diff options
| author | 2021-07-25 15:31:33 -0400 | |
|---|---|---|
| committer | 2021-07-25 15:31:33 -0400 | |
| commit | 09d6cc99435322c5f480eaa2b0967e33f4966ba6 (patch) | |
| tree | 72cdf06f6b7d77fdf5826104fea691f3ea450f54 | |
| parent | configuration: Use combobox apply template where possible (diff) | |
| parent | Merge pull request #6575 from FernandoS27/new_settings (diff) | |
| download | yuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.tar.gz yuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.tar.xz yuzu-09d6cc99435322c5f480eaa2b0967e33f4966ba6.zip | |
Merge branch 'master' into fullscreen-enum
453 files changed, 49784 insertions, 27358 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 311ba1c2e..43ca730ec 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -48,69 +48,6 @@ if (BUILD_REPOSITORY) | |||
| 48 | endif() | 48 | endif() |
| 49 | endif() | 49 | endif() |
| 50 | 50 | ||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | # (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 53 | set(HASH_FILES | ||
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | ||
| 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | ||
| 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | ||
| 59 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | ||
| 60 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | ||
| 61 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | ||
| 62 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | ||
| 63 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | ||
| 64 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | ||
| 65 | "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" | ||
| 66 | "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" | ||
| 67 | "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" | ||
| 68 | "${VIDEO_CORE}/shader/decode/bfe.cpp" | ||
| 69 | "${VIDEO_CORE}/shader/decode/bfi.cpp" | ||
| 70 | "${VIDEO_CORE}/shader/decode/conversion.cpp" | ||
| 71 | "${VIDEO_CORE}/shader/decode/ffma.cpp" | ||
| 72 | "${VIDEO_CORE}/shader/decode/float_set.cpp" | ||
| 73 | "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" | ||
| 74 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | ||
| 75 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | ||
| 77 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 78 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | ||
| 79 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | ||
| 80 | "${VIDEO_CORE}/shader/decode/memory.cpp" | ||
| 81 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/decode/other.cpp" | ||
| 83 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | ||
| 85 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | ||
| 86 | "${VIDEO_CORE}/shader/decode/shift.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/decode/video.cpp" | ||
| 88 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 89 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||
| 90 | "${VIDEO_CORE}/shader/ast.cpp" | ||
| 91 | "${VIDEO_CORE}/shader/ast.h" | ||
| 92 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||
| 93 | "${VIDEO_CORE}/shader/compiler_settings.h" | ||
| 94 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 95 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 96 | "${VIDEO_CORE}/shader/decode.cpp" | ||
| 97 | "${VIDEO_CORE}/shader/expr.cpp" | ||
| 98 | "${VIDEO_CORE}/shader/expr.h" | ||
| 99 | "${VIDEO_CORE}/shader/node.h" | ||
| 100 | "${VIDEO_CORE}/shader/node_helper.cpp" | ||
| 101 | "${VIDEO_CORE}/shader/node_helper.h" | ||
| 102 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 103 | "${VIDEO_CORE}/shader/registry.h" | ||
| 104 | "${VIDEO_CORE}/shader/shader_ir.cpp" | ||
| 105 | "${VIDEO_CORE}/shader/shader_ir.h" | ||
| 106 | "${VIDEO_CORE}/shader/track.cpp" | ||
| 107 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 108 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 109 | ) | ||
| 110 | set(COMBINED "") | ||
| 111 | foreach (F IN LISTS HASH_FILES) | ||
| 112 | file(READ ${F} TMP) | ||
| 113 | set(COMBINED "${COMBINED}${TMP}") | ||
| 114 | endforeach() | ||
| 115 | string(MD5 SHADER_CACHE_VERSION "${COMBINED}") | ||
| 116 | configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) | 53 | configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY) |
| @@ -35,7 +35,7 @@ It is written in C++ with portability in mind, and we actively maintain builds f | |||
| 35 | 35 | ||
| 36 | The emulator is capable of running most commercial games at full speed, provided you meet the [necessary hardware requirements](https://yuzu-emu.org/help/quickstart/#hardware-requirements). | 36 | The emulator is capable of running most commercial games at full speed, provided you meet the [necessary hardware requirements](https://yuzu-emu.org/help/quickstart/#hardware-requirements). |
| 37 | 37 | ||
| 38 | For a full list of games yuzu support, please visit our [Compatibility page](https://yuzu-emu.org/game/) | 38 | For a full list of games yuzu support, please visit our [Compatibility page](https://yuzu-emu.org/game/) |
| 39 | 39 | ||
| 40 | Check out our [website](https://yuzu-emu.org/) for the latest news on exciting features, monthly progress reports, and more! | 40 | Check out our [website](https://yuzu-emu.org/) for the latest news on exciting features, monthly progress reports, and more! |
| 41 | 41 | ||
| @@ -43,7 +43,7 @@ Check out our [website](https://yuzu-emu.org/) for the latest news on exciting f | |||
| 43 | 43 | ||
| 44 | Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY). | 44 | Most of the development happens on GitHub. It's also where [our central repository](https://github.com/yuzu-emu/yuzu) is hosted. For development discussion, please join us on [Discord](https://discord.com/invite/u77vRWY). |
| 45 | 45 | ||
| 46 | If you want to contribute, please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). | 46 | If you want to contribute, please take a look at the [Contributor's Guide](https://github.com/yuzu-emu/yuzu/wiki/Contributing) and [Developer Information](https://github.com/yuzu-emu/yuzu/wiki/Developer-Information). |
| 47 | You can also contact any of the developers on Discord in order to know about the current state of the emulator. | 47 | You can also contact any of the developers on Discord in order to know about the current state of the emulator. |
| 48 | 48 | ||
| 49 | If you want to contribute to the user interface translation project, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations. | 49 | If you want to contribute to the user interface translation project, please check out the [yuzu project on transifex](https://www.transifex.com/yuzu-emulator/yuzu). We centralize translation work there, and periodically upstream translations. |
| @@ -78,3 +78,5 @@ If you wish to support us a different way, please join our [Discord](https://dis | |||
| 78 | ## License | 78 | ## License |
| 79 | 79 | ||
| 80 | yuzu is licensed under the GPLv2 (or any later version). Refer to the [license.txt](https://github.com/yuzu-emu/yuzu/blob/master/license.txt) file. | 80 | yuzu is licensed under the GPLv2 (or any later version). Refer to the [license.txt](https://github.com/yuzu-emu/yuzu/blob/master/license.txt) file. |
| 81 | |||
| 82 | The [Skyline-Emulator Team](https://github.com/skyline-emu/skyline) is exempt from GPLv2 for the contributions from all these contributors [FernandoS27](https://github.com/FernandoS27), [lioncash](https://github.com/lioncash), [bunnei](https://github.com/bunnei), [ReinUsesLisp](https://github.com/ReinUsesLisp), [Morph1984](https://github.com/Morph1984), [ogniK5377](https://github.com/ogniK5377), [german77](https://github.com/german77), [ameerj](https://github.com/ameerj), [Kelebek1](https://github.com/Kelebek1) and [lat9nq](https://github.com/lat9nq). They may only use the code from these contributors under Mozilla Public License, version 2.0. | ||
diff --git a/dist/qt_themes/default/style.qss b/dist/qt_themes/default/style.qss index cee219374..9915a40ba 100644 --- a/dist/qt_themes/default/style.qss +++ b/dist/qt_themes/default/style.qss | |||
| @@ -38,6 +38,26 @@ QPushButton#RendererStatusBarButton:!checked { | |||
| 38 | color: #0066ff; | 38 | color: #0066ff; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | QPushButton#GPUStatusBarButton { | ||
| 42 | color: #656565; | ||
| 43 | border: 1px solid transparent; | ||
| 44 | background-color: transparent; | ||
| 45 | padding: 0px 3px 0px 3px; | ||
| 46 | text-align: center; | ||
| 47 | } | ||
| 48 | |||
| 49 | QPushButton#GPUStatusBarButton:hover { | ||
| 50 | border: 1px solid #76797C; | ||
| 51 | } | ||
| 52 | |||
| 53 | QPushButton#GPUStatusBarButton:checked { | ||
| 54 | color: #ff8040; | ||
| 55 | } | ||
| 56 | |||
| 57 | QPushButton#GPUStatusBarButton:!checked { | ||
| 58 | color: #40dd40; | ||
| 59 | } | ||
| 60 | |||
| 41 | QPushButton#buttonRefreshDevices { | 61 | QPushButton#buttonRefreshDevices { |
| 42 | min-width: 21px; | 62 | min-width: 21px; |
| 43 | min-height: 21px; | 63 | min-height: 21px; |
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss index 3d0ccbb9e..dac2dba86 100644 --- a/dist/qt_themes/qdarkstyle/style.qss +++ b/dist/qt_themes/qdarkstyle/style.qss | |||
| @@ -1283,6 +1283,27 @@ QPushButton#RendererStatusBarButton:!checked { | |||
| 1283 | color: #00ccdd; | 1283 | color: #00ccdd; |
| 1284 | } | 1284 | } |
| 1285 | 1285 | ||
| 1286 | QPushButton#GPUStatusBarButton { | ||
| 1287 | min-width: 0px; | ||
| 1288 | color: #656565; | ||
| 1289 | border: 1px solid transparent; | ||
| 1290 | background-color: transparent; | ||
| 1291 | padding: 0px 3px 0px 3px; | ||
| 1292 | text-align: center; | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | QPushButton#GPUStatusBarButton:hover { | ||
| 1296 | border: 1px solid #76797C; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | QPushButton#GPUStatusBarButton:checked { | ||
| 1300 | color: #ff8040; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | QPushButton#GPUStatusBarButton:!checked { | ||
| 1304 | color: #40dd40; | ||
| 1305 | } | ||
| 1306 | |||
| 1286 | QPushButton#buttonRefreshDevices { | 1307 | QPushButton#buttonRefreshDevices { |
| 1287 | min-width: 23px; | 1308 | min-width: 23px; |
| 1288 | min-height: 23px; | 1309 | min-height: 23px; |
diff --git a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss index 51bec2fd7..032d05ec6 100644 --- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss +++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss | |||
| @@ -2186,6 +2186,27 @@ QPushButton#RendererStatusBarButton:!checked { | |||
| 2186 | color: #00ccdd; | 2186 | color: #00ccdd; |
| 2187 | } | 2187 | } |
| 2188 | 2188 | ||
| 2189 | QPushButton#GPUStatusBarButton { | ||
| 2190 | min-width: 0px; | ||
| 2191 | color: #656565; | ||
| 2192 | border: 1px solid transparent; | ||
| 2193 | background-color: transparent; | ||
| 2194 | padding: 0px 3px 0px 3px; | ||
| 2195 | text-align: center; | ||
| 2196 | } | ||
| 2197 | |||
| 2198 | QPushButton#GPUStatusBarButton:hover { | ||
| 2199 | border: 1px solid #76797C; | ||
| 2200 | } | ||
| 2201 | |||
| 2202 | QPushButton#GPUStatusBarButton:checked { | ||
| 2203 | color: #ff8040; | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | QPushButton#GPUStatusBarButton:!checked { | ||
| 2207 | color: #40dd40; | ||
| 2208 | } | ||
| 2209 | |||
| 2189 | QPushButton#buttonRefreshDevices { | 2210 | QPushButton#buttonRefreshDevices { |
| 2190 | min-width: 19px; | 2211 | min-width: 19px; |
| 2191 | min-height: 19px; | 2212 | min-height: 19px; |
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers | |||
| Subproject 8188e3fbbc105591064093440f88081fb957d4f | Subproject 07c4a37bcf41ea50aef6e98236abdfe8089fb4c | ||
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject eefca56afd49379bdebc97ded8b480839f93088 | Subproject a39596358a3a5488c06554c0c15184a6af71e43 | ||
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f8ec8fea8..6e66dc1df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -142,6 +142,7 @@ add_subdirectory(core) | |||
| 142 | add_subdirectory(audio_core) | 142 | add_subdirectory(audio_core) |
| 143 | add_subdirectory(video_core) | 143 | add_subdirectory(video_core) |
| 144 | add_subdirectory(input_common) | 144 | add_subdirectory(input_common) |
| 145 | add_subdirectory(shader_recompiler) | ||
| 145 | add_subdirectory(tests) | 146 | add_subdirectory(tests) |
| 146 | 147 | ||
| 147 | if (ENABLE_SDL2) | 148 | if (ENABLE_SDL2) |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e03fffd8d..57922b51c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -1,8 +1,3 @@ | |||
| 1 | # Add a custom command to generate a new shader_cache_version hash when any of the following files change | ||
| 2 | # NOTE: This is an approximation of what files affect shader generation, its possible something else | ||
| 3 | # could affect the result, but much more unlikely than the following files. Keeping a list of files | ||
| 4 | # like this allows for much better caching since it doesn't force the user to recompile binary shaders every update | ||
| 5 | set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core") | ||
| 6 | if (DEFINED ENV{AZURECIREPO}) | 1 | if (DEFINED ENV{AZURECIREPO}) |
| 7 | set(BUILD_REPOSITORY $ENV{AZURECIREPO}) | 2 | set(BUILD_REPOSITORY $ENV{AZURECIREPO}) |
| 8 | endif() | 3 | endif() |
| @@ -30,64 +25,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 30 | -DGIT_EXECUTABLE=${GIT_EXECUTABLE} | 25 | -DGIT_EXECUTABLE=${GIT_EXECUTABLE} |
| 31 | -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake | 26 | -P ${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake |
| 32 | DEPENDS | 27 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 28 | # Check that the scm_rev files haven't changed |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | ||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | ||
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | ||
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | ||
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | ||
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | ||
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | ||
| 46 | "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" | ||
| 47 | "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" | ||
| 48 | "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" | ||
| 49 | "${VIDEO_CORE}/shader/decode/bfe.cpp" | ||
| 50 | "${VIDEO_CORE}/shader/decode/bfi.cpp" | ||
| 51 | "${VIDEO_CORE}/shader/decode/conversion.cpp" | ||
| 52 | "${VIDEO_CORE}/shader/decode/ffma.cpp" | ||
| 53 | "${VIDEO_CORE}/shader/decode/float_set.cpp" | ||
| 54 | "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" | ||
| 55 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | ||
| 56 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | ||
| 57 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | ||
| 58 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 59 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | ||
| 60 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | ||
| 61 | "${VIDEO_CORE}/shader/decode/memory.cpp" | ||
| 62 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 63 | "${VIDEO_CORE}/shader/decode/other.cpp" | ||
| 64 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | ||
| 65 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | ||
| 66 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | ||
| 67 | "${VIDEO_CORE}/shader/decode/shift.cpp" | ||
| 68 | "${VIDEO_CORE}/shader/decode/video.cpp" | ||
| 69 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 70 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||
| 71 | "${VIDEO_CORE}/shader/ast.cpp" | ||
| 72 | "${VIDEO_CORE}/shader/ast.h" | ||
| 73 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||
| 74 | "${VIDEO_CORE}/shader/compiler_settings.h" | ||
| 75 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 77 | "${VIDEO_CORE}/shader/decode.cpp" | ||
| 78 | "${VIDEO_CORE}/shader/expr.cpp" | ||
| 79 | "${VIDEO_CORE}/shader/expr.h" | ||
| 80 | "${VIDEO_CORE}/shader/node.h" | ||
| 81 | "${VIDEO_CORE}/shader/node_helper.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/node_helper.h" | ||
| 83 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/registry.h" | ||
| 85 | "${VIDEO_CORE}/shader/shader_ir.cpp" | ||
| 86 | "${VIDEO_CORE}/shader/shader_ir.h" | ||
| 87 | "${VIDEO_CORE}/shader/track.cpp" | ||
| 88 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 89 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 90 | # and also check that the scm_rev files haven't changed | ||
| 91 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 29 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 92 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 30 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
| 93 | # technically we should regenerate if the git version changed, but its not worth the effort imo | 31 | # technically we should regenerate if the git version changed, but its not worth the effort imo |
| @@ -231,7 +169,7 @@ endif() | |||
| 231 | 169 | ||
| 232 | create_target_directory_groups(common) | 170 | create_target_directory_groups(common) |
| 233 | 171 | ||
| 234 | target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile) | 172 | target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads) |
| 235 | target_link_libraries(common PRIVATE lz4::lz4 xbyak) | 173 | target_link_libraries(common PRIVATE lz4::lz4 xbyak) |
| 236 | if (MSVC) | 174 | if (MSVC) |
| 237 | target_link_libraries(common PRIVATE zstd::zstd) | 175 | target_link_libraries(common PRIVATE zstd::zstd) |
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 4f2cc29e1..f055f0e11 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp | |||
| @@ -144,6 +144,10 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { | |||
| 144 | SUB(Render, Software) \ | 144 | SUB(Render, Software) \ |
| 145 | SUB(Render, OpenGL) \ | 145 | SUB(Render, OpenGL) \ |
| 146 | SUB(Render, Vulkan) \ | 146 | SUB(Render, Vulkan) \ |
| 147 | CLS(Shader) \ | ||
| 148 | SUB(Shader, SPIRV) \ | ||
| 149 | SUB(Shader, GLASM) \ | ||
| 150 | SUB(Shader, GLSL) \ | ||
| 147 | CLS(Audio) \ | 151 | CLS(Audio) \ |
| 148 | SUB(Audio, DSP) \ | 152 | SUB(Audio, DSP) \ |
| 149 | SUB(Audio, Sink) \ | 153 | SUB(Audio, Sink) \ |
diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 88b0e9c01..7ad0334fc 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h | |||
| @@ -114,6 +114,10 @@ enum class Class : u8 { | |||
| 114 | Render_Software, ///< Software renderer backend | 114 | Render_Software, ///< Software renderer backend |
| 115 | Render_OpenGL, ///< OpenGL backend | 115 | Render_OpenGL, ///< OpenGL backend |
| 116 | Render_Vulkan, ///< Vulkan backend | 116 | Render_Vulkan, ///< Vulkan backend |
| 117 | Shader, ///< Shader recompiler | ||
| 118 | Shader_SPIRV, ///< Shader SPIR-V code generation | ||
| 119 | Shader_GLASM, ///< Shader GLASM code generation | ||
| 120 | Shader_GLSL, ///< Shader GLSL code generation | ||
| 117 | Audio, ///< Audio emulation | 121 | Audio, ///< Audio emulation |
| 118 | Audio_DSP, ///< The HLE implementation of the DSP | 122 | Audio_DSP, ///< The HLE implementation of the DSP |
| 119 | Audio_Sink, ///< Emulator audio output backend | 123 | Audio_Sink, ///< Emulator audio output backend |
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index 5f126f324..cc88994c6 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #define BUILD_ID "@BUILD_ID@" | 14 | #define BUILD_ID "@BUILD_ID@" |
| 15 | #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" | 15 | #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" |
| 16 | #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" | 16 | #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" |
| 17 | #define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@" | ||
| 18 | 17 | ||
| 19 | namespace Common { | 18 | namespace Common { |
| 20 | 19 | ||
| @@ -28,7 +27,6 @@ const char g_build_version[] = BUILD_VERSION; | |||
| 28 | const char g_build_id[] = BUILD_ID; | 27 | const char g_build_id[] = BUILD_ID; |
| 29 | const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; | 28 | const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; |
| 30 | const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; | 29 | const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; |
| 31 | const char g_shader_cache_version[] = SHADER_CACHE_VERSION; | ||
| 32 | 30 | ||
| 33 | } // namespace | 31 | } // namespace |
| 34 | 32 | ||
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -57,7 +57,7 @@ void LogSettings() { | |||
| 57 | log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); | 57 | log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); |
| 58 | log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); | 58 | log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); |
| 59 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); | 59 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); |
| 60 | log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); | 60 | log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); |
| 61 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); | 61 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); |
| 62 | log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); | 62 | log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); |
| 63 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); | 63 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); |
| @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 140 | values.use_nvdec_emulation.SetGlobal(true); | 140 | values.use_nvdec_emulation.SetGlobal(true); |
| 141 | values.accelerate_astc.SetGlobal(true); | 141 | values.accelerate_astc.SetGlobal(true); |
| 142 | values.use_vsync.SetGlobal(true); | 142 | values.use_vsync.SetGlobal(true); |
| 143 | values.use_assembly_shaders.SetGlobal(true); | 143 | values.shader_backend.SetGlobal(true); |
| 144 | values.use_asynchronous_shaders.SetGlobal(true); | 144 | values.use_asynchronous_shaders.SetGlobal(true); |
| 145 | values.use_fast_gpu_time.SetGlobal(true); | 145 | values.use_fast_gpu_time.SetGlobal(true); |
| 146 | values.use_caches_gc.SetGlobal(true); | 146 | values.use_caches_gc.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index fd2a263ec..801bed603 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { | |||
| 24 | Vulkan = 1, | 24 | Vulkan = 1, |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | enum class ShaderBackend : u32 { | ||
| 28 | GLSL = 0, | ||
| 29 | GLASM = 1, | ||
| 30 | SPIRV = 2, | ||
| 31 | }; | ||
| 32 | |||
| 27 | enum class GPUAccuracy : u32 { | 33 | enum class GPUAccuracy : u32 { |
| 28 | Normal = 0, | 34 | Normal = 0, |
| 29 | High = 1, | 35 | High = 1, |
| @@ -313,6 +319,9 @@ struct Values { | |||
| 313 | // Renderer | 319 | // Renderer |
| 314 | Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"}; | 320 | Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"}; |
| 315 | BasicSetting<bool> renderer_debug{false, "debug"}; | 321 | BasicSetting<bool> renderer_debug{false, "debug"}; |
| 322 | BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; | ||
| 323 | BasicSetting<bool> disable_shader_loop_safety_checks{false, | ||
| 324 | "disable_shader_loop_safety_checks"}; | ||
| 316 | Setting<int> vulkan_device{0, "vulkan_device"}; | 325 | Setting<int> vulkan_device{0, "vulkan_device"}; |
| 317 | 326 | ||
| 318 | Setting<u16> resolution_factor{1, "resolution_factor"}; | 327 | Setting<u16> resolution_factor{1, "resolution_factor"}; |
| @@ -336,7 +345,7 @@ struct Values { | |||
| 336 | Setting<bool> accelerate_astc{true, "accelerate_astc"}; | 345 | Setting<bool> accelerate_astc{true, "accelerate_astc"}; |
| 337 | Setting<bool> use_vsync{true, "use_vsync"}; | 346 | Setting<bool> use_vsync{true, "use_vsync"}; |
| 338 | BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"}; | 347 | BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"}; |
| 339 | Setting<bool> use_assembly_shaders{false, "use_assembly_shaders"}; | 348 | Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"}; |
| 340 | Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; | 349 | Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; |
| 341 | Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; | 350 | Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; |
| 342 | Setting<bool> use_caches_gc{false, "use_caches_gc"}; | 351 | Setting<bool> use_caches_gc{false, "use_caches_gc"}; |
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 8272985ff..cd0017726 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <condition_variable> | ||
| 8 | #include <functional> | 9 | #include <functional> |
| 9 | #include <mutex> | 10 | #include <mutex> |
| 10 | #include <stop_token> | 11 | #include <stop_token> |
| @@ -39,7 +40,7 @@ public: | |||
| 39 | const auto lambda = [this, func](std::stop_token stop_token) { | 40 | const auto lambda = [this, func](std::stop_token stop_token) { |
| 40 | Common::SetCurrentThreadName(thread_name.c_str()); | 41 | Common::SetCurrentThreadName(thread_name.c_str()); |
| 41 | { | 42 | { |
| 42 | std::conditional_t<with_state, StateType, int> state{func()}; | 43 | [[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()}; |
| 43 | while (!stop_token.stop_requested()) { | 44 | while (!stop_token.stop_requested()) { |
| 44 | Task task; | 45 | Task task; |
| 45 | { | 46 | { |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c7b899131..5c99c00f5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -517,6 +517,8 @@ add_library(core STATIC | |||
| 517 | hle/service/psc/psc.h | 517 | hle/service/psc/psc.h |
| 518 | hle/service/ptm/psm.cpp | 518 | hle/service/ptm/psm.cpp |
| 519 | hle/service/ptm/psm.h | 519 | hle/service/ptm/psm.h |
| 520 | hle/service/kernel_helpers.cpp | ||
| 521 | hle/service/kernel_helpers.h | ||
| 520 | hle/service/service.cpp | 522 | hle/service/service.cpp |
| 521 | hle/service/service.h | 523 | hle/service/service.h |
| 522 | hle/service/set/set.cpp | 524 | hle/service/set/set.cpp |
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 28ed6265a..ca68fc325 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp | |||
| @@ -58,6 +58,9 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co | |||
| 58 | 58 | ||
| 59 | void SessionRequestHandler::ClientConnected(KServerSession* session) { | 59 | void SessionRequestHandler::ClientConnected(KServerSession* session) { |
| 60 | session->ClientConnected(shared_from_this()); | 60 | session->ClientConnected(shared_from_this()); |
| 61 | |||
| 62 | // Ensure our server session is tracked globally. | ||
| 63 | kernel.RegisterServerSession(session); | ||
| 61 | } | 64 | } |
| 62 | 65 | ||
| 63 | void SessionRequestHandler::ClientDisconnected(KServerSession* session) { | 66 | void SessionRequestHandler::ClientDisconnected(KServerSession* session) { |
diff --git a/src/core/hle/kernel/k_auto_object.cpp b/src/core/hle/kernel/k_auto_object.cpp index dbe237f09..c99a9ebb7 100644 --- a/src/core/hle/kernel/k_auto_object.cpp +++ b/src/core/hle/kernel/k_auto_object.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/kernel/k_auto_object.h" | 5 | #include "core/hle/kernel/k_auto_object.h" |
| 6 | #include "core/hle/kernel/kernel.h" | ||
| 6 | 7 | ||
| 7 | namespace Kernel { | 8 | namespace Kernel { |
| 8 | 9 | ||
| @@ -11,4 +12,12 @@ KAutoObject* KAutoObject::Create(KAutoObject* obj) { | |||
| 11 | return obj; | 12 | return obj; |
| 12 | } | 13 | } |
| 13 | 14 | ||
| 15 | void KAutoObject::RegisterWithKernel() { | ||
| 16 | kernel.RegisterKernelObject(this); | ||
| 17 | } | ||
| 18 | |||
| 19 | void KAutoObject::UnregisterWithKernel() { | ||
| 20 | kernel.UnregisterKernelObject(this); | ||
| 21 | } | ||
| 22 | |||
| 14 | } // namespace Kernel | 23 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index 88a052f65..e4fcdbc67 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h | |||
| @@ -85,8 +85,12 @@ private: | |||
| 85 | KERNEL_AUTOOBJECT_TRAITS(KAutoObject, KAutoObject); | 85 | KERNEL_AUTOOBJECT_TRAITS(KAutoObject, KAutoObject); |
| 86 | 86 | ||
| 87 | public: | 87 | public: |
| 88 | explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) {} | 88 | explicit KAutoObject(KernelCore& kernel_) : kernel(kernel_) { |
| 89 | virtual ~KAutoObject() = default; | 89 | RegisterWithKernel(); |
| 90 | } | ||
| 91 | virtual ~KAutoObject() { | ||
| 92 | UnregisterWithKernel(); | ||
| 93 | } | ||
| 90 | 94 | ||
| 91 | static KAutoObject* Create(KAutoObject* ptr); | 95 | static KAutoObject* Create(KAutoObject* ptr); |
| 92 | 96 | ||
| @@ -166,6 +170,10 @@ public: | |||
| 166 | } | 170 | } |
| 167 | } | 171 | } |
| 168 | 172 | ||
| 173 | private: | ||
| 174 | void RegisterWithKernel(); | ||
| 175 | void UnregisterWithKernel(); | ||
| 176 | |||
| 169 | protected: | 177 | protected: |
| 170 | KernelCore& kernel; | 178 | KernelCore& kernel; |
| 171 | std::string name; | 179 | std::string name; |
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index d1bd98051..8ead1a769 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "common/scope_exit.h" | ||
| 13 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 14 | #include "core/core.h" | 15 | #include "core/core.h" |
| 15 | #include "core/device_memory.h" | 16 | #include "core/device_memory.h" |
| @@ -43,6 +44,8 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority | |||
| 43 | ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1)); | 44 | ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1)); |
| 44 | 45 | ||
| 45 | KThread* thread = KThread::Create(system.Kernel()); | 46 | KThread* thread = KThread::Create(system.Kernel()); |
| 47 | SCOPE_EXIT({ thread->Close(); }); | ||
| 48 | |||
| 46 | ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority, | 49 | ASSERT(KThread::InitializeUserThread(system, thread, entry_point, 0, stack_top, priority, |
| 47 | owner_process.GetIdealCoreId(), &owner_process) | 50 | owner_process.GetIdealCoreId(), &owner_process) |
| 48 | .IsSuccess()); | 51 | .IsSuccess()); |
| @@ -162,7 +165,7 @@ void KProcess::DecrementThreadCount() { | |||
| 162 | ASSERT(num_threads > 0); | 165 | ASSERT(num_threads > 0); |
| 163 | 166 | ||
| 164 | if (const auto count = --num_threads; count == 0) { | 167 | if (const auto count = --num_threads; count == 0) { |
| 165 | UNIMPLEMENTED_MSG("Process termination is not implemented!"); | 168 | LOG_WARNING(Kernel, "Process termination is not fully implemented."); |
| 166 | } | 169 | } |
| 167 | } | 170 | } |
| 168 | 171 | ||
| @@ -406,6 +409,9 @@ void KProcess::Finalize() { | |||
| 406 | resource_limit->Close(); | 409 | resource_limit->Close(); |
| 407 | } | 410 | } |
| 408 | 411 | ||
| 412 | // Finalize the handle table and close any open handles. | ||
| 413 | handle_table.Finalize(); | ||
| 414 | |||
| 409 | // Perform inherited finalization. | 415 | // Perform inherited finalization. |
| 410 | KAutoObjectWithSlabHeapAndContainer<KProcess, KSynchronizationObject>::Finalize(); | 416 | KAutoObjectWithSlabHeapAndContainer<KProcess, KSynchronizationObject>::Finalize(); |
| 411 | } | 417 | } |
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp index 5c3c13ce6..b9f24475c 100644 --- a/src/core/hle/kernel/k_server_session.cpp +++ b/src/core/hle/kernel/k_server_session.cpp | |||
| @@ -28,7 +28,10 @@ namespace Kernel { | |||
| 28 | 28 | ||
| 29 | KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {} | 29 | KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {} |
| 30 | 30 | ||
| 31 | KServerSession::~KServerSession() {} | 31 | KServerSession::~KServerSession() { |
| 32 | // Ensure that the global list tracking server sessions does not hold on to a reference. | ||
| 33 | kernel.UnregisterServerSession(this); | ||
| 34 | } | ||
| 32 | 35 | ||
| 33 | void KServerSession::Initialize(KSession* parent_session_, std::string&& name_, | 36 | void KServerSession::Initialize(KSession* parent_session_, std::string&& name_, |
| 34 | std::shared_ptr<SessionRequestManager> manager_) { | 37 | std::shared_ptr<SessionRequestManager> manager_) { |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 64bd0c494..92fbc5532 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -61,6 +61,7 @@ struct KernelCore::Impl { | |||
| 61 | void Initialize(KernelCore& kernel) { | 61 | void Initialize(KernelCore& kernel) { |
| 62 | global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); | 62 | global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); |
| 63 | global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); | 63 | global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); |
| 64 | global_handle_table->Initialize(KHandleTable::MaxTableSize); | ||
| 64 | 65 | ||
| 65 | is_phantom_mode_for_singlecore = false; | 66 | is_phantom_mode_for_singlecore = false; |
| 66 | 67 | ||
| @@ -90,9 +91,39 @@ struct KernelCore::Impl { | |||
| 90 | } | 91 | } |
| 91 | 92 | ||
| 92 | void Shutdown() { | 93 | void Shutdown() { |
| 94 | // Shutdown all processes. | ||
| 95 | if (current_process) { | ||
| 96 | current_process->Finalize(); | ||
| 97 | current_process->Close(); | ||
| 98 | current_process = nullptr; | ||
| 99 | } | ||
| 93 | process_list.clear(); | 100 | process_list.clear(); |
| 94 | 101 | ||
| 95 | // Ensures all service threads gracefully shutdown | 102 | // Close all open server ports. |
| 103 | std::unordered_set<KServerPort*> server_ports_; | ||
| 104 | { | ||
| 105 | std::lock_guard lk(server_ports_lock); | ||
| 106 | server_ports_ = server_ports; | ||
| 107 | server_ports.clear(); | ||
| 108 | } | ||
| 109 | for (auto* server_port : server_ports_) { | ||
| 110 | server_port->Close(); | ||
| 111 | } | ||
| 112 | // Close all open server sessions. | ||
| 113 | std::unordered_set<KServerSession*> server_sessions_; | ||
| 114 | { | ||
| 115 | std::lock_guard lk(server_sessions_lock); | ||
| 116 | server_sessions_ = server_sessions; | ||
| 117 | server_sessions.clear(); | ||
| 118 | } | ||
| 119 | for (auto* server_session : server_sessions_) { | ||
| 120 | server_session->Close(); | ||
| 121 | } | ||
| 122 | |||
| 123 | // Ensure that the object list container is finalized and properly shutdown. | ||
| 124 | object_list_container.Finalize(); | ||
| 125 | |||
| 126 | // Ensures all service threads gracefully shutdown. | ||
| 96 | service_threads.clear(); | 127 | service_threads.clear(); |
| 97 | 128 | ||
| 98 | next_object_id = 0; | 129 | next_object_id = 0; |
| @@ -111,11 +142,7 @@ struct KernelCore::Impl { | |||
| 111 | 142 | ||
| 112 | cores.clear(); | 143 | cores.clear(); |
| 113 | 144 | ||
| 114 | if (current_process) { | 145 | global_handle_table->Finalize(); |
| 115 | current_process->Close(); | ||
| 116 | current_process = nullptr; | ||
| 117 | } | ||
| 118 | |||
| 119 | global_handle_table.reset(); | 146 | global_handle_table.reset(); |
| 120 | 147 | ||
| 121 | preemption_event = nullptr; | 148 | preemption_event = nullptr; |
| @@ -142,6 +169,16 @@ struct KernelCore::Impl { | |||
| 142 | 169 | ||
| 143 | // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others | 170 | // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others |
| 144 | next_host_thread_id = Core::Hardware::NUM_CPU_CORES; | 171 | next_host_thread_id = Core::Hardware::NUM_CPU_CORES; |
| 172 | |||
| 173 | // Track kernel objects that were not freed on shutdown | ||
| 174 | { | ||
| 175 | std::lock_guard lk(registered_objects_lock); | ||
| 176 | if (registered_objects.size()) { | ||
| 177 | LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!", | ||
| 178 | registered_objects.size()); | ||
| 179 | registered_objects.clear(); | ||
| 180 | } | ||
| 181 | } | ||
| 145 | } | 182 | } |
| 146 | 183 | ||
| 147 | void InitializePhysicalCores() { | 184 | void InitializePhysicalCores() { |
| @@ -630,6 +667,21 @@ struct KernelCore::Impl { | |||
| 630 | user_slab_heap_size); | 667 | user_slab_heap_size); |
| 631 | } | 668 | } |
| 632 | 669 | ||
| 670 | KClientPort* CreateNamedServicePort(std::string name) { | ||
| 671 | auto search = service_interface_factory.find(name); | ||
| 672 | if (search == service_interface_factory.end()) { | ||
| 673 | UNIMPLEMENTED(); | ||
| 674 | return {}; | ||
| 675 | } | ||
| 676 | |||
| 677 | KClientPort* port = &search->second(system.ServiceManager(), system); | ||
| 678 | { | ||
| 679 | std::lock_guard lk(server_ports_lock); | ||
| 680 | server_ports.insert(&port->GetParent()->GetServerPort()); | ||
| 681 | } | ||
| 682 | return port; | ||
| 683 | } | ||
| 684 | |||
| 633 | std::atomic<u32> next_object_id{0}; | 685 | std::atomic<u32> next_object_id{0}; |
| 634 | std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin}; | 686 | std::atomic<u64> next_kernel_process_id{KProcess::InitialKIPIDMin}; |
| 635 | std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin}; | 687 | std::atomic<u64> next_user_process_id{KProcess::ProcessIDMin}; |
| @@ -656,6 +708,12 @@ struct KernelCore::Impl { | |||
| 656 | /// the ConnectToPort SVC. | 708 | /// the ConnectToPort SVC. |
| 657 | std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory; | 709 | std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory; |
| 658 | NamedPortTable named_ports; | 710 | NamedPortTable named_ports; |
| 711 | std::unordered_set<KServerPort*> server_ports; | ||
| 712 | std::unordered_set<KServerSession*> server_sessions; | ||
| 713 | std::unordered_set<KAutoObject*> registered_objects; | ||
| 714 | std::mutex server_ports_lock; | ||
| 715 | std::mutex server_sessions_lock; | ||
| 716 | std::mutex registered_objects_lock; | ||
| 659 | 717 | ||
| 660 | std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; | 718 | std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; |
| 661 | std::vector<Kernel::PhysicalCore> cores; | 719 | std::vector<Kernel::PhysicalCore> cores; |
| @@ -844,12 +902,27 @@ void KernelCore::RegisterNamedService(std::string name, ServiceInterfaceFactory& | |||
| 844 | } | 902 | } |
| 845 | 903 | ||
| 846 | KClientPort* KernelCore::CreateNamedServicePort(std::string name) { | 904 | KClientPort* KernelCore::CreateNamedServicePort(std::string name) { |
| 847 | auto search = impl->service_interface_factory.find(name); | 905 | return impl->CreateNamedServicePort(std::move(name)); |
| 848 | if (search == impl->service_interface_factory.end()) { | 906 | } |
| 849 | UNIMPLEMENTED(); | 907 | |
| 850 | return {}; | 908 | void KernelCore::RegisterServerSession(KServerSession* server_session) { |
| 851 | } | 909 | std::lock_guard lk(impl->server_sessions_lock); |
| 852 | return &search->second(impl->system.ServiceManager(), impl->system); | 910 | impl->server_sessions.insert(server_session); |
| 911 | } | ||
| 912 | |||
| 913 | void KernelCore::UnregisterServerSession(KServerSession* server_session) { | ||
| 914 | std::lock_guard lk(impl->server_sessions_lock); | ||
| 915 | impl->server_sessions.erase(server_session); | ||
| 916 | } | ||
| 917 | |||
| 918 | void KernelCore::RegisterKernelObject(KAutoObject* object) { | ||
| 919 | std::lock_guard lk(impl->registered_objects_lock); | ||
| 920 | impl->registered_objects.insert(object); | ||
| 921 | } | ||
| 922 | |||
| 923 | void KernelCore::UnregisterKernelObject(KAutoObject* object) { | ||
| 924 | std::lock_guard lk(impl->registered_objects_lock); | ||
| 925 | impl->registered_objects.erase(object); | ||
| 853 | } | 926 | } |
| 854 | 927 | ||
| 855 | bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const { | 928 | bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const { |
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 2d01e1ae0..3a6db0b1c 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h | |||
| @@ -45,6 +45,7 @@ class KPort; | |||
| 45 | class KProcess; | 45 | class KProcess; |
| 46 | class KResourceLimit; | 46 | class KResourceLimit; |
| 47 | class KScheduler; | 47 | class KScheduler; |
| 48 | class KServerSession; | ||
| 48 | class KSession; | 49 | class KSession; |
| 49 | class KSharedMemory; | 50 | class KSharedMemory; |
| 50 | class KThread; | 51 | class KThread; |
| @@ -185,6 +186,22 @@ public: | |||
| 185 | /// Opens a port to a service previously registered with RegisterNamedService. | 186 | /// Opens a port to a service previously registered with RegisterNamedService. |
| 186 | KClientPort* CreateNamedServicePort(std::string name); | 187 | KClientPort* CreateNamedServicePort(std::string name); |
| 187 | 188 | ||
| 189 | /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is | ||
| 190 | /// necessary because we do not emulate processes for HLE sessions. | ||
| 191 | void RegisterServerSession(KServerSession* server_session); | ||
| 192 | |||
| 193 | /// Unregisters a server session previously registered with RegisterServerSession when it was | ||
| 194 | /// destroyed during the current emulation session. | ||
| 195 | void UnregisterServerSession(KServerSession* server_session); | ||
| 196 | |||
| 197 | /// Registers all kernel objects with the global emulation state, this is purely for tracking | ||
| 198 | /// leaks after emulation has been shutdown. | ||
| 199 | void RegisterKernelObject(KAutoObject* object); | ||
| 200 | |||
| 201 | /// Unregisters a kernel object previously registered with RegisterKernelObject when it was | ||
| 202 | /// destroyed during the current emulation session. | ||
| 203 | void UnregisterKernelObject(KAutoObject* object); | ||
| 204 | |||
| 188 | /// Determines whether or not the given port is a valid named port. | 205 | /// Determines whether or not the given port is a valid named port. |
| 189 | bool IsValidNamedPort(NamedPortTable::const_iterator port) const; | 206 | bool IsValidNamedPort(NamedPortTable::const_iterator port) const; |
| 190 | 207 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 8339e11a0..2eb532472 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -298,6 +298,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out, VAddr po | |||
| 298 | // Create a session. | 298 | // Create a session. |
| 299 | KClientSession* session{}; | 299 | KClientSession* session{}; |
| 300 | R_TRY(port->CreateSession(std::addressof(session))); | 300 | R_TRY(port->CreateSession(std::addressof(session))); |
| 301 | port->Close(); | ||
| 301 | 302 | ||
| 302 | // Register the session in the table, close the extra reference. | 303 | // Register the session in the table, close the extra reference. |
| 303 | handle_table.Register(*out, session); | 304 | handle_table.Register(*out, session); |
| @@ -1439,11 +1440,6 @@ static void ExitProcess(Core::System& system) { | |||
| 1439 | LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); | 1440 | LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID()); |
| 1440 | ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, | 1441 | ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, |
| 1441 | "Process has already exited"); | 1442 | "Process has already exited"); |
| 1442 | |||
| 1443 | current_process->PrepareForTermination(); | ||
| 1444 | |||
| 1445 | // Kill the current thread | ||
| 1446 | system.Kernel().CurrentScheduler()->GetCurrentThread()->Exit(); | ||
| 1447 | } | 1443 | } |
| 1448 | 1444 | ||
| 1449 | static void ExitProcess32(Core::System& system) { | 1445 | static void ExitProcess32(Core::System& system) { |
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 6ce1360e3..b7f551e40 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "core/hle/kernel/k_writable_event.h" | 18 | #include "core/hle/kernel/k_writable_event.h" |
| 19 | #include "core/hle/kernel/kernel.h" | 19 | #include "core/hle/kernel/kernel.h" |
| 20 | #include "core/hle/service/hid/controllers/npad.h" | 20 | #include "core/hle/service/hid/controllers/npad.h" |
| 21 | #include "core/hle/service/kernel_helpers.h" | ||
| 21 | 22 | ||
| 22 | namespace Service::HID { | 23 | namespace Service::HID { |
| 23 | constexpr s32 HID_JOYSTICK_MAX = 0x7fff; | 24 | constexpr s32 HID_JOYSTICK_MAX = 0x7fff; |
| @@ -147,7 +148,9 @@ bool Controller_NPad::IsDeviceHandleValid(const DeviceHandle& device_handle) { | |||
| 147 | device_handle.device_index < DeviceIndex::MaxDeviceIndex; | 148 | device_handle.device_index < DeviceIndex::MaxDeviceIndex; |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | Controller_NPad::Controller_NPad(Core::System& system_) : ControllerBase{system_} { | 151 | Controller_NPad::Controller_NPad(Core::System& system_, |
| 152 | KernelHelpers::ServiceContext& service_context_) | ||
| 153 | : ControllerBase{system_}, service_context{service_context_} { | ||
| 151 | latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE}); | 154 | latest_vibration_values.fill({DEFAULT_VIBRATION_VALUE, DEFAULT_VIBRATION_VALUE}); |
| 152 | } | 155 | } |
| 153 | 156 | ||
| @@ -251,10 +254,9 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) { | |||
| 251 | } | 254 | } |
| 252 | 255 | ||
| 253 | void Controller_NPad::OnInit() { | 256 | void Controller_NPad::OnInit() { |
| 254 | auto& kernel = system.Kernel(); | ||
| 255 | for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { | 257 | for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { |
| 256 | styleset_changed_events[i] = Kernel::KEvent::Create(kernel); | 258 | styleset_changed_events[i] = |
| 257 | styleset_changed_events[i]->Initialize(fmt::format("npad:NpadStyleSetChanged_{}", i)); | 259 | service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i)); |
| 258 | } | 260 | } |
| 259 | 261 | ||
| 260 | if (!IsControllerActivated()) { | 262 | if (!IsControllerActivated()) { |
| @@ -344,8 +346,7 @@ void Controller_NPad::OnRelease() { | |||
| 344 | } | 346 | } |
| 345 | 347 | ||
| 346 | for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { | 348 | for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { |
| 347 | styleset_changed_events[i]->Close(); | 349 | service_context.CloseEvent(styleset_changed_events[i]); |
| 348 | styleset_changed_events[i] = nullptr; | ||
| 349 | } | 350 | } |
| 350 | } | 351 | } |
| 351 | 352 | ||
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 1409d82a2..4fcc6f93a 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h | |||
| @@ -20,6 +20,10 @@ class KEvent; | |||
| 20 | class KReadableEvent; | 20 | class KReadableEvent; |
| 21 | } // namespace Kernel | 21 | } // namespace Kernel |
| 22 | 22 | ||
| 23 | namespace Service::KernelHelpers { | ||
| 24 | class ServiceContext; | ||
| 25 | } | ||
| 26 | |||
| 23 | namespace Service::HID { | 27 | namespace Service::HID { |
| 24 | 28 | ||
| 25 | constexpr u32 NPAD_HANDHELD = 32; | 29 | constexpr u32 NPAD_HANDHELD = 32; |
| @@ -27,7 +31,8 @@ constexpr u32 NPAD_UNKNOWN = 16; // TODO(ogniK): What is this? | |||
| 27 | 31 | ||
| 28 | class Controller_NPad final : public ControllerBase { | 32 | class Controller_NPad final : public ControllerBase { |
| 29 | public: | 33 | public: |
| 30 | explicit Controller_NPad(Core::System& system_); | 34 | explicit Controller_NPad(Core::System& system_, |
| 35 | KernelHelpers::ServiceContext& service_context_); | ||
| 31 | ~Controller_NPad() override; | 36 | ~Controller_NPad() override; |
| 32 | 37 | ||
| 33 | // Called when the controller is initialized | 38 | // Called when the controller is initialized |
| @@ -566,6 +571,7 @@ private: | |||
| 566 | std::array<std::unique_ptr<Input::MotionDevice>, Settings::NativeMotion::NUM_MOTIONS_HID>, | 571 | std::array<std::unique_ptr<Input::MotionDevice>, Settings::NativeMotion::NUM_MOTIONS_HID>, |
| 567 | 10>; | 572 | 10>; |
| 568 | 573 | ||
| 574 | KernelHelpers::ServiceContext& service_context; | ||
| 569 | std::mutex mutex; | 575 | std::mutex mutex; |
| 570 | ButtonArray buttons; | 576 | ButtonArray buttons; |
| 571 | StickArray sticks; | 577 | StickArray sticks; |
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index d68b023d0..b8b80570d 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -46,8 +46,9 @@ constexpr auto pad_update_ns = std::chrono::nanoseconds{1000 * 1000}; // | |||
| 46 | constexpr auto motion_update_ns = std::chrono::nanoseconds{15 * 1000 * 1000}; // (15ms, 66.666Hz) | 46 | constexpr auto motion_update_ns = std::chrono::nanoseconds{15 * 1000 * 1000}; // (15ms, 66.666Hz) |
| 47 | constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; | 47 | constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; |
| 48 | 48 | ||
| 49 | IAppletResource::IAppletResource(Core::System& system_) | 49 | IAppletResource::IAppletResource(Core::System& system_, |
| 50 | : ServiceFramework{system_, "IAppletResource"} { | 50 | KernelHelpers::ServiceContext& service_context_) |
| 51 | : ServiceFramework{system_, "IAppletResource"}, service_context{service_context_} { | ||
| 51 | static const FunctionInfo functions[] = { | 52 | static const FunctionInfo functions[] = { |
| 52 | {0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"}, | 53 | {0, &IAppletResource::GetSharedMemoryHandle, "GetSharedMemoryHandle"}, |
| 53 | }; | 54 | }; |
| @@ -63,7 +64,7 @@ IAppletResource::IAppletResource(Core::System& system_) | |||
| 63 | MakeController<Controller_Stubbed>(HidController::CaptureButton); | 64 | MakeController<Controller_Stubbed>(HidController::CaptureButton); |
| 64 | MakeController<Controller_Stubbed>(HidController::InputDetector); | 65 | MakeController<Controller_Stubbed>(HidController::InputDetector); |
| 65 | MakeController<Controller_Stubbed>(HidController::UniquePad); | 66 | MakeController<Controller_Stubbed>(HidController::UniquePad); |
| 66 | MakeController<Controller_NPad>(HidController::NPad); | 67 | MakeControllerWithServiceContext<Controller_NPad>(HidController::NPad); |
| 67 | MakeController<Controller_Gesture>(HidController::Gesture); | 68 | MakeController<Controller_Gesture>(HidController::Gesture); |
| 68 | MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor); | 69 | MakeController<Controller_ConsoleSixAxis>(HidController::ConsoleSixAxisSensor); |
| 69 | 70 | ||
| @@ -191,13 +192,14 @@ private: | |||
| 191 | 192 | ||
| 192 | std::shared_ptr<IAppletResource> Hid::GetAppletResource() { | 193 | std::shared_ptr<IAppletResource> Hid::GetAppletResource() { |
| 193 | if (applet_resource == nullptr) { | 194 | if (applet_resource == nullptr) { |
| 194 | applet_resource = std::make_shared<IAppletResource>(system); | 195 | applet_resource = std::make_shared<IAppletResource>(system, service_context); |
| 195 | } | 196 | } |
| 196 | 197 | ||
| 197 | return applet_resource; | 198 | return applet_resource; |
| 198 | } | 199 | } |
| 199 | 200 | ||
| 200 | Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} { | 201 | Hid::Hid(Core::System& system_) |
| 202 | : ServiceFramework{system_, "hid"}, service_context{system_, service_name} { | ||
| 201 | // clang-format off | 203 | // clang-format off |
| 202 | static const FunctionInfo functions[] = { | 204 | static const FunctionInfo functions[] = { |
| 203 | {0, &Hid::CreateAppletResource, "CreateAppletResource"}, | 205 | {0, &Hid::CreateAppletResource, "CreateAppletResource"}, |
| @@ -347,7 +349,7 @@ void Hid::CreateAppletResource(Kernel::HLERequestContext& ctx) { | |||
| 347 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); | 349 | LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id); |
| 348 | 350 | ||
| 349 | if (applet_resource == nullptr) { | 351 | if (applet_resource == nullptr) { |
| 350 | applet_resource = std::make_shared<IAppletResource>(system); | 352 | applet_resource = std::make_shared<IAppletResource>(system, service_context); |
| 351 | } | 353 | } |
| 352 | 354 | ||
| 353 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 355 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 83fc2ea1d..9c5c7f252 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <chrono> | 7 | #include <chrono> |
| 8 | 8 | ||
| 9 | #include "core/hle/service/hid/controllers/controller_base.h" | 9 | #include "core/hle/service/hid/controllers/controller_base.h" |
| 10 | #include "core/hle/service/kernel_helpers.h" | ||
| 10 | #include "core/hle/service/service.h" | 11 | #include "core/hle/service/service.h" |
| 11 | 12 | ||
| 12 | namespace Core::Timing { | 13 | namespace Core::Timing { |
| @@ -39,7 +40,8 @@ enum class HidController : std::size_t { | |||
| 39 | 40 | ||
| 40 | class IAppletResource final : public ServiceFramework<IAppletResource> { | 41 | class IAppletResource final : public ServiceFramework<IAppletResource> { |
| 41 | public: | 42 | public: |
| 42 | explicit IAppletResource(Core::System& system_); | 43 | explicit IAppletResource(Core::System& system_, |
| 44 | KernelHelpers::ServiceContext& service_context_); | ||
| 43 | ~IAppletResource() override; | 45 | ~IAppletResource() override; |
| 44 | 46 | ||
| 45 | void ActivateController(HidController controller); | 47 | void ActivateController(HidController controller); |
| @@ -60,11 +62,18 @@ private: | |||
| 60 | void MakeController(HidController controller) { | 62 | void MakeController(HidController controller) { |
| 61 | controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>(system); | 63 | controllers[static_cast<std::size_t>(controller)] = std::make_unique<T>(system); |
| 62 | } | 64 | } |
| 65 | template <typename T> | ||
| 66 | void MakeControllerWithServiceContext(HidController controller) { | ||
| 67 | controllers[static_cast<std::size_t>(controller)] = | ||
| 68 | std::make_unique<T>(system, service_context); | ||
| 69 | } | ||
| 63 | 70 | ||
| 64 | void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); | 71 | void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); |
| 65 | void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); | 72 | void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); |
| 66 | void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); | 73 | void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); |
| 67 | 74 | ||
| 75 | KernelHelpers::ServiceContext& service_context; | ||
| 76 | |||
| 68 | std::shared_ptr<Core::Timing::EventType> pad_update_event; | 77 | std::shared_ptr<Core::Timing::EventType> pad_update_event; |
| 69 | std::shared_ptr<Core::Timing::EventType> motion_update_event; | 78 | std::shared_ptr<Core::Timing::EventType> motion_update_event; |
| 70 | 79 | ||
| @@ -176,6 +185,8 @@ private: | |||
| 176 | static_assert(sizeof(VibrationDeviceInfo) == 0x8, "VibrationDeviceInfo has incorrect size."); | 185 | static_assert(sizeof(VibrationDeviceInfo) == 0x8, "VibrationDeviceInfo has incorrect size."); |
| 177 | 186 | ||
| 178 | std::shared_ptr<IAppletResource> applet_resource; | 187 | std::shared_ptr<IAppletResource> applet_resource; |
| 188 | |||
| 189 | KernelHelpers::ServiceContext service_context; | ||
| 179 | }; | 190 | }; |
| 180 | 191 | ||
| 181 | /// Reload input devices. Used when input configuration changed | 192 | /// Reload input devices. Used when input configuration changed |
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp new file mode 100644 index 000000000..62f4cdfb2 --- /dev/null +++ b/src/core/hle/service/kernel_helpers.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hle/kernel/k_event.h" | ||
| 7 | #include "core/hle/kernel/k_process.h" | ||
| 8 | #include "core/hle/kernel/k_readable_event.h" | ||
| 9 | #include "core/hle/kernel/k_resource_limit.h" | ||
| 10 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | ||
| 11 | #include "core/hle/kernel/k_writable_event.h" | ||
| 12 | #include "core/hle/service/kernel_helpers.h" | ||
| 13 | |||
| 14 | namespace Service::KernelHelpers { | ||
| 15 | |||
| 16 | ServiceContext::ServiceContext(Core::System& system_, std::string name_) | ||
| 17 | : kernel(system_.Kernel()) { | ||
| 18 | process = Kernel::KProcess::Create(kernel); | ||
| 19 | ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_), | ||
| 20 | Kernel::KProcess::ProcessType::Userland) | ||
| 21 | .IsSuccess()); | ||
| 22 | } | ||
| 23 | |||
| 24 | ServiceContext::~ServiceContext() { | ||
| 25 | process->Close(); | ||
| 26 | process = nullptr; | ||
| 27 | } | ||
| 28 | |||
| 29 | Kernel::KEvent* ServiceContext::CreateEvent(std::string&& name) { | ||
| 30 | // Reserve a new event from the process resource limit | ||
| 31 | Kernel::KScopedResourceReservation event_reservation(process, | ||
| 32 | Kernel::LimitableResource::Events); | ||
| 33 | if (!event_reservation.Succeeded()) { | ||
| 34 | LOG_CRITICAL(Service, "Resource limit reached!"); | ||
| 35 | return {}; | ||
| 36 | } | ||
| 37 | |||
| 38 | // Create a new event. | ||
| 39 | auto* event = Kernel::KEvent::Create(kernel); | ||
| 40 | if (!event) { | ||
| 41 | LOG_CRITICAL(Service, "Unable to create event!"); | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | |||
| 45 | // Initialize the event. | ||
| 46 | event->Initialize(std::move(name)); | ||
| 47 | |||
| 48 | // Commit the thread reservation. | ||
| 49 | event_reservation.Commit(); | ||
| 50 | |||
| 51 | // Register the event. | ||
| 52 | Kernel::KEvent::Register(kernel, event); | ||
| 53 | |||
| 54 | return event; | ||
| 55 | } | ||
| 56 | |||
| 57 | void ServiceContext::CloseEvent(Kernel::KEvent* event) { | ||
| 58 | event->GetReadableEvent().Close(); | ||
| 59 | event->GetWritableEvent().Close(); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Service::KernelHelpers | ||
diff --git a/src/core/hle/service/kernel_helpers.h b/src/core/hle/service/kernel_helpers.h new file mode 100644 index 000000000..4f3e95f67 --- /dev/null +++ b/src/core/hle/service/kernel_helpers.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Kernel { | ||
| 14 | class KernelCore; | ||
| 15 | class KEvent; | ||
| 16 | class KProcess; | ||
| 17 | } // namespace Kernel | ||
| 18 | |||
| 19 | namespace Service::KernelHelpers { | ||
| 20 | |||
| 21 | class ServiceContext { | ||
| 22 | public: | ||
| 23 | ServiceContext(Core::System& system_, std::string name_); | ||
| 24 | ~ServiceContext(); | ||
| 25 | |||
| 26 | Kernel::KEvent* CreateEvent(std::string&& name); | ||
| 27 | |||
| 28 | void CloseEvent(Kernel::KEvent* event); | ||
| 29 | |||
| 30 | private: | ||
| 31 | Kernel::KernelCore& kernel; | ||
| 32 | Kernel::KProcess* process{}; | ||
| 33 | }; | ||
| 34 | |||
| 35 | } // namespace Service::KernelHelpers | ||
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 03992af5e..ff405099a 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -39,11 +39,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | |||
| 39 | nvflinger.SetNVDrvInstance(module_); | 39 | nvflinger.SetNVDrvInstance(module_); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { | 42 | Module::Module(Core::System& system) |
| 43 | auto& kernel = system.Kernel(); | 43 | : syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} { |
| 44 | for (u32 i = 0; i < MaxNvEvents; i++) { | 44 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 45 | events_interface.events[i].event = Kernel::KEvent::Create(kernel); | 45 | events_interface.events[i].event = |
| 46 | events_interface.events[i].event->Initialize(fmt::format("NVDRV::NvEvent_{}", i)); | 46 | service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i)); |
| 47 | events_interface.status[i] = EventState::Free; | 47 | events_interface.status[i] = EventState::Free; |
| 48 | events_interface.registered[i] = false; | 48 | events_interface.registered[i] = false; |
| 49 | } | 49 | } |
| @@ -65,8 +65,7 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { | |||
| 65 | 65 | ||
| 66 | Module::~Module() { | 66 | Module::~Module() { |
| 67 | for (u32 i = 0; i < MaxNvEvents; i++) { | 67 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 68 | events_interface.events[i].event->Close(); | 68 | service_context.CloseEvent(events_interface.events[i].event); |
| 69 | events_interface.events[i].event = nullptr; | ||
| 70 | } | 69 | } |
| 71 | } | 70 | } |
| 72 | 71 | ||
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index a43ceb7ae..e2a1dde5b 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/service/kernel_helpers.h" | ||
| 12 | #include "core/hle/service/nvdrv/nvdata.h" | 13 | #include "core/hle/service/nvdrv/nvdata.h" |
| 13 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | 14 | #include "core/hle/service/nvdrv/syncpoint_manager.h" |
| 14 | #include "core/hle/service/service.h" | 15 | #include "core/hle/service/service.h" |
| @@ -154,6 +155,8 @@ private: | |||
| 154 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; | 155 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; |
| 155 | 156 | ||
| 156 | EventInterface events_interface; | 157 | EventInterface events_interface; |
| 158 | |||
| 159 | KernelHelpers::ServiceContext service_context; | ||
| 157 | }; | 160 | }; |
| 158 | 161 | ||
| 159 | /// Registers all NVDRV services with the specified service manager. | 162 | /// Registers all NVDRV services with the specified service manager. |
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index e6fba88b2..b3e50433b 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -104,23 +104,22 @@ ServiceFrameworkBase::~ServiceFrameworkBase() { | |||
| 104 | void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { | 104 | void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { |
| 105 | const auto guard = LockService(); | 105 | const auto guard = LockService(); |
| 106 | 106 | ||
| 107 | ASSERT(!port_installed); | 107 | ASSERT(!service_registered); |
| 108 | 108 | ||
| 109 | auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); | 109 | service_manager.RegisterService(service_name, max_sessions, shared_from_this()); |
| 110 | port->SetSessionHandler(shared_from_this()); | 110 | service_registered = true; |
| 111 | port_installed = true; | ||
| 112 | } | 111 | } |
| 113 | 112 | ||
| 114 | Kernel::KClientPort& ServiceFrameworkBase::CreatePort() { | 113 | Kernel::KClientPort& ServiceFrameworkBase::CreatePort() { |
| 115 | const auto guard = LockService(); | 114 | const auto guard = LockService(); |
| 116 | 115 | ||
| 117 | ASSERT(!port_installed); | 116 | ASSERT(!service_registered); |
| 118 | 117 | ||
| 119 | auto* port = Kernel::KPort::Create(kernel); | 118 | auto* port = Kernel::KPort::Create(kernel); |
| 120 | port->Initialize(max_sessions, false, service_name); | 119 | port->Initialize(max_sessions, false, service_name); |
| 121 | port->GetServerPort().SetSessionHandler(shared_from_this()); | 120 | port->GetServerPort().SetSessionHandler(shared_from_this()); |
| 122 | 121 | ||
| 123 | port_installed = true; | 122 | service_registered = true; |
| 124 | 123 | ||
| 125 | return port->GetClientPort(); | 124 | return port->GetClientPort(); |
| 126 | } | 125 | } |
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index e078ac176..c9d6b879d 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h | |||
| @@ -96,6 +96,9 @@ protected: | |||
| 96 | /// System context that the service operates under. | 96 | /// System context that the service operates under. |
| 97 | Core::System& system; | 97 | Core::System& system; |
| 98 | 98 | ||
| 99 | /// Identifier string used to connect to the service. | ||
| 100 | std::string service_name; | ||
| 101 | |||
| 99 | private: | 102 | private: |
| 100 | template <typename T> | 103 | template <typename T> |
| 101 | friend class ServiceFramework; | 104 | friend class ServiceFramework; |
| @@ -117,14 +120,12 @@ private: | |||
| 117 | void RegisterHandlersBaseTipc(const FunctionInfoBase* functions, std::size_t n); | 120 | void RegisterHandlersBaseTipc(const FunctionInfoBase* functions, std::size_t n); |
| 118 | void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info); | 121 | void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info); |
| 119 | 122 | ||
| 120 | /// Identifier string used to connect to the service. | ||
| 121 | std::string service_name; | ||
| 122 | /// Maximum number of concurrent sessions that this service can handle. | 123 | /// Maximum number of concurrent sessions that this service can handle. |
| 123 | u32 max_sessions; | 124 | u32 max_sessions; |
| 124 | 125 | ||
| 125 | /// Flag to store if a port was already create/installed to detect multiple install attempts, | 126 | /// Flag to store if a port was already create/installed to detect multiple install attempts, |
| 126 | /// which is not supported. | 127 | /// which is not supported. |
| 127 | bool port_installed = false; | 128 | bool service_registered = false; |
| 128 | 129 | ||
| 129 | /// Function used to safely up-cast pointers to the derived class before invoking a handler. | 130 | /// Function used to safely up-cast pointers to the derived class before invoking a handler. |
| 130 | InvokerFn* handler_invoker; | 131 | InvokerFn* handler_invoker; |
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index 15034abed..ae4dc4a75 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <tuple> | 5 | #include <tuple> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/scope_exit.h" | ||
| 7 | #include "core/core.h" | 8 | #include "core/core.h" |
| 8 | #include "core/hle/ipc_helpers.h" | 9 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/k_client_port.h" | 10 | #include "core/hle/kernel/k_client_port.h" |
| @@ -40,17 +41,13 @@ static ResultCode ValidateServiceName(const std::string& name) { | |||
| 40 | } | 41 | } |
| 41 | 42 | ||
| 42 | Kernel::KClientPort& ServiceManager::InterfaceFactory(ServiceManager& self, Core::System& system) { | 43 | Kernel::KClientPort& ServiceManager::InterfaceFactory(ServiceManager& self, Core::System& system) { |
| 43 | ASSERT(self.sm_interface.expired()); | 44 | self.sm_interface = std::make_shared<SM>(self, system); |
| 44 | |||
| 45 | auto sm = std::make_shared<SM>(self, system); | ||
| 46 | self.sm_interface = sm; | ||
| 47 | self.controller_interface = std::make_unique<Controller>(system); | 45 | self.controller_interface = std::make_unique<Controller>(system); |
| 48 | 46 | return self.sm_interface->CreatePort(); | |
| 49 | return sm->CreatePort(); | ||
| 50 | } | 47 | } |
| 51 | 48 | ||
| 52 | ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name, | 49 | ResultCode ServiceManager::RegisterService(std::string name, u32 max_sessions, |
| 53 | u32 max_sessions) { | 50 | Kernel::SessionRequestHandlerPtr handler) { |
| 54 | 51 | ||
| 55 | CASCADE_CODE(ValidateServiceName(name)); | 52 | CASCADE_CODE(ValidateServiceName(name)); |
| 56 | 53 | ||
| @@ -59,12 +56,9 @@ ResultVal<Kernel::KServerPort*> ServiceManager::RegisterService(std::string name | |||
| 59 | return ERR_ALREADY_REGISTERED; | 56 | return ERR_ALREADY_REGISTERED; |
| 60 | } | 57 | } |
| 61 | 58 | ||
| 62 | auto* port = Kernel::KPort::Create(kernel); | 59 | registered_services.emplace(std::move(name), handler); |
| 63 | port->Initialize(max_sessions, false, name); | ||
| 64 | 60 | ||
| 65 | registered_services.emplace(std::move(name), port); | 61 | return ResultSuccess; |
| 66 | |||
| 67 | return MakeResult(&port->GetServerPort()); | ||
| 68 | } | 62 | } |
| 69 | 63 | ||
| 70 | ResultCode ServiceManager::UnregisterService(const std::string& name) { | 64 | ResultCode ServiceManager::UnregisterService(const std::string& name) { |
| @@ -76,14 +70,11 @@ ResultCode ServiceManager::UnregisterService(const std::string& name) { | |||
| 76 | return ERR_SERVICE_NOT_REGISTERED; | 70 | return ERR_SERVICE_NOT_REGISTERED; |
| 77 | } | 71 | } |
| 78 | 72 | ||
| 79 | iter->second->Close(); | ||
| 80 | |||
| 81 | registered_services.erase(iter); | 73 | registered_services.erase(iter); |
| 82 | return ResultSuccess; | 74 | return ResultSuccess; |
| 83 | } | 75 | } |
| 84 | 76 | ||
| 85 | ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name) { | 77 | ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name) { |
| 86 | |||
| 87 | CASCADE_CODE(ValidateServiceName(name)); | 78 | CASCADE_CODE(ValidateServiceName(name)); |
| 88 | auto it = registered_services.find(name); | 79 | auto it = registered_services.find(name); |
| 89 | if (it == registered_services.end()) { | 80 | if (it == registered_services.end()) { |
| @@ -91,10 +82,13 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name | |||
| 91 | return ERR_SERVICE_NOT_REGISTERED; | 82 | return ERR_SERVICE_NOT_REGISTERED; |
| 92 | } | 83 | } |
| 93 | 84 | ||
| 94 | return MakeResult(it->second); | 85 | auto* port = Kernel::KPort::Create(kernel); |
| 95 | } | 86 | port->Initialize(ServerSessionCountMax, false, name); |
| 87 | auto handler = it->second; | ||
| 88 | port->GetServerPort().SetSessionHandler(std::move(handler)); | ||
| 96 | 89 | ||
| 97 | SM::~SM() = default; | 90 | return MakeResult(port); |
| 91 | } | ||
| 98 | 92 | ||
| 99 | /** | 93 | /** |
| 100 | * SM::Initialize service function | 94 | * SM::Initialize service function |
| @@ -156,11 +150,15 @@ ResultVal<Kernel::KClientSession*> SM::GetServiceImpl(Kernel::HLERequestContext& | |||
| 156 | LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, port_result.Code().raw); | 150 | LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, port_result.Code().raw); |
| 157 | return port_result.Code(); | 151 | return port_result.Code(); |
| 158 | } | 152 | } |
| 159 | auto& port = port_result.Unwrap()->GetClientPort(); | 153 | auto& port = port_result.Unwrap(); |
| 154 | SCOPE_EXIT({ port->GetClientPort().Close(); }); | ||
| 155 | |||
| 156 | server_ports.emplace_back(&port->GetServerPort()); | ||
| 160 | 157 | ||
| 161 | // Create a new session. | 158 | // Create a new session. |
| 162 | Kernel::KClientSession* session{}; | 159 | Kernel::KClientSession* session{}; |
| 163 | if (const auto result = port.CreateSession(std::addressof(session)); result.IsError()) { | 160 | if (const auto result = port->GetClientPort().CreateSession(std::addressof(session)); |
| 161 | result.IsError()) { | ||
| 164 | LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw); | 162 | LOG_ERROR(Service_SM, "called service={} -> error 0x{:08X}", name, result.raw); |
| 165 | return result; | 163 | return result; |
| 166 | } | 164 | } |
| @@ -180,20 +178,21 @@ void SM::RegisterService(Kernel::HLERequestContext& ctx) { | |||
| 180 | LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name, | 178 | LOG_DEBUG(Service_SM, "called with name={}, max_session_count={}, is_light={}", name, |
| 181 | max_session_count, is_light); | 179 | max_session_count, is_light); |
| 182 | 180 | ||
| 183 | auto handle = service_manager.RegisterService(name, max_session_count); | 181 | if (const auto result = service_manager.RegisterService(name, max_session_count, nullptr); |
| 184 | if (handle.Failed()) { | 182 | result.IsError()) { |
| 185 | LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", | 183 | LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}", result.raw); |
| 186 | handle.Code().raw); | ||
| 187 | IPC::ResponseBuilder rb{ctx, 2}; | 184 | IPC::ResponseBuilder rb{ctx, 2}; |
| 188 | rb.Push(handle.Code()); | 185 | rb.Push(result); |
| 189 | return; | 186 | return; |
| 190 | } | 187 | } |
| 191 | 188 | ||
| 192 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; | 189 | auto* port = Kernel::KPort::Create(kernel); |
| 193 | rb.Push(handle.Code()); | 190 | port->Initialize(ServerSessionCountMax, is_light, name); |
| 191 | SCOPE_EXIT({ port->GetClientPort().Close(); }); | ||
| 194 | 192 | ||
| 195 | auto server_port = handle.Unwrap(); | 193 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; |
| 196 | rb.PushMoveObjects(server_port); | 194 | rb.Push(ResultSuccess); |
| 195 | rb.PushMoveObjects(port->GetServerPort()); | ||
| 197 | } | 196 | } |
| 198 | 197 | ||
| 199 | void SM::UnregisterService(Kernel::HLERequestContext& ctx) { | 198 | void SM::UnregisterService(Kernel::HLERequestContext& ctx) { |
| @@ -225,4 +224,10 @@ SM::SM(ServiceManager& service_manager_, Core::System& system_) | |||
| 225 | }); | 224 | }); |
| 226 | } | 225 | } |
| 227 | 226 | ||
| 227 | SM::~SM() { | ||
| 228 | for (auto& server_port : server_ports) { | ||
| 229 | server_port->Close(); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 228 | } // namespace Service::SM | 233 | } // namespace Service::SM |
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h index ea37f11d4..068c78588 100644 --- a/src/core/hle/service/sm/sm.h +++ b/src/core/hle/service/sm/sm.h | |||
| @@ -49,6 +49,7 @@ private: | |||
| 49 | ServiceManager& service_manager; | 49 | ServiceManager& service_manager; |
| 50 | bool is_initialized{}; | 50 | bool is_initialized{}; |
| 51 | Kernel::KernelCore& kernel; | 51 | Kernel::KernelCore& kernel; |
| 52 | std::vector<Kernel::KServerPort*> server_ports; | ||
| 52 | }; | 53 | }; |
| 53 | 54 | ||
| 54 | class ServiceManager { | 55 | class ServiceManager { |
| @@ -58,7 +59,8 @@ public: | |||
| 58 | explicit ServiceManager(Kernel::KernelCore& kernel_); | 59 | explicit ServiceManager(Kernel::KernelCore& kernel_); |
| 59 | ~ServiceManager(); | 60 | ~ServiceManager(); |
| 60 | 61 | ||
| 61 | ResultVal<Kernel::KServerPort*> RegisterService(std::string name, u32 max_sessions); | 62 | ResultCode RegisterService(std::string name, u32 max_sessions, |
| 63 | Kernel::SessionRequestHandlerPtr handler); | ||
| 62 | ResultCode UnregisterService(const std::string& name); | 64 | ResultCode UnregisterService(const std::string& name); |
| 63 | ResultVal<Kernel::KPort*> GetServicePort(const std::string& name); | 65 | ResultVal<Kernel::KPort*> GetServicePort(const std::string& name); |
| 64 | 66 | ||
| @@ -69,21 +71,17 @@ public: | |||
| 69 | LOG_DEBUG(Service, "Can't find service: {}", service_name); | 71 | LOG_DEBUG(Service, "Can't find service: {}", service_name); |
| 70 | return nullptr; | 72 | return nullptr; |
| 71 | } | 73 | } |
| 72 | auto* port = service->second; | 74 | return std::static_pointer_cast<T>(service->second); |
| 73 | if (port == nullptr) { | ||
| 74 | return nullptr; | ||
| 75 | } | ||
| 76 | return std::static_pointer_cast<T>(port->GetServerPort().GetSessionRequestHandler()); | ||
| 77 | } | 75 | } |
| 78 | 76 | ||
| 79 | void InvokeControlRequest(Kernel::HLERequestContext& context); | 77 | void InvokeControlRequest(Kernel::HLERequestContext& context); |
| 80 | 78 | ||
| 81 | private: | 79 | private: |
| 82 | std::weak_ptr<SM> sm_interface; | 80 | std::shared_ptr<SM> sm_interface; |
| 83 | std::unique_ptr<Controller> controller_interface; | 81 | std::unique_ptr<Controller> controller_interface; |
| 84 | 82 | ||
| 85 | /// Map of registered services, retrieved using GetServicePort. | 83 | /// Map of registered services, retrieved using GetServicePort. |
| 86 | std::unordered_map<std::string, Kernel::KPort*> registered_services; | 84 | std::unordered_map<std::string, Kernel::SessionRequestHandlerPtr> registered_services; |
| 87 | 85 | ||
| 88 | /// Kernel context | 86 | /// Kernel context |
| 89 | Kernel::KernelCore& kernel; | 87 | Kernel::KernelCore& kernel; |
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index cfaf50105..365b8f906 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp | |||
| @@ -62,7 +62,6 @@ json GetYuzuVersionData() { | |||
| 62 | {"build_date", std::string(Common::g_build_date)}, | 62 | {"build_date", std::string(Common::g_build_date)}, |
| 63 | {"build_fullname", std::string(Common::g_build_fullname)}, | 63 | {"build_fullname", std::string(Common::g_build_fullname)}, |
| 64 | {"build_version", std::string(Common::g_build_version)}, | 64 | {"build_version", std::string(Common::g_build_version)}, |
| 65 | {"shader_cache_version", std::string(Common::g_shader_cache_version)}, | ||
| 66 | }; | 65 | }; |
| 67 | } | 66 | } |
| 68 | 67 | ||
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, | |||
| 233 | Settings::values.use_nvdec_emulation.GetValue()); | 233 | Settings::values.use_nvdec_emulation.GetValue()); |
| 234 | AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); | 234 | AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); |
| 235 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); | 235 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); |
| 236 | AddField(field_type, "Renderer_UseAssemblyShaders", | 236 | AddField(field_type, "Renderer_ShaderBackend", |
| 237 | Settings::values.use_assembly_shaders.GetValue()); | 237 | static_cast<u32>(Settings::values.shader_backend.GetValue())); |
| 238 | AddField(field_type, "Renderer_UseAsynchronousShaders", | 238 | AddField(field_type, "Renderer_UseAsynchronousShaders", |
| 239 | Settings::values.use_asynchronous_shaders.GetValue()); | 239 | Settings::values.use_asynchronous_shaders.GetValue()); |
| 240 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); | 240 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); |
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt new file mode 100644 index 000000000..b5b7e5e83 --- /dev/null +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -0,0 +1,268 @@ | |||
| 1 | add_library(shader_recompiler STATIC | ||
| 2 | backend/bindings.h | ||
| 3 | backend/glasm/emit_context.cpp | ||
| 4 | backend/glasm/emit_context.h | ||
| 5 | backend/glasm/emit_glasm.cpp | ||
| 6 | backend/glasm/emit_glasm.h | ||
| 7 | backend/glasm/emit_glasm_barriers.cpp | ||
| 8 | backend/glasm/emit_glasm_bitwise_conversion.cpp | ||
| 9 | backend/glasm/emit_glasm_composite.cpp | ||
| 10 | backend/glasm/emit_glasm_context_get_set.cpp | ||
| 11 | backend/glasm/emit_glasm_control_flow.cpp | ||
| 12 | backend/glasm/emit_glasm_convert.cpp | ||
| 13 | backend/glasm/emit_glasm_floating_point.cpp | ||
| 14 | backend/glasm/emit_glasm_image.cpp | ||
| 15 | backend/glasm/emit_glasm_instructions.h | ||
| 16 | backend/glasm/emit_glasm_integer.cpp | ||
| 17 | backend/glasm/emit_glasm_logical.cpp | ||
| 18 | backend/glasm/emit_glasm_memory.cpp | ||
| 19 | backend/glasm/emit_glasm_not_implemented.cpp | ||
| 20 | backend/glasm/emit_glasm_select.cpp | ||
| 21 | backend/glasm/emit_glasm_shared_memory.cpp | ||
| 22 | backend/glasm/emit_glasm_special.cpp | ||
| 23 | backend/glasm/emit_glasm_undefined.cpp | ||
| 24 | backend/glasm/emit_glasm_warp.cpp | ||
| 25 | backend/glasm/reg_alloc.cpp | ||
| 26 | backend/glasm/reg_alloc.h | ||
| 27 | backend/glsl/emit_context.cpp | ||
| 28 | backend/glsl/emit_context.h | ||
| 29 | backend/glsl/emit_glsl.cpp | ||
| 30 | backend/glsl/emit_glsl.h | ||
| 31 | backend/glsl/emit_glsl_atomic.cpp | ||
| 32 | backend/glsl/emit_glsl_barriers.cpp | ||
| 33 | backend/glsl/emit_glsl_bitwise_conversion.cpp | ||
| 34 | backend/glsl/emit_glsl_composite.cpp | ||
| 35 | backend/glsl/emit_glsl_context_get_set.cpp | ||
| 36 | backend/glsl/emit_glsl_control_flow.cpp | ||
| 37 | backend/glsl/emit_glsl_convert.cpp | ||
| 38 | backend/glsl/emit_glsl_floating_point.cpp | ||
| 39 | backend/glsl/emit_glsl_image.cpp | ||
| 40 | backend/glsl/emit_glsl_instructions.h | ||
| 41 | backend/glsl/emit_glsl_integer.cpp | ||
| 42 | backend/glsl/emit_glsl_logical.cpp | ||
| 43 | backend/glsl/emit_glsl_memory.cpp | ||
| 44 | backend/glsl/emit_glsl_not_implemented.cpp | ||
| 45 | backend/glsl/emit_glsl_select.cpp | ||
| 46 | backend/glsl/emit_glsl_shared_memory.cpp | ||
| 47 | backend/glsl/emit_glsl_special.cpp | ||
| 48 | backend/glsl/emit_glsl_undefined.cpp | ||
| 49 | backend/glsl/emit_glsl_warp.cpp | ||
| 50 | backend/glsl/var_alloc.cpp | ||
| 51 | backend/glsl/var_alloc.h | ||
| 52 | backend/spirv/emit_context.cpp | ||
| 53 | backend/spirv/emit_context.h | ||
| 54 | backend/spirv/emit_spirv.cpp | ||
| 55 | backend/spirv/emit_spirv.h | ||
| 56 | backend/spirv/emit_spirv_atomic.cpp | ||
| 57 | backend/spirv/emit_spirv_barriers.cpp | ||
| 58 | backend/spirv/emit_spirv_bitwise_conversion.cpp | ||
| 59 | backend/spirv/emit_spirv_composite.cpp | ||
| 60 | backend/spirv/emit_spirv_context_get_set.cpp | ||
| 61 | backend/spirv/emit_spirv_control_flow.cpp | ||
| 62 | backend/spirv/emit_spirv_convert.cpp | ||
| 63 | backend/spirv/emit_spirv_floating_point.cpp | ||
| 64 | backend/spirv/emit_spirv_image.cpp | ||
| 65 | backend/spirv/emit_spirv_image_atomic.cpp | ||
| 66 | backend/spirv/emit_spirv_instructions.h | ||
| 67 | backend/spirv/emit_spirv_integer.cpp | ||
| 68 | backend/spirv/emit_spirv_logical.cpp | ||
| 69 | backend/spirv/emit_spirv_memory.cpp | ||
| 70 | backend/spirv/emit_spirv_select.cpp | ||
| 71 | backend/spirv/emit_spirv_shared_memory.cpp | ||
| 72 | backend/spirv/emit_spirv_special.cpp | ||
| 73 | backend/spirv/emit_spirv_undefined.cpp | ||
| 74 | backend/spirv/emit_spirv_warp.cpp | ||
| 75 | environment.h | ||
| 76 | exception.h | ||
| 77 | frontend/ir/abstract_syntax_list.h | ||
| 78 | frontend/ir/attribute.cpp | ||
| 79 | frontend/ir/attribute.h | ||
| 80 | frontend/ir/basic_block.cpp | ||
| 81 | frontend/ir/basic_block.h | ||
| 82 | frontend/ir/breadth_first_search.h | ||
| 83 | frontend/ir/condition.cpp | ||
| 84 | frontend/ir/condition.h | ||
| 85 | frontend/ir/flow_test.cpp | ||
| 86 | frontend/ir/flow_test.h | ||
| 87 | frontend/ir/ir_emitter.cpp | ||
| 88 | frontend/ir/ir_emitter.h | ||
| 89 | frontend/ir/microinstruction.cpp | ||
| 90 | frontend/ir/modifiers.h | ||
| 91 | frontend/ir/opcodes.cpp | ||
| 92 | frontend/ir/opcodes.h | ||
| 93 | frontend/ir/opcodes.inc | ||
| 94 | frontend/ir/patch.cpp | ||
| 95 | frontend/ir/patch.h | ||
| 96 | frontend/ir/post_order.cpp | ||
| 97 | frontend/ir/post_order.h | ||
| 98 | frontend/ir/pred.h | ||
| 99 | frontend/ir/program.cpp | ||
| 100 | frontend/ir/program.h | ||
| 101 | frontend/ir/reg.h | ||
| 102 | frontend/ir/type.cpp | ||
| 103 | frontend/ir/type.h | ||
| 104 | frontend/ir/value.cpp | ||
| 105 | frontend/ir/value.h | ||
| 106 | frontend/maxwell/control_flow.cpp | ||
| 107 | frontend/maxwell/control_flow.h | ||
| 108 | frontend/maxwell/decode.cpp | ||
| 109 | frontend/maxwell/decode.h | ||
| 110 | frontend/maxwell/indirect_branch_table_track.cpp | ||
| 111 | frontend/maxwell/indirect_branch_table_track.h | ||
| 112 | frontend/maxwell/instruction.h | ||
| 113 | frontend/maxwell/location.h | ||
| 114 | frontend/maxwell/maxwell.inc | ||
| 115 | frontend/maxwell/opcodes.cpp | ||
| 116 | frontend/maxwell/opcodes.h | ||
| 117 | frontend/maxwell/structured_control_flow.cpp | ||
| 118 | frontend/maxwell/structured_control_flow.h | ||
| 119 | frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | ||
| 120 | frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | ||
| 121 | frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | ||
| 122 | frontend/maxwell/translate/impl/barrier_operations.cpp | ||
| 123 | frontend/maxwell/translate/impl/bitfield_extract.cpp | ||
| 124 | frontend/maxwell/translate/impl/bitfield_insert.cpp | ||
| 125 | frontend/maxwell/translate/impl/branch_indirect.cpp | ||
| 126 | frontend/maxwell/translate/impl/common_encoding.h | ||
| 127 | frontend/maxwell/translate/impl/common_funcs.cpp | ||
| 128 | frontend/maxwell/translate/impl/common_funcs.h | ||
| 129 | frontend/maxwell/translate/impl/condition_code_set.cpp | ||
| 130 | frontend/maxwell/translate/impl/double_add.cpp | ||
| 131 | frontend/maxwell/translate/impl/double_compare_and_set.cpp | ||
| 132 | frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | ||
| 133 | frontend/maxwell/translate/impl/double_min_max.cpp | ||
| 134 | frontend/maxwell/translate/impl/double_multiply.cpp | ||
| 135 | frontend/maxwell/translate/impl/double_set_predicate.cpp | ||
| 136 | frontend/maxwell/translate/impl/exit_program.cpp | ||
| 137 | frontend/maxwell/translate/impl/find_leading_one.cpp | ||
| 138 | frontend/maxwell/translate/impl/floating_point_add.cpp | ||
| 139 | frontend/maxwell/translate/impl/floating_point_compare.cpp | ||
| 140 | frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | ||
| 141 | frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | ||
| 142 | frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | ||
| 143 | frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | ||
| 144 | frontend/maxwell/translate/impl/floating_point_min_max.cpp | ||
| 145 | frontend/maxwell/translate/impl/floating_point_multi_function.cpp | ||
| 146 | frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||
| 147 | frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | ||
| 148 | frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | ||
| 149 | frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | ||
| 150 | frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||
| 151 | frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | ||
| 152 | frontend/maxwell/translate/impl/half_floating_point_helper.cpp | ||
| 153 | frontend/maxwell/translate/impl/half_floating_point_helper.h | ||
| 154 | frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | ||
| 155 | frontend/maxwell/translate/impl/half_floating_point_set.cpp | ||
| 156 | frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | ||
| 157 | frontend/maxwell/translate/impl/impl.cpp | ||
| 158 | frontend/maxwell/translate/impl/impl.h | ||
| 159 | frontend/maxwell/translate/impl/integer_add.cpp | ||
| 160 | frontend/maxwell/translate/impl/integer_add_three_input.cpp | ||
| 161 | frontend/maxwell/translate/impl/integer_compare.cpp | ||
| 162 | frontend/maxwell/translate/impl/integer_compare_and_set.cpp | ||
| 163 | frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | ||
| 164 | frontend/maxwell/translate/impl/integer_funnel_shift.cpp | ||
| 165 | frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | ||
| 166 | frontend/maxwell/translate/impl/integer_popcount.cpp | ||
| 167 | frontend/maxwell/translate/impl/integer_scaled_add.cpp | ||
| 168 | frontend/maxwell/translate/impl/integer_set_predicate.cpp | ||
| 169 | frontend/maxwell/translate/impl/integer_shift_left.cpp | ||
| 170 | frontend/maxwell/translate/impl/integer_shift_right.cpp | ||
| 171 | frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | ||
| 172 | frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | ||
| 173 | frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | ||
| 174 | frontend/maxwell/translate/impl/load_constant.cpp | ||
| 175 | frontend/maxwell/translate/impl/load_constant.h | ||
| 176 | frontend/maxwell/translate/impl/load_effective_address.cpp | ||
| 177 | frontend/maxwell/translate/impl/load_store_attribute.cpp | ||
| 178 | frontend/maxwell/translate/impl/load_store_local_shared.cpp | ||
| 179 | frontend/maxwell/translate/impl/load_store_memory.cpp | ||
| 180 | frontend/maxwell/translate/impl/logic_operation.cpp | ||
| 181 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp | ||
| 182 | frontend/maxwell/translate/impl/move_predicate_to_register.cpp | ||
| 183 | frontend/maxwell/translate/impl/move_register.cpp | ||
| 184 | frontend/maxwell/translate/impl/move_register_to_predicate.cpp | ||
| 185 | frontend/maxwell/translate/impl/move_special_register.cpp | ||
| 186 | frontend/maxwell/translate/impl/not_implemented.cpp | ||
| 187 | frontend/maxwell/translate/impl/output_geometry.cpp | ||
| 188 | frontend/maxwell/translate/impl/pixel_load.cpp | ||
| 189 | frontend/maxwell/translate/impl/predicate_set_predicate.cpp | ||
| 190 | frontend/maxwell/translate/impl/predicate_set_register.cpp | ||
| 191 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp | ||
| 192 | frontend/maxwell/translate/impl/surface_atomic_operations.cpp | ||
| 193 | frontend/maxwell/translate/impl/surface_load_store.cpp | ||
| 194 | frontend/maxwell/translate/impl/texture_fetch.cpp | ||
| 195 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | ||
| 196 | frontend/maxwell/translate/impl/texture_gather.cpp | ||
| 197 | frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | ||
| 198 | frontend/maxwell/translate/impl/texture_gradient.cpp | ||
| 199 | frontend/maxwell/translate/impl/texture_load.cpp | ||
| 200 | frontend/maxwell/translate/impl/texture_load_swizzled.cpp | ||
| 201 | frontend/maxwell/translate/impl/texture_mipmap_level.cpp | ||
| 202 | frontend/maxwell/translate/impl/texture_query.cpp | ||
| 203 | frontend/maxwell/translate/impl/video_helper.cpp | ||
| 204 | frontend/maxwell/translate/impl/video_helper.h | ||
| 205 | frontend/maxwell/translate/impl/video_minimum_maximum.cpp | ||
| 206 | frontend/maxwell/translate/impl/video_multiply_add.cpp | ||
| 207 | frontend/maxwell/translate/impl/video_set_predicate.cpp | ||
| 208 | frontend/maxwell/translate/impl/vote.cpp | ||
| 209 | frontend/maxwell/translate/impl/warp_shuffle.cpp | ||
| 210 | frontend/maxwell/translate/translate.cpp | ||
| 211 | frontend/maxwell/translate/translate.h | ||
| 212 | frontend/maxwell/translate_program.cpp | ||
| 213 | frontend/maxwell/translate_program.h | ||
| 214 | host_translate_info.h | ||
| 215 | ir_opt/collect_shader_info_pass.cpp | ||
| 216 | ir_opt/constant_propagation_pass.cpp | ||
| 217 | ir_opt/dead_code_elimination_pass.cpp | ||
| 218 | ir_opt/dual_vertex_pass.cpp | ||
| 219 | ir_opt/global_memory_to_storage_buffer_pass.cpp | ||
| 220 | ir_opt/identity_removal_pass.cpp | ||
| 221 | ir_opt/lower_fp16_to_fp32.cpp | ||
| 222 | ir_opt/lower_int64_to_int32.cpp | ||
| 223 | ir_opt/passes.h | ||
| 224 | ir_opt/ssa_rewrite_pass.cpp | ||
| 225 | ir_opt/texture_pass.cpp | ||
| 226 | ir_opt/verification_pass.cpp | ||
| 227 | object_pool.h | ||
| 228 | profile.h | ||
| 229 | program_header.h | ||
| 230 | runtime_info.h | ||
| 231 | shader_info.h | ||
| 232 | varying_state.h | ||
| 233 | ) | ||
| 234 | |||
| 235 | target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) | ||
| 236 | |||
| 237 | if (MSVC) | ||
| 238 | target_compile_options(shader_recompiler PRIVATE | ||
| 239 | /W4 | ||
| 240 | /WX | ||
| 241 | /we4018 # 'expression' : signed/unsigned mismatch | ||
| 242 | /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) | ||
| 243 | /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch | ||
| 244 | /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data | ||
| 245 | /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data | ||
| 246 | /we4305 # 'context' : truncation from 'type1' to 'type2' | ||
| 247 | /we4800 # Implicit conversion from 'type' to bool. Possible information loss | ||
| 248 | /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. | ||
| 249 | ) | ||
| 250 | else() | ||
| 251 | target_compile_options(shader_recompiler PRIVATE | ||
| 252 | -Werror | ||
| 253 | -Werror=conversion | ||
| 254 | -Werror=ignored-qualifiers | ||
| 255 | -Werror=implicit-fallthrough | ||
| 256 | -Werror=shadow | ||
| 257 | -Werror=sign-compare | ||
| 258 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> | ||
| 259 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> | ||
| 260 | -Werror=unused-variable | ||
| 261 | |||
| 262 | # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. | ||
| 263 | # And this in turns limits the size of a std::array. | ||
| 264 | $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> | ||
| 265 | ) | ||
| 266 | endif() | ||
| 267 | |||
| 268 | create_target_directory_groups(shader_recompiler) | ||
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend { | ||
| 10 | |||
| 11 | struct Bindings { | ||
| 12 | u32 unified{}; | ||
| 13 | u32 uniform_buffer{}; | ||
| 14 | u32 storage_buffer{}; | ||
| 15 | u32 texture{}; | ||
| 16 | u32 image{}; | ||
| 17 | }; | ||
| 18 | |||
| 19 | } // namespace Shader::Backend | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..069c019ad --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/bindings.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 16 | switch (interp) { | ||
| 17 | case Interpolation::Smooth: | ||
| 18 | return ""; | ||
| 19 | case Interpolation::Flat: | ||
| 20 | return "FLAT "; | ||
| 21 | case Interpolation::NoPerspective: | ||
| 22 | return "NOPERSPECTIVE "; | ||
| 23 | } | ||
| 24 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool IsInputArray(Stage stage) { | ||
| 28 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 29 | stage == Stage::TessellationEval; | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 34 | const RuntimeInfo& runtime_info_) | ||
| 35 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | ||
| 36 | // FIXME: Temporary partial implementation | ||
| 37 | u32 cbuf_index{}; | ||
| 38 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 39 | if (desc.count != 1) { | ||
| 40 | throw NotImplementedException("Constant buffer descriptor array"); | ||
| 41 | } | ||
| 42 | Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); | ||
| 43 | ++cbuf_index; | ||
| 44 | } | ||
| 45 | u32 ssbo_index{}; | ||
| 46 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 47 | if (desc.count != 1) { | ||
| 48 | throw NotImplementedException("Storage buffer descriptor array"); | ||
| 49 | } | ||
| 50 | if (runtime_info.glasm_use_storage_buffers) { | ||
| 51 | Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); | ||
| 52 | ++bindings.storage_buffer; | ||
| 53 | ++ssbo_index; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | if (!runtime_info.glasm_use_storage_buffers) { | ||
| 57 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { | ||
| 58 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | stage = program.stage; | ||
| 62 | switch (program.stage) { | ||
| 63 | case Stage::VertexA: | ||
| 64 | case Stage::VertexB: | ||
| 65 | stage_name = "vertex"; | ||
| 66 | attrib_name = "vertex"; | ||
| 67 | break; | ||
| 68 | case Stage::TessellationControl: | ||
| 69 | case Stage::TessellationEval: | ||
| 70 | stage_name = "primitive"; | ||
| 71 | attrib_name = "primitive"; | ||
| 72 | break; | ||
| 73 | case Stage::Geometry: | ||
| 74 | stage_name = "primitive"; | ||
| 75 | attrib_name = "vertex"; | ||
| 76 | break; | ||
| 77 | case Stage::Fragment: | ||
| 78 | stage_name = "fragment"; | ||
| 79 | attrib_name = "fragment"; | ||
| 80 | break; | ||
| 81 | case Stage::Compute: | ||
| 82 | stage_name = "invocation"; | ||
| 83 | break; | ||
| 84 | } | ||
| 85 | const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; | ||
| 86 | const VaryingState loads{info.loads.mask | info.passthrough.mask}; | ||
| 87 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 88 | if (loads.Generic(index)) { | ||
| 89 | Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", | ||
| 90 | InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 94 | Add("ATTRIB vertex_position=vertex.position;"); | ||
| 95 | } | ||
| 96 | if (info.uses_invocation_id) { | ||
| 97 | Add("ATTRIB primitive_invocation=primitive.invocation;"); | ||
| 98 | } | ||
| 99 | if (info.stores_tess_level_outer) { | ||
| 100 | Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); | ||
| 101 | } | ||
| 102 | if (info.stores_tess_level_inner) { | ||
| 103 | Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); | ||
| 104 | } | ||
| 105 | if (info.stores.ClipDistances()) { | ||
| 106 | Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); | ||
| 107 | } | ||
| 108 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 109 | if (!info.uses_patches[index]) { | ||
| 110 | continue; | ||
| 111 | } | ||
| 112 | if (stage == Stage::TessellationControl) { | ||
| 113 | Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" | ||
| 114 | "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", | ||
| 115 | index, index, index, index, index, index); | ||
| 116 | } else { | ||
| 117 | Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, | ||
| 118 | index, index); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | if (stage == Stage::Fragment) { | ||
| 122 | Add("OUTPUT frag_color0=result.color;"); | ||
| 123 | for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { | ||
| 124 | Add("OUTPUT frag_color{}=result.color[{}];", index, index); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 128 | if (info.stores.Generic(index)) { | ||
| 129 | Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); | ||
| 133 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 134 | image_buffer_bindings.push_back(bindings.image); | ||
| 135 | bindings.image += desc.count; | ||
| 136 | } | ||
| 137 | image_bindings.reserve(info.image_descriptors.size()); | ||
| 138 | for (const auto& desc : info.image_descriptors) { | ||
| 139 | image_bindings.push_back(bindings.image); | ||
| 140 | bindings.image += desc.count; | ||
| 141 | } | ||
| 142 | texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); | ||
| 143 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 144 | texture_buffer_bindings.push_back(bindings.texture); | ||
| 145 | bindings.texture += desc.count; | ||
| 146 | } | ||
| 147 | texture_bindings.reserve(info.texture_descriptors.size()); | ||
| 148 | for (const auto& desc : info.texture_descriptors) { | ||
| 149 | texture_bindings.push_back(bindings.texture); | ||
| 150 | bindings.texture += desc.count; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..8433e5c00 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader { | ||
| 17 | struct Info; | ||
| 18 | struct Profile; | ||
| 19 | struct RuntimeInfo; | ||
| 20 | } // namespace Shader | ||
| 21 | |||
| 22 | namespace Shader::Backend { | ||
| 23 | struct Bindings; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Shader::IR { | ||
| 27 | class Inst; | ||
| 28 | struct Program; | ||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | namespace Shader::Backend::GLASM { | ||
| 32 | |||
| 33 | class EmitContext { | ||
| 34 | public: | ||
| 35 | explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 36 | const RuntimeInfo& runtime_info_); | ||
| 37 | |||
| 38 | template <typename... Args> | ||
| 39 | void Add(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 40 | code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), | ||
| 41 | std::forward<Args>(args)...); | ||
| 42 | // TODO: Remove this | ||
| 43 | code += '\n'; | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename... Args> | ||
| 47 | void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 48 | code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), | ||
| 49 | std::forward<Args>(args)...); | ||
| 50 | // TODO: Remove this | ||
| 51 | code += '\n'; | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | void Add(const char* format_str, Args&&... args) { | ||
| 56 | code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); | ||
| 57 | // TODO: Remove this | ||
| 58 | code += '\n'; | ||
| 59 | } | ||
| 60 | |||
| 61 | std::string code; | ||
| 62 | RegAlloc reg_alloc{}; | ||
| 63 | const Info& info; | ||
| 64 | const Profile& profile; | ||
| 65 | const RuntimeInfo& runtime_info; | ||
| 66 | |||
| 67 | std::vector<u32> texture_buffer_bindings; | ||
| 68 | std::vector<u32> image_buffer_bindings; | ||
| 69 | std::vector<u32> texture_bindings; | ||
| 70 | std::vector<u32> image_bindings; | ||
| 71 | |||
| 72 | Stage stage{}; | ||
| 73 | std::string_view stage_name = "invalid"; | ||
| 74 | std::string_view attrib_name = "invalid"; | ||
| 75 | |||
| 76 | u32 num_safety_loop_vars{}; | ||
| 77 | bool uses_y_direction{}; | ||
| 78 | }; | ||
| 79 | |||
| 80 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..a5e8c9b6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/div_ceil.h" | ||
| 10 | #include "common/settings.h" | ||
| 11 | #include "shader_recompiler/backend/bindings.h" | ||
| 12 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 14 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 17 | #include "shader_recompiler/profile.h" | ||
| 18 | #include "shader_recompiler/runtime_info.h" | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLASM { | ||
| 21 | namespace { | ||
| 22 | template <class Func> | ||
| 23 | struct FuncTraits {}; | ||
| 24 | |||
| 25 | template <class ReturnType_, class... Args> | ||
| 26 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 27 | using ReturnType = ReturnType_; | ||
| 28 | |||
| 29 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 30 | |||
| 31 | template <size_t I> | ||
| 32 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 33 | }; | ||
| 34 | |||
| 35 | template <typename T> | ||
| 36 | struct Identity { | ||
| 37 | Identity(T data_) : data{data_} {} | ||
| 38 | |||
| 39 | T Extract() { | ||
| 40 | return data; | ||
| 41 | } | ||
| 42 | |||
| 43 | T data; | ||
| 44 | }; | ||
| 45 | |||
| 46 | template <bool scalar> | ||
| 47 | class RegWrapper { | ||
| 48 | public: | ||
| 49 | RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { | ||
| 50 | const Value value{reg_alloc.Peek(ir_value)}; | ||
| 51 | if (value.type == Type::Register) { | ||
| 52 | inst = ir_value.InstRecursive(); | ||
| 53 | reg = Register{value}; | ||
| 54 | } else { | ||
| 55 | reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); | ||
| 56 | } | ||
| 57 | switch (value.type) { | ||
| 58 | case Type::Register: | ||
| 59 | case Type::Void: | ||
| 60 | break; | ||
| 61 | case Type::U32: | ||
| 62 | ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); | ||
| 63 | break; | ||
| 64 | case Type::U64: | ||
| 65 | ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); | ||
| 66 | break; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | auto Extract() { | ||
| 71 | if (inst) { | ||
| 72 | reg_alloc.Unref(*inst); | ||
| 73 | } else { | ||
| 74 | reg_alloc.FreeReg(reg); | ||
| 75 | } | ||
| 76 | return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; | ||
| 77 | } | ||
| 78 | |||
| 79 | private: | ||
| 80 | RegAlloc& reg_alloc; | ||
| 81 | IR::Inst* inst{}; | ||
| 82 | Register reg{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename ArgType> | ||
| 86 | class ValueWrapper { | ||
| 87 | public: | ||
| 88 | ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) | ||
| 89 | : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} | ||
| 90 | |||
| 91 | ArgType Extract() { | ||
| 92 | if (!ir_value.IsImmediate()) { | ||
| 93 | reg_alloc.Unref(*ir_value.InstRecursive()); | ||
| 94 | } | ||
| 95 | return value; | ||
| 96 | } | ||
| 97 | |||
| 98 | private: | ||
| 99 | RegAlloc& reg_alloc; | ||
| 100 | const IR::Value& ir_value; | ||
| 101 | ArgType value; | ||
| 102 | }; | ||
| 103 | |||
| 104 | template <typename ArgType> | ||
| 105 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 106 | if constexpr (std::is_same_v<ArgType, Register>) { | ||
| 107 | return RegWrapper<false>{ctx, arg}; | ||
| 108 | } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { | ||
| 109 | return RegWrapper<true>{ctx, arg}; | ||
| 110 | } else if constexpr (std::is_base_of_v<Value, ArgType>) { | ||
| 111 | return ValueWrapper<ArgType>{ctx, arg}; | ||
| 112 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 113 | return Identity<const IR::Value&>{arg}; | ||
| 114 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 115 | return Identity{arg.U32()}; | ||
| 116 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 117 | return Identity{arg.Attribute()}; | ||
| 118 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 119 | return Identity{arg.Patch()}; | ||
| 120 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 121 | return Identity{arg.Reg()}; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | template <auto func, bool is_first_arg_inst> | ||
| 126 | struct InvokeCall { | ||
| 127 | template <typename... Args> | ||
| 128 | InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { | ||
| 129 | if constexpr (is_first_arg_inst) { | ||
| 130 | func(ctx, *inst, args.Extract()...); | ||
| 131 | } else { | ||
| 132 | func(ctx, args.Extract()...); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 138 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 139 | using Traits = FuncTraits<decltype(func)>; | ||
| 140 | if constexpr (is_first_arg_inst) { | ||
| 141 | InvokeCall<func, is_first_arg_inst>{ | ||
| 142 | ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; | ||
| 143 | } else { | ||
| 144 | InvokeCall<func, is_first_arg_inst>{ | ||
| 145 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | template <auto func> | ||
| 150 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 151 | using Traits = FuncTraits<decltype(func)>; | ||
| 152 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 153 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 154 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 155 | } else { | ||
| 156 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 157 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 158 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 159 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 164 | switch (inst->GetOpcode()) { | ||
| 165 | #define OPCODE(name, result_type, ...) \ | ||
| 166 | case IR::Opcode::name: \ | ||
| 167 | return Invoke<&Emit##name>(ctx, inst); | ||
| 168 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 169 | #undef OPCODE | ||
| 170 | } | ||
| 171 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 172 | } | ||
| 173 | |||
| 174 | bool IsReference(IR::Inst& inst) { | ||
| 175 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 176 | } | ||
| 177 | |||
| 178 | void PrecolorInst(IR::Inst& phi) { | ||
| 179 | // Insert phi moves before references to avoid overwritting other phis | ||
| 180 | const size_t num_args{phi.NumArgs()}; | ||
| 181 | for (size_t i = 0; i < num_args; ++i) { | ||
| 182 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 183 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 184 | IR::IREmitter ir{phi_block, it}; | ||
| 185 | const IR::Value arg{phi.Arg(i)}; | ||
| 186 | if (arg.IsImmediate()) { | ||
| 187 | ir.PhiMove(phi, arg); | ||
| 188 | } else { | ||
| 189 | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | for (size_t i = 0; i < num_args; ++i) { | ||
| 193 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void Precolor(const IR::Program& program) { | ||
| 198 | for (IR::Block* const block : program.blocks) { | ||
| 199 | for (IR::Inst& phi : block->Instructions()) { | ||
| 200 | if (!IR::IsPhi(phi)) { | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | PrecolorInst(phi); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 209 | const auto eval{ | ||
| 210 | [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; | ||
| 211 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 212 | switch (node.type) { | ||
| 213 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 214 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 215 | EmitInst(ctx, &inst); | ||
| 216 | } | ||
| 217 | break; | ||
| 218 | case IR::AbstractSyntaxNode::Type::If: | ||
| 219 | ctx.Add("MOV.S.CC RC,{};" | ||
| 220 | "IF NE.x;", | ||
| 221 | eval(node.data.if_node.cond)); | ||
| 222 | break; | ||
| 223 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 224 | ctx.Add("ENDIF;"); | ||
| 225 | break; | ||
| 226 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 227 | ctx.Add("REP;"); | ||
| 228 | break; | ||
| 229 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 230 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 231 | const u32 loop_index{ctx.num_safety_loop_vars++}; | ||
| 232 | const u32 vector_index{loop_index / 4}; | ||
| 233 | const char component{"xyzw"[loop_index % 4]}; | ||
| 234 | ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" | ||
| 235 | "BRK(LT.{});", | ||
| 236 | vector_index, component, vector_index, component, component); | ||
| 237 | } | ||
| 238 | if (node.data.repeat.cond.IsImmediate()) { | ||
| 239 | if (node.data.repeat.cond.U1()) { | ||
| 240 | ctx.Add("ENDREP;"); | ||
| 241 | } else { | ||
| 242 | ctx.Add("BRK;" | ||
| 243 | "ENDREP;"); | ||
| 244 | } | ||
| 245 | } else { | ||
| 246 | ctx.Add("MOV.S.CC RC,{};" | ||
| 247 | "BRK(EQ.x);" | ||
| 248 | "ENDREP;", | ||
| 249 | eval(node.data.repeat.cond)); | ||
| 250 | } | ||
| 251 | break; | ||
| 252 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 253 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 254 | if (node.data.break_node.cond.U1()) { | ||
| 255 | ctx.Add("BRK;"); | ||
| 256 | } | ||
| 257 | } else { | ||
| 258 | ctx.Add("MOV.S.CC RC,{};" | ||
| 259 | "BRK (NE.x);", | ||
| 260 | eval(node.data.break_node.cond)); | ||
| 261 | } | ||
| 262 | break; | ||
| 263 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 264 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 265 | ctx.Add("RET;"); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | if (!ctx.reg_alloc.IsEmpty()) { | ||
| 270 | LOG_WARNING(Shader_GLASM, "Register leak after generating code"); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void SetupOptions(const IR::Program& program, const Profile& profile, | ||
| 275 | const RuntimeInfo& runtime_info, std::string& header) { | ||
| 276 | const Info& info{program.info}; | ||
| 277 | const Stage stage{program.stage}; | ||
| 278 | |||
| 279 | // TODO: Track the shared atomic ops | ||
| 280 | header += "OPTION NV_internal;" | ||
| 281 | "OPTION NV_shader_storage_buffer;" | ||
| 282 | "OPTION NV_gpu_program_fp64;"; | ||
| 283 | if (info.uses_int64_bit_atomics) { | ||
| 284 | header += "OPTION NV_shader_atomic_int64;"; | ||
| 285 | } | ||
| 286 | if (info.uses_atomic_f32_add) { | ||
| 287 | header += "OPTION NV_shader_atomic_float;"; | ||
| 288 | } | ||
| 289 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 290 | header += "OPTION NV_shader_atomic_fp16_vector;"; | ||
| 291 | } | ||
| 292 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 293 | info.uses_fswzadd) { | ||
| 294 | header += "OPTION NV_shader_thread_group;"; | ||
| 295 | } | ||
| 296 | if (info.uses_subgroup_shuffles) { | ||
| 297 | header += "OPTION NV_shader_thread_shuffle;"; | ||
| 298 | } | ||
| 299 | if (info.uses_sparse_residency) { | ||
| 300 | header += "OPTION EXT_sparse_texture2;"; | ||
| 301 | } | ||
| 302 | const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || | ||
| 303 | info.stores[IR::Attribute::Layer]}; | ||
| 304 | if ((stage != Stage::Geometry && stores_viewport_layer) || | ||
| 305 | info.stores[IR::Attribute::ViewportMask]) { | ||
| 306 | if (profile.support_viewport_index_layer_non_geometry) { | ||
| 307 | header += "OPTION NV_viewport_array2;"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { | ||
| 311 | header += "OPTION NV_geometry_shader_passthrough;"; | ||
| 312 | } | ||
| 313 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 314 | header += "OPTION EXT_shader_image_load_formatted;"; | ||
| 315 | } | ||
| 316 | if (profile.support_derivative_control) { | ||
| 317 | header += "OPTION ARB_derivative_control;"; | ||
| 318 | } | ||
| 319 | if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { | ||
| 320 | header += "OPTION NV_early_fragment_tests;"; | ||
| 321 | } | ||
| 322 | if (stage == Stage::Fragment) { | ||
| 323 | header += "OPTION ARB_draw_buffers;"; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | std::string_view StageHeader(Stage stage) { | ||
| 328 | switch (stage) { | ||
| 329 | case Stage::VertexA: | ||
| 330 | case Stage::VertexB: | ||
| 331 | return "!!NVvp5.0\n"; | ||
| 332 | case Stage::TessellationControl: | ||
| 333 | return "!!NVtcp5.0\n"; | ||
| 334 | case Stage::TessellationEval: | ||
| 335 | return "!!NVtep5.0\n"; | ||
| 336 | case Stage::Geometry: | ||
| 337 | return "!!NVgp5.0\n"; | ||
| 338 | case Stage::Fragment: | ||
| 339 | return "!!NVfp5.0\n"; | ||
| 340 | case Stage::Compute: | ||
| 341 | return "!!NVcp5.0\n"; | ||
| 342 | } | ||
| 343 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 344 | } | ||
| 345 | |||
| 346 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 347 | switch (topology) { | ||
| 348 | case InputTopology::Points: | ||
| 349 | return "POINTS"; | ||
| 350 | case InputTopology::Lines: | ||
| 351 | return "LINES"; | ||
| 352 | case InputTopology::LinesAdjacency: | ||
| 353 | return "LINESS_ADJACENCY"; | ||
| 354 | case InputTopology::Triangles: | ||
| 355 | return "TRIANGLES"; | ||
| 356 | case InputTopology::TrianglesAdjacency: | ||
| 357 | return "TRIANGLES_ADJACENCY"; | ||
| 358 | } | ||
| 359 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 363 | switch (topology) { | ||
| 364 | case OutputTopology::PointList: | ||
| 365 | return "POINTS"; | ||
| 366 | case OutputTopology::LineStrip: | ||
| 367 | return "LINE_STRIP"; | ||
| 368 | case OutputTopology::TriangleStrip: | ||
| 369 | return "TRIANGLE_STRIP"; | ||
| 370 | } | ||
| 371 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 372 | } | ||
| 373 | |||
| 374 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 375 | switch (primitive) { | ||
| 376 | case TessPrimitive::Triangles: | ||
| 377 | return "TRIANGLES"; | ||
| 378 | case TessPrimitive::Quads: | ||
| 379 | return "QUADS"; | ||
| 380 | case TessPrimitive::Isolines: | ||
| 381 | return "ISOLINES"; | ||
| 382 | } | ||
| 383 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 384 | } | ||
| 385 | |||
| 386 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 387 | switch (spacing) { | ||
| 388 | case TessSpacing::Equal: | ||
| 389 | return "EQUAL"; | ||
| 390 | case TessSpacing::FractionalOdd: | ||
| 391 | return "FRACTIONAL_ODD"; | ||
| 392 | case TessSpacing::FractionalEven: | ||
| 393 | return "FRACTIONAL_EVEN"; | ||
| 394 | } | ||
| 395 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 396 | } | ||
| 397 | } // Anonymous namespace | ||
| 398 | |||
| 399 | std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 400 | Bindings& bindings) { | ||
| 401 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 402 | Precolor(program); | ||
| 403 | EmitCode(ctx, program); | ||
| 404 | std::string header{StageHeader(program.stage)}; | ||
| 405 | SetupOptions(program, profile, runtime_info, header); | ||
| 406 | switch (program.stage) { | ||
| 407 | case Stage::TessellationControl: | ||
| 408 | header += fmt::format("VERTICES_OUT {};", program.invocations); | ||
| 409 | break; | ||
| 410 | case Stage::TessellationEval: | ||
| 411 | header += fmt::format("TESS_MODE {};" | ||
| 412 | "TESS_SPACING {};" | ||
| 413 | "TESS_VERTEX_ORDER {};", | ||
| 414 | GetTessMode(runtime_info.tess_primitive), | ||
| 415 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 416 | runtime_info.tess_clockwise ? "CW" : "CCW"); | ||
| 417 | break; | ||
| 418 | case Stage::Geometry: | ||
| 419 | header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); | ||
| 420 | if (program.is_geometry_passthrough) { | ||
| 421 | if (profile.support_geometry_shader_passthrough) { | ||
| 422 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 423 | if (program.info.passthrough.Generic(index)) { | ||
| 424 | header += fmt::format("PASSTHROUGH result.attrib[{}];", index); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 428 | header += "PASSTHROUGH result.position;"; | ||
| 429 | } | ||
| 430 | } else { | ||
| 431 | LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); | ||
| 432 | } | ||
| 433 | } else { | ||
| 434 | header += | ||
| 435 | fmt::format("VERTICES_OUT {};" | ||
| 436 | "PRIMITIVE_OUT {};", | ||
| 437 | program.output_vertices, OutputPrimitive(program.output_topology)); | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | case Stage::Compute: | ||
| 441 | header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], | ||
| 442 | program.workgroup_size[1], program.workgroup_size[2]); | ||
| 443 | break; | ||
| 444 | default: | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | if (program.shared_memory_size > 0) { | ||
| 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); | ||
| 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); | ||
| 450 | } | ||
| 451 | header += "TEMP "; | ||
| 452 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | ||
| 453 | header += fmt::format("R{},", index); | ||
| 454 | } | ||
| 455 | if (program.local_memory_size > 0) { | ||
| 456 | header += fmt::format("lmem[{}],", program.local_memory_size); | ||
| 457 | } | ||
| 458 | if (program.info.uses_fswzadd) { | ||
| 459 | header += "FSWZA[4],FSWZB[4],"; | ||
| 460 | } | ||
| 461 | const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; | ||
| 462 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 463 | header += fmt::format("loop{},", index); | ||
| 464 | } | ||
| 465 | header += "RC;" | ||
| 466 | "LONG TEMP "; | ||
| 467 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { | ||
| 468 | header += fmt::format("D{},", index); | ||
| 469 | } | ||
| 470 | header += "DC;"; | ||
| 471 | if (program.info.uses_fswzadd) { | ||
| 472 | header += "MOV.F FSWZA[0],-1;" | ||
| 473 | "MOV.F FSWZA[1],1;" | ||
| 474 | "MOV.F FSWZA[2],-1;" | ||
| 475 | "MOV.F FSWZA[3],0;" | ||
| 476 | "MOV.F FSWZB[0],-1;" | ||
| 477 | "MOV.F FSWZB[1],-1;" | ||
| 478 | "MOV.F FSWZB[2],1;" | ||
| 479 | "MOV.F FSWZB[3],-1;"; | ||
| 480 | } | ||
| 481 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 482 | header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); | ||
| 483 | } | ||
| 484 | if (ctx.uses_y_direction) { | ||
| 485 | header += "PARAM y_direction[1]={state.material.front.ambient};"; | ||
| 486 | } | ||
| 487 | ctx.code.insert(0, header); | ||
| 488 | ctx.code += "END"; | ||
| 489 | return ctx.code; | ||
| 490 | } | ||
| 491 | |||
| 492 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..bcb55f062 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/bindings.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/runtime_info.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 17 | IR::Program& program, Bindings& bindings); | ||
| 18 | |||
| 19 | [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 20 | IR::Program& program) { | ||
| 21 | Bindings binding; | ||
| 22 | return EmitGLASM(profile, runtime_info, program, binding); | ||
| 23 | } | ||
| 24 | |||
| 25 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..9201ccd39 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | |||
| 11 | static void Alias(IR::Inst& inst, const IR::Value& value) { | ||
| 12 | if (value.IsImmediate()) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; | ||
| 16 | value_inst.DestructiveAddUsage(inst.UseCount()); | ||
| 17 | value_inst.DestructiveRemoveUsage(); | ||
| 18 | inst.SetDefinition(value_inst.Definition<Id>()); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 22 | Alias(inst, value); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | ||
| 26 | // Fake one usage to get a real register out of the condition | ||
| 27 | inst.DestructiveAddUsage(1); | ||
| 28 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 29 | const ScalarS32 input{ctx.reg_alloc.Consume(value)}; | ||
| 30 | if (ret != input) { | ||
| 31 | ctx.Add("MOV.S {},{};", ret, input); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 36 | Alias(inst, value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 40 | Alias(inst, value); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 44 | Alias(inst, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 48 | Alias(inst, value); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 52 | Alias(inst, value); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 56 | Alias(inst, value); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 60 | ctx.LongAdd("PK64.U {}.x,{};", inst, value); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | ctx.Add("UP64.U {}.xy,{}.x;", inst, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 72 | throw NotImplementedException("GLASM instruction"); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | ctx.Add("PK2H {}.x,{};", inst, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 80 | ctx.Add("UP2H {}.xy,{}.x;", inst, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 84 | ctx.LongAdd("PK64 {}.x,{};", inst, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | ctx.Add("UP64 {}.xy,{}.x;", inst, value); | ||
| 89 | } | ||
| 90 | |||
| 91 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..bff0b7c1c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | template <auto read_imm, char type, typename... Values> | ||
| 12 | void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { | ||
| 13 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 14 | if (std::ranges::any_of(std::array{elements...}, | ||
| 15 | [](const IR::Value& value) { return value.IsImmediate(); })) { | ||
| 16 | using Type = std::invoke_result_t<decltype(read_imm), IR::Value>; | ||
| 17 | const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; | ||
| 18 | ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), | ||
| 19 | fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); | ||
| 20 | } | ||
| 21 | size_t index{}; | ||
| 22 | for (const IR::Value& element : {elements...}) { | ||
| 23 | if (!element.IsImmediate()) { | ||
| 24 | const ScalarU32 value{ctx.reg_alloc.Consume(element)}; | ||
| 25 | ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); | ||
| 26 | } | ||
| 27 | ++index; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { | ||
| 32 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 33 | if (ret == composite && index == 0) { | ||
| 34 | // No need to do anything here, the source and destination are the same register | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); | ||
| 38 | } | ||
| 39 | |||
| 40 | template <typename ObjectType> | ||
| 41 | void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, | ||
| 42 | u32 index, char type) { | ||
| 43 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 44 | const char swizzle{"xyzw"[index]}; | ||
| 45 | if (ret != composite && ret == object) { | ||
| 46 | // The object is aliased with the return value, so we have to use a temporary to insert | ||
| 47 | ctx.Add("MOV.{} RC,{};" | ||
| 48 | "MOV.{} RC.{},{};" | ||
| 49 | "MOV.{} {},RC;", | ||
| 50 | type, composite, type, swizzle, object, type, ret); | ||
| 51 | } else if (ret != composite) { | ||
| 52 | // The input composite is not aliased with the return value so we have to copy it before | ||
| 53 | // hand. But the insert object is not aliased with the return value, so we don't have to | ||
| 54 | // worry about that | ||
| 55 | ctx.Add("MOV.{} {},{};" | ||
| 56 | "MOV.{} {}.{},{};", | ||
| 57 | type, ret, composite, type, ret, swizzle, object); | ||
| 58 | } else { | ||
| 59 | // The return value is alised so we can just insert the object, it doesn't matter if it's | ||
| 60 | // aliased | ||
| 61 | ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 67 | const IR::Value& e2) { | ||
| 68 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 72 | const IR::Value& e2, const IR::Value& e3) { | ||
| 73 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 77 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 78 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 82 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 86 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 90 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, | ||
| 94 | [[maybe_unused]] Register composite, | ||
| 95 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 96 | throw NotImplementedException("GLASM instruction"); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, | ||
| 100 | [[maybe_unused]] Register composite, | ||
| 101 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 102 | throw NotImplementedException("GLASM instruction"); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, | ||
| 106 | [[maybe_unused]] Register composite, | ||
| 107 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 108 | throw NotImplementedException("GLASM instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 112 | [[maybe_unused]] Register e2) { | ||
| 113 | throw NotImplementedException("GLASM instruction"); | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 117 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { | ||
| 118 | throw NotImplementedException("GLASM instruction"); | ||
| 119 | } | ||
| 120 | |||
| 121 | void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 122 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, | ||
| 123 | [[maybe_unused]] Register e4) { | ||
| 124 | throw NotImplementedException("GLASM instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 128 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 129 | throw NotImplementedException("GLASM instruction"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 133 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 134 | throw NotImplementedException("GLASM instruction"); | ||
| 135 | } | ||
| 136 | |||
| 137 | void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 138 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 139 | throw NotImplementedException("GLASM instruction"); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 143 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 144 | [[maybe_unused]] u32 index) { | ||
| 145 | throw NotImplementedException("GLASM instruction"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 149 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 150 | [[maybe_unused]] u32 index) { | ||
| 151 | throw NotImplementedException("GLASM instruction"); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 155 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 156 | [[maybe_unused]] u32 index) { | ||
| 157 | throw NotImplementedException("GLASM instruction"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2) { | ||
| 162 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); | ||
| 163 | } | ||
| 164 | |||
| 165 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 166 | const IR::Value& e2, const IR::Value& e3) { | ||
| 167 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); | ||
| 168 | } | ||
| 169 | |||
| 170 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 171 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 172 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 176 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 180 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 184 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 188 | ScalarF32 object, u32 index) { | ||
| 189 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 193 | ScalarF32 object, u32 index) { | ||
| 194 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 198 | ScalarF32 object, u32 index) { | ||
| 199 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 203 | throw NotImplementedException("GLASM instruction"); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 207 | throw NotImplementedException("GLASM instruction"); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 215 | throw NotImplementedException("GLASM instruction"); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 219 | throw NotImplementedException("GLASM instruction"); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, | ||
| 227 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 228 | [[maybe_unused]] u32 index) { | ||
| 229 | throw NotImplementedException("GLASM instruction"); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, | ||
| 233 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 234 | [[maybe_unused]] u32 index) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, | ||
| 239 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 240 | [[maybe_unused]] u32 index) { | ||
| 241 | throw NotImplementedException("GLASM instruction"); | ||
| 242 | } | ||
| 243 | |||
| 244 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..02c9dc6d7 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view size) { | ||
| 17 | if (!binding.IsImmediate()) { | ||
| 18 | throw NotImplementedException("Indirect constant buffer loading"); | ||
| 19 | } | ||
| 20 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 21 | if (offset.type == Type::U32) { | ||
| 22 | // Avoid reading arrays out of bounds, matching hardware's behavior | ||
| 23 | if (offset.imm_u32 >= 0x10'000) { | ||
| 24 | ctx.Add("MOV.S {},0;", ret); | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | bool IsInputArray(Stage stage) { | ||
| 32 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 33 | stage == Stage::TessellationEval; | ||
| 34 | } | ||
| 35 | |||
| 36 | std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { | ||
| 37 | return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; | ||
| 38 | } | ||
| 39 | |||
| 40 | u32 TexCoordIndex(IR::Attribute attr) { | ||
| 41 | return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; | ||
| 42 | } | ||
| 43 | } // Anonymous namespace | ||
| 44 | |||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 46 | GetCbuf(ctx, inst, binding, offset, "U8"); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 50 | GetCbuf(ctx, inst, binding, offset, "S8"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 54 | GetCbuf(ctx, inst, binding, offset, "U16"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 58 | GetCbuf(ctx, inst, binding, offset, "S16"); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 62 | GetCbuf(ctx, inst, binding, offset, "U32"); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 66 | GetCbuf(ctx, inst, binding, offset, "F32"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 70 | ScalarU32 offset) { | ||
| 71 | GetCbuf(ctx, inst, binding, offset, "U32X2"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { | ||
| 75 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 76 | const char swizzle{"xyzw"[element]}; | ||
| 77 | if (IR::IsGeneric(attr)) { | ||
| 78 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 79 | ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); | ||
| 80 | return; | ||
| 81 | } | ||
| 82 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 83 | const u32 index{TexCoordIndex(attr)}; | ||
| 84 | ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | switch (attr) { | ||
| 88 | case IR::Attribute::PrimitiveId: | ||
| 89 | ctx.Add("MOV.S {}.x,primitive.id;", inst); | ||
| 90 | break; | ||
| 91 | case IR::Attribute::PositionX: | ||
| 92 | case IR::Attribute::PositionY: | ||
| 93 | case IR::Attribute::PositionZ: | ||
| 94 | case IR::Attribute::PositionW: | ||
| 95 | if (IsInputArray(ctx.stage)) { | ||
| 96 | ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); | ||
| 97 | } else { | ||
| 98 | ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 102 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 103 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 104 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 105 | ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); | ||
| 106 | break; | ||
| 107 | case IR::Attribute::PointSpriteS: | ||
| 108 | case IR::Attribute::PointSpriteT: | ||
| 109 | ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); | ||
| 110 | break; | ||
| 111 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 112 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 113 | ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); | ||
| 114 | break; | ||
| 115 | case IR::Attribute::InstanceId: | ||
| 116 | ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); | ||
| 117 | break; | ||
| 118 | case IR::Attribute::VertexId: | ||
| 119 | ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); | ||
| 120 | break; | ||
| 121 | case IR::Attribute::FrontFace: | ||
| 122 | ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Get attribute {}", attr); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, | ||
| 130 | [[maybe_unused]] ScalarU32 vertex) { | ||
| 131 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 132 | const char swizzle{"xyzw"[element]}; | ||
| 133 | if (IR::IsGeneric(attr)) { | ||
| 134 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 135 | ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { | ||
| 139 | const u32 index{TexCoordIndex(attr)}; | ||
| 140 | ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | switch (attr) { | ||
| 144 | case IR::Attribute::Layer: | ||
| 145 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 146 | ctx.Add("MOV.F result.layer.x,{};", value); | ||
| 147 | } else { | ||
| 148 | LOG_WARNING(Shader_GLASM, | ||
| 149 | "Layer stored outside of geometry shader not supported by device"); | ||
| 150 | } | ||
| 151 | break; | ||
| 152 | case IR::Attribute::ViewportIndex: | ||
| 153 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 154 | ctx.Add("MOV.F result.viewport.x,{};", value); | ||
| 155 | } else { | ||
| 156 | LOG_WARNING(Shader_GLASM, | ||
| 157 | "Viewport stored outside of geometry shader not supported by device"); | ||
| 158 | } | ||
| 159 | break; | ||
| 160 | case IR::Attribute::ViewportMask: | ||
| 161 | // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. | ||
| 162 | if (ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 163 | ctx.Add("MOV.F result.viewportmask[0].x,{};", value); | ||
| 164 | } else { | ||
| 165 | LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); | ||
| 166 | } | ||
| 167 | break; | ||
| 168 | case IR::Attribute::PointSize: | ||
| 169 | ctx.Add("MOV.F result.pointsize.x,{};", value); | ||
| 170 | break; | ||
| 171 | case IR::Attribute::PositionX: | ||
| 172 | case IR::Attribute::PositionY: | ||
| 173 | case IR::Attribute::PositionZ: | ||
| 174 | case IR::Attribute::PositionW: | ||
| 175 | ctx.Add("MOV.F result.position.{},{};", swizzle, value); | ||
| 176 | break; | ||
| 177 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 178 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 179 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 180 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 181 | ctx.Add("MOV.F result.color.{},{};", swizzle, value); | ||
| 182 | break; | ||
| 183 | case IR::Attribute::ColorFrontSpecularR: | ||
| 184 | case IR::Attribute::ColorFrontSpecularG: | ||
| 185 | case IR::Attribute::ColorFrontSpecularB: | ||
| 186 | case IR::Attribute::ColorFrontSpecularA: | ||
| 187 | ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); | ||
| 188 | break; | ||
| 189 | case IR::Attribute::ColorBackDiffuseR: | ||
| 190 | case IR::Attribute::ColorBackDiffuseG: | ||
| 191 | case IR::Attribute::ColorBackDiffuseB: | ||
| 192 | case IR::Attribute::ColorBackDiffuseA: | ||
| 193 | ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); | ||
| 194 | break; | ||
| 195 | case IR::Attribute::ColorBackSpecularR: | ||
| 196 | case IR::Attribute::ColorBackSpecularG: | ||
| 197 | case IR::Attribute::ColorBackSpecularB: | ||
| 198 | case IR::Attribute::ColorBackSpecularA: | ||
| 199 | ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); | ||
| 200 | break; | ||
| 201 | case IR::Attribute::FogCoordinate: | ||
| 202 | ctx.Add("MOV.F result.fogcoord.x,{};", value); | ||
| 203 | break; | ||
| 204 | case IR::Attribute::ClipDistance0: | ||
| 205 | case IR::Attribute::ClipDistance1: | ||
| 206 | case IR::Attribute::ClipDistance2: | ||
| 207 | case IR::Attribute::ClipDistance3: | ||
| 208 | case IR::Attribute::ClipDistance4: | ||
| 209 | case IR::Attribute::ClipDistance5: | ||
| 210 | case IR::Attribute::ClipDistance6: | ||
| 211 | case IR::Attribute::ClipDistance7: { | ||
| 212 | const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 213 | ctx.Add("MOV.F result.clip[{}].x,{};", index, value); | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | default: | ||
| 217 | throw NotImplementedException("Set attribute {}", attr); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { | ||
| 222 | // RC.x = base_index | ||
| 223 | // RC.y = masked_index | ||
| 224 | // RC.z = compare_index | ||
| 225 | ctx.Add("SHR.S RC.x,{},2;" | ||
| 226 | "AND.S RC.y,RC.x,3;" | ||
| 227 | "SHR.S RC.z,{},4;", | ||
| 228 | offset, offset); | ||
| 229 | |||
| 230 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 231 | u32 num_endifs{}; | ||
| 232 | const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) { | ||
| 233 | ++num_endifs; | ||
| 234 | ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index | ||
| 235 | "IF NE.w;" | ||
| 236 | // X | ||
| 237 | "SEQ.S.CC RC.w,RC.y,0;" | ||
| 238 | "IF NE.w;" | ||
| 239 | "MOV {}.x,{};" | ||
| 240 | "ELSE;" | ||
| 241 | // Y | ||
| 242 | "SEQ.S.CC RC.w,RC.y,1;" | ||
| 243 | "IF NE.w;" | ||
| 244 | "MOV {}.x,{};" | ||
| 245 | "ELSE;" | ||
| 246 | // Z | ||
| 247 | "SEQ.S.CC RC.w,RC.y,2;" | ||
| 248 | "IF NE.w;" | ||
| 249 | "MOV {}.x,{};" | ||
| 250 | "ELSE;" | ||
| 251 | // W | ||
| 252 | "MOV {}.x,{};" | ||
| 253 | "ENDIF;" | ||
| 254 | "ENDIF;" | ||
| 255 | "ENDIF;" | ||
| 256 | "ELSE;", | ||
| 257 | compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); | ||
| 258 | }}; | ||
| 259 | const auto read_swizzled{[&](u32 compare_index, std::string_view value) { | ||
| 260 | const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), | ||
| 261 | fmt::format("{}.z", value), fmt::format("{}.w", value)}; | ||
| 262 | read(compare_index, values); | ||
| 263 | }}; | ||
| 264 | if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 265 | const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; | ||
| 266 | if (IsInputArray(ctx.stage)) { | ||
| 267 | read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); | ||
| 268 | } else { | ||
| 269 | read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { | ||
| 273 | if (!ctx.info.loads.Generic(index)) { | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); | ||
| 277 | } | ||
| 278 | for (u32 i = 0; i < num_endifs; ++i) { | ||
| 279 | ctx.Add("ENDIF;"); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, | ||
| 284 | [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { | ||
| 285 | throw NotImplementedException("GLASM instruction"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { | ||
| 289 | if (!IR::IsGeneric(patch)) { | ||
| 290 | throw NotImplementedException("Non-generic patch load"); | ||
| 291 | } | ||
| 292 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 293 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 294 | const char swizzle{"xyzw"[element]}; | ||
| 295 | const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; | ||
| 296 | ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { | ||
| 300 | if (IR::IsGeneric(patch)) { | ||
| 301 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 302 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 303 | ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | switch (patch) { | ||
| 307 | case IR::Patch::TessellationLodLeft: | ||
| 308 | case IR::Patch::TessellationLodRight: | ||
| 309 | case IR::Patch::TessellationLodTop: | ||
| 310 | case IR::Patch::TessellationLodBottom: { | ||
| 311 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 312 | ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | case IR::Patch::TessellationLodInteriorU: | ||
| 316 | ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); | ||
| 317 | break; | ||
| 318 | case IR::Patch::TessellationLodInteriorV: | ||
| 319 | ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); | ||
| 320 | break; | ||
| 321 | default: | ||
| 322 | throw NotImplementedException("Patch {}", patch); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { | ||
| 327 | ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); | ||
| 328 | } | ||
| 329 | |||
| 330 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { | ||
| 331 | ctx.Add("MOV.S result.samplemask.x,{};", value); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { | ||
| 335 | ctx.Add("MOV.F result.depth.z,{};", value); | ||
| 336 | } | ||
| 337 | |||
| 338 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { | ||
| 339 | ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { | ||
| 343 | ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); | ||
| 344 | } | ||
| 345 | |||
| 346 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..ccdf1cbc8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp | |||
| @@ -0,0 +1,231 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | std::string_view FpRounding(IR::FpRounding fp_rounding) { | ||
| 15 | switch (fp_rounding) { | ||
| 16 | case IR::FpRounding::DontCare: | ||
| 17 | return ""; | ||
| 18 | case IR::FpRounding::RN: | ||
| 19 | return ".ROUND"; | ||
| 20 | case IR::FpRounding::RZ: | ||
| 21 | return ".TRUNC"; | ||
| 22 | case IR::FpRounding::RM: | ||
| 23 | return ".FLR"; | ||
| 24 | case IR::FpRounding::RP: | ||
| 25 | return ".CEIL"; | ||
| 26 | } | ||
| 27 | throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); | ||
| 28 | } | ||
| 29 | |||
| 30 | template <typename InputType> | ||
| 31 | void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, | ||
| 32 | std::string_view src, bool is_long_result) { | ||
| 33 | const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)}; | ||
| 34 | const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; | ||
| 35 | ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); | ||
| 36 | } | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 40 | Convert(ctx, inst, value, "S16", "F16", false); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 44 | Convert(ctx, inst, value, "S16", "F32", false); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 48 | Convert(ctx, inst, value, "S16", "F64", false); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 52 | Convert(ctx, inst, value, "S32", "F16", false); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 56 | Convert(ctx, inst, value, "S32", "F32", false); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 60 | Convert(ctx, inst, value, "S32", "F64", false); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | Convert(ctx, inst, value, "S64", "F16", true); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 68 | Convert(ctx, inst, value, "S64", "F32", true); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 72 | Convert(ctx, inst, value, "S64", "F64", true); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | Convert(ctx, inst, value, "U16", "F16", false); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 80 | Convert(ctx, inst, value, "U16", "F32", false); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 84 | Convert(ctx, inst, value, "U16", "F64", false); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | Convert(ctx, inst, value, "U32", "F16", false); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 92 | Convert(ctx, inst, value, "U32", "F32", false); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 96 | Convert(ctx, inst, value, "U32", "F64", false); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 100 | Convert(ctx, inst, value, "U64", "F16", true); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 104 | Convert(ctx, inst, value, "U64", "F32", true); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 108 | Convert(ctx, inst, value, "U64", "F64", true); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 112 | Convert(ctx, inst, value, "U64", "U32", true); | ||
| 113 | } | ||
| 114 | |||
| 115 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 116 | Convert(ctx, inst, value, "U32", "U64", false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 120 | Convert(ctx, inst, value, "F16", "F32", false); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 124 | Convert(ctx, inst, value, "F32", "F16", false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 128 | Convert(ctx, inst, value, "F32", "F64", false); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 132 | Convert(ctx, inst, value, "F64", "F32", true); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 136 | Convert(ctx, inst, value, "F16", "S8", false); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 140 | Convert(ctx, inst, value, "F16", "S16", false); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 144 | Convert(ctx, inst, value, "F16", "S32", false); | ||
| 145 | } | ||
| 146 | |||
| 147 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 148 | Convert(ctx, inst, value, "F16", "S64", false); | ||
| 149 | } | ||
| 150 | |||
| 151 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 152 | Convert(ctx, inst, value, "F16", "U8", false); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 156 | Convert(ctx, inst, value, "F16", "U16", false); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 160 | Convert(ctx, inst, value, "F16", "U32", false); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 164 | Convert(ctx, inst, value, "F16", "U64", false); | ||
| 165 | } | ||
| 166 | |||
| 167 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 168 | Convert(ctx, inst, value, "F32", "S8", false); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 172 | Convert(ctx, inst, value, "F32", "S16", false); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 176 | Convert(ctx, inst, value, "F32", "S32", false); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 180 | Convert(ctx, inst, value, "F32", "S64", false); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 184 | Convert(ctx, inst, value, "F32", "U8", false); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 188 | Convert(ctx, inst, value, "F32", "U16", false); | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 192 | Convert(ctx, inst, value, "F32", "U32", false); | ||
| 193 | } | ||
| 194 | |||
| 195 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 196 | Convert(ctx, inst, value, "F32", "U64", false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 200 | Convert(ctx, inst, value, "F64", "S8", true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 204 | Convert(ctx, inst, value, "F64", "S16", true); | ||
| 205 | } | ||
| 206 | |||
| 207 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 208 | Convert(ctx, inst, value, "F64", "S32", true); | ||
| 209 | } | ||
| 210 | |||
| 211 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 212 | Convert(ctx, inst, value, "F64", "S64", true); | ||
| 213 | } | ||
| 214 | |||
| 215 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 216 | Convert(ctx, inst, value, "F64", "U8", true); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 220 | Convert(ctx, inst, value, "F64", "U16", true); | ||
| 221 | } | ||
| 222 | |||
| 223 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 224 | Convert(ctx, inst, value, "F64", "U32", true); | ||
| 225 | } | ||
| 226 | |||
| 227 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 228 | Convert(ctx, inst, value, "F64", "U64", true); | ||
| 229 | } | ||
| 230 | |||
| 231 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..4ed58619d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | template <typename InputType> | ||
| 15 | void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, | ||
| 16 | std::string_view type, bool ordered, bool inequality = false) { | ||
| 17 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 18 | ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); | ||
| 19 | if (ordered && inequality) { | ||
| 20 | ctx.Add("SEQ.{} RC.y,{},{};" | ||
| 21 | "SEQ.{} RC.z,{},{};" | ||
| 22 | "AND.U RC.x,RC.x,RC.y;" | ||
| 23 | "AND.U RC.x,RC.x,RC.z;" | ||
| 24 | "SNE.S {}.x,RC.x,0;", | ||
| 25 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 26 | } else if (ordered) { | ||
| 27 | ctx.Add("SNE.S {}.x,RC.x,0;", ret); | ||
| 28 | } else { | ||
| 29 | ctx.Add("SNE.{} RC.y,{},{};" | ||
| 30 | "SNE.{} RC.z,{},{};" | ||
| 31 | "OR.U RC.x,RC.x,RC.y;" | ||
| 32 | "OR.U RC.x,RC.x,RC.z;" | ||
| 33 | "SNE.S {}.x,RC.x,0;", | ||
| 34 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | template <typename InputType> | ||
| 39 | void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, | ||
| 40 | InputType max_value, std::string_view type) { | ||
| 41 | // Call MAX first to properly clamp nan to min_value instead | ||
| 42 | ctx.Add("MAX.{} RC.x,{},{};" | ||
| 43 | "MIN.{} {}.x,RC.x,{};", | ||
| 44 | type, min_value, value, type, ret, max_value); | ||
| 45 | } | ||
| 46 | |||
| 47 | std::string_view Precise(IR::Inst& inst) { | ||
| 48 | const bool precise{inst.Flags<IR::FpControl>().no_contraction}; | ||
| 49 | return precise ? ".PREC" : ""; | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 54 | [[maybe_unused]] Register value) { | ||
| 55 | throw NotImplementedException("GLASM instruction"); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 59 | ctx.Add("MOV.F {}.x,|{}|;", inst, value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 63 | ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 67 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 72 | ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 76 | ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 80 | [[maybe_unused]] Register a, [[maybe_unused]] Register b, | ||
| 81 | [[maybe_unused]] Register c) { | ||
| 82 | throw NotImplementedException("GLASM instruction"); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { | ||
| 86 | ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { | ||
| 90 | ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 94 | ctx.Add("MAX.F {}.x,{},{};", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 98 | ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 102 | ctx.Add("MIN.F {}.x,{},{};", inst, a, b); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 106 | ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 110 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 111 | throw NotImplementedException("GLASM instruction"); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 115 | ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 119 | ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 123 | throw NotImplementedException("GLASM instruction"); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { | ||
| 127 | ctx.Add("MOV.F {}.x,-{};", inst, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 131 | ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 135 | ctx.Add("SIN {}.x,{};", inst, value); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 139 | ctx.Add("COS {}.x,{};", inst, value); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 143 | ctx.Add("EX2 {}.x,{};", inst, value); | ||
| 144 | } | ||
| 145 | |||
| 146 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 147 | ctx.Add("LG2 {}.x,{};", inst, value); | ||
| 148 | } | ||
| 149 | |||
| 150 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 151 | ctx.Add("RCP {}.x,{};", inst, value); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 155 | throw NotImplementedException("GLASM instruction"); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 159 | ctx.Add("RSQ {}.x,{};", inst, value); | ||
| 160 | } | ||
| 161 | |||
| 162 | void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 163 | throw NotImplementedException("GLASM instruction"); | ||
| 164 | } | ||
| 165 | |||
| 166 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 167 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 168 | ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 172 | throw NotImplementedException("GLASM instruction"); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 176 | ctx.Add("MOV.F.SAT {}.x,{};", inst, value); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 180 | throw NotImplementedException("GLASM instruction"); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, | ||
| 184 | [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { | ||
| 185 | throw NotImplementedException("GLASM instruction"); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 189 | ScalarF32 max_value) { | ||
| 190 | Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 194 | ScalarF64 max_value) { | ||
| 195 | Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); | ||
| 196 | } | ||
| 197 | |||
| 198 | void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 199 | throw NotImplementedException("GLASM instruction"); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 203 | ctx.Add("ROUND.F {}.x,{};", inst, value); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 207 | ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 215 | ctx.Add("FLR.F {}.x,{};", inst, value); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 219 | ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 227 | ctx.Add("CEIL.F {}.x,{};", inst, value); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 231 | ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 239 | ctx.Add("TRUNC.F {}.x,{};", inst, value); | ||
| 240 | } | ||
| 241 | |||
| 242 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 243 | ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); | ||
| 244 | } | ||
| 245 | |||
| 246 | void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 247 | [[maybe_unused]] Register rhs) { | ||
| 248 | throw NotImplementedException("GLASM instruction"); | ||
| 249 | } | ||
| 250 | |||
| 251 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 252 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); | ||
| 253 | } | ||
| 254 | |||
| 255 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 256 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 260 | [[maybe_unused]] Register rhs) { | ||
| 261 | throw NotImplementedException("GLASM instruction"); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 265 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 269 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 273 | [[maybe_unused]] Register rhs) { | ||
| 274 | throw NotImplementedException("GLASM instruction"); | ||
| 275 | } | ||
| 276 | |||
| 277 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 278 | Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); | ||
| 279 | } | ||
| 280 | |||
| 281 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 282 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); | ||
| 283 | } | ||
| 284 | |||
| 285 | void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 286 | [[maybe_unused]] Register rhs) { | ||
| 287 | throw NotImplementedException("GLASM instruction"); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 291 | Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 295 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 299 | [[maybe_unused]] Register rhs) { | ||
| 300 | throw NotImplementedException("GLASM instruction"); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 304 | Compare(ctx, inst, lhs, rhs, "SLT", "F", true); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 308 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); | ||
| 309 | } | ||
| 310 | |||
| 311 | void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 312 | [[maybe_unused]] Register rhs) { | ||
| 313 | throw NotImplementedException("GLASM instruction"); | ||
| 314 | } | ||
| 315 | |||
| 316 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 317 | Compare(ctx, inst, lhs, rhs, "SLT", "F", false); | ||
| 318 | } | ||
| 319 | |||
| 320 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 321 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 325 | [[maybe_unused]] Register rhs) { | ||
| 326 | throw NotImplementedException("GLASM instruction"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 330 | Compare(ctx, inst, lhs, rhs, "SGT", "F", true); | ||
| 331 | } | ||
| 332 | |||
| 333 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 334 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); | ||
| 335 | } | ||
| 336 | |||
| 337 | void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 338 | [[maybe_unused]] Register rhs) { | ||
| 339 | throw NotImplementedException("GLASM instruction"); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 343 | Compare(ctx, inst, lhs, rhs, "SGT", "F", false); | ||
| 344 | } | ||
| 345 | |||
| 346 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 347 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); | ||
| 348 | } | ||
| 349 | |||
| 350 | void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 351 | [[maybe_unused]] Register rhs) { | ||
| 352 | throw NotImplementedException("GLASM instruction"); | ||
| 353 | } | ||
| 354 | |||
| 355 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 356 | Compare(ctx, inst, lhs, rhs, "SLE", "F", true); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 360 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 364 | [[maybe_unused]] Register rhs) { | ||
| 365 | throw NotImplementedException("GLASM instruction"); | ||
| 366 | } | ||
| 367 | |||
| 368 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 369 | Compare(ctx, inst, lhs, rhs, "SLE", "F", false); | ||
| 370 | } | ||
| 371 | |||
| 372 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 373 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); | ||
| 374 | } | ||
| 375 | |||
| 376 | void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 377 | [[maybe_unused]] Register rhs) { | ||
| 378 | throw NotImplementedException("GLASM instruction"); | ||
| 379 | } | ||
| 380 | |||
| 381 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 382 | Compare(ctx, inst, lhs, rhs, "SGE", "F", true); | ||
| 383 | } | ||
| 384 | |||
| 385 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 386 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 390 | [[maybe_unused]] Register rhs) { | ||
| 391 | throw NotImplementedException("GLASM instruction"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 395 | Compare(ctx, inst, lhs, rhs, "SGE", "F", false); | ||
| 396 | } | ||
| 397 | |||
| 398 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 399 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); | ||
| 400 | } | ||
| 401 | |||
| 402 | void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 403 | throw NotImplementedException("GLASM instruction"); | ||
| 404 | } | ||
| 405 | |||
| 406 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 407 | Compare(ctx, inst, value, value, "SNE", "F", true, false); | ||
| 408 | } | ||
| 409 | |||
| 410 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 411 | Compare(ctx, inst, value, value, "SNE", "F64", true, false); | ||
| 412 | } | ||
| 413 | |||
| 414 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -0,0 +1,850 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | struct ScopedRegister { | ||
| 15 | ScopedRegister() = default; | ||
| 16 | ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} | ||
| 17 | |||
| 18 | ~ScopedRegister() { | ||
| 19 | if (reg_alloc) { | ||
| 20 | reg_alloc->FreeReg(reg); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { | ||
| 25 | if (reg_alloc) { | ||
| 26 | reg_alloc->FreeReg(reg); | ||
| 27 | } | ||
| 28 | reg_alloc = std::exchange(rhs.reg_alloc, nullptr); | ||
| 29 | reg = rhs.reg; | ||
| 30 | return *this; | ||
| 31 | } | ||
| 32 | |||
| 33 | ScopedRegister(ScopedRegister&& rhs) noexcept | ||
| 34 | : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} | ||
| 35 | |||
| 36 | ScopedRegister& operator=(const ScopedRegister&) = delete; | ||
| 37 | ScopedRegister(const ScopedRegister&) = delete; | ||
| 38 | |||
| 39 | RegAlloc* reg_alloc{}; | ||
| 40 | Register reg; | ||
| 41 | }; | ||
| 42 | |||
| 43 | std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 44 | [[maybe_unused]] const IR::Value& index) { | ||
| 45 | // FIXME: indexed reads | ||
| 46 | if (info.type == TextureType::Buffer) { | ||
| 47 | return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); | ||
| 48 | } else { | ||
| 49 | return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | std::string Image(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 54 | [[maybe_unused]] const IR::Value& index) { | ||
| 55 | // FIXME: indexed reads | ||
| 56 | if (info.type == TextureType::Buffer) { | ||
| 57 | return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); | ||
| 58 | } else { | ||
| 59 | return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view TextureType(IR::TextureInstInfo info) { | ||
| 64 | if (info.is_depth) { | ||
| 65 | switch (info.type) { | ||
| 66 | case TextureType::Color1D: | ||
| 67 | return "SHADOW1D"; | ||
| 68 | case TextureType::ColorArray1D: | ||
| 69 | return "SHADOWARRAY1D"; | ||
| 70 | case TextureType::Color2D: | ||
| 71 | return "SHADOW2D"; | ||
| 72 | case TextureType::ColorArray2D: | ||
| 73 | return "SHADOWARRAY2D"; | ||
| 74 | case TextureType::Color3D: | ||
| 75 | return "SHADOW3D"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "SHADOWCUBE"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "SHADOWARRAYCUBE"; | ||
| 80 | case TextureType::Buffer: | ||
| 81 | return "SHADOWBUFFER"; | ||
| 82 | } | ||
| 83 | } else { | ||
| 84 | switch (info.type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "ARRAY1D"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "ARRAY2D"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "CUBE"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "ARRAYCUBE"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "BUFFER"; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | throw InvalidArgument("Invalid texture type {}", info.type.Value()); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string Offset(EmitContext& ctx, const IR::Value& offset) { | ||
| 107 | if (offset.IsEmpty()) { | ||
| 108 | return ""; | ||
| 109 | } | ||
| 110 | return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); | ||
| 111 | } | ||
| 112 | |||
| 113 | std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, | ||
| 114 | const IR::Value& offset2) { | ||
| 115 | if (offset2.IsEmpty()) { | ||
| 116 | return {}; | ||
| 117 | } else { | ||
| 118 | return {ctx.reg_alloc, ctx.reg_alloc}; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, | ||
| 123 | const IR::Value& offset2) { | ||
| 124 | const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; | ||
| 125 | const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; | ||
| 126 | // Input swizzle: [XYXY] [XYXY] | ||
| 127 | // Output swizzle: [XXXX] [YYYY] | ||
| 128 | ctx.Add("MOV {}.x,{}.x;" | ||
| 129 | "MOV {}.y,{}.z;" | ||
| 130 | "MOV {}.z,{}.x;" | ||
| 131 | "MOV {}.w,{}.z;" | ||
| 132 | "MOV {}.x,{}.y;" | ||
| 133 | "MOV {}.y,{}.w;" | ||
| 134 | "MOV {}.z,{}.y;" | ||
| 135 | "MOV {}.w,{}.w;", | ||
| 136 | off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, | ||
| 137 | offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::string GradOffset(const IR::Value& offset) { | ||
| 141 | if (offset.IsImmediate()) { | ||
| 142 | LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); | ||
| 143 | return ""; | ||
| 144 | } | ||
| 145 | IR::Inst* const vector{offset.InstRecursive()}; | ||
| 146 | if (!vector->AreAllArgsImmediates()) { | ||
| 147 | LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); | ||
| 148 | return ""; | ||
| 149 | } | ||
| 150 | switch (vector->NumArgs()) { | ||
| 151 | case 1: | ||
| 152 | return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); | ||
| 153 | case 2: | ||
| 154 | return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), | ||
| 155 | static_cast<s32>(vector->Arg(1).U32())); | ||
| 156 | default: | ||
| 157 | throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { | ||
| 162 | if (coord.IsImmediate()) { | ||
| 163 | ScopedRegister scoped_reg(ctx.reg_alloc); | ||
| 164 | ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); | ||
| 165 | return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; | ||
| 166 | } | ||
| 167 | std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; | ||
| 168 | if (coord.InstRecursive()->HasUses()) { | ||
| 169 | // Move non-dead coords to a separate register, although this should never happen because | ||
| 170 | // vectors are only assembled for immediate texture instructions | ||
| 171 | ctx.Add("MOV.F RC,{};", coord_vec); | ||
| 172 | coord_vec = "RC"; | ||
| 173 | } | ||
| 174 | return {std::move(coord_vec), ScopedRegister{}}; | ||
| 175 | } | ||
| 176 | |||
| 177 | void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { | ||
| 178 | if (!sparse_inst) { | ||
| 179 | return; | ||
| 180 | } | ||
| 181 | const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; | ||
| 182 | ctx.Add("MOV.S {},-1;" | ||
| 183 | "MOV.S {}(NONRESIDENT),0;", | ||
| 184 | sparse_ret, sparse_ret); | ||
| 185 | } | ||
| 186 | |||
| 187 | std::string_view FormatStorage(ImageFormat format) { | ||
| 188 | switch (format) { | ||
| 189 | case ImageFormat::Typeless: | ||
| 190 | return "U"; | ||
| 191 | case ImageFormat::R8_UINT: | ||
| 192 | return "U8"; | ||
| 193 | case ImageFormat::R8_SINT: | ||
| 194 | return "S8"; | ||
| 195 | case ImageFormat::R16_UINT: | ||
| 196 | return "U16"; | ||
| 197 | case ImageFormat::R16_SINT: | ||
| 198 | return "S16"; | ||
| 199 | case ImageFormat::R32_UINT: | ||
| 200 | return "U32"; | ||
| 201 | case ImageFormat::R32G32_UINT: | ||
| 202 | return "U32X2"; | ||
| 203 | case ImageFormat::R32G32B32A32_UINT: | ||
| 204 | return "U32X4"; | ||
| 205 | } | ||
| 206 | throw InvalidArgument("Invalid image format {}", format); | ||
| 207 | } | ||
| 208 | |||
| 209 | template <typename T> | ||
| 210 | void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, | ||
| 211 | std::string_view op) { | ||
| 212 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 213 | const std::string_view type{TextureType(info)}; | ||
| 214 | const std::string image{Image(ctx, info, index)}; | ||
| 215 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 216 | ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 220 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 221 | if (sparse_inst) { | ||
| 222 | sparse_inst->Invalidate(); | ||
| 223 | } | ||
| 224 | return sparse_inst; | ||
| 225 | } | ||
| 226 | } // Anonymous namespace | ||
| 227 | |||
| 228 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 229 | const IR::Value& coord, Register bias_lc, const IR::Value& offset) { | ||
| 230 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 231 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 232 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 233 | const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; | ||
| 234 | const std::string_view type{TextureType(info)}; | ||
| 235 | const std::string texture{Texture(ctx, info, index)}; | ||
| 236 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 237 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 238 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 239 | if (info.has_bias) { | ||
| 240 | if (info.type == TextureType::ColorArrayCube) { | ||
| 241 | ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, | ||
| 242 | bias_lc, texture, offset_vec); | ||
| 243 | } else { | ||
| 244 | if (info.has_lod_clamp) { | ||
| 245 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 246 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 247 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, | ||
| 248 | offset_vec); | ||
| 249 | } else { | ||
| 250 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 251 | "TXB.F{} {},{},{},{}{};", | ||
| 252 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } else { | ||
| 256 | if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { | ||
| 257 | ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, | ||
| 258 | bias_lc, texture, offset_vec); | ||
| 259 | } else { | ||
| 260 | ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, | ||
| 261 | type, offset_vec); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | StoreSparse(ctx, sparse_inst); | ||
| 265 | } | ||
| 266 | |||
| 267 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 268 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { | ||
| 269 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 270 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 271 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 272 | const std::string_view type{TextureType(info)}; | ||
| 273 | const std::string texture{Texture(ctx, info, index)}; | ||
| 274 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 275 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 276 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 277 | if (info.type == TextureType::ColorArrayCube) { | ||
| 278 | ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, | ||
| 279 | offset_vec); | ||
| 280 | } else { | ||
| 281 | ctx.Add("MOV.F {}.w,{};" | ||
| 282 | "TXL.F{} {},{},{},{}{};", | ||
| 283 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 284 | } | ||
| 285 | StoreSparse(ctx, sparse_inst); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 289 | const IR::Value& coord, const IR::Value& dref, | ||
| 290 | const IR::Value& bias_lc, const IR::Value& offset) { | ||
| 291 | // Allocate early to avoid aliases | ||
| 292 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 293 | ScopedRegister staging; | ||
| 294 | if (info.type == TextureType::ColorArrayCube) { | ||
| 295 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 296 | } | ||
| 297 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 298 | const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; | ||
| 299 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 300 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 301 | const std::string_view type{TextureType(info)}; | ||
| 302 | const std::string texture{Texture(ctx, info, index)}; | ||
| 303 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 304 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 305 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 306 | if (info.has_bias) { | ||
| 307 | if (info.has_lod_clamp) { | ||
| 308 | switch (info.type) { | ||
| 309 | case TextureType::Color1D: | ||
| 310 | case TextureType::ColorArray1D: | ||
| 311 | case TextureType::Color2D: | ||
| 312 | ctx.Add("MOV.F {}.z,{};" | ||
| 313 | "MOV.F {}.w,{}.x;" | ||
| 314 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 315 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 316 | bias_lc_vec, texture, type, offset_vec); | ||
| 317 | break; | ||
| 318 | case TextureType::ColorArray2D: | ||
| 319 | case TextureType::ColorCube: | ||
| 320 | ctx.Add("MOV.F {}.w,{};" | ||
| 321 | "TXB.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 322 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 323 | offset_vec); | ||
| 324 | break; | ||
| 325 | default: | ||
| 326 | throw NotImplementedException("Invalid type {} with bias and lod clamp", | ||
| 327 | info.type.Value()); | ||
| 328 | } | ||
| 329 | } else { | ||
| 330 | switch (info.type) { | ||
| 331 | case TextureType::Color1D: | ||
| 332 | case TextureType::ColorArray1D: | ||
| 333 | case TextureType::Color2D: | ||
| 334 | ctx.Add("MOV.F {}.z,{};" | ||
| 335 | "MOV.F {}.w,{}.x;" | ||
| 336 | "TXB.F{} {},{},{},{}{};", | ||
| 337 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 338 | texture, type, offset_vec); | ||
| 339 | break; | ||
| 340 | case TextureType::ColorArray2D: | ||
| 341 | case TextureType::ColorCube: | ||
| 342 | ctx.Add("MOV.F {}.w,{};" | ||
| 343 | "TXB.F{} {},{},{},{},{}{};", | ||
| 344 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 345 | offset_vec); | ||
| 346 | break; | ||
| 347 | case TextureType::ColorArrayCube: | ||
| 348 | ctx.Add("MOV.F {}.x,{};" | ||
| 349 | "MOV.F {}.y,{}.x;" | ||
| 350 | "TXB.F{} {},{},{},{},{}{};", | ||
| 351 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 352 | staging.reg, texture, type, offset_vec); | ||
| 353 | break; | ||
| 354 | default: | ||
| 355 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (info.has_lod_clamp) { | ||
| 360 | if (info.type != TextureType::ColorArrayCube) { | ||
| 361 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 362 | info.type == TextureType::ColorCube}; | ||
| 363 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 364 | ctx.Add("MOV.F {}.{},{};" | ||
| 365 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 366 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, | ||
| 367 | texture, type, offset_vec); | ||
| 368 | } else { | ||
| 369 | ctx.Add("MOV.F {}.x,{};" | ||
| 370 | "MOV.F {}.y,{};" | ||
| 371 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 372 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 373 | staging.reg, texture, type, offset_vec); | ||
| 374 | } | ||
| 375 | } else { | ||
| 376 | if (info.type != TextureType::ColorArrayCube) { | ||
| 377 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 378 | info.type == TextureType::ColorCube}; | ||
| 379 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 380 | ctx.Add("MOV.F {}.{},{};" | ||
| 381 | "TEX.F{} {},{},{},{}{};", | ||
| 382 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, | ||
| 383 | type, offset_vec); | ||
| 384 | } else { | ||
| 385 | ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, | ||
| 386 | type, offset_vec); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | StoreSparse(ctx, sparse_inst); | ||
| 391 | } | ||
| 392 | |||
| 393 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 394 | const IR::Value& coord, const IR::Value& dref, | ||
| 395 | const IR::Value& lod, const IR::Value& offset) { | ||
| 396 | // Allocate early to avoid aliases | ||
| 397 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 398 | ScopedRegister staging; | ||
| 399 | if (info.type == TextureType::ColorArrayCube) { | ||
| 400 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 401 | } | ||
| 402 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 403 | const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; | ||
| 404 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 405 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 406 | const std::string_view type{TextureType(info)}; | ||
| 407 | const std::string texture{Texture(ctx, info, index)}; | ||
| 408 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 409 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 410 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 411 | switch (info.type) { | ||
| 412 | case TextureType::Color1D: | ||
| 413 | case TextureType::ColorArray1D: | ||
| 414 | case TextureType::Color2D: | ||
| 415 | ctx.Add("MOV.F {}.z,{};" | ||
| 416 | "MOV.F {}.w,{};" | ||
| 417 | "TXL.F{} {},{},{},{}{};", | ||
| 418 | coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, | ||
| 419 | offset_vec); | ||
| 420 | break; | ||
| 421 | case TextureType::ColorArray2D: | ||
| 422 | case TextureType::ColorCube: | ||
| 423 | ctx.Add("MOV.F {}.w,{};" | ||
| 424 | "TXL.F{} {},{},{},{},{}{};", | ||
| 425 | coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, | ||
| 426 | offset_vec); | ||
| 427 | break; | ||
| 428 | case TextureType::ColorArrayCube: | ||
| 429 | ctx.Add("MOV.F {}.x,{};" | ||
| 430 | "MOV.F {}.y,{};" | ||
| 431 | "TXL.F{} {},{},{},{},{}{};", | ||
| 432 | staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, | ||
| 433 | staging.reg, texture, type, offset_vec); | ||
| 434 | break; | ||
| 435 | default: | ||
| 436 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 437 | } | ||
| 438 | StoreSparse(ctx, sparse_inst); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 442 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { | ||
| 443 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 444 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 445 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 446 | const char comp{"xyzw"[info.gather_component]}; | ||
| 447 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 448 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 449 | const std::string_view type{TextureType(info)}; | ||
| 450 | const std::string texture{Texture(ctx, info, index)}; | ||
| 451 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 452 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 453 | if (offset2.IsEmpty()) { | ||
| 454 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 455 | ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, | ||
| 456 | offset_vec); | ||
| 457 | } else { | ||
| 458 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 459 | ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, | ||
| 460 | texture, comp, type); | ||
| 461 | } | ||
| 462 | StoreSparse(ctx, sparse_inst); | ||
| 463 | } | ||
| 464 | |||
| 465 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 466 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 467 | const IR::Value& dref) { | ||
| 468 | // FIXME: This instruction is not working as expected | ||
| 469 | |||
| 470 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 471 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 472 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 473 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 474 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 475 | const std::string_view type{TextureType(info)}; | ||
| 476 | const std::string texture{Texture(ctx, info, index)}; | ||
| 477 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 478 | const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; | ||
| 479 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 480 | std::string args; | ||
| 481 | switch (info.type) { | ||
| 482 | case TextureType::Color2D: | ||
| 483 | ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); | ||
| 484 | args = fmt::to_string(coord_vec); | ||
| 485 | break; | ||
| 486 | case TextureType::ColorArray2D: | ||
| 487 | case TextureType::ColorCube: | ||
| 488 | ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); | ||
| 489 | args = fmt::to_string(coord_vec); | ||
| 490 | break; | ||
| 491 | case TextureType::ColorArrayCube: | ||
| 492 | args = fmt::format("{},{}", coord_vec, dref_value); | ||
| 493 | break; | ||
| 494 | default: | ||
| 495 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 496 | } | ||
| 497 | if (offset2.IsEmpty()) { | ||
| 498 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 499 | ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); | ||
| 500 | } else { | ||
| 501 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 502 | ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, | ||
| 503 | type); | ||
| 504 | } | ||
| 505 | StoreSparse(ctx, sparse_inst); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 509 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { | ||
| 510 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 511 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 512 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 513 | const std::string_view type{TextureType(info)}; | ||
| 514 | const std::string texture{Texture(ctx, info, index)}; | ||
| 515 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 516 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 517 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 518 | if (info.type == TextureType::Buffer) { | ||
| 519 | ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 520 | } else if (ms.type != Type::Void) { | ||
| 521 | ctx.Add("MOV.S {}.w,{};" | ||
| 522 | "TXFMS.F{} {},{},{},{}{};", | ||
| 523 | coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 524 | } else { | ||
| 525 | ctx.Add("MOV.S {}.w,{};" | ||
| 526 | "TXF.F{} {},{},{},{}{};", | ||
| 527 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 528 | } | ||
| 529 | StoreSparse(ctx, sparse_inst); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | ScalarS32 lod) { | ||
| 534 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 535 | const std::string texture{Texture(ctx, info, index)}; | ||
| 536 | const std::string_view type{TextureType(info)}; | ||
| 537 | ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 541 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 542 | const std::string texture{Texture(ctx, info, index)}; | ||
| 543 | const std::string_view type{TextureType(info)}; | ||
| 544 | ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); | ||
| 545 | } | ||
| 546 | |||
| 547 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 549 | const IR::Value& offset, const IR::Value& lod_clamp) { | ||
| 550 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 551 | ScopedRegister dpdx, dpdy; | ||
| 552 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||
| 553 | if (multi_component) { | ||
| 554 | // Allocate this early to avoid aliasing other registers | ||
| 555 | dpdx = ScopedRegister{ctx.reg_alloc}; | ||
| 556 | dpdy = ScopedRegister{ctx.reg_alloc}; | ||
| 557 | } | ||
| 558 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 559 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 560 | const std::string_view type{TextureType(info)}; | ||
| 561 | const std::string texture{Texture(ctx, info, index)}; | ||
| 562 | const std::string offset_vec{GradOffset(offset)}; | ||
| 563 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 564 | const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; | ||
| 565 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 566 | if (multi_component) { | ||
| 567 | ctx.Add("MOV.F {}.x,{}.x;" | ||
| 568 | "MOV.F {}.y,{}.z;" | ||
| 569 | "MOV.F {}.x,{}.y;" | ||
| 570 | "MOV.F {}.y,{}.w;", | ||
| 571 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | ||
| 572 | dpdy.reg, derivatives_vec); | ||
| 573 | if (info.has_lod_clamp) { | ||
| 574 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | ||
| 575 | ctx.Add("MOV.F {}.w,{};" | ||
| 576 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | ||
| 577 | dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 578 | texture, type, offset_vec); | ||
| 579 | } else { | ||
| 580 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 581 | texture, type, offset_vec); | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | ||
| 585 | derivatives_vec, texture, type, offset_vec); | ||
| 586 | } | ||
| 587 | StoreSparse(ctx, sparse_inst); | ||
| 588 | } | ||
| 589 | |||
| 590 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 591 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 592 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 593 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 594 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 595 | const std::string_view type{TextureType(info)}; | ||
| 596 | const std::string image{Image(ctx, info, index)}; | ||
| 597 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 598 | ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); | ||
| 599 | StoreSparse(ctx, sparse_inst); | ||
| 600 | } | ||
| 601 | |||
| 602 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 603 | Register color) { | ||
| 604 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 605 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 606 | const std::string_view type{TextureType(info)}; | ||
| 607 | const std::string image{Image(ctx, info, index)}; | ||
| 608 | ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); | ||
| 609 | } | ||
| 610 | |||
| 611 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 612 | ScalarU32 value) { | ||
| 613 | ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); | ||
| 614 | } | ||
| 615 | |||
| 616 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 617 | ScalarS32 value) { | ||
| 618 | ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); | ||
| 619 | } | ||
| 620 | |||
| 621 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 622 | ScalarU32 value) { | ||
| 623 | ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); | ||
| 624 | } | ||
| 625 | |||
| 626 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 627 | ScalarS32 value) { | ||
| 628 | ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); | ||
| 629 | } | ||
| 630 | |||
| 631 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 632 | ScalarU32 value) { | ||
| 633 | ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); | ||
| 634 | } | ||
| 635 | |||
| 636 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 637 | ScalarU32 value) { | ||
| 638 | ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); | ||
| 639 | } | ||
| 640 | |||
| 641 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 642 | ScalarU32 value) { | ||
| 643 | ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); | ||
| 644 | } | ||
| 645 | |||
| 646 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 647 | ScalarU32 value) { | ||
| 648 | ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); | ||
| 649 | } | ||
| 650 | |||
| 651 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 652 | ScalarU32 value) { | ||
| 653 | ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); | ||
| 654 | } | ||
| 655 | |||
| 656 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 657 | ScalarU32 value) { | ||
| 658 | ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); | ||
| 659 | } | ||
| 660 | |||
| 661 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 662 | Register coord, ScalarU32 value) { | ||
| 663 | ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); | ||
| 664 | } | ||
| 665 | |||
| 666 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 667 | throw LogicError("Unreachable instruction"); | ||
| 668 | } | ||
| 669 | |||
| 670 | void EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 671 | throw LogicError("Unreachable instruction"); | ||
| 672 | } | ||
| 673 | |||
| 674 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 675 | throw LogicError("Unreachable instruction"); | ||
| 676 | } | ||
| 677 | |||
| 678 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 679 | throw LogicError("Unreachable instruction"); | ||
| 680 | } | ||
| 681 | |||
| 682 | void EmitBindlessImageGather(EmitContext&) { | ||
| 683 | throw LogicError("Unreachable instruction"); | ||
| 684 | } | ||
| 685 | |||
| 686 | void EmitBindlessImageGatherDref(EmitContext&) { | ||
| 687 | throw LogicError("Unreachable instruction"); | ||
| 688 | } | ||
| 689 | |||
| 690 | void EmitBindlessImageFetch(EmitContext&) { | ||
| 691 | throw LogicError("Unreachable instruction"); | ||
| 692 | } | ||
| 693 | |||
| 694 | void EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 695 | throw LogicError("Unreachable instruction"); | ||
| 696 | } | ||
| 697 | |||
| 698 | void EmitBindlessImageQueryLod(EmitContext&) { | ||
| 699 | throw LogicError("Unreachable instruction"); | ||
| 700 | } | ||
| 701 | |||
| 702 | void EmitBindlessImageGradient(EmitContext&) { | ||
| 703 | throw LogicError("Unreachable instruction"); | ||
| 704 | } | ||
| 705 | |||
| 706 | void EmitBindlessImageRead(EmitContext&) { | ||
| 707 | throw LogicError("Unreachable instruction"); | ||
| 708 | } | ||
| 709 | |||
| 710 | void EmitBindlessImageWrite(EmitContext&) { | ||
| 711 | throw LogicError("Unreachable instruction"); | ||
| 712 | } | ||
| 713 | |||
| 714 | void EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 715 | throw LogicError("Unreachable instruction"); | ||
| 716 | } | ||
| 717 | |||
| 718 | void EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 719 | throw LogicError("Unreachable instruction"); | ||
| 720 | } | ||
| 721 | |||
| 722 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 723 | throw LogicError("Unreachable instruction"); | ||
| 724 | } | ||
| 725 | |||
| 726 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 727 | throw LogicError("Unreachable instruction"); | ||
| 728 | } | ||
| 729 | |||
| 730 | void EmitBoundImageGather(EmitContext&) { | ||
| 731 | throw LogicError("Unreachable instruction"); | ||
| 732 | } | ||
| 733 | |||
| 734 | void EmitBoundImageGatherDref(EmitContext&) { | ||
| 735 | throw LogicError("Unreachable instruction"); | ||
| 736 | } | ||
| 737 | |||
| 738 | void EmitBoundImageFetch(EmitContext&) { | ||
| 739 | throw LogicError("Unreachable instruction"); | ||
| 740 | } | ||
| 741 | |||
| 742 | void EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 743 | throw LogicError("Unreachable instruction"); | ||
| 744 | } | ||
| 745 | |||
| 746 | void EmitBoundImageQueryLod(EmitContext&) { | ||
| 747 | throw LogicError("Unreachable instruction"); | ||
| 748 | } | ||
| 749 | |||
| 750 | void EmitBoundImageGradient(EmitContext&) { | ||
| 751 | throw LogicError("Unreachable instruction"); | ||
| 752 | } | ||
| 753 | |||
| 754 | void EmitBoundImageRead(EmitContext&) { | ||
| 755 | throw LogicError("Unreachable instruction"); | ||
| 756 | } | ||
| 757 | |||
| 758 | void EmitBoundImageWrite(EmitContext&) { | ||
| 759 | throw LogicError("Unreachable instruction"); | ||
| 760 | } | ||
| 761 | |||
| 762 | void EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 763 | throw LogicError("Unreachable instruction"); | ||
| 764 | } | ||
| 765 | |||
| 766 | void EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 767 | throw LogicError("Unreachable instruction"); | ||
| 768 | } | ||
| 769 | |||
| 770 | void EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 771 | throw LogicError("Unreachable instruction"); | ||
| 772 | } | ||
| 773 | |||
| 774 | void EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 775 | throw LogicError("Unreachable instruction"); | ||
| 776 | } | ||
| 777 | |||
| 778 | void EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 779 | throw LogicError("Unreachable instruction"); | ||
| 780 | } | ||
| 781 | |||
| 782 | void EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 783 | throw LogicError("Unreachable instruction"); | ||
| 784 | } | ||
| 785 | |||
| 786 | void EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 787 | throw LogicError("Unreachable instruction"); | ||
| 788 | } | ||
| 789 | |||
| 790 | void EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 791 | throw LogicError("Unreachable instruction"); | ||
| 792 | } | ||
| 793 | |||
| 794 | void EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 795 | throw LogicError("Unreachable instruction"); | ||
| 796 | } | ||
| 797 | |||
| 798 | void EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 799 | throw LogicError("Unreachable instruction"); | ||
| 800 | } | ||
| 801 | |||
| 802 | void EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 803 | throw LogicError("Unreachable instruction"); | ||
| 804 | } | ||
| 805 | |||
| 806 | void EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 807 | throw LogicError("Unreachable instruction"); | ||
| 808 | } | ||
| 809 | |||
| 810 | void EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 811 | throw LogicError("Unreachable instruction"); | ||
| 812 | } | ||
| 813 | |||
| 814 | void EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 815 | throw LogicError("Unreachable instruction"); | ||
| 816 | } | ||
| 817 | |||
| 818 | void EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 819 | throw LogicError("Unreachable instruction"); | ||
| 820 | } | ||
| 821 | |||
| 822 | void EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 823 | throw LogicError("Unreachable instruction"); | ||
| 824 | } | ||
| 825 | |||
| 826 | void EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 827 | throw LogicError("Unreachable instruction"); | ||
| 828 | } | ||
| 829 | |||
| 830 | void EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 831 | throw LogicError("Unreachable instruction"); | ||
| 832 | } | ||
| 833 | |||
| 834 | void EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 835 | throw LogicError("Unreachable instruction"); | ||
| 836 | } | ||
| 837 | |||
| 838 | void EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 839 | throw LogicError("Unreachable instruction"); | ||
| 840 | } | ||
| 841 | |||
| 842 | void EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 843 | throw LogicError("Unreachable instruction"); | ||
| 844 | } | ||
| 845 | |||
| 846 | void EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 847 | throw LogicError("Unreachable instruction"); | ||
| 848 | } | ||
| 849 | |||
| 850 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..12afda43b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | |||
| @@ -0,0 +1,625 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | enum class Attribute : u64; | ||
| 12 | enum class Patch : u64; | ||
| 13 | class Inst; | ||
| 14 | class Value; | ||
| 15 | } // namespace Shader::IR | ||
| 16 | |||
| 17 | namespace Shader::Backend::GLASM { | ||
| 18 | |||
| 19 | class EmitContext; | ||
| 20 | |||
| 21 | // Microinstruction emitters | ||
| 22 | void EmitPhi(EmitContext& ctx, IR::Inst& inst); | ||
| 23 | void EmitVoid(EmitContext& ctx); | ||
| 24 | void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 26 | void EmitReference(EmitContext&, const IR::Value& value); | ||
| 27 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); | ||
| 28 | void EmitJoin(EmitContext& ctx); | ||
| 29 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 30 | void EmitBarrier(EmitContext& ctx); | ||
| 31 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 32 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 33 | void EmitPrologue(EmitContext& ctx); | ||
| 34 | void EmitEpilogue(EmitContext& ctx); | ||
| 35 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); | ||
| 36 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 37 | void EmitGetRegister(EmitContext& ctx); | ||
| 38 | void EmitSetRegister(EmitContext& ctx); | ||
| 39 | void EmitGetPred(EmitContext& ctx); | ||
| 40 | void EmitSetPred(EmitContext& ctx); | ||
| 41 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 42 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 43 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 44 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 46 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 47 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 48 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 49 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 50 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 51 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 52 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); | ||
| 53 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); | ||
| 54 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); | ||
| 55 | void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); | ||
| 56 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); | ||
| 57 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); | ||
| 58 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); | ||
| 59 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); | ||
| 60 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); | ||
| 61 | void EmitGetZFlag(EmitContext& ctx); | ||
| 62 | void EmitGetSFlag(EmitContext& ctx); | ||
| 63 | void EmitGetCFlag(EmitContext& ctx); | ||
| 64 | void EmitGetOFlag(EmitContext& ctx); | ||
| 65 | void EmitSetZFlag(EmitContext& ctx); | ||
| 66 | void EmitSetSFlag(EmitContext& ctx); | ||
| 67 | void EmitSetCFlag(EmitContext& ctx); | ||
| 68 | void EmitSetOFlag(EmitContext& ctx); | ||
| 69 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); | ||
| 70 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 71 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 72 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | ||
| 73 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | ||
| 74 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | ||
| 75 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); | ||
| 76 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); | ||
| 77 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | ||
| 78 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); | ||
| 79 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); | ||
| 80 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); | ||
| 81 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); | ||
| 82 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 83 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 84 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 85 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 86 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 87 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 88 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 89 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); | ||
| 90 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); | ||
| 91 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); | ||
| 92 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); | ||
| 93 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); | ||
| 94 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); | ||
| 95 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); | ||
| 96 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 97 | ScalarU32 offset); | ||
| 98 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 99 | ScalarU32 offset); | ||
| 100 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 101 | ScalarU32 offset); | ||
| 102 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 103 | ScalarU32 offset); | ||
| 104 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 105 | ScalarU32 offset); | ||
| 106 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 107 | ScalarU32 offset); | ||
| 108 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | ScalarU32 offset); | ||
| 110 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 111 | ScalarU32 value); | ||
| 112 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 113 | ScalarS32 value); | ||
| 114 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 115 | ScalarU32 value); | ||
| 116 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 117 | ScalarS32 value); | ||
| 118 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 119 | ScalarU32 value); | ||
| 120 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 121 | Register value); | ||
| 122 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 123 | Register value); | ||
| 124 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 125 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 126 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 127 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 128 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 129 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 130 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 131 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 132 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 133 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 134 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 135 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 136 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 137 | const IR::Value& e2); | ||
| 138 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 139 | const IR::Value& e2, const IR::Value& e3); | ||
| 140 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 141 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 142 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 143 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 144 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 145 | void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 146 | void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 147 | void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 148 | void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); | ||
| 149 | void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); | ||
| 150 | void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, | ||
| 151 | Register e4); | ||
| 152 | void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); | ||
| 153 | void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); | ||
| 154 | void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); | ||
| 155 | void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 156 | void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 157 | void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 158 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 159 | const IR::Value& e2); | ||
| 160 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2, const IR::Value& e3); | ||
| 162 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 163 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 164 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 165 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 166 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 167 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 168 | ScalarF32 object, u32 index); | ||
| 169 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 170 | ScalarF32 object, u32 index); | ||
| 171 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 172 | ScalarF32 object, u32 index); | ||
| 173 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 174 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 175 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 176 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 177 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 178 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 179 | void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 180 | void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 181 | void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 182 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 183 | ScalarS32 false_value); | ||
| 184 | void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 185 | void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 186 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 187 | ScalarS32 false_value); | ||
| 188 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 189 | Register false_value); | ||
| 190 | void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 191 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 192 | ScalarS32 false_value); | ||
| 193 | void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 194 | void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 195 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 196 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 197 | void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 198 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 199 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 200 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 201 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 202 | void EmitPackFloat2x16(EmitContext& ctx, Register value); | ||
| 203 | void EmitUnpackFloat2x16(EmitContext& ctx, Register value); | ||
| 204 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 205 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 206 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 207 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 208 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 209 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 210 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 211 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 212 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 213 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 214 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 215 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 216 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 217 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 218 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 219 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 220 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); | ||
| 221 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); | ||
| 222 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); | ||
| 223 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 224 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 225 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 226 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 227 | void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 228 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 229 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 230 | void EmitFPNeg16(EmitContext& ctx, Register value); | ||
| 231 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); | ||
| 232 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 233 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 234 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 235 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 236 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 237 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 238 | void EmitFPRecip64(EmitContext& ctx, Register value); | ||
| 239 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 240 | void EmitFPRecipSqrt64(EmitContext& ctx, Register value); | ||
| 241 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 242 | void EmitFPSaturate16(EmitContext& ctx, Register value); | ||
| 243 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 244 | void EmitFPSaturate64(EmitContext& ctx, Register value); | ||
| 245 | void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); | ||
| 246 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 247 | ScalarF32 max_value); | ||
| 248 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 249 | ScalarF64 max_value); | ||
| 250 | void EmitFPRoundEven16(EmitContext& ctx, Register value); | ||
| 251 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 252 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 253 | void EmitFPFloor16(EmitContext& ctx, Register value); | ||
| 254 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 255 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 256 | void EmitFPCeil16(EmitContext& ctx, Register value); | ||
| 257 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 258 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 259 | void EmitFPTrunc16(EmitContext& ctx, Register value); | ||
| 260 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 261 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 262 | void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 263 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 264 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 265 | void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 266 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 267 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 268 | void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 269 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 270 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 271 | void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 272 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 273 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 274 | void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 275 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 276 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 277 | void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 278 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 279 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 280 | void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 281 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 282 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 283 | void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 284 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 285 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 286 | void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 287 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 288 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 289 | void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 290 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 291 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 292 | void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 293 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 294 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 295 | void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 296 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 297 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 298 | void EmitFPIsNan16(EmitContext& ctx, Register value); | ||
| 299 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 300 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 301 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 302 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 303 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 304 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 305 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 306 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 307 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 308 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 309 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 310 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); | ||
| 311 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 312 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 313 | ScalarU32 shift); | ||
| 314 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); | ||
| 315 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 316 | ScalarS32 shift); | ||
| 317 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 318 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 319 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 320 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 321 | ScalarS32 offset, ScalarS32 count); | ||
| 322 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 323 | ScalarS32 count); | ||
| 324 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 325 | ScalarU32 count); | ||
| 326 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 327 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 328 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 329 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 330 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 331 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 332 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 333 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 334 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 335 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); | ||
| 336 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); | ||
| 337 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 338 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 339 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 340 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 341 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 342 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 343 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 344 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 345 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 346 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 347 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 348 | ScalarU32 value); | ||
| 349 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 350 | ScalarS32 value); | ||
| 351 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 352 | ScalarU32 value); | ||
| 353 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 354 | ScalarS32 value); | ||
| 355 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 356 | ScalarU32 value); | ||
| 357 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 358 | ScalarU32 value); | ||
| 359 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 360 | ScalarU32 value); | ||
| 361 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 362 | ScalarU32 value); | ||
| 363 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 364 | ScalarU32 value); | ||
| 365 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 366 | ScalarU32 value); | ||
| 367 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 368 | ScalarU32 value); | ||
| 369 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 370 | Register value); | ||
| 371 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 372 | ScalarU32 offset, ScalarU32 value); | ||
| 373 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 374 | ScalarU32 offset, ScalarS32 value); | ||
| 375 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 376 | ScalarU32 offset, ScalarU32 value); | ||
| 377 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 378 | ScalarU32 offset, ScalarS32 value); | ||
| 379 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, ScalarU32 value); | ||
| 381 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 382 | ScalarU32 offset, ScalarU32 value); | ||
| 383 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 384 | ScalarU32 offset, ScalarU32 value); | ||
| 385 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 386 | ScalarU32 offset, ScalarU32 value); | ||
| 387 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 388 | ScalarU32 offset, ScalarU32 value); | ||
| 389 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, ScalarU32 value); | ||
| 391 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 392 | ScalarU32 offset, ScalarU32 value); | ||
| 393 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 394 | ScalarU32 offset, Register value); | ||
| 395 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 396 | ScalarU32 offset, Register value); | ||
| 397 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 398 | ScalarU32 offset, Register value); | ||
| 399 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value); | ||
| 401 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 402 | ScalarU32 offset, Register value); | ||
| 403 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 404 | ScalarU32 offset, Register value); | ||
| 405 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 406 | ScalarU32 offset, Register value); | ||
| 407 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 408 | ScalarU32 offset, Register value); | ||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value); | ||
| 411 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 412 | ScalarU32 offset, ScalarF32 value); | ||
| 413 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 414 | ScalarU32 offset, Register value); | ||
| 415 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 416 | ScalarU32 offset, Register value); | ||
| 417 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 418 | ScalarU32 offset, Register value); | ||
| 419 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value); | ||
| 421 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 422 | ScalarU32 offset, Register value); | ||
| 423 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 424 | ScalarU32 offset, Register value); | ||
| 425 | void EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 426 | void EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 427 | void EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 428 | void EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 429 | void EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 430 | void EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 431 | void EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 432 | void EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 433 | void EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 434 | void EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 435 | void EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 436 | void EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 437 | void EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 438 | void EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 439 | void EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 440 | void EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 441 | void EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 442 | void EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 443 | void EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 444 | void EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 445 | void EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 446 | void EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 447 | void EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 448 | void EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 449 | void EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 450 | void EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 451 | void EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 452 | void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 453 | void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 454 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 455 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 456 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 457 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 458 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 459 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 460 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 461 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 462 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 463 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 464 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 465 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 466 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 467 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 468 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 469 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 470 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 471 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 472 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 473 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 474 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 475 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 476 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 477 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 478 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 479 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 480 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 481 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 482 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 483 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 484 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 485 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 486 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 487 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 488 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 489 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 490 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 491 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 492 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 493 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 494 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 495 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 496 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 497 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 498 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 499 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 500 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 501 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 502 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 503 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 504 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 505 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 506 | void EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 507 | void EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 508 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 509 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 510 | void EmitBindlessImageGather(EmitContext&); | ||
| 511 | void EmitBindlessImageGatherDref(EmitContext&); | ||
| 512 | void EmitBindlessImageFetch(EmitContext&); | ||
| 513 | void EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 514 | void EmitBindlessImageQueryLod(EmitContext&); | ||
| 515 | void EmitBindlessImageGradient(EmitContext&); | ||
| 516 | void EmitBindlessImageRead(EmitContext&); | ||
| 517 | void EmitBindlessImageWrite(EmitContext&); | ||
| 518 | void EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 519 | void EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 520 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 521 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 522 | void EmitBoundImageGather(EmitContext&); | ||
| 523 | void EmitBoundImageGatherDref(EmitContext&); | ||
| 524 | void EmitBoundImageFetch(EmitContext&); | ||
| 525 | void EmitBoundImageQueryDimensions(EmitContext&); | ||
| 526 | void EmitBoundImageQueryLod(EmitContext&); | ||
| 527 | void EmitBoundImageGradient(EmitContext&); | ||
| 528 | void EmitBoundImageRead(EmitContext&); | ||
| 529 | void EmitBoundImageWrite(EmitContext&); | ||
| 530 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 531 | const IR::Value& coord, Register bias_lc, const IR::Value& offset); | ||
| 532 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); | ||
| 534 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 535 | const IR::Value& coord, const IR::Value& dref, | ||
| 536 | const IR::Value& bias_lc, const IR::Value& offset); | ||
| 537 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 538 | const IR::Value& coord, const IR::Value& dref, | ||
| 539 | const IR::Value& lod, const IR::Value& offset); | ||
| 540 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 541 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); | ||
| 542 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 543 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 544 | const IR::Value& dref); | ||
| 545 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 546 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); | ||
| 547 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | ScalarS32 lod); | ||
| 549 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 550 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 551 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 552 | const IR::Value& offset, const IR::Value& lod_clamp); | ||
| 553 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 554 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 555 | Register color); | ||
| 556 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 557 | void EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 558 | void EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 559 | void EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 560 | void EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 561 | void EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 562 | void EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 563 | void EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 564 | void EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 565 | void EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 566 | void EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 567 | void EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 568 | void EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 569 | void EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 570 | void EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 571 | void EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 572 | void EmitBoundImageAtomicInc32(EmitContext&); | ||
| 573 | void EmitBoundImageAtomicDec32(EmitContext&); | ||
| 574 | void EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 575 | void EmitBoundImageAtomicOr32(EmitContext&); | ||
| 576 | void EmitBoundImageAtomicXor32(EmitContext&); | ||
| 577 | void EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 578 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 579 | ScalarU32 value); | ||
| 580 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 581 | ScalarS32 value); | ||
| 582 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 583 | ScalarU32 value); | ||
| 584 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 585 | ScalarS32 value); | ||
| 586 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 587 | ScalarU32 value); | ||
| 588 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 589 | ScalarU32 value); | ||
| 590 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 591 | ScalarU32 value); | ||
| 592 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 593 | ScalarU32 value); | ||
| 594 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 595 | ScalarU32 value); | ||
| 596 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 597 | ScalarU32 value); | ||
| 598 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 599 | Register coord, ScalarU32 value); | ||
| 600 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst); | ||
| 601 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 602 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 603 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 604 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 605 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); | ||
| 606 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 607 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 608 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 609 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 610 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 611 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 612 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 613 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 614 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 615 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 616 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 617 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 618 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 619 | ScalarU32 swizzle); | ||
| 620 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 621 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 622 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 623 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 624 | |||
| 625 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..f55c26b76 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, | ||
| 12 | std::string_view lop) { | ||
| 13 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 14 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 15 | if (zero) { | ||
| 16 | zero->Invalidate(); | ||
| 17 | } | ||
| 18 | if (sign) { | ||
| 19 | sign->Invalidate(); | ||
| 20 | } | ||
| 21 | if (zero || sign) { | ||
| 22 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 23 | } | ||
| 24 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 25 | ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); | ||
| 26 | if (zero) { | ||
| 27 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 28 | } | ||
| 29 | if (sign) { | ||
| 30 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 36 | const std::array flags{ | ||
| 37 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | ||
| 38 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | ||
| 39 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | ||
| 40 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | ||
| 41 | }; | ||
| 42 | for (IR::Inst* const flag_inst : flags) { | ||
| 43 | if (flag_inst) { | ||
| 44 | flag_inst->Invalidate(); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | const bool cc{inst.HasAssociatedPseudoOperation()}; | ||
| 48 | const std::string_view cc_mod{cc ? ".CC" : ""}; | ||
| 49 | if (cc) { | ||
| 50 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 51 | } | ||
| 52 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 53 | ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); | ||
| 54 | if (!cc) { | ||
| 55 | return; | ||
| 56 | } | ||
| 57 | static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; | ||
| 58 | for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { | ||
| 59 | if (!flags[flag_index]) { | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; | ||
| 63 | if (flag_index == 0) { | ||
| 64 | ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); | ||
| 65 | } else { | ||
| 66 | // We could use conditional execution here, but it's broken on Nvidia's compiler | ||
| 67 | ctx.Add("IF {}.x;" | ||
| 68 | "MOV.S {}.x,-1;" | ||
| 69 | "ELSE;" | ||
| 70 | "MOV.S {}.x,0;" | ||
| 71 | "ENDIF;", | ||
| 72 | masks[flag_index], flag_ret, flag_ret); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 78 | ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 82 | ctx.Add("SUB.S {}.x,{},{};", inst, a, b); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 86 | ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 90 | ctx.Add("MUL.S {}.x,{},{};", inst, a, b); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 94 | if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { | ||
| 95 | ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); | ||
| 96 | } else { | ||
| 97 | ctx.Add("MOV.S {},-{};", inst, value); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 102 | ctx.LongAdd("MOV.S64 {},-{};", inst, value); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 106 | ctx.Add("ABS.S {},{};", inst, value); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 110 | ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 114 | ScalarU32 shift) { | ||
| 115 | ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 119 | ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 123 | ScalarU32 shift) { | ||
| 124 | ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { | ||
| 128 | ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 132 | ScalarS32 shift) { | ||
| 133 | ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 137 | BitwiseLogicalOp(ctx, inst, a, b, "AND"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 141 | BitwiseLogicalOp(ctx, inst, a, b, "OR"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 145 | BitwiseLogicalOp(ctx, inst, a, b, "XOR"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 149 | ScalarS32 offset, ScalarS32 count) { | ||
| 150 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 151 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 152 | ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); | ||
| 153 | } else { | ||
| 154 | ctx.Add("MOV.S RC.x,{};" | ||
| 155 | "MOV.S RC.y,{};" | ||
| 156 | "BFI.S {},RC,{},{};", | ||
| 157 | count, offset, ret, insert, base); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 162 | ScalarS32 count) { | ||
| 163 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 164 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 165 | ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 166 | } else { | ||
| 167 | ctx.Add("MOV.S RC.x,{};" | ||
| 168 | "MOV.S RC.y,{};" | ||
| 169 | "BFE.S {},RC,{};", | ||
| 170 | count, offset, ret, base); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 175 | ScalarU32 count) { | ||
| 176 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 177 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 178 | if (zero) { | ||
| 179 | zero->Invalidate(); | ||
| 180 | } | ||
| 181 | if (sign) { | ||
| 182 | sign->Invalidate(); | ||
| 183 | } | ||
| 184 | if (zero || sign) { | ||
| 185 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 186 | } | ||
| 187 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 188 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 189 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 190 | } else { | ||
| 191 | ctx.Add("MOV.U RC.x,{};" | ||
| 192 | "MOV.U RC.y,{};" | ||
| 193 | "BFE.U {},RC,{};", | ||
| 194 | count, offset, ret, base); | ||
| 195 | } | ||
| 196 | if (zero) { | ||
| 197 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 198 | } | ||
| 199 | if (sign) { | ||
| 200 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 205 | ctx.Add("BFR {},{};", inst, value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 209 | ctx.Add("BTC {},{};", inst, value); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 213 | ctx.Add("NOT.S {},{};", inst, value); | ||
| 214 | } | ||
| 215 | |||
| 216 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 217 | ctx.Add("BTFM.S {},{};", inst, value); | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 221 | ctx.Add("BTFM.U {},{};", inst, value); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 225 | ctx.Add("MIN.S {},{},{};", inst, a, b); | ||
| 226 | } | ||
| 227 | |||
| 228 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 229 | ctx.Add("MIN.U {},{},{};", inst, a, b); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 233 | ctx.Add("MAX.S {},{},{};", inst, a, b); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 237 | ctx.Add("MAX.U {},{},{};", inst, a, b); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { | ||
| 241 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 242 | ctx.Add("MIN.S RC.x,{},{};" | ||
| 243 | "MAX.S {}.x,RC.x,{};", | ||
| 244 | max, value, ret, min); | ||
| 245 | } | ||
| 246 | |||
| 247 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { | ||
| 248 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 249 | ctx.Add("MIN.U RC.x,{},{};" | ||
| 250 | "MAX.U {}.x,RC.x,{};", | ||
| 251 | max, value, ret, min); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 255 | ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 256 | } | ||
| 257 | |||
| 258 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 259 | ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 263 | ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); | ||
| 264 | } | ||
| 265 | |||
| 266 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 267 | ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 268 | } | ||
| 269 | |||
| 270 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 271 | ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 275 | ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 279 | ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 280 | } | ||
| 281 | |||
| 282 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 283 | ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 284 | } | ||
| 285 | |||
| 286 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 287 | ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 291 | ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 292 | } | ||
| 293 | |||
| 294 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..af9fac7c1 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -0,0 +1,568 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view then_expr, std::string_view else_expr = {}) { | ||
| 17 | // Operate on bindless SSBO, call the expression with bounds checking | ||
| 18 | // address = c[binding].xy | ||
| 19 | // length = c[binding].z | ||
| 20 | const u32 sb_binding{binding.U32()}; | ||
| 21 | ctx.Add("PK64.U DC,c[{}];" // pointer = address | ||
| 22 | "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) | ||
| 23 | "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset | ||
| 24 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length | ||
| 25 | sb_binding, offset, offset, sb_binding); | ||
| 26 | if (else_expr.empty()) { | ||
| 27 | ctx.Add("IF NE.x;{}ENDIF;", then_expr); | ||
| 28 | } else { | ||
| 29 | ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, | ||
| 34 | std::string_view else_expr = {}) { | ||
| 35 | const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; | ||
| 36 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 37 | if (!ctx.info.nvn_buffer_used[index]) { | ||
| 38 | continue; | ||
| 39 | } | ||
| 40 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | ||
| 41 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | ||
| 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | ||
| 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | ||
| 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | ||
| 45 | "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 | ||
| 46 | "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 | ||
| 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | ||
| 48 | "IF NE.x;" // if cond | ||
| 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | ||
| 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | ||
| 51 | address, address); | ||
| 52 | if (pointer_based) { | ||
| 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | ||
| 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||
| 55 | "{}" | ||
| 56 | "ELSE;", | ||
| 57 | index, expr); | ||
| 58 | } else { | ||
| 59 | ctx.Add("CVT.U32.U64 RC.x,DC.x;" | ||
| 60 | "{},ssbo{}[RC.x];" | ||
| 61 | "ELSE;", | ||
| 62 | expr, index); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | if (!else_expr.empty()) { | ||
| 66 | ctx.Add("{}", else_expr); | ||
| 67 | } | ||
| 68 | const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; | ||
| 69 | for (size_t index = 0; index < num_used_buffers; ++index) { | ||
| 70 | ctx.Add("ENDIF;"); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | template <typename ValueType> | ||
| 75 | void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, | ||
| 76 | std::string_view size) { | ||
| 77 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 78 | ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); | ||
| 79 | } else { | ||
| 80 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 85 | std::string_view size) { | ||
| 86 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 87 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 88 | ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); | ||
| 89 | } else { | ||
| 90 | StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 91 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | template <typename ValueType> | ||
| 96 | void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { | ||
| 97 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 98 | GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); | ||
| 99 | } else { | ||
| 100 | GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { | ||
| 105 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 106 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 107 | GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); | ||
| 108 | } else { | ||
| 109 | GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 110 | fmt::format("MOV.S {},0;", ret)); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | template <typename ValueType> | ||
| 115 | void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 116 | ValueType value, std::string_view operation, std::string_view size) { | ||
| 117 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 118 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 119 | ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), | ||
| 120 | offset); | ||
| 121 | } else { | ||
| 122 | StorageOp(ctx, binding, offset, | ||
| 123 | fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | } // Anonymous namespace | ||
| 127 | |||
| 128 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 129 | GlobalLoad(ctx, inst, address, "U8"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 133 | GlobalLoad(ctx, inst, address, "S8"); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 137 | GlobalLoad(ctx, inst, address, "U16"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 141 | GlobalLoad(ctx, inst, address, "S16"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 145 | GlobalLoad(ctx, inst, address, "U32"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 149 | GlobalLoad(ctx, inst, address, "U32X2"); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 153 | GlobalLoad(ctx, inst, address, "U32X4"); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { | ||
| 157 | GlobalWrite(ctx, address, value, "U8"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { | ||
| 161 | GlobalWrite(ctx, address, value, "S8"); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { | ||
| 165 | GlobalWrite(ctx, address, value, "U16"); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { | ||
| 169 | GlobalWrite(ctx, address, value, "S16"); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { | ||
| 173 | GlobalWrite(ctx, address, value, "U32"); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { | ||
| 177 | GlobalWrite(ctx, address, value, "U32X2"); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { | ||
| 181 | GlobalWrite(ctx, address, value, "U32X4"); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 185 | ScalarU32 offset) { | ||
| 186 | Load(ctx, inst, binding, offset, "U8"); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 190 | ScalarU32 offset) { | ||
| 191 | Load(ctx, inst, binding, offset, "S8"); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 195 | ScalarU32 offset) { | ||
| 196 | Load(ctx, inst, binding, offset, "U16"); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 200 | ScalarU32 offset) { | ||
| 201 | Load(ctx, inst, binding, offset, "S16"); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 205 | ScalarU32 offset) { | ||
| 206 | Load(ctx, inst, binding, offset, "U32"); | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 210 | ScalarU32 offset) { | ||
| 211 | Load(ctx, inst, binding, offset, "U32X2"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 215 | ScalarU32 offset) { | ||
| 216 | Load(ctx, inst, binding, offset, "U32X4"); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 220 | ScalarU32 value) { | ||
| 221 | Write(ctx, binding, offset, value, "U8"); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 225 | ScalarS32 value) { | ||
| 226 | Write(ctx, binding, offset, value, "S8"); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 230 | ScalarU32 value) { | ||
| 231 | Write(ctx, binding, offset, value, "U16"); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 235 | ScalarS32 value) { | ||
| 236 | Write(ctx, binding, offset, value, "S16"); | ||
| 237 | } | ||
| 238 | |||
| 239 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 240 | ScalarU32 value) { | ||
| 241 | Write(ctx, binding, offset, value, "U32"); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 245 | Register value) { | ||
| 246 | Write(ctx, binding, offset, value, "U32X2"); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 250 | Register value) { | ||
| 251 | Write(ctx, binding, offset, value, "U32X4"); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 255 | ScalarU32 value) { | ||
| 256 | ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 260 | ScalarS32 value) { | ||
| 261 | ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 265 | ScalarU32 value) { | ||
| 266 | ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 270 | ScalarS32 value) { | ||
| 271 | ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 275 | ScalarU32 value) { | ||
| 276 | ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 277 | } | ||
| 278 | |||
| 279 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 280 | ScalarU32 value) { | ||
| 281 | ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 282 | } | ||
| 283 | |||
| 284 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 285 | ScalarU32 value) { | ||
| 286 | ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 287 | } | ||
| 288 | |||
| 289 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 290 | ScalarU32 value) { | ||
| 291 | ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 295 | ScalarU32 value) { | ||
| 296 | ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 300 | ScalarU32 value) { | ||
| 301 | ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 302 | } | ||
| 303 | |||
| 304 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 305 | ScalarU32 value) { | ||
| 306 | ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 307 | } | ||
| 308 | |||
| 309 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 310 | Register value) { | ||
| 311 | ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 312 | } | ||
| 313 | |||
| 314 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 315 | ScalarU32 offset, ScalarU32 value) { | ||
| 316 | Atom(ctx, inst, binding, offset, value, "ADD", "U32"); | ||
| 317 | } | ||
| 318 | |||
| 319 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 320 | ScalarU32 offset, ScalarS32 value) { | ||
| 321 | Atom(ctx, inst, binding, offset, value, "MIN", "S32"); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 325 | ScalarU32 offset, ScalarU32 value) { | ||
| 326 | Atom(ctx, inst, binding, offset, value, "MIN", "U32"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 330 | ScalarU32 offset, ScalarS32 value) { | ||
| 331 | Atom(ctx, inst, binding, offset, value, "MAX", "S32"); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 335 | ScalarU32 offset, ScalarU32 value) { | ||
| 336 | Atom(ctx, inst, binding, offset, value, "MAX", "U32"); | ||
| 337 | } | ||
| 338 | |||
| 339 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 340 | ScalarU32 offset, ScalarU32 value) { | ||
| 341 | Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); | ||
| 342 | } | ||
| 343 | |||
| 344 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 345 | ScalarU32 offset, ScalarU32 value) { | ||
| 346 | Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); | ||
| 347 | } | ||
| 348 | |||
| 349 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 350 | ScalarU32 offset, ScalarU32 value) { | ||
| 351 | Atom(ctx, inst, binding, offset, value, "AND", "U32"); | ||
| 352 | } | ||
| 353 | |||
| 354 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 355 | ScalarU32 offset, ScalarU32 value) { | ||
| 356 | Atom(ctx, inst, binding, offset, value, "OR", "U32"); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 360 | ScalarU32 offset, ScalarU32 value) { | ||
| 361 | Atom(ctx, inst, binding, offset, value, "XOR", "U32"); | ||
| 362 | } | ||
| 363 | |||
| 364 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 365 | ScalarU32 offset, ScalarU32 value) { | ||
| 366 | Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 370 | ScalarU32 offset, Register value) { | ||
| 371 | Atom(ctx, inst, binding, offset, value, "ADD", "U64"); | ||
| 372 | } | ||
| 373 | |||
| 374 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 375 | ScalarU32 offset, Register value) { | ||
| 376 | Atom(ctx, inst, binding, offset, value, "MIN", "S64"); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, Register value) { | ||
| 381 | Atom(ctx, inst, binding, offset, value, "MIN", "U64"); | ||
| 382 | } | ||
| 383 | |||
| 384 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 385 | ScalarU32 offset, Register value) { | ||
| 386 | Atom(ctx, inst, binding, offset, value, "MAX", "S64"); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, Register value) { | ||
| 391 | Atom(ctx, inst, binding, offset, value, "MAX", "U64"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 395 | ScalarU32 offset, Register value) { | ||
| 396 | Atom(ctx, inst, binding, offset, value, "AND", "U64"); | ||
| 397 | } | ||
| 398 | |||
| 399 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value) { | ||
| 401 | Atom(ctx, inst, binding, offset, value, "OR", "U64"); | ||
| 402 | } | ||
| 403 | |||
| 404 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 405 | ScalarU32 offset, Register value) { | ||
| 406 | Atom(ctx, inst, binding, offset, value, "XOR", "U64"); | ||
| 407 | } | ||
| 408 | |||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value) { | ||
| 411 | Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); | ||
| 412 | } | ||
| 413 | |||
| 414 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 415 | ScalarU32 offset, ScalarF32 value) { | ||
| 416 | Atom(ctx, inst, binding, offset, value, "ADD", "F32"); | ||
| 417 | } | ||
| 418 | |||
| 419 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value) { | ||
| 421 | Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); | ||
| 422 | } | ||
| 423 | |||
| 424 | void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 425 | [[maybe_unused]] const IR::Value& binding, | ||
| 426 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 427 | throw NotImplementedException("GLASM instruction"); | ||
| 428 | } | ||
| 429 | |||
| 430 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 431 | ScalarU32 offset, Register value) { | ||
| 432 | Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 436 | [[maybe_unused]] const IR::Value& binding, | ||
| 437 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 438 | throw NotImplementedException("GLASM instruction"); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 442 | ScalarU32 offset, Register value) { | ||
| 443 | Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); | ||
| 444 | } | ||
| 445 | |||
| 446 | void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 447 | [[maybe_unused]] const IR::Value& binding, | ||
| 448 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 449 | throw NotImplementedException("GLASM instruction"); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 453 | throw NotImplementedException("GLASM instruction"); | ||
| 454 | } | ||
| 455 | |||
| 456 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 457 | throw NotImplementedException("GLASM instruction"); | ||
| 458 | } | ||
| 459 | |||
| 460 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 461 | throw NotImplementedException("GLASM instruction"); | ||
| 462 | } | ||
| 463 | |||
| 464 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 465 | throw NotImplementedException("GLASM instruction"); | ||
| 466 | } | ||
| 467 | |||
| 468 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 469 | throw NotImplementedException("GLASM instruction"); | ||
| 470 | } | ||
| 471 | |||
| 472 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 473 | throw NotImplementedException("GLASM instruction"); | ||
| 474 | } | ||
| 475 | |||
| 476 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 477 | throw NotImplementedException("GLASM instruction"); | ||
| 478 | } | ||
| 479 | |||
| 480 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 481 | throw NotImplementedException("GLASM instruction"); | ||
| 482 | } | ||
| 483 | |||
| 484 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 485 | throw NotImplementedException("GLASM instruction"); | ||
| 486 | } | ||
| 487 | |||
| 488 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 489 | throw NotImplementedException("GLASM instruction"); | ||
| 490 | } | ||
| 491 | |||
| 492 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 493 | throw NotImplementedException("GLASM instruction"); | ||
| 494 | } | ||
| 495 | |||
| 496 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 497 | throw NotImplementedException("GLASM instruction"); | ||
| 498 | } | ||
| 499 | |||
| 500 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 501 | throw NotImplementedException("GLASM instruction"); | ||
| 502 | } | ||
| 503 | |||
| 504 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 505 | throw NotImplementedException("GLASM instruction"); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 509 | throw NotImplementedException("GLASM instruction"); | ||
| 510 | } | ||
| 511 | |||
| 512 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 513 | throw NotImplementedException("GLASM instruction"); | ||
| 514 | } | ||
| 515 | |||
| 516 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 517 | throw NotImplementedException("GLASM instruction"); | ||
| 518 | } | ||
| 519 | |||
| 520 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 521 | throw NotImplementedException("GLASM instruction"); | ||
| 522 | } | ||
| 523 | |||
| 524 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 525 | throw NotImplementedException("GLASM instruction"); | ||
| 526 | } | ||
| 527 | |||
| 528 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 529 | throw NotImplementedException("GLASM instruction"); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 533 | throw NotImplementedException("GLASM instruction"); | ||
| 534 | } | ||
| 535 | |||
| 536 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 537 | throw NotImplementedException("GLASM instruction"); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 541 | throw NotImplementedException("GLASM instruction"); | ||
| 542 | } | ||
| 543 | |||
| 544 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 545 | throw NotImplementedException("GLASM instruction"); | ||
| 546 | } | ||
| 547 | |||
| 548 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 549 | throw NotImplementedException("GLASM instruction"); | ||
| 550 | } | ||
| 551 | |||
| 552 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 553 | throw NotImplementedException("GLASM instruction"); | ||
| 554 | } | ||
| 555 | |||
| 556 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 557 | throw NotImplementedException("GLASM instruction"); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 561 | throw NotImplementedException("GLASM instruction"); | ||
| 562 | } | ||
| 563 | |||
| 564 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 565 | throw NotImplementedException("GLASM instruction"); | ||
| 566 | } | ||
| 567 | |||
| 568 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..ff64c6924 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | #ifdef _MSC_VER | ||
| 13 | #pragma warning(disable : 4100) | ||
| 14 | #endif | ||
| 15 | |||
| 16 | namespace Shader::Backend::GLASM { | ||
| 17 | |||
| 18 | #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) | ||
| 19 | |||
| 20 | static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 21 | switch (phi.Arg(0).Type()) { | ||
| 22 | case IR::Type::U1: | ||
| 23 | case IR::Type::U32: | ||
| 24 | case IR::Type::F32: | ||
| 25 | ctx.reg_alloc.Define(phi); | ||
| 26 | break; | ||
| 27 | case IR::Type::U64: | ||
| 28 | case IR::Type::F64: | ||
| 29 | ctx.reg_alloc.LongDefine(phi); | ||
| 30 | break; | ||
| 31 | default: | ||
| 32 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 37 | const size_t num_args{phi.NumArgs()}; | ||
| 38 | for (size_t i = 0; i < num_args; ++i) { | ||
| 39 | ctx.reg_alloc.Consume(phi.Arg(i)); | ||
| 40 | } | ||
| 41 | if (!phi.Definition<Id>().is_valid) { | ||
| 42 | // The phi node wasn't forward defined | ||
| 43 | DefinePhi(ctx, phi); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitVoid(EmitContext&) {} | ||
| 48 | |||
| 49 | void EmitReference(EmitContext& ctx, const IR::Value& value) { | ||
| 50 | ctx.reg_alloc.Consume(value); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { | ||
| 54 | IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; | ||
| 55 | if (!phi.Definition<Id>().is_valid) { | ||
| 56 | // The phi node wasn't forward defined | ||
| 57 | DefinePhi(ctx, phi); | ||
| 58 | } | ||
| 59 | const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; | ||
| 60 | const Value eval_value{ctx.reg_alloc.Consume(value)}; | ||
| 61 | |||
| 62 | if (phi_reg == eval_value) { | ||
| 63 | return; | ||
| 64 | } | ||
| 65 | switch (phi.Flags<IR::Type>()) { | ||
| 66 | case IR::Type::U1: | ||
| 67 | case IR::Type::U32: | ||
| 68 | case IR::Type::F32: | ||
| 69 | ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); | ||
| 70 | break; | ||
| 71 | case IR::Type::U64: | ||
| 72 | case IR::Type::F64: | ||
| 73 | ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); | ||
| 74 | break; | ||
| 75 | default: | ||
| 76 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void EmitJoin(EmitContext& ctx) { | ||
| 81 | NotImplemented(); | ||
| 82 | } | ||
| 83 | |||
| 84 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 85 | ctx.Add("KIL TR.x;"); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitBarrier(EmitContext& ctx) { | ||
| 89 | ctx.Add("BAR;"); | ||
| 90 | } | ||
| 91 | |||
| 92 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 93 | ctx.Add("MEMBAR.CTA;"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 97 | ctx.Add("MEMBAR;"); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitPrologue(EmitContext& ctx) { | ||
| 101 | // TODO | ||
| 102 | } | ||
| 103 | |||
| 104 | void EmitEpilogue(EmitContext& ctx) { | ||
| 105 | // TODO | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { | ||
| 109 | if (stream.type == Type::U32 && stream.imm_u32 == 0) { | ||
| 110 | ctx.Add("EMIT;"); | ||
| 111 | } else { | ||
| 112 | ctx.Add("EMITS {};", stream); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 117 | if (!stream.IsImmediate()) { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Stream is not immediate"); | ||
| 119 | } | ||
| 120 | ctx.reg_alloc.Consume(stream); | ||
| 121 | ctx.Add("ENDPRIM;"); | ||
| 122 | } | ||
| 123 | |||
| 124 | void EmitGetRegister(EmitContext& ctx) { | ||
| 125 | NotImplemented(); | ||
| 126 | } | ||
| 127 | |||
| 128 | void EmitSetRegister(EmitContext& ctx) { | ||
| 129 | NotImplemented(); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitGetPred(EmitContext& ctx) { | ||
| 133 | NotImplemented(); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitSetPred(EmitContext& ctx) { | ||
| 137 | NotImplemented(); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitSetGotoVariable(EmitContext& ctx) { | ||
| 141 | NotImplemented(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitGetGotoVariable(EmitContext& ctx) { | ||
| 145 | NotImplemented(); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitSetIndirectBranchVariable(EmitContext& ctx) { | ||
| 149 | NotImplemented(); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitGetIndirectBranchVariable(EmitContext& ctx) { | ||
| 153 | NotImplemented(); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitGetZFlag(EmitContext& ctx) { | ||
| 157 | NotImplemented(); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitGetSFlag(EmitContext& ctx) { | ||
| 161 | NotImplemented(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitGetCFlag(EmitContext& ctx) { | ||
| 165 | NotImplemented(); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitGetOFlag(EmitContext& ctx) { | ||
| 169 | NotImplemented(); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitSetZFlag(EmitContext& ctx) { | ||
| 173 | NotImplemented(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitSetSFlag(EmitContext& ctx) { | ||
| 177 | NotImplemented(); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitSetCFlag(EmitContext& ctx) { | ||
| 181 | NotImplemented(); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitSetOFlag(EmitContext& ctx) { | ||
| 185 | NotImplemented(); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { | ||
| 189 | ctx.Add("MOV.S {},invocation.groupid;", inst); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 193 | ctx.Add("MOV.S {},invocation.localid;", inst); | ||
| 194 | } | ||
| 195 | |||
| 196 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 197 | ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { | ||
| 201 | ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { | ||
| 205 | ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | ||
| 209 | ctx.uses_y_direction = true; | ||
| 210 | ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); | ||
| 211 | } | ||
| 212 | |||
| 213 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | ||
| 214 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 215 | } | ||
| 216 | |||
| 217 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { | ||
| 218 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { | ||
| 222 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { | ||
| 226 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { | ||
| 230 | ctx.LongAdd("MOV.S64 {}.x,0;", inst); | ||
| 231 | } | ||
| 232 | |||
| 233 | void EmitGetZeroFromOp(EmitContext& ctx) { | ||
| 234 | NotImplemented(); | ||
| 235 | } | ||
| 236 | |||
| 237 | void EmitGetSignFromOp(EmitContext& ctx) { | ||
| 238 | NotImplemented(); | ||
| 239 | } | ||
| 240 | |||
| 241 | void EmitGetCarryFromOp(EmitContext& ctx) { | ||
| 242 | NotImplemented(); | ||
| 243 | } | ||
| 244 | |||
| 245 | void EmitGetOverflowFromOp(EmitContext& ctx) { | ||
| 246 | NotImplemented(); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitGetSparseFromOp(EmitContext& ctx) { | ||
| 250 | NotImplemented(); | ||
| 251 | } | ||
| 252 | |||
| 253 | void EmitGetInBoundsFromOp(EmitContext& ctx) { | ||
| 254 | NotImplemented(); | ||
| 255 | } | ||
| 256 | |||
| 257 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 258 | ctx.Add("OR.S {},{},{};", inst, a, b); | ||
| 259 | } | ||
| 260 | |||
| 261 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 262 | ctx.Add("AND.S {},{},{};", inst, a, b); | ||
| 263 | } | ||
| 264 | |||
| 265 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 266 | ctx.Add("XOR.S {},{},{};", inst, a, b); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 270 | ctx.Add("SEQ.S {},{},0;", inst, value); | ||
| 271 | } | ||
| 272 | |||
| 273 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..68fff613c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 13 | ScalarS32 false_value) { | ||
| 14 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 18 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 19 | throw NotImplementedException("GLASM instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 23 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 24 | throw NotImplementedException("GLASM instruction"); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 28 | ScalarS32 false_value) { | ||
| 29 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 33 | Register false_value) { | ||
| 34 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 35 | const Register ret{ctx.reg_alloc.LongDefine(inst)}; | ||
| 36 | if (ret == true_value) { | ||
| 37 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 38 | "MOV.U64 {}.x(EQ.x),{};", | ||
| 39 | cond, ret, false_value); | ||
| 40 | } else if (ret == false_value) { | ||
| 41 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 42 | "MOV.U64 {}.x(NE.x),{};", | ||
| 43 | cond, ret, true_value); | ||
| 44 | } else { | ||
| 45 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 46 | "MOV.U64 {}.x,{};" | ||
| 47 | "MOV.U64 {}.x(NE.x),{};", | ||
| 48 | cond, ret, false_value, ret, true_value); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 53 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 54 | throw NotImplementedException("GLASM instruction"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 58 | ScalarS32 false_value) { | ||
| 59 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 63 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 64 | throw NotImplementedException("GLASM instruction"); | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..c1498f449 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 12 | ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); | ||
| 13 | } | ||
| 14 | |||
| 15 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 16 | ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); | ||
| 17 | } | ||
| 18 | |||
| 19 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 20 | ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); | ||
| 21 | } | ||
| 22 | |||
| 23 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 24 | ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 28 | ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 32 | ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 36 | ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 40 | ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 44 | ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 48 | ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 52 | ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 56 | ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); | ||
| 57 | } | ||
| 58 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..544d475b4 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 13 | ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); | ||
| 14 | } | ||
| 15 | |||
| 16 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 17 | ctx.Add("TGALL.S {}.x,{};", inst, pred); | ||
| 18 | } | ||
| 19 | |||
| 20 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 21 | ctx.Add("TGANY.S {}.x,{};", inst, pred); | ||
| 22 | } | ||
| 23 | |||
| 24 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 25 | ctx.Add("TGEQ.S {}.x,{};", inst, pred); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 29 | ctx.Add("TGBALLOT {}.x,{};", inst, pred); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 33 | ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 37 | ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 41 | ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 45 | ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); | ||
| 50 | } | ||
| 51 | |||
| 52 | static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 53 | const IR::Value& clamp, const IR::Value& segmentation_mask, | ||
| 54 | std::string_view op) { | ||
| 55 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 56 | if (in_bounds) { | ||
| 57 | in_bounds->Invalidate(); | ||
| 58 | } | ||
| 59 | std::string mask; | ||
| 60 | if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { | ||
| 61 | mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); | ||
| 62 | } else { | ||
| 63 | mask = "RC"; | ||
| 64 | ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", | ||
| 65 | ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, | ||
| 66 | ScalarU32{ctx.reg_alloc.Consume(clamp)}); | ||
| 67 | } | ||
| 68 | const Register value_ret{ctx.reg_alloc.Define(inst)}; | ||
| 69 | if (in_bounds) { | ||
| 70 | const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; | ||
| 71 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 72 | "MOV.U {}.x,{}.y;", | ||
| 73 | op, bounds_ret, value, index, mask, value_ret, bounds_ret); | ||
| 74 | } else { | ||
| 75 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 76 | "MOV.U {}.x,{}.y;", | ||
| 77 | op, value_ret, value, index, mask, value_ret, value_ret); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 82 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 83 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 87 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 88 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 92 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 93 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 97 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 98 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 102 | ScalarU32 swizzle) { | ||
| 103 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 104 | ctx.Add("AND.U RC.z,{}.threadid,3;" | ||
| 105 | "SHL.U RC.z,RC.z,1;" | ||
| 106 | "SHR.U RC.z,{},RC.z;" | ||
| 107 | "AND.U RC.z,RC.z,3;" | ||
| 108 | "MUL.F RC.x,{},FSWZA[RC.z];" | ||
| 109 | "MUL.F RC.y,{},FSWZB[RC.z];" | ||
| 110 | "ADD.F {}.x,RC.x,RC.y;", | ||
| 111 | ctx.stage_name, swizzle, op_a, op_b, ret); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 115 | if (ctx.profile.support_derivative_control) { | ||
| 116 | ctx.Add("DDX.FINE {}.x,{};", inst, p); | ||
| 117 | } else { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 119 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 124 | if (ctx.profile.support_derivative_control) { | ||
| 125 | ctx.Add("DDY.FINE {}.x,{};", inst, p); | ||
| 126 | } else { | ||
| 127 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 128 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 133 | if (ctx.profile.support_derivative_control) { | ||
| 134 | ctx.Add("DDX.COARSE {}.x,{};", inst, p); | ||
| 135 | } else { | ||
| 136 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 137 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 142 | if (ctx.profile.support_derivative_control) { | ||
| 143 | ctx.Add("DDY.COARSE {}.x,{};", inst, p); | ||
| 144 | } else { | ||
| 145 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 146 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..4c046db6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 10 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | Register RegAlloc::Define(IR::Inst& inst) { | ||
| 17 | return Define(inst, false); | ||
| 18 | } | ||
| 19 | |||
| 20 | Register RegAlloc::LongDefine(IR::Inst& inst) { | ||
| 21 | return Define(inst, true); | ||
| 22 | } | ||
| 23 | |||
| 24 | Value RegAlloc::Peek(const IR::Value& value) { | ||
| 25 | if (value.IsImmediate()) { | ||
| 26 | return MakeImm(value); | ||
| 27 | } else { | ||
| 28 | return PeekInst(*value.Inst()); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | Value RegAlloc::Consume(const IR::Value& value) { | ||
| 33 | if (value.IsImmediate()) { | ||
| 34 | return MakeImm(value); | ||
| 35 | } else { | ||
| 36 | return ConsumeInst(*value.Inst()); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | void RegAlloc::Unref(IR::Inst& inst) { | ||
| 41 | IR::Inst& value_inst{AliasInst(inst)}; | ||
| 42 | value_inst.DestructiveRemoveUsage(); | ||
| 43 | if (!value_inst.HasUses()) { | ||
| 44 | Free(value_inst.Definition<Id>()); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | Register RegAlloc::AllocReg() { | ||
| 49 | Register ret; | ||
| 50 | ret.type = Type::Register; | ||
| 51 | ret.id = Alloc(false); | ||
| 52 | return ret; | ||
| 53 | } | ||
| 54 | |||
| 55 | Register RegAlloc::AllocLongReg() { | ||
| 56 | Register ret; | ||
| 57 | ret.type = Type::Register; | ||
| 58 | ret.id = Alloc(true); | ||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 62 | void RegAlloc::FreeReg(Register reg) { | ||
| 63 | Free(reg.id); | ||
| 64 | } | ||
| 65 | |||
| 66 | Value RegAlloc::MakeImm(const IR::Value& value) { | ||
| 67 | Value ret; | ||
| 68 | switch (value.Type()) { | ||
| 69 | case IR::Type::Void: | ||
| 70 | ret.type = Type::Void; | ||
| 71 | break; | ||
| 72 | case IR::Type::U1: | ||
| 73 | ret.type = Type::U32; | ||
| 74 | ret.imm_u32 = value.U1() ? 0xffffffff : 0; | ||
| 75 | break; | ||
| 76 | case IR::Type::U32: | ||
| 77 | ret.type = Type::U32; | ||
| 78 | ret.imm_u32 = value.U32(); | ||
| 79 | break; | ||
| 80 | case IR::Type::F32: | ||
| 81 | ret.type = Type::U32; | ||
| 82 | ret.imm_u32 = Common::BitCast<u32>(value.F32()); | ||
| 83 | break; | ||
| 84 | case IR::Type::U64: | ||
| 85 | ret.type = Type::U64; | ||
| 86 | ret.imm_u64 = value.U64(); | ||
| 87 | break; | ||
| 88 | case IR::Type::F64: | ||
| 89 | ret.type = Type::U64; | ||
| 90 | ret.imm_u64 = Common::BitCast<u64>(value.F64()); | ||
| 91 | break; | ||
| 92 | default: | ||
| 93 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 94 | } | ||
| 95 | return ret; | ||
| 96 | } | ||
| 97 | |||
| 98 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { | ||
| 99 | if (inst.HasUses()) { | ||
| 100 | inst.SetDefinition<Id>(Alloc(is_long)); | ||
| 101 | } else { | ||
| 102 | Id id{}; | ||
| 103 | id.is_long.Assign(is_long ? 1 : 0); | ||
| 104 | id.is_null.Assign(1); | ||
| 105 | inst.SetDefinition<Id>(id); | ||
| 106 | } | ||
| 107 | return Register{PeekInst(inst)}; | ||
| 108 | } | ||
| 109 | |||
| 110 | Value RegAlloc::PeekInst(IR::Inst& inst) { | ||
| 111 | Value ret; | ||
| 112 | ret.type = Type::Register; | ||
| 113 | ret.id = inst.Definition<Id>(); | ||
| 114 | return ret; | ||
| 115 | } | ||
| 116 | |||
| 117 | Value RegAlloc::ConsumeInst(IR::Inst& inst) { | ||
| 118 | Unref(inst); | ||
| 119 | return PeekInst(inst); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id RegAlloc::Alloc(bool is_long) { | ||
| 123 | size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; | ||
| 124 | std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; | ||
| 125 | if (num_used_registers + num_used_long_registers < NUM_REGS) { | ||
| 126 | for (size_t reg = 0; reg < NUM_REGS; ++reg) { | ||
| 127 | if (use[reg]) { | ||
| 128 | continue; | ||
| 129 | } | ||
| 130 | num_regs = std::max(num_regs, reg + 1); | ||
| 131 | use[reg] = true; | ||
| 132 | Id ret{}; | ||
| 133 | ret.is_valid.Assign(1); | ||
| 134 | ret.is_long.Assign(is_long ? 1 : 0); | ||
| 135 | ret.is_spill.Assign(0); | ||
| 136 | ret.is_condition_code.Assign(0); | ||
| 137 | ret.is_null.Assign(0); | ||
| 138 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 139 | return ret; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | throw NotImplementedException("Register spilling"); | ||
| 143 | } | ||
| 144 | |||
| 145 | void RegAlloc::Free(Id id) { | ||
| 146 | if (id.is_valid == 0) { | ||
| 147 | throw LogicError("Freeing invalid register"); | ||
| 148 | } | ||
| 149 | if (id.is_spill != 0) { | ||
| 150 | throw NotImplementedException("Free spill"); | ||
| 151 | } | ||
| 152 | if (id.is_long != 0) { | ||
| 153 | long_register_use[id.index] = false; | ||
| 154 | } else { | ||
| 155 | register_use[id.index] = false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { | ||
| 160 | switch (inst.GetOpcode()) { | ||
| 161 | case IR::Opcode::Identity: | ||
| 162 | case IR::Opcode::BitCastU16F16: | ||
| 163 | case IR::Opcode::BitCastU32F32: | ||
| 164 | case IR::Opcode::BitCastU64F64: | ||
| 165 | case IR::Opcode::BitCastF16U16: | ||
| 166 | case IR::Opcode::BitCastF32U32: | ||
| 167 | case IR::Opcode::BitCastF64U64: | ||
| 168 | return true; | ||
| 169 | default: | ||
| 170 | return false; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | /*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { | ||
| 175 | IR::Inst* it{&inst}; | ||
| 176 | while (IsAliased(*it)) { | ||
| 177 | const IR::Value arg{it->Arg(0)}; | ||
| 178 | if (arg.IsImmediate()) { | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | it = arg.InstRecursive(); | ||
| 182 | } | ||
| 183 | return *it; | ||
| 184 | } | ||
| 185 | |||
| 186 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..82aec66c6 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h | |||
| @@ -0,0 +1,303 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "shader_recompiler/exception.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | class Inst; | ||
| 18 | class Value; | ||
| 19 | } // namespace Shader::IR | ||
| 20 | |||
| 21 | namespace Shader::Backend::GLASM { | ||
| 22 | |||
| 23 | class EmitContext; | ||
| 24 | |||
| 25 | enum class Type : u32 { | ||
| 26 | Void, | ||
| 27 | Register, | ||
| 28 | U32, | ||
| 29 | U64, | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct Id { | ||
| 33 | union { | ||
| 34 | u32 raw; | ||
| 35 | BitField<0, 1, u32> is_valid; | ||
| 36 | BitField<1, 1, u32> is_long; | ||
| 37 | BitField<2, 1, u32> is_spill; | ||
| 38 | BitField<3, 1, u32> is_condition_code; | ||
| 39 | BitField<4, 1, u32> is_null; | ||
| 40 | BitField<5, 27, u32> index; | ||
| 41 | }; | ||
| 42 | |||
| 43 | bool operator==(Id rhs) const noexcept { | ||
| 44 | return raw == rhs.raw; | ||
| 45 | } | ||
| 46 | bool operator!=(Id rhs) const noexcept { | ||
| 47 | return !operator==(rhs); | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | static_assert(sizeof(Id) == sizeof(u32)); | ||
| 51 | |||
| 52 | struct Value { | ||
| 53 | Type type; | ||
| 54 | union { | ||
| 55 | Id id; | ||
| 56 | u32 imm_u32; | ||
| 57 | u64 imm_u64; | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool operator==(const Value& rhs) const noexcept { | ||
| 61 | if (type != rhs.type) { | ||
| 62 | return false; | ||
| 63 | } | ||
| 64 | switch (type) { | ||
| 65 | case Type::Void: | ||
| 66 | return true; | ||
| 67 | case Type::Register: | ||
| 68 | return id == rhs.id; | ||
| 69 | case Type::U32: | ||
| 70 | return imm_u32 == rhs.imm_u32; | ||
| 71 | case Type::U64: | ||
| 72 | return imm_u64 == rhs.imm_u64; | ||
| 73 | } | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | bool operator!=(const Value& rhs) const noexcept { | ||
| 77 | return !operator==(rhs); | ||
| 78 | } | ||
| 79 | }; | ||
| 80 | struct Register : Value {}; | ||
| 81 | struct ScalarRegister : Value {}; | ||
| 82 | struct ScalarU32 : Value {}; | ||
| 83 | struct ScalarS32 : Value {}; | ||
| 84 | struct ScalarF32 : Value {}; | ||
| 85 | struct ScalarF64 : Value {}; | ||
| 86 | |||
| 87 | class RegAlloc { | ||
| 88 | public: | ||
| 89 | RegAlloc() = default; | ||
| 90 | |||
| 91 | Register Define(IR::Inst& inst); | ||
| 92 | |||
| 93 | Register LongDefine(IR::Inst& inst); | ||
| 94 | |||
| 95 | [[nodiscard]] Value Peek(const IR::Value& value); | ||
| 96 | |||
| 97 | Value Consume(const IR::Value& value); | ||
| 98 | |||
| 99 | void Unref(IR::Inst& inst); | ||
| 100 | |||
| 101 | [[nodiscard]] Register AllocReg(); | ||
| 102 | |||
| 103 | [[nodiscard]] Register AllocLongReg(); | ||
| 104 | |||
| 105 | void FreeReg(Register reg); | ||
| 106 | |||
| 107 | void InvalidateConditionCodes() { | ||
| 108 | // This does nothing for now | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] size_t NumUsedRegisters() const noexcept { | ||
| 112 | return num_used_registers; | ||
| 113 | } | ||
| 114 | |||
| 115 | [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { | ||
| 116 | return num_used_long_registers; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] bool IsEmpty() const noexcept { | ||
| 120 | return register_use.none() && long_register_use.none(); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns true if the instruction is expected to be aliased to another | ||
| 124 | static bool IsAliased(const IR::Inst& inst); | ||
| 125 | |||
| 126 | /// Returns the underlying value out of an alias sequence | ||
| 127 | static IR::Inst& AliasInst(IR::Inst& inst); | ||
| 128 | |||
| 129 | private: | ||
| 130 | static constexpr size_t NUM_REGS = 4096; | ||
| 131 | static constexpr size_t NUM_ELEMENTS = 4; | ||
| 132 | |||
| 133 | Value MakeImm(const IR::Value& value); | ||
| 134 | |||
| 135 | Register Define(IR::Inst& inst, bool is_long); | ||
| 136 | |||
| 137 | Value PeekInst(IR::Inst& inst); | ||
| 138 | |||
| 139 | Value ConsumeInst(IR::Inst& inst); | ||
| 140 | |||
| 141 | Id Alloc(bool is_long); | ||
| 142 | |||
| 143 | void Free(Id id); | ||
| 144 | |||
| 145 | size_t num_used_registers{}; | ||
| 146 | size_t num_used_long_registers{}; | ||
| 147 | std::bitset<NUM_REGS> register_use{}; | ||
| 148 | std::bitset<NUM_REGS> long_register_use{}; | ||
| 149 | }; | ||
| 150 | |||
| 151 | template <bool scalar, typename FormatContext> | ||
| 152 | auto FormatTo(FormatContext& ctx, Id id) { | ||
| 153 | if (id.is_condition_code != 0) { | ||
| 154 | throw NotImplementedException("Condition code emission"); | ||
| 155 | } | ||
| 156 | if (id.is_spill != 0) { | ||
| 157 | throw NotImplementedException("Spill emission"); | ||
| 158 | } | ||
| 159 | if constexpr (scalar) { | ||
| 160 | if (id.is_null != 0) { | ||
| 161 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); | ||
| 162 | } | ||
| 163 | if (id.is_long != 0) { | ||
| 164 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); | ||
| 165 | } else { | ||
| 166 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | ||
| 167 | } | ||
| 168 | } else { | ||
| 169 | if (id.is_null != 0) { | ||
| 170 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); | ||
| 171 | } | ||
| 172 | if (id.is_long != 0) { | ||
| 173 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); | ||
| 174 | } else { | ||
| 175 | return fmt::format_to(ctx.out(), "R{}", id.index.Value()); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | } // namespace Shader::Backend::GLASM | ||
| 181 | |||
| 182 | template <> | ||
| 183 | struct fmt::formatter<Shader::Backend::GLASM::Id> { | ||
| 184 | constexpr auto parse(format_parse_context& ctx) { | ||
| 185 | return ctx.begin(); | ||
| 186 | } | ||
| 187 | template <typename FormatContext> | ||
| 188 | auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { | ||
| 189 | return Shader::Backend::GLASM::FormatTo<true>(ctx, id); | ||
| 190 | } | ||
| 191 | }; | ||
| 192 | |||
| 193 | template <> | ||
| 194 | struct fmt::formatter<Shader::Backend::GLASM::Register> { | ||
| 195 | constexpr auto parse(format_parse_context& ctx) { | ||
| 196 | return ctx.begin(); | ||
| 197 | } | ||
| 198 | template <typename FormatContext> | ||
| 199 | auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { | ||
| 200 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 201 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 202 | } | ||
| 203 | return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id); | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | template <> | ||
| 208 | struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> { | ||
| 209 | constexpr auto parse(format_parse_context& ctx) { | ||
| 210 | return ctx.begin(); | ||
| 211 | } | ||
| 212 | template <typename FormatContext> | ||
| 213 | auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { | ||
| 214 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 215 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 216 | } | ||
| 217 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 218 | } | ||
| 219 | }; | ||
| 220 | |||
| 221 | template <> | ||
| 222 | struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { | ||
| 223 | constexpr auto parse(format_parse_context& ctx) { | ||
| 224 | return ctx.begin(); | ||
| 225 | } | ||
| 226 | template <typename FormatContext> | ||
| 227 | auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { | ||
| 228 | switch (value.type) { | ||
| 229 | case Shader::Backend::GLASM::Type::Void: | ||
| 230 | break; | ||
| 231 | case Shader::Backend::GLASM::Type::Register: | ||
| 232 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 233 | case Shader::Backend::GLASM::Type::U32: | ||
| 234 | return fmt::format_to(ctx.out(), "{}", value.imm_u32); | ||
| 235 | case Shader::Backend::GLASM::Type::U64: | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 239 | } | ||
| 240 | }; | ||
| 241 | |||
| 242 | template <> | ||
| 243 | struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { | ||
| 244 | constexpr auto parse(format_parse_context& ctx) { | ||
| 245 | return ctx.begin(); | ||
| 246 | } | ||
| 247 | template <typename FormatContext> | ||
| 248 | auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { | ||
| 249 | switch (value.type) { | ||
| 250 | case Shader::Backend::GLASM::Type::Void: | ||
| 251 | break; | ||
| 252 | case Shader::Backend::GLASM::Type::Register: | ||
| 253 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 254 | case Shader::Backend::GLASM::Type::U32: | ||
| 255 | return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32)); | ||
| 256 | case Shader::Backend::GLASM::Type::U64: | ||
| 257 | break; | ||
| 258 | } | ||
| 259 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 260 | } | ||
| 261 | }; | ||
| 262 | |||
| 263 | template <> | ||
| 264 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { | ||
| 265 | constexpr auto parse(format_parse_context& ctx) { | ||
| 266 | return ctx.begin(); | ||
| 267 | } | ||
| 268 | template <typename FormatContext> | ||
| 269 | auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { | ||
| 270 | switch (value.type) { | ||
| 271 | case Shader::Backend::GLASM::Type::Void: | ||
| 272 | break; | ||
| 273 | case Shader::Backend::GLASM::Type::Register: | ||
| 274 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 275 | case Shader::Backend::GLASM::Type::U32: | ||
| 276 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32)); | ||
| 277 | case Shader::Backend::GLASM::Type::U64: | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 281 | } | ||
| 282 | }; | ||
| 283 | |||
| 284 | template <> | ||
| 285 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { | ||
| 286 | constexpr auto parse(format_parse_context& ctx) { | ||
| 287 | return ctx.begin(); | ||
| 288 | } | ||
| 289 | template <typename FormatContext> | ||
| 290 | auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { | ||
| 291 | switch (value.type) { | ||
| 292 | case Shader::Backend::GLASM::Type::Void: | ||
| 293 | break; | ||
| 294 | case Shader::Backend::GLASM::Type::Register: | ||
| 295 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 296 | case Shader::Backend::GLASM::Type::U32: | ||
| 297 | break; | ||
| 298 | case Shader::Backend::GLASM::Type::U64: | ||
| 299 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64)); | ||
| 300 | } | ||
| 301 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 302 | } | ||
| 303 | }; | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..4e6f2c0fe --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -0,0 +1,715 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/bindings.h" | ||
| 6 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | #include "shader_recompiler/runtime_info.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | u32 CbufIndex(size_t offset) { | ||
| 14 | return (offset / 4) % 4; | ||
| 15 | } | ||
| 16 | |||
| 17 | char Swizzle(size_t offset) { | ||
| 18 | return "xyzw"[CbufIndex(offset)]; | ||
| 19 | } | ||
| 20 | |||
| 21 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 22 | switch (interp) { | ||
| 23 | case Interpolation::Smooth: | ||
| 24 | return ""; | ||
| 25 | case Interpolation::Flat: | ||
| 26 | return "flat "; | ||
| 27 | case Interpolation::NoPerspective: | ||
| 28 | return "noperspective "; | ||
| 29 | } | ||
| 30 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 31 | } | ||
| 32 | |||
| 33 | std::string_view InputArrayDecorator(Stage stage) { | ||
| 34 | switch (stage) { | ||
| 35 | case Stage::Geometry: | ||
| 36 | case Stage::TessellationControl: | ||
| 37 | case Stage::TessellationEval: | ||
| 38 | return "[]"; | ||
| 39 | default: | ||
| 40 | return ""; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | bool StoresPerVertexAttributes(Stage stage) { | ||
| 45 | switch (stage) { | ||
| 46 | case Stage::VertexA: | ||
| 47 | case Stage::VertexB: | ||
| 48 | case Stage::Geometry: | ||
| 49 | case Stage::TessellationEval: | ||
| 50 | return true; | ||
| 51 | default: | ||
| 52 | return false; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | std::string OutputDecorator(Stage stage, u32 size) { | ||
| 57 | switch (stage) { | ||
| 58 | case Stage::TessellationControl: | ||
| 59 | return fmt::format("[{}]", size); | ||
| 60 | default: | ||
| 61 | return ""; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | std::string_view SamplerType(TextureType type, bool is_depth) { | ||
| 66 | if (is_depth) { | ||
| 67 | switch (type) { | ||
| 68 | case TextureType::Color1D: | ||
| 69 | return "sampler1DShadow"; | ||
| 70 | case TextureType::ColorArray1D: | ||
| 71 | return "sampler1DArrayShadow"; | ||
| 72 | case TextureType::Color2D: | ||
| 73 | return "sampler2DShadow"; | ||
| 74 | case TextureType::ColorArray2D: | ||
| 75 | return "sampler2DArrayShadow"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "samplerCubeShadow"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "samplerCubeArrayShadow"; | ||
| 80 | default: | ||
| 81 | throw NotImplementedException("Texture type: {}", type); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | switch (type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "sampler1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "sampler1DArray"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "sampler2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "sampler2DArray"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "sampler3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "samplerCube"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "samplerCubeArray"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "samplerBuffer"; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Texture type: {}", type); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string_view ImageType(TextureType type) { | ||
| 107 | switch (type) { | ||
| 108 | case TextureType::Color1D: | ||
| 109 | return "uimage1D"; | ||
| 110 | case TextureType::ColorArray1D: | ||
| 111 | return "uimage1DArray"; | ||
| 112 | case TextureType::Color2D: | ||
| 113 | return "uimage2D"; | ||
| 114 | case TextureType::ColorArray2D: | ||
| 115 | return "uimage2DArray"; | ||
| 116 | case TextureType::Color3D: | ||
| 117 | return "uimage3D"; | ||
| 118 | case TextureType::ColorCube: | ||
| 119 | return "uimageCube"; | ||
| 120 | case TextureType::ColorArrayCube: | ||
| 121 | return "uimageCubeArray"; | ||
| 122 | case TextureType::Buffer: | ||
| 123 | return "uimageBuffer"; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Image type: {}", type); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | std::string_view ImageFormatString(ImageFormat format) { | ||
| 130 | switch (format) { | ||
| 131 | case ImageFormat::Typeless: | ||
| 132 | return ""; | ||
| 133 | case ImageFormat::R8_UINT: | ||
| 134 | return ",r8ui"; | ||
| 135 | case ImageFormat::R8_SINT: | ||
| 136 | return ",r8i"; | ||
| 137 | case ImageFormat::R16_UINT: | ||
| 138 | return ",r16ui"; | ||
| 139 | case ImageFormat::R16_SINT: | ||
| 140 | return ",r16i"; | ||
| 141 | case ImageFormat::R32_UINT: | ||
| 142 | return ",r32ui"; | ||
| 143 | case ImageFormat::R32G32_UINT: | ||
| 144 | return ",rg32ui"; | ||
| 145 | case ImageFormat::R32G32B32A32_UINT: | ||
| 146 | return ",rgba32ui"; | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Image format: {}", format); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | std::string_view ImageAccessQualifier(bool is_written, bool is_read) { | ||
| 153 | if (is_written && !is_read) { | ||
| 154 | return "writeonly "; | ||
| 155 | } | ||
| 156 | if (is_read && !is_written) { | ||
| 157 | return "readonly "; | ||
| 158 | } | ||
| 159 | return ""; | ||
| 160 | } | ||
| 161 | |||
| 162 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 163 | switch (primitive) { | ||
| 164 | case TessPrimitive::Triangles: | ||
| 165 | return "triangles"; | ||
| 166 | case TessPrimitive::Quads: | ||
| 167 | return "quads"; | ||
| 168 | case TessPrimitive::Isolines: | ||
| 169 | return "isolines"; | ||
| 170 | } | ||
| 171 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 172 | } | ||
| 173 | |||
| 174 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 175 | switch (spacing) { | ||
| 176 | case TessSpacing::Equal: | ||
| 177 | return "equal_spacing"; | ||
| 178 | case TessSpacing::FractionalOdd: | ||
| 179 | return "fractional_odd_spacing"; | ||
| 180 | case TessSpacing::FractionalEven: | ||
| 181 | return "fractional_even_spacing"; | ||
| 182 | } | ||
| 183 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 187 | switch (topology) { | ||
| 188 | case InputTopology::Points: | ||
| 189 | return "points"; | ||
| 190 | case InputTopology::Lines: | ||
| 191 | return "lines"; | ||
| 192 | case InputTopology::LinesAdjacency: | ||
| 193 | return "lines_adjacency"; | ||
| 194 | case InputTopology::Triangles: | ||
| 195 | return "triangles"; | ||
| 196 | case InputTopology::TrianglesAdjacency: | ||
| 197 | return "triangles_adjacency"; | ||
| 198 | } | ||
| 199 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 203 | switch (topology) { | ||
| 204 | case OutputTopology::PointList: | ||
| 205 | return "points"; | ||
| 206 | case OutputTopology::LineStrip: | ||
| 207 | return "line_strip"; | ||
| 208 | case OutputTopology::TriangleStrip: | ||
| 209 | return "triangle_strip"; | ||
| 210 | } | ||
| 211 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 212 | } | ||
| 213 | |||
| 214 | void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 215 | if (!ctx.info.stores.Legacy()) { | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | if (ctx.info.stores.FixedFunctionTexture()) { | ||
| 219 | header += "vec4 gl_TexCoord[8];"; | ||
| 220 | } | ||
| 221 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 222 | header += "vec4 gl_FrontColor;"; | ||
| 223 | } | ||
| 224 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { | ||
| 225 | header += "vec4 gl_FrontSecondaryColor;"; | ||
| 226 | } | ||
| 227 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { | ||
| 228 | header += "vec4 gl_BackColor;"; | ||
| 229 | } | ||
| 230 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { | ||
| 231 | header += "vec4 gl_BackSecondaryColor;"; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | void SetupOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 236 | if (!StoresPerVertexAttributes(ctx.stage)) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | if (ctx.uses_geometry_passthrough) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | header += "out gl_PerVertex{vec4 gl_Position;"; | ||
| 243 | if (ctx.info.stores[IR::Attribute::PointSize]) { | ||
| 244 | header += "float gl_PointSize;"; | ||
| 245 | } | ||
| 246 | if (ctx.info.stores.ClipDistances()) { | ||
| 247 | header += "float gl_ClipDistance[];"; | ||
| 248 | } | ||
| 249 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && | ||
| 250 | ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { | ||
| 251 | header += "int gl_ViewportIndex;"; | ||
| 252 | } | ||
| 253 | SetupLegacyOutPerVertex(ctx, header); | ||
| 254 | header += "};"; | ||
| 255 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { | ||
| 256 | header += "out int gl_ViewportIndex;"; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void SetupInPerVertex(EmitContext& ctx, std::string& header) { | ||
| 261 | // Currently only required for TessellationControl to adhere to | ||
| 262 | // ARB_separate_shader_objects requirements | ||
| 263 | if (ctx.stage != Stage::TessellationControl) { | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; | ||
| 267 | const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; | ||
| 268 | const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; | ||
| 269 | const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; | ||
| 270 | if (!loads_per_vertex) { | ||
| 271 | return; | ||
| 272 | } | ||
| 273 | header += "in gl_PerVertex{"; | ||
| 274 | if (loads_position) { | ||
| 275 | header += "vec4 gl_Position;"; | ||
| 276 | } | ||
| 277 | if (loads_point_size) { | ||
| 278 | header += "float gl_PointSize;"; | ||
| 279 | } | ||
| 280 | if (loads_clip_distance) { | ||
| 281 | header += "float gl_ClipDistance[];"; | ||
| 282 | } | ||
| 283 | header += "}gl_in[gl_MaxPatchVertices];"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { | ||
| 287 | if (!ctx.info.loads.Legacy()) { | ||
| 288 | return; | ||
| 289 | } | ||
| 290 | header += "in gl_PerFragment{"; | ||
| 291 | if (ctx.info.loads.FixedFunctionTexture()) { | ||
| 292 | header += "vec4 gl_TexCoord[8];"; | ||
| 293 | } | ||
| 294 | if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 295 | header += "vec4 gl_Color;"; | ||
| 296 | } | ||
| 297 | header += "};"; | ||
| 298 | } | ||
| 299 | |||
| 300 | } // Anonymous namespace | ||
| 301 | |||
| 302 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 303 | const RuntimeInfo& runtime_info_) | ||
| 304 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, | ||
| 305 | uses_geometry_passthrough{program.is_geometry_passthrough && | ||
| 306 | profile.support_geometry_shader_passthrough} { | ||
| 307 | if (profile.need_fastmath_off) { | ||
| 308 | header += "#pragma optionNV(fastmath off)\n"; | ||
| 309 | } | ||
| 310 | SetupExtensions(); | ||
| 311 | switch (program.stage) { | ||
| 312 | case Stage::VertexA: | ||
| 313 | case Stage::VertexB: | ||
| 314 | stage_name = "vs"; | ||
| 315 | break; | ||
| 316 | case Stage::TessellationControl: | ||
| 317 | stage_name = "tcs"; | ||
| 318 | header += fmt::format("layout(vertices={})out;", program.invocations); | ||
| 319 | break; | ||
| 320 | case Stage::TessellationEval: | ||
| 321 | stage_name = "tes"; | ||
| 322 | header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), | ||
| 323 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 324 | runtime_info.tess_clockwise ? "cw" : "ccw"); | ||
| 325 | break; | ||
| 326 | case Stage::Geometry: | ||
| 327 | stage_name = "gs"; | ||
| 328 | header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); | ||
| 329 | if (uses_geometry_passthrough) { | ||
| 330 | header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; | ||
| 331 | break; | ||
| 332 | } else if (program.is_geometry_passthrough && | ||
| 333 | !profile.support_geometry_shader_passthrough) { | ||
| 334 | LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); | ||
| 335 | } | ||
| 336 | header += fmt::format( | ||
| 337 | "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", | ||
| 338 | OutputPrimitive(program.output_topology), program.output_vertices); | ||
| 339 | break; | ||
| 340 | case Stage::Fragment: | ||
| 341 | stage_name = "fs"; | ||
| 342 | position_name = "gl_FragCoord"; | ||
| 343 | if (runtime_info.force_early_z) { | ||
| 344 | header += "layout(early_fragment_tests)in;"; | ||
| 345 | } | ||
| 346 | if (info.uses_sample_id) { | ||
| 347 | header += "in int gl_SampleID;"; | ||
| 348 | } | ||
| 349 | if (info.stores_sample_mask) { | ||
| 350 | header += "out int gl_SampleMask[];"; | ||
| 351 | } | ||
| 352 | break; | ||
| 353 | case Stage::Compute: | ||
| 354 | stage_name = "cs"; | ||
| 355 | const u32 local_x{std::max(program.workgroup_size[0], 1u)}; | ||
| 356 | const u32 local_y{std::max(program.workgroup_size[1], 1u)}; | ||
| 357 | const u32 local_z{std::max(program.workgroup_size[2], 1u)}; | ||
| 358 | header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", | ||
| 359 | local_x, local_y, local_z); | ||
| 360 | break; | ||
| 361 | } | ||
| 362 | SetupOutPerVertex(*this, header); | ||
| 363 | SetupInPerVertex(*this, header); | ||
| 364 | SetupLegacyInPerFragment(*this, header); | ||
| 365 | |||
| 366 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 367 | if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { | ||
| 368 | continue; | ||
| 369 | } | ||
| 370 | const auto qualifier{uses_geometry_passthrough ? "passthrough" | ||
| 371 | : fmt::format("location={}", index)}; | ||
| 372 | header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, | ||
| 373 | InterpDecorator(info.interpolation[index]), index, | ||
| 374 | InputArrayDecorator(stage)); | ||
| 375 | } | ||
| 376 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 377 | if (!info.uses_patches[index]) { | ||
| 378 | continue; | ||
| 379 | } | ||
| 380 | const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; | ||
| 381 | header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); | ||
| 382 | } | ||
| 383 | if (stage == Stage::Fragment) { | ||
| 384 | for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { | ||
| 385 | if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { | ||
| 386 | continue; | ||
| 387 | } | ||
| 388 | header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); | ||
| 389 | } | ||
| 390 | } | ||
| 391 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 392 | if (info.stores.Generic(index)) { | ||
| 393 | DefineGenericOutput(index, program.invocations); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | DefineConstantBuffers(bindings); | ||
| 397 | DefineStorageBuffers(bindings); | ||
| 398 | SetupImages(bindings); | ||
| 399 | SetupTextures(bindings); | ||
| 400 | DefineHelperFunctions(); | ||
| 401 | DefineConstants(); | ||
| 402 | } | ||
| 403 | |||
| 404 | void EmitContext::SetupExtensions() { | ||
| 405 | header += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||
| 406 | if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { | ||
| 407 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||
| 408 | } | ||
| 409 | if (info.uses_int64 && profile.support_int64) { | ||
| 410 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 411 | } | ||
| 412 | if (info.uses_int64_bit_atomics) { | ||
| 413 | header += "#extension GL_NV_shader_atomic_int64 : enable\n"; | ||
| 414 | } | ||
| 415 | if (info.uses_atomic_f32_add) { | ||
| 416 | header += "#extension GL_NV_shader_atomic_float : enable\n"; | ||
| 417 | } | ||
| 418 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 419 | header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; | ||
| 420 | } | ||
| 421 | if (info.uses_fp16) { | ||
| 422 | if (profile.support_gl_nv_gpu_shader_5) { | ||
| 423 | header += "#extension GL_NV_gpu_shader5 : enable\n"; | ||
| 424 | } | ||
| 425 | if (profile.support_gl_amd_gpu_shader_half_float) { | ||
| 426 | header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 430 | info.uses_subgroup_shuffles || info.uses_fswzadd) { | ||
| 431 | header += "#extension GL_ARB_shader_ballot : enable\n" | ||
| 432 | "#extension GL_ARB_shader_group_vote : enable\n"; | ||
| 433 | if (!info.uses_int64 && profile.support_int64) { | ||
| 434 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 435 | } | ||
| 436 | if (profile.support_gl_warp_intrinsics) { | ||
| 437 | header += "#extension GL_NV_shader_thread_shuffle : enable\n"; | ||
| 438 | } | ||
| 439 | } | ||
| 440 | if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && | ||
| 441 | profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { | ||
| 442 | header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 443 | } | ||
| 444 | if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { | ||
| 445 | header += "#extension GL_ARB_sparse_texture2 : enable\n"; | ||
| 446 | } | ||
| 447 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 448 | header += "#extension GL_NV_viewport_array2 : enable\n"; | ||
| 449 | } | ||
| 450 | if (info.uses_typeless_image_reads) { | ||
| 451 | header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; | ||
| 452 | } | ||
| 453 | if (info.uses_derivatives && profile.support_gl_derivative_control) { | ||
| 454 | header += "#extension GL_ARB_derivative_control : enable\n"; | ||
| 455 | } | ||
| 456 | if (uses_geometry_passthrough) { | ||
| 457 | header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | void EmitContext::DefineConstantBuffers(Bindings& bindings) { | ||
| 462 | if (info.constant_buffer_descriptors.empty()) { | ||
| 463 | return; | ||
| 464 | } | ||
| 465 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 466 | header += fmt::format( | ||
| 467 | "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | ||
| 468 | bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); | ||
| 469 | bindings.uniform_buffer += desc.count; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | |||
| 473 | void EmitContext::DefineStorageBuffers(Bindings& bindings) { | ||
| 474 | if (info.storage_buffers_descriptors.empty()) { | ||
| 475 | return; | ||
| 476 | } | ||
| 477 | u32 index{}; | ||
| 478 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 479 | header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", | ||
| 480 | bindings.storage_buffer, stage_name, bindings.storage_buffer, | ||
| 481 | stage_name, index); | ||
| 482 | bindings.storage_buffer += desc.count; | ||
| 483 | index += desc.count; | ||
| 484 | } | ||
| 485 | } | ||
| 486 | |||
| 487 | void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { | ||
| 488 | static constexpr std::string_view swizzle{"xyzw"}; | ||
| 489 | const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 490 | u32 element{0}; | ||
| 491 | while (element < 4) { | ||
| 492 | std::string definition{fmt::format("layout(location={}", index)}; | ||
| 493 | const u32 remainder{4 - element}; | ||
| 494 | const TransformFeedbackVarying* xfb_varying{}; | ||
| 495 | if (!runtime_info.xfb_varyings.empty()) { | ||
| 496 | xfb_varying = &runtime_info.xfb_varyings[base_index + element]; | ||
| 497 | xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; | ||
| 498 | } | ||
| 499 | const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; | ||
| 500 | if (element > 0) { | ||
| 501 | definition += fmt::format(",component={}", element); | ||
| 502 | } | ||
| 503 | if (xfb_varying) { | ||
| 504 | definition += | ||
| 505 | fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, | ||
| 506 | xfb_varying->stride, xfb_varying->offset); | ||
| 507 | } | ||
| 508 | std::string name{fmt::format("out_attr{}", index)}; | ||
| 509 | if (num_components < 4 || element > 0) { | ||
| 510 | name += fmt::format("_{}", swizzle.substr(element, num_components)); | ||
| 511 | } | ||
| 512 | const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; | ||
| 513 | definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); | ||
| 514 | header += definition; | ||
| 515 | |||
| 516 | const GenericElementInfo element_info{ | ||
| 517 | .name = name, | ||
| 518 | .first_element = element, | ||
| 519 | .num_components = num_components, | ||
| 520 | }; | ||
| 521 | std::fill_n(output_generics[index].begin() + element, num_components, element_info); | ||
| 522 | element += num_components; | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | void EmitContext::DefineHelperFunctions() { | ||
| 527 | header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" | ||
| 528 | "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; | ||
| 529 | if (info.uses_global_increment || info.uses_shared_increment) { | ||
| 530 | header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; | ||
| 531 | } | ||
| 532 | if (info.uses_global_decrement || info.uses_shared_decrement) { | ||
| 533 | header += "uint CasDecrement(uint op_a,uint op_b){" | ||
| 534 | "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; | ||
| 535 | } | ||
| 536 | if (info.uses_atomic_f32_add) { | ||
| 537 | header += "uint CasFloatAdd(uint op_a,float op_b){" | ||
| 538 | "return ftou(utof(op_a)+op_b);}"; | ||
| 539 | } | ||
| 540 | if (info.uses_atomic_f32x2_add) { | ||
| 541 | header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" | ||
| 542 | "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; | ||
| 543 | } | ||
| 544 | if (info.uses_atomic_f32x2_min) { | ||
| 545 | header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " | ||
| 546 | "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; | ||
| 547 | } | ||
| 548 | if (info.uses_atomic_f32x2_max) { | ||
| 549 | header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " | ||
| 550 | "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; | ||
| 551 | } | ||
| 552 | if (info.uses_atomic_f16x2_add) { | ||
| 553 | header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " | ||
| 554 | "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; | ||
| 555 | } | ||
| 556 | if (info.uses_atomic_f16x2_min) { | ||
| 557 | header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " | ||
| 558 | "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; | ||
| 559 | } | ||
| 560 | if (info.uses_atomic_f16x2_max) { | ||
| 561 | header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " | ||
| 562 | "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; | ||
| 563 | } | ||
| 564 | if (info.uses_atomic_s32_min) { | ||
| 565 | header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | ||
| 566 | } | ||
| 567 | if (info.uses_atomic_s32_max) { | ||
| 568 | header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; | ||
| 569 | } | ||
| 570 | if (info.uses_global_memory && profile.support_int64) { | ||
| 571 | header += DefineGlobalMemoryFunctions(); | ||
| 572 | } | ||
| 573 | if (info.loads_indexed_attributes) { | ||
| 574 | const bool is_array{stage == Stage::Geometry}; | ||
| 575 | const auto vertex_arg{is_array ? ",uint vertex" : ""}; | ||
| 576 | std::string func{ | ||
| 577 | fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " | ||
| 578 | "masked_index=uint(base_index)&3u;switch(base_index>>2){{", | ||
| 579 | vertex_arg)}; | ||
| 580 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 581 | const auto position_idx{is_array ? "gl_in[vertex]." : ""}; | ||
| 582 | func += fmt::format("case {}:return {}{}[masked_index];", | ||
| 583 | static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, | ||
| 584 | position_name); | ||
| 585 | } | ||
| 586 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 587 | for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 588 | if (!info.loads.Generic(index)) { | ||
| 589 | continue; | ||
| 590 | } | ||
| 591 | const auto vertex_idx{is_array ? "[vertex]" : ""}; | ||
| 592 | func += fmt::format("case {}:return in_attr{}{}[masked_index];", | ||
| 593 | base_attribute_value + index, index, vertex_idx); | ||
| 594 | } | ||
| 595 | func += "default: return 0.0;}}"; | ||
| 596 | header += func; | ||
| 597 | } | ||
| 598 | if (info.stores_indexed_attributes) { | ||
| 599 | // TODO | ||
| 600 | } | ||
| 601 | } | ||
| 602 | |||
| 603 | std::string EmitContext::DefineGlobalMemoryFunctions() { | ||
| 604 | const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { | ||
| 605 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 606 | const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; | ||
| 607 | const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; | ||
| 608 | const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; | ||
| 609 | std::array<std::string, 2> addr_xy; | ||
| 610 | std::array<std::string, 2> size_xy; | ||
| 611 | for (size_t i = 0; i < addr_xy.size(); ++i) { | ||
| 612 | const auto addr_loc{ssbo.cbuf_offset + 4 * i}; | ||
| 613 | const auto size_loc{size_cbuf_offset + 4 * i}; | ||
| 614 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | ||
| 615 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | ||
| 616 | } | ||
| 617 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | ||
| 618 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | ||
| 619 | func += addr_statment; | ||
| 620 | |||
| 621 | const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; | ||
| 622 | const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; | ||
| 623 | const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; | ||
| 624 | const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; | ||
| 625 | func += comparison; | ||
| 626 | |||
| 627 | const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; | ||
| 628 | func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); | ||
| 629 | }}; | ||
| 630 | std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; | ||
| 631 | std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; | ||
| 632 | std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; | ||
| 633 | std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; | ||
| 634 | std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; | ||
| 635 | std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; | ||
| 636 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 637 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 638 | if (!info.nvn_buffer_used[index]) { | ||
| 639 | continue; | ||
| 640 | } | ||
| 641 | define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); | ||
| 642 | define_body(write_func_64, index, | ||
| 643 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); | ||
| 644 | define_body(write_func_128, index, | ||
| 645 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" | ||
| 646 | "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); | ||
| 647 | define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); | ||
| 648 | define_body(load_func_64, index, | ||
| 649 | "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); | ||
| 650 | define_body(load_func_128, index, | ||
| 651 | "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" | ||
| 652 | "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); | ||
| 653 | } | ||
| 654 | write_func += '}'; | ||
| 655 | write_func_64 += '}'; | ||
| 656 | write_func_128 += '}'; | ||
| 657 | load_func += "return 0u;}"; | ||
| 658 | load_func_64 += "return uvec2(0);}"; | ||
| 659 | load_func_128 += "return uvec4(0);}"; | ||
| 660 | return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; | ||
| 661 | } | ||
| 662 | |||
| 663 | void EmitContext::SetupImages(Bindings& bindings) { | ||
| 664 | image_buffers.reserve(info.image_buffer_descriptors.size()); | ||
| 665 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 666 | image_buffers.push_back({bindings.image, desc.count}); | ||
| 667 | const auto format{ImageFormatString(desc.format)}; | ||
| 668 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 669 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 670 | header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", | ||
| 671 | bindings.image, format, qualifier, bindings.image, array_decorator); | ||
| 672 | bindings.image += desc.count; | ||
| 673 | } | ||
| 674 | images.reserve(info.image_descriptors.size()); | ||
| 675 | for (const auto& desc : info.image_descriptors) { | ||
| 676 | images.push_back({bindings.image, desc.count}); | ||
| 677 | const auto format{ImageFormatString(desc.format)}; | ||
| 678 | const auto image_type{ImageType(desc.type)}; | ||
| 679 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 680 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 681 | header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, | ||
| 682 | qualifier, image_type, bindings.image, array_decorator); | ||
| 683 | bindings.image += desc.count; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 687 | void EmitContext::SetupTextures(Bindings& bindings) { | ||
| 688 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | ||
| 689 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 690 | texture_buffers.push_back({bindings.texture, desc.count}); | ||
| 691 | const auto sampler_type{SamplerType(TextureType::Buffer, false)}; | ||
| 692 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 693 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 694 | sampler_type, bindings.texture, array_decorator); | ||
| 695 | bindings.texture += desc.count; | ||
| 696 | } | ||
| 697 | textures.reserve(info.texture_descriptors.size()); | ||
| 698 | for (const auto& desc : info.texture_descriptors) { | ||
| 699 | textures.push_back({bindings.texture, desc.count}); | ||
| 700 | const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; | ||
| 701 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 702 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 703 | sampler_type, bindings.texture, array_decorator); | ||
| 704 | bindings.texture += desc.count; | ||
| 705 | } | ||
| 706 | } | ||
| 707 | |||
| 708 | void EmitContext::DefineConstants() { | ||
| 709 | if (info.uses_fswzadd) { | ||
| 710 | header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" | ||
| 711 | "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; | ||
| 712 | } | ||
| 713 | } | ||
| 714 | |||
| 715 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..d9b639d29 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/backend/glsl/var_alloc.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader { | ||
| 17 | struct Info; | ||
| 18 | struct Profile; | ||
| 19 | struct RuntimeInfo; | ||
| 20 | } // namespace Shader | ||
| 21 | |||
| 22 | namespace Shader::Backend { | ||
| 23 | struct Bindings; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Shader::IR { | ||
| 27 | class Inst; | ||
| 28 | struct Program; | ||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | namespace Shader::Backend::GLSL { | ||
| 32 | |||
| 33 | struct GenericElementInfo { | ||
| 34 | std::string name; | ||
| 35 | u32 first_element{}; | ||
| 36 | u32 num_components{}; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct TextureImageDefinition { | ||
| 40 | u32 binding; | ||
| 41 | u32 count; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class EmitContext { | ||
| 45 | public: | ||
| 46 | explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 47 | const RuntimeInfo& runtime_info_); | ||
| 48 | |||
| 49 | template <GlslVarType type, typename... Args> | ||
| 50 | void Add(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 51 | const auto var_def{var_alloc.AddDefine(inst, type)}; | ||
| 52 | if (var_def.empty()) { | ||
| 53 | // skip assigment. | ||
| 54 | code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); | ||
| 55 | } else { | ||
| 56 | code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); | ||
| 57 | } | ||
| 58 | // TODO: Remove this | ||
| 59 | code += '\n'; | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 64 | Add<GlslVarType::U1>(format_str, inst, args...); | ||
| 65 | } | ||
| 66 | |||
| 67 | template <typename... Args> | ||
| 68 | void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 69 | Add<GlslVarType::F16x2>(format_str, inst, args...); | ||
| 70 | } | ||
| 71 | |||
| 72 | template <typename... Args> | ||
| 73 | void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 74 | Add<GlslVarType::U32>(format_str, inst, args...); | ||
| 75 | } | ||
| 76 | |||
| 77 | template <typename... Args> | ||
| 78 | void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 79 | Add<GlslVarType::F32>(format_str, inst, args...); | ||
| 80 | } | ||
| 81 | |||
| 82 | template <typename... Args> | ||
| 83 | void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 84 | Add<GlslVarType::U64>(format_str, inst, args...); | ||
| 85 | } | ||
| 86 | |||
| 87 | template <typename... Args> | ||
| 88 | void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 89 | Add<GlslVarType::F64>(format_str, inst, args...); | ||
| 90 | } | ||
| 91 | |||
| 92 | template <typename... Args> | ||
| 93 | void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 94 | Add<GlslVarType::U32x2>(format_str, inst, args...); | ||
| 95 | } | ||
| 96 | |||
| 97 | template <typename... Args> | ||
| 98 | void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 99 | Add<GlslVarType::F32x2>(format_str, inst, args...); | ||
| 100 | } | ||
| 101 | |||
| 102 | template <typename... Args> | ||
| 103 | void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 104 | Add<GlslVarType::U32x3>(format_str, inst, args...); | ||
| 105 | } | ||
| 106 | |||
| 107 | template <typename... Args> | ||
| 108 | void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 109 | Add<GlslVarType::F32x3>(format_str, inst, args...); | ||
| 110 | } | ||
| 111 | |||
| 112 | template <typename... Args> | ||
| 113 | void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 114 | Add<GlslVarType::U32x4>(format_str, inst, args...); | ||
| 115 | } | ||
| 116 | |||
| 117 | template <typename... Args> | ||
| 118 | void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 119 | Add<GlslVarType::F32x4>(format_str, inst, args...); | ||
| 120 | } | ||
| 121 | |||
| 122 | template <typename... Args> | ||
| 123 | void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 124 | Add<GlslVarType::PrecF32>(format_str, inst, args...); | ||
| 125 | } | ||
| 126 | |||
| 127 | template <typename... Args> | ||
| 128 | void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 129 | Add<GlslVarType::PrecF64>(format_str, inst, args...); | ||
| 130 | } | ||
| 131 | |||
| 132 | template <typename... Args> | ||
| 133 | void Add(const char* format_str, Args&&... args) { | ||
| 134 | code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); | ||
| 135 | // TODO: Remove this | ||
| 136 | code += '\n'; | ||
| 137 | } | ||
| 138 | |||
| 139 | std::string header; | ||
| 140 | std::string code; | ||
| 141 | VarAlloc var_alloc; | ||
| 142 | const Info& info; | ||
| 143 | const Profile& profile; | ||
| 144 | const RuntimeInfo& runtime_info; | ||
| 145 | |||
| 146 | Stage stage{}; | ||
| 147 | std::string_view stage_name = "invalid"; | ||
| 148 | std::string_view position_name = "gl_Position"; | ||
| 149 | |||
| 150 | std::vector<TextureImageDefinition> texture_buffers; | ||
| 151 | std::vector<TextureImageDefinition> image_buffers; | ||
| 152 | std::vector<TextureImageDefinition> textures; | ||
| 153 | std::vector<TextureImageDefinition> images; | ||
| 154 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | ||
| 155 | |||
| 156 | u32 num_safety_loop_vars{}; | ||
| 157 | |||
| 158 | bool uses_y_direction{}; | ||
| 159 | bool uses_cc_carry{}; | ||
| 160 | bool uses_geometry_passthrough{}; | ||
| 161 | |||
| 162 | private: | ||
| 163 | void SetupExtensions(); | ||
| 164 | void DefineConstantBuffers(Bindings& bindings); | ||
| 165 | void DefineStorageBuffers(Bindings& bindings); | ||
| 166 | void DefineGenericOutput(size_t index, u32 invocations); | ||
| 167 | void DefineHelperFunctions(); | ||
| 168 | void DefineConstants(); | ||
| 169 | std::string DefineGlobalMemoryFunctions(); | ||
| 170 | void SetupImages(Bindings& bindings); | ||
| 171 | void SetupTextures(Bindings& bindings); | ||
| 172 | }; | ||
| 173 | |||
| 174 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..8a430d573 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp | |||
| @@ -0,0 +1,252 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "common/div_ceil.h" | ||
| 11 | #include "common/settings.h" | ||
| 12 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glsl/emit_glsl.h" | ||
| 14 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::GLSL { | ||
| 18 | namespace { | ||
| 19 | template <class Func> | ||
| 20 | struct FuncTraits {}; | ||
| 21 | |||
| 22 | template <class ReturnType_, class... Args> | ||
| 23 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 24 | using ReturnType = ReturnType_; | ||
| 25 | |||
| 26 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 27 | |||
| 28 | template <size_t I> | ||
| 29 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 30 | }; | ||
| 31 | |||
| 32 | template <auto func, typename... Args> | ||
| 33 | void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { | ||
| 34 | inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); | ||
| 35 | } | ||
| 36 | |||
| 37 | template <typename ArgType> | ||
| 38 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 39 | if constexpr (std::is_same_v<ArgType, std::string_view>) { | ||
| 40 | return ctx.var_alloc.Consume(arg); | ||
| 41 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 42 | return arg; | ||
| 43 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 44 | return arg.U32(); | ||
| 45 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 46 | return arg.Attribute(); | ||
| 47 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 48 | return arg.Patch(); | ||
| 49 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 50 | return arg.Reg(); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 55 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 56 | using Traits = FuncTraits<decltype(func)>; | ||
| 57 | if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { | ||
| 58 | if constexpr (is_first_arg_inst) { | ||
| 59 | SetDefinition<func>( | ||
| 60 | ctx, inst, *inst, | ||
| 61 | Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 62 | } else { | ||
| 63 | SetDefinition<func>( | ||
| 64 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 65 | } | ||
| 66 | } else { | ||
| 67 | if constexpr (is_first_arg_inst) { | ||
| 68 | func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 69 | } else { | ||
| 70 | func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | template <auto func> | ||
| 76 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 77 | using Traits = FuncTraits<decltype(func)>; | ||
| 78 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 79 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 80 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 81 | } else { | ||
| 82 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 83 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 84 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 85 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 90 | switch (inst->GetOpcode()) { | ||
| 91 | #define OPCODE(name, result_type, ...) \ | ||
| 92 | case IR::Opcode::name: \ | ||
| 93 | return Invoke<&Emit##name>(ctx, inst); | ||
| 94 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 95 | #undef OPCODE | ||
| 96 | } | ||
| 97 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 98 | } | ||
| 99 | |||
| 100 | bool IsReference(IR::Inst& inst) { | ||
| 101 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 102 | } | ||
| 103 | |||
| 104 | void PrecolorInst(IR::Inst& phi) { | ||
| 105 | // Insert phi moves before references to avoid overwritting other phis | ||
| 106 | const size_t num_args{phi.NumArgs()}; | ||
| 107 | for (size_t i = 0; i < num_args; ++i) { | ||
| 108 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 109 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 110 | IR::IREmitter ir{phi_block, it}; | ||
| 111 | const IR::Value arg{phi.Arg(i)}; | ||
| 112 | if (arg.IsImmediate()) { | ||
| 113 | ir.PhiMove(phi, arg); | ||
| 114 | } else { | ||
| 115 | ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | for (size_t i = 0; i < num_args; ++i) { | ||
| 119 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void Precolor(const IR::Program& program) { | ||
| 124 | for (IR::Block* const block : program.blocks) { | ||
| 125 | for (IR::Inst& phi : block->Instructions()) { | ||
| 126 | if (!IR::IsPhi(phi)) { | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | PrecolorInst(phi); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 135 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 136 | switch (node.type) { | ||
| 137 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 138 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 139 | EmitInst(ctx, &inst); | ||
| 140 | } | ||
| 141 | break; | ||
| 142 | case IR::AbstractSyntaxNode::Type::If: | ||
| 143 | ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); | ||
| 144 | break; | ||
| 145 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 146 | ctx.Add("}}"); | ||
| 147 | break; | ||
| 148 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 149 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 150 | if (node.data.break_node.cond.U1()) { | ||
| 151 | ctx.Add("break;"); | ||
| 152 | } | ||
| 153 | } else { | ||
| 154 | ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 158 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 159 | ctx.Add("return;"); | ||
| 160 | break; | ||
| 161 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 162 | ctx.Add("for(;;){{"); | ||
| 163 | break; | ||
| 164 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 165 | if (Settings::values.disable_shader_loop_safety_checks) { | ||
| 166 | ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); | ||
| 167 | } else { | ||
| 168 | ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, | ||
| 169 | ctx.var_alloc.Consume(node.data.repeat.cond)); | ||
| 170 | } | ||
| 171 | break; | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | std::string GlslVersionSpecifier(const EmitContext& ctx) { | ||
| 179 | if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { | ||
| 180 | return " compatibility"; | ||
| 181 | } | ||
| 182 | return ""; | ||
| 183 | } | ||
| 184 | |||
| 185 | bool IsPreciseType(GlslVarType type) { | ||
| 186 | switch (type) { | ||
| 187 | case GlslVarType::PrecF32: | ||
| 188 | case GlslVarType::PrecF64: | ||
| 189 | return true; | ||
| 190 | default: | ||
| 191 | return false; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | void DefineVariables(const EmitContext& ctx, std::string& header) { | ||
| 196 | for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) { | ||
| 197 | const auto type{static_cast<GlslVarType>(i)}; | ||
| 198 | const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; | ||
| 199 | const auto type_name{ctx.var_alloc.GetGlslType(type)}; | ||
| 200 | const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; | ||
| 201 | const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; | ||
| 202 | // Temps/return types that are never used are stored at index 0 | ||
| 203 | if (tracker.uses_temp) { | ||
| 204 | header += fmt::format("{}{} t{}={}(0);", precise, type_name, | ||
| 205 | ctx.var_alloc.Representation(0, type), type_name); | ||
| 206 | } | ||
| 207 | for (u32 index = 0; index < tracker.num_used; ++index) { | ||
| 208 | header += fmt::format("{}{} {}={}(0);", precise, type_name, | ||
| 209 | ctx.var_alloc.Representation(index, type), type_name); | ||
| 210 | } | ||
| 211 | } | ||
| 212 | for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { | ||
| 213 | header += fmt::format("int loop{}=0x2000;", i); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } // Anonymous namespace | ||
| 217 | |||
| 218 | std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 219 | Bindings& bindings) { | ||
| 220 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 221 | Precolor(program); | ||
| 222 | EmitCode(ctx, program); | ||
| 223 | const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; | ||
| 224 | ctx.header.insert(0, version); | ||
| 225 | if (program.shared_memory_size > 0) { | ||
| 226 | const auto requested_size{program.shared_memory_size}; | ||
| 227 | const auto max_size{profile.gl_max_compute_smem_size}; | ||
| 228 | const bool needs_clamp{requested_size > max_size}; | ||
| 229 | if (needs_clamp) { | ||
| 230 | LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", | ||
| 231 | requested_size, max_size); | ||
| 232 | } | ||
| 233 | const auto smem_size{needs_clamp ? max_size : requested_size}; | ||
| 234 | ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); | ||
| 235 | } | ||
| 236 | ctx.header += "void main(){\n"; | ||
| 237 | if (program.local_memory_size > 0) { | ||
| 238 | ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); | ||
| 239 | } | ||
| 240 | DefineVariables(ctx, ctx.header); | ||
| 241 | if (ctx.uses_cc_carry) { | ||
| 242 | ctx.header += "uint carry;"; | ||
| 243 | } | ||
| 244 | if (program.info.uses_subgroup_shuffles) { | ||
| 245 | ctx.header += "bool shfl_in_bounds;"; | ||
| 246 | } | ||
| 247 | ctx.code.insert(0, ctx.header); | ||
| 248 | ctx.code += '}'; | ||
| 249 | return ctx.code; | ||
| 250 | } | ||
| 251 | |||
| 252 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..20e5719e6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/bindings.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/runtime_info.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLSL { | ||
| 15 | |||
| 16 | [[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 17 | IR::Program& program, Bindings& bindings); | ||
| 18 | |||
| 19 | [[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { | ||
| 20 | Bindings binding; | ||
| 21 | return EmitGLSL(profile, {}, program, binding); | ||
| 22 | } | ||
| 23 | |||
| 24 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..772acc5a4 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | |||
| @@ -0,0 +1,418 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr char cas_loop[]{ | ||
| 14 | "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; | ||
| 15 | |||
| 16 | void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 17 | std::string_view value, std::string_view function) { | ||
| 18 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 19 | const std::string smem{fmt::format("smem[{}>>2]", offset)}; | ||
| 20 | ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); | ||
| 21 | } | ||
| 22 | |||
| 23 | void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 24 | const IR::Value& offset, std::string_view value, std::string_view function) { | ||
| 25 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 26 | const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), | ||
| 27 | ctx.var_alloc.Consume(offset))}; | ||
| 28 | ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); | ||
| 29 | } | ||
| 30 | |||
| 31 | void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 32 | const IR::Value& offset, std::string_view value, | ||
| 33 | std::string_view function) { | ||
| 34 | const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), | ||
| 35 | ctx.var_alloc.Consume(offset))}; | ||
| 36 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 37 | ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); | ||
| 38 | ctx.AddF32("{}=utof({});", inst, ret); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 43 | std::string_view value) { | ||
| 44 | ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 48 | std::string_view value) { | ||
| 49 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 50 | SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 54 | std::string_view value) { | ||
| 55 | ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 59 | std::string_view value) { | ||
| 60 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 61 | SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); | ||
| 62 | } | ||
| 63 | |||
| 64 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 65 | std::string_view value) { | ||
| 66 | ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 70 | std::string_view value) { | ||
| 71 | SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 75 | std::string_view value) { | ||
| 76 | SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 80 | std::string_view value) { | ||
| 81 | ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 82 | } | ||
| 83 | |||
| 84 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 85 | std::string_view value) { | ||
| 86 | ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 90 | std::string_view value) { | ||
| 91 | ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 95 | std::string_view value) { | ||
| 96 | ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 100 | std::string_view value) { | ||
| 101 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 102 | ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, | ||
| 103 | pointer_offset); | ||
| 104 | ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", | ||
| 105 | pointer_offset, value, pointer_offset, value); | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | const IR::Value& offset, std::string_view value) { | ||
| 110 | ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 111 | ctx.var_alloc.Consume(offset), value); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 115 | const IR::Value& offset, std::string_view value) { | ||
| 116 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 117 | SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); | ||
| 118 | } | ||
| 119 | |||
| 120 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 121 | const IR::Value& offset, std::string_view value) { | ||
| 122 | ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 123 | ctx.var_alloc.Consume(offset), value); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 127 | const IR::Value& offset, std::string_view value) { | ||
| 128 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 129 | SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 133 | const IR::Value& offset, std::string_view value) { | ||
| 134 | ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 135 | ctx.var_alloc.Consume(offset), value); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 139 | const IR::Value& offset, std::string_view value) { | ||
| 140 | SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 144 | const IR::Value& offset, std::string_view value) { | ||
| 145 | SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 149 | const IR::Value& offset, std::string_view value) { | ||
| 150 | ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 151 | ctx.var_alloc.Consume(offset), value); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 155 | const IR::Value& offset, std::string_view value) { | ||
| 156 | ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 157 | ctx.var_alloc.Consume(offset), value); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 161 | const IR::Value& offset, std::string_view value) { | ||
| 162 | ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 163 | ctx.var_alloc.Consume(offset), value); | ||
| 164 | } | ||
| 165 | |||
| 166 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 167 | const IR::Value& offset, std::string_view value) { | ||
| 168 | ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 169 | ctx.var_alloc.Consume(offset), value); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 173 | const IR::Value& offset, std::string_view value) { | ||
| 174 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 175 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 176 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 177 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 178 | ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", | ||
| 179 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 180 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 184 | const IR::Value& offset, std::string_view value) { | ||
| 185 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 186 | ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 187 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 188 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 189 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 190 | "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" | ||
| 191 | ");}}", | ||
| 192 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 193 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 194 | } | ||
| 195 | |||
| 196 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 197 | const IR::Value& offset, std::string_view value) { | ||
| 198 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 199 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 200 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 201 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 202 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 203 | "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", | ||
| 204 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 205 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 209 | const IR::Value& offset, std::string_view value) { | ||
| 210 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 211 | ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 212 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 213 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 214 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 215 | "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" | ||
| 216 | ");}}", | ||
| 217 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 218 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 222 | const IR::Value& offset, std::string_view value) { | ||
| 223 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 224 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 225 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 226 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 227 | ctx.Add("for(int " | ||
| 228 | "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" | ||
| 229 | "))[i]);}}", | ||
| 230 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 231 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 235 | const IR::Value& offset, std::string_view value) { | ||
| 236 | ctx.AddU64( | ||
| 237 | "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" | ||
| 238 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 239 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 240 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 241 | } | ||
| 242 | |||
| 243 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 244 | const IR::Value& offset, std::string_view value) { | ||
| 245 | ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" | ||
| 246 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 247 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, | ||
| 248 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 249 | } | ||
| 250 | |||
| 251 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 252 | const IR::Value& offset, std::string_view value) { | ||
| 253 | ctx.AddU64( | ||
| 254 | "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" | ||
| 255 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 256 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 257 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 258 | } | ||
| 259 | |||
| 260 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 261 | const IR::Value& offset, std::string_view value) { | ||
| 262 | ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," | ||
| 263 | "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 264 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, | ||
| 265 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 269 | const IR::Value& offset, std::string_view value) { | ||
| 270 | SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); | ||
| 271 | } | ||
| 272 | |||
| 273 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 274 | const IR::Value& offset, std::string_view value) { | ||
| 275 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 279 | const IR::Value& offset, std::string_view value) { | ||
| 280 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 284 | const IR::Value& offset, std::string_view value) { | ||
| 285 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 289 | const IR::Value& offset, std::string_view value) { | ||
| 290 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); | ||
| 291 | } | ||
| 292 | |||
| 293 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 294 | const IR::Value& offset, std::string_view value) { | ||
| 295 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 299 | const IR::Value& offset, std::string_view value) { | ||
| 300 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 304 | throw NotImplementedException("GLSL Instrucion"); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 308 | throw NotImplementedException("GLSL Instrucion"); | ||
| 309 | } | ||
| 310 | |||
| 311 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 312 | throw NotImplementedException("GLSL Instrucion"); | ||
| 313 | } | ||
| 314 | |||
| 315 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 316 | throw NotImplementedException("GLSL Instrucion"); | ||
| 317 | } | ||
| 318 | |||
| 319 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 320 | throw NotImplementedException("GLSL Instrucion"); | ||
| 321 | } | ||
| 322 | |||
| 323 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 324 | throw NotImplementedException("GLSL Instrucion"); | ||
| 325 | } | ||
| 326 | |||
| 327 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 328 | throw NotImplementedException("GLSL Instrucion"); | ||
| 329 | } | ||
| 330 | |||
| 331 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 332 | throw NotImplementedException("GLSL Instrucion"); | ||
| 333 | } | ||
| 334 | |||
| 335 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 336 | throw NotImplementedException("GLSL Instrucion"); | ||
| 337 | } | ||
| 338 | |||
| 339 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 340 | throw NotImplementedException("GLSL Instrucion"); | ||
| 341 | } | ||
| 342 | |||
| 343 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 344 | throw NotImplementedException("GLSL Instrucion"); | ||
| 345 | } | ||
| 346 | |||
| 347 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 348 | throw NotImplementedException("GLSL Instrucion"); | ||
| 349 | } | ||
| 350 | |||
| 351 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 352 | throw NotImplementedException("GLSL Instrucion"); | ||
| 353 | } | ||
| 354 | |||
| 355 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 356 | throw NotImplementedException("GLSL Instrucion"); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 360 | throw NotImplementedException("GLSL Instrucion"); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 364 | throw NotImplementedException("GLSL Instrucion"); | ||
| 365 | } | ||
| 366 | |||
| 367 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 368 | throw NotImplementedException("GLSL Instrucion"); | ||
| 369 | } | ||
| 370 | |||
| 371 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 372 | throw NotImplementedException("GLSL Instrucion"); | ||
| 373 | } | ||
| 374 | |||
| 375 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 376 | throw NotImplementedException("GLSL Instrucion"); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 380 | throw NotImplementedException("GLSL Instrucion"); | ||
| 381 | } | ||
| 382 | |||
| 383 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 384 | throw NotImplementedException("GLSL Instrucion"); | ||
| 385 | } | ||
| 386 | |||
| 387 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 388 | throw NotImplementedException("GLSL Instrucion"); | ||
| 389 | } | ||
| 390 | |||
| 391 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 392 | throw NotImplementedException("GLSL Instrucion"); | ||
| 393 | } | ||
| 394 | |||
| 395 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 396 | throw NotImplementedException("GLSL Instrucion"); | ||
| 397 | } | ||
| 398 | |||
| 399 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 400 | throw NotImplementedException("GLSL Instrucion"); | ||
| 401 | } | ||
| 402 | |||
| 403 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 404 | throw NotImplementedException("GLSL Instrucion"); | ||
| 405 | } | ||
| 406 | |||
| 407 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 408 | throw NotImplementedException("GLSL Instrucion"); | ||
| 409 | } | ||
| 410 | |||
| 411 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 412 | throw NotImplementedException("GLSL Instrucion"); | ||
| 413 | } | ||
| 414 | |||
| 415 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 416 | throw NotImplementedException("GLSL Instrucion"); | ||
| 417 | } | ||
| 418 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e1d1b558e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLSL { | ||
| 10 | void EmitBarrier(EmitContext& ctx) { | ||
| 11 | ctx.Add("barrier();"); | ||
| 12 | } | ||
| 13 | |||
| 14 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 15 | ctx.Add("groupMemoryBarrier();"); | ||
| 16 | } | ||
| 17 | |||
| 18 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 19 | ctx.Add("memoryBarrier();"); | ||
| 20 | } | ||
| 21 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..3c1714e89 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | void Alias(IR::Inst& inst, const IR::Value& value) { | ||
| 14 | if (value.IsImmediate()) { | ||
| 15 | return; | ||
| 16 | } | ||
| 17 | IR::Inst& value_inst{*value.InstRecursive()}; | ||
| 18 | value_inst.DestructiveAddUsage(inst.UseCount()); | ||
| 19 | value_inst.DestructiveRemoveUsage(); | ||
| 20 | inst.SetDefinition(value_inst.Definition<Id>()); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 25 | Alias(inst, value); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | ||
| 29 | // Fake one usage to get a real variable out of the condition | ||
| 30 | inst.DestructiveAddUsage(1); | ||
| 31 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; | ||
| 32 | const auto input{ctx.var_alloc.Consume(value)}; | ||
| 33 | if (ret != input) { | ||
| 34 | ctx.Add("{}={};", ret, input); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | ||
| 39 | NotImplemented(); | ||
| 40 | } | ||
| 41 | |||
| 42 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 43 | ctx.AddU32("{}=ftou({});", inst, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 47 | ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | ||
| 51 | NotImplemented(); | ||
| 52 | } | ||
| 53 | |||
| 54 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 55 | ctx.AddF32("{}=utof({});", inst, value); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 59 | ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 63 | ctx.AddU64("{}=packUint2x32({});", inst, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 67 | ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); | ||
| 68 | } | ||
| 69 | |||
| 70 | void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 71 | ctx.AddU32("{}=packFloat2x16({});", inst, value); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 75 | ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); | ||
| 76 | } | ||
| 77 | |||
| 78 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 79 | ctx.AddU32("{}=packHalf2x16({});", inst, value); | ||
| 80 | } | ||
| 81 | |||
| 82 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 83 | ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 87 | ctx.AddF64("{}=packDouble2x32({});", inst, value); | ||
| 88 | } | ||
| 89 | |||
| 90 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 91 | ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..49a66e3ec --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr std::string_view SWIZZLE{"xyzw"}; | ||
| 14 | void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, | ||
| 15 | std::string_view object, u32 index) { | ||
| 16 | if (result == composite) { | ||
| 17 | // The result is aliased with the composite | ||
| 18 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 19 | } else { | ||
| 20 | ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 26 | std::string_view e2) { | ||
| 27 | ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 31 | std::string_view e2, std::string_view e3) { | ||
| 32 | ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 36 | std::string_view e2, std::string_view e3, std::string_view e4) { | ||
| 37 | ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 41 | u32 index) { | ||
| 42 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 46 | u32 index) { | ||
| 47 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 51 | u32 index) { | ||
| 52 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 56 | std::string_view object, u32 index) { | ||
| 57 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||
| 58 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 62 | std::string_view object, u32 index) { | ||
| 63 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)}; | ||
| 64 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 68 | std::string_view object, u32 index) { | ||
| 69 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)}; | ||
| 70 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 74 | [[maybe_unused]] std::string_view e1, | ||
| 75 | [[maybe_unused]] std::string_view e2) { | ||
| 76 | NotImplemented(); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 80 | [[maybe_unused]] std::string_view e1, | ||
| 81 | [[maybe_unused]] std::string_view e2, | ||
| 82 | [[maybe_unused]] std::string_view e3) { | ||
| 83 | NotImplemented(); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 87 | [[maybe_unused]] std::string_view e1, | ||
| 88 | [[maybe_unused]] std::string_view e2, | ||
| 89 | [[maybe_unused]] std::string_view e3, | ||
| 90 | [[maybe_unused]] std::string_view e4) { | ||
| 91 | NotImplemented(); | ||
| 92 | } | ||
| 93 | |||
| 94 | void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 95 | [[maybe_unused]] std::string_view composite, | ||
| 96 | [[maybe_unused]] u32 index) { | ||
| 97 | NotImplemented(); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 101 | [[maybe_unused]] std::string_view composite, | ||
| 102 | [[maybe_unused]] u32 index) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 107 | [[maybe_unused]] std::string_view composite, | ||
| 108 | [[maybe_unused]] u32 index) { | ||
| 109 | NotImplemented(); | ||
| 110 | } | ||
| 111 | |||
| 112 | void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 113 | [[maybe_unused]] std::string_view composite, | ||
| 114 | [[maybe_unused]] std::string_view object, | ||
| 115 | [[maybe_unused]] u32 index) { | ||
| 116 | NotImplemented(); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 120 | [[maybe_unused]] std::string_view composite, | ||
| 121 | [[maybe_unused]] std::string_view object, | ||
| 122 | [[maybe_unused]] u32 index) { | ||
| 123 | NotImplemented(); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 127 | [[maybe_unused]] std::string_view composite, | ||
| 128 | [[maybe_unused]] std::string_view object, | ||
| 129 | [[maybe_unused]] u32 index) { | ||
| 130 | NotImplemented(); | ||
| 131 | } | ||
| 132 | |||
| 133 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 134 | std::string_view e2) { | ||
| 135 | ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 139 | std::string_view e2, std::string_view e3) { | ||
| 140 | ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 144 | std::string_view e2, std::string_view e3, std::string_view e4) { | ||
| 145 | ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 149 | u32 index) { | ||
| 150 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 151 | } | ||
| 152 | |||
| 153 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 154 | u32 index) { | ||
| 155 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 159 | u32 index) { | ||
| 160 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 164 | std::string_view object, u32 index) { | ||
| 165 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)}; | ||
| 166 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 170 | std::string_view object, u32 index) { | ||
| 171 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)}; | ||
| 172 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 176 | std::string_view object, u32 index) { | ||
| 177 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 178 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 182 | NotImplemented(); | ||
| 183 | } | ||
| 184 | |||
| 185 | void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 186 | NotImplemented(); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 190 | NotImplemented(); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 194 | NotImplemented(); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 198 | NotImplemented(); | ||
| 199 | } | ||
| 200 | |||
| 201 | void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 202 | NotImplemented(); | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 206 | u32 index) { | ||
| 207 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 211 | u32 index) { | ||
| 212 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 213 | } | ||
| 214 | |||
| 215 | void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 216 | u32 index) { | ||
| 217 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 218 | } | ||
| 219 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..580063fa9 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -0,0 +1,456 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | constexpr char SWIZZLE[]{"xyzw"}; | ||
| 16 | |||
| 17 | u32 CbufIndex(u32 offset) { | ||
| 18 | return (offset / 4) % 4; | ||
| 19 | } | ||
| 20 | |||
| 21 | char OffsetSwizzle(u32 offset) { | ||
| 22 | return SWIZZLE[CbufIndex(offset)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool IsInputArray(Stage stage) { | ||
| 26 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 27 | stage == Stage::TessellationEval; | ||
| 28 | } | ||
| 29 | |||
| 30 | std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { | ||
| 31 | return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; | ||
| 32 | } | ||
| 33 | |||
| 34 | std::string_view OutputVertexIndex(EmitContext& ctx) { | ||
| 35 | return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; | ||
| 36 | } | ||
| 37 | |||
| 38 | void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | ||
| 39 | const IR::Value& offset, u32 num_bits, std::string_view cast = {}, | ||
| 40 | std::string_view bit_offset = {}) { | ||
| 41 | const bool is_immediate{offset.IsImmediate()}; | ||
| 42 | const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; | ||
| 43 | if (is_immediate) { | ||
| 44 | const s32 signed_offset{static_cast<s32>(offset.U32())}; | ||
| 45 | static constexpr u32 cbuf_size{0x10000}; | ||
| 46 | if (signed_offset < 0 || offset.U32() > cbuf_size) { | ||
| 47 | LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); | ||
| 48 | ctx.Add("{}=0u;", ret); | ||
| 49 | return; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 53 | const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) | ||
| 54 | : fmt::format("{}>>4", offset_var)}; | ||
| 55 | const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) | ||
| 56 | : fmt::format("[({}>>2)%4]", offset_var)}; | ||
| 57 | |||
| 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; | ||
| 60 | const auto extraction{num_bits == 32 ? cbuf_cast | ||
| 61 | : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, | ||
| 62 | bit_offset, num_bits)}; | ||
| 63 | if (!component_indexing_bug) { | ||
| 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; | ||
| 65 | ctx.Add("{}={};", ret, result); | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 69 | for (u32 i = 0; i < 4; ++i) { | ||
| 70 | const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; | ||
| 71 | const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; | ||
| 72 | ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, | ||
| 77 | std::string_view cast) { | ||
| 78 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 79 | if (offset.IsImmediate()) { | ||
| 80 | const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; | ||
| 81 | GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); | ||
| 82 | } else { | ||
| 83 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 84 | const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; | ||
| 85 | GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, | ||
| 90 | std::string_view cast) { | ||
| 91 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 92 | if (offset.IsImmediate()) { | ||
| 93 | const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; | ||
| 94 | GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); | ||
| 95 | } else { | ||
| 96 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 97 | const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; | ||
| 98 | GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 TexCoordIndex(IR::Attribute attr) { | ||
| 103 | return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; | ||
| 104 | } | ||
| 105 | } // Anonymous namespace | ||
| 106 | |||
| 107 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 108 | const IR::Value& offset) { | ||
| 109 | GetCbuf8(ctx, inst, binding, offset, "ftou"); | ||
| 110 | } | ||
| 111 | |||
| 112 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 113 | const IR::Value& offset) { | ||
| 114 | GetCbuf8(ctx, inst, binding, offset, "ftoi"); | ||
| 115 | } | ||
| 116 | |||
| 117 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 118 | const IR::Value& offset) { | ||
| 119 | GetCbuf16(ctx, inst, binding, offset, "ftou"); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 123 | const IR::Value& offset) { | ||
| 124 | GetCbuf16(ctx, inst, binding, offset, "ftoi"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 128 | const IR::Value& offset) { | ||
| 129 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 130 | GetCbuf(ctx, ret, binding, offset, 32, "ftou"); | ||
| 131 | } | ||
| 132 | |||
| 133 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 134 | const IR::Value& offset) { | ||
| 135 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; | ||
| 136 | GetCbuf(ctx, ret, binding, offset, 32); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 140 | const IR::Value& offset) { | ||
| 141 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 142 | if (offset.IsImmediate()) { | ||
| 143 | static constexpr u32 cbuf_size{0x10000}; | ||
| 144 | const u32 u32_offset{offset.U32()}; | ||
| 145 | const s32 signed_offset{static_cast<s32>(offset.U32())}; | ||
| 146 | if (signed_offset < 0 || u32_offset > cbuf_size) { | ||
| 147 | LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); | ||
| 148 | ctx.AddU32x2("{}=uvec2(0u);", inst); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | if (u32_offset % 2 == 0) { | ||
| 152 | ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, | ||
| 153 | OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); | ||
| 154 | } else { | ||
| 155 | ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, | ||
| 156 | OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, | ||
| 157 | OffsetSwizzle(u32_offset + 4)); | ||
| 158 | } | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 162 | if (!ctx.profile.has_gl_component_indexing_bug) { | ||
| 163 | ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", | ||
| 164 | inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); | ||
| 165 | return; | ||
| 166 | } | ||
| 167 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||
| 168 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 169 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 170 | ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, | ||
| 171 | swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, | ||
| 172 | "xyzw"[(swizzle + 1) % 4]); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, | ||
| 177 | std::string_view vertex) { | ||
| 178 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 179 | const char swizzle{"xyzw"[element]}; | ||
| 180 | if (IR::IsGeneric(attr)) { | ||
| 181 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 182 | if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { | ||
| 183 | if (element == 3) { | ||
| 184 | ctx.AddF32("{}=1.f;", inst, attr); | ||
| 185 | } else { | ||
| 186 | ctx.AddF32("{}=0.f;", inst, attr); | ||
| 187 | } | ||
| 188 | return; | ||
| 189 | } | ||
| 190 | ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); | ||
| 191 | return; | ||
| 192 | } | ||
| 193 | // GLSL only exposes 8 legacy texcoords | ||
| 194 | if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 195 | LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", | ||
| 196 | TexCoordIndex(attr)); | ||
| 197 | ctx.AddF32("{}=0.f;", inst); | ||
| 198 | return; | ||
| 199 | } | ||
| 200 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { | ||
| 201 | const u32 index{TexCoordIndex(attr)}; | ||
| 202 | ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle); | ||
| 203 | return; | ||
| 204 | } | ||
| 205 | switch (attr) { | ||
| 206 | case IR::Attribute::PrimitiveId: | ||
| 207 | ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); | ||
| 208 | break; | ||
| 209 | case IR::Attribute::PositionX: | ||
| 210 | case IR::Attribute::PositionY: | ||
| 211 | case IR::Attribute::PositionZ: | ||
| 212 | case IR::Attribute::PositionW: { | ||
| 213 | const bool is_array{IsInputArray(ctx.stage)}; | ||
| 214 | const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; | ||
| 215 | ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 219 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 220 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 221 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 222 | if (ctx.stage == Stage::Fragment) { | ||
| 223 | ctx.AddF32("{}=gl_Color.{};", inst, swizzle); | ||
| 224 | } else { | ||
| 225 | ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle); | ||
| 226 | } | ||
| 227 | break; | ||
| 228 | case IR::Attribute::PointSpriteS: | ||
| 229 | case IR::Attribute::PointSpriteT: | ||
| 230 | ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); | ||
| 231 | break; | ||
| 232 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 233 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 234 | ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); | ||
| 235 | break; | ||
| 236 | case IR::Attribute::InstanceId: | ||
| 237 | ctx.AddF32("{}=itof(gl_InstanceID);", inst); | ||
| 238 | break; | ||
| 239 | case IR::Attribute::VertexId: | ||
| 240 | ctx.AddF32("{}=itof(gl_VertexID);", inst); | ||
| 241 | break; | ||
| 242 | case IR::Attribute::FrontFace: | ||
| 243 | ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); | ||
| 244 | break; | ||
| 245 | default: | ||
| 246 | throw NotImplementedException("Get attribute {}", attr); | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, | ||
| 251 | [[maybe_unused]] std::string_view vertex) { | ||
| 252 | if (IR::IsGeneric(attr)) { | ||
| 253 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 254 | const u32 attr_element{IR::GenericAttributeElement(attr)}; | ||
| 255 | const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; | ||
| 256 | const auto output_decorator{OutputVertexIndex(ctx)}; | ||
| 257 | if (info.num_components == 1) { | ||
| 258 | ctx.Add("{}{}={};", info.name, output_decorator, value); | ||
| 259 | } else { | ||
| 260 | const u32 index_element{attr_element - info.first_element}; | ||
| 261 | ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); | ||
| 262 | } | ||
| 263 | return; | ||
| 264 | } | ||
| 265 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 266 | const char swizzle{"xyzw"[element]}; | ||
| 267 | // GLSL only exposes 8 legacy texcoords | ||
| 268 | if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 269 | LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", | ||
| 270 | TexCoordIndex(attr)); | ||
| 271 | return; | ||
| 272 | } | ||
| 273 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { | ||
| 274 | const u32 index{TexCoordIndex(attr)}; | ||
| 275 | ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value); | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | switch (attr) { | ||
| 279 | case IR::Attribute::Layer: | ||
| 280 | if (ctx.stage != Stage::Geometry && | ||
| 281 | !ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 282 | LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support " | ||
| 283 | "viewport layer extension"); | ||
| 284 | break; | ||
| 285 | } | ||
| 286 | ctx.Add("gl_Layer=ftoi({});", value); | ||
| 287 | break; | ||
| 288 | case IR::Attribute::ViewportIndex: | ||
| 289 | if (ctx.stage != Stage::Geometry && | ||
| 290 | !ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 291 | LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support " | ||
| 292 | "viewport layer extension"); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | ctx.Add("gl_ViewportIndex=ftoi({});", value); | ||
| 296 | break; | ||
| 297 | case IR::Attribute::ViewportMask: | ||
| 298 | if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { | ||
| 299 | LOG_WARNING( | ||
| 300 | Shader_GLSL, | ||
| 301 | "Shader stores viewport mask but device does not support viewport mask extension"); | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | ctx.Add("gl_ViewportMask[0]=ftoi({});", value); | ||
| 305 | break; | ||
| 306 | case IR::Attribute::PointSize: | ||
| 307 | ctx.Add("gl_PointSize={};", value); | ||
| 308 | break; | ||
| 309 | case IR::Attribute::PositionX: | ||
| 310 | case IR::Attribute::PositionY: | ||
| 311 | case IR::Attribute::PositionZ: | ||
| 312 | case IR::Attribute::PositionW: | ||
| 313 | ctx.Add("gl_Position.{}={};", swizzle, value); | ||
| 314 | break; | ||
| 315 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 316 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 317 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 318 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 319 | ctx.Add("gl_FrontColor.{}={};", swizzle, value); | ||
| 320 | break; | ||
| 321 | case IR::Attribute::ColorFrontSpecularR: | ||
| 322 | case IR::Attribute::ColorFrontSpecularG: | ||
| 323 | case IR::Attribute::ColorFrontSpecularB: | ||
| 324 | case IR::Attribute::ColorFrontSpecularA: | ||
| 325 | ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); | ||
| 326 | break; | ||
| 327 | case IR::Attribute::ColorBackDiffuseR: | ||
| 328 | case IR::Attribute::ColorBackDiffuseG: | ||
| 329 | case IR::Attribute::ColorBackDiffuseB: | ||
| 330 | case IR::Attribute::ColorBackDiffuseA: | ||
| 331 | ctx.Add("gl_BackColor.{}={};", swizzle, value); | ||
| 332 | break; | ||
| 333 | case IR::Attribute::ColorBackSpecularR: | ||
| 334 | case IR::Attribute::ColorBackSpecularG: | ||
| 335 | case IR::Attribute::ColorBackSpecularB: | ||
| 336 | case IR::Attribute::ColorBackSpecularA: | ||
| 337 | ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); | ||
| 338 | break; | ||
| 339 | case IR::Attribute::FogCoordinate: | ||
| 340 | ctx.Add("gl_FogFragCoord={};", value); | ||
| 341 | break; | ||
| 342 | case IR::Attribute::ClipDistance0: | ||
| 343 | case IR::Attribute::ClipDistance1: | ||
| 344 | case IR::Attribute::ClipDistance2: | ||
| 345 | case IR::Attribute::ClipDistance3: | ||
| 346 | case IR::Attribute::ClipDistance4: | ||
| 347 | case IR::Attribute::ClipDistance5: | ||
| 348 | case IR::Attribute::ClipDistance6: | ||
| 349 | case IR::Attribute::ClipDistance7: { | ||
| 350 | const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 351 | ctx.Add("gl_ClipDistance[{}]={};", index, value); | ||
| 352 | break; | ||
| 353 | } | ||
| 354 | default: | ||
| 355 | throw NotImplementedException("Set attribute {}", attr); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 360 | std::string_view vertex) { | ||
| 361 | const bool is_array{ctx.stage == Stage::Geometry}; | ||
| 362 | const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; | ||
| 363 | ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); | ||
| 364 | } | ||
| 365 | |||
| 366 | void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, | ||
| 367 | [[maybe_unused]] std::string_view offset, | ||
| 368 | [[maybe_unused]] std::string_view value, | ||
| 369 | [[maybe_unused]] std::string_view vertex) { | ||
| 370 | NotImplemented(); | ||
| 371 | } | ||
| 372 | |||
| 373 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { | ||
| 374 | if (!IR::IsGeneric(patch)) { | ||
| 375 | throw NotImplementedException("Non-generic patch load"); | ||
| 376 | } | ||
| 377 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 378 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 379 | const char swizzle{"xyzw"[element]}; | ||
| 380 | ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); | ||
| 381 | } | ||
| 382 | |||
| 383 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { | ||
| 384 | if (IR::IsGeneric(patch)) { | ||
| 385 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 386 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 387 | ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | switch (patch) { | ||
| 391 | case IR::Patch::TessellationLodLeft: | ||
| 392 | case IR::Patch::TessellationLodRight: | ||
| 393 | case IR::Patch::TessellationLodTop: | ||
| 394 | case IR::Patch::TessellationLodBottom: { | ||
| 395 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 396 | ctx.Add("gl_TessLevelOuter[{}]={};", index, value); | ||
| 397 | break; | ||
| 398 | } | ||
| 399 | case IR::Patch::TessellationLodInteriorU: | ||
| 400 | ctx.Add("gl_TessLevelInner[0]={};", value); | ||
| 401 | break; | ||
| 402 | case IR::Patch::TessellationLodInteriorV: | ||
| 403 | ctx.Add("gl_TessLevelInner[1]={};", value); | ||
| 404 | break; | ||
| 405 | default: | ||
| 406 | throw NotImplementedException("Patch {}", patch); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 410 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { | ||
| 411 | const char swizzle{"xyzw"[component]}; | ||
| 412 | ctx.Add("frag_color{}.{}={};", index, swizzle, value); | ||
| 413 | } | ||
| 414 | |||
| 415 | void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { | ||
| 416 | ctx.Add("gl_SampleMask[0]=int({});", value); | ||
| 417 | } | ||
| 418 | |||
| 419 | void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { | ||
| 420 | ctx.Add("gl_FragDepth={};", value); | ||
| 421 | } | ||
| 422 | |||
| 423 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 424 | ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); | ||
| 425 | } | ||
| 426 | |||
| 427 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { | ||
| 428 | ctx.AddU32x3("{}=gl_WorkGroupID;", inst); | ||
| 429 | } | ||
| 430 | |||
| 431 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 432 | ctx.AddU32("{}=uint(gl_InvocationID);", inst); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { | ||
| 436 | ctx.AddU32("{}=uint(gl_SampleID);", inst); | ||
| 437 | } | ||
| 438 | |||
| 439 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { | ||
| 440 | ctx.AddU1("{}=gl_HelperInvocation;", inst); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | ||
| 444 | ctx.uses_y_direction = true; | ||
| 445 | ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); | ||
| 446 | } | ||
| 447 | |||
| 448 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { | ||
| 449 | ctx.AddU32("{}=lmem[{}];", inst, word_offset); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { | ||
| 453 | ctx.Add("lmem[{}]={};", word_offset, value); | ||
| 454 | } | ||
| 455 | |||
| 456 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..53f8896be --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/exception.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | |||
| 13 | void EmitJoin(EmitContext&) { | ||
| 14 | throw NotImplementedException("Join shouldn't be emitted"); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 18 | ctx.Add("discard;"); | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..eeae6562c --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp | |||
| @@ -0,0 +1,230 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 13 | [[maybe_unused]] std::string_view value) { | ||
| 14 | NotImplemented(); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 18 | ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 22 | [[maybe_unused]] std::string_view value) { | ||
| 23 | NotImplemented(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 27 | [[maybe_unused]] std::string_view value) { | ||
| 28 | NotImplemented(); | ||
| 29 | } | ||
| 30 | |||
| 31 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 32 | ctx.AddU32("{}=int({});", inst, value); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 36 | ctx.AddU32("{}=int({});", inst, value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 40 | [[maybe_unused]] std::string_view value) { | ||
| 41 | NotImplemented(); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 45 | ctx.AddU64("{}=int64_t({});", inst, value); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 49 | ctx.AddU64("{}=int64_t({});", inst, value); | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 53 | [[maybe_unused]] std::string_view value) { | ||
| 54 | NotImplemented(); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 58 | [[maybe_unused]] std::string_view value) { | ||
| 59 | NotImplemented(); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 63 | [[maybe_unused]] std::string_view value) { | ||
| 64 | NotImplemented(); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 68 | [[maybe_unused]] std::string_view value) { | ||
| 69 | NotImplemented(); | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 73 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 77 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 78 | } | ||
| 79 | |||
| 80 | void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 81 | [[maybe_unused]] std::string_view value) { | ||
| 82 | NotImplemented(); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 86 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 90 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 94 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 98 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 102 | [[maybe_unused]] std::string_view value) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 107 | [[maybe_unused]] std::string_view value) { | ||
| 108 | NotImplemented(); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 112 | ctx.AddF32("{}=float({});", inst, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 116 | ctx.AddF64("{}=double({});", inst, value); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 120 | [[maybe_unused]] std::string_view value) { | ||
| 121 | NotImplemented(); | ||
| 122 | } | ||
| 123 | |||
| 124 | void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 125 | [[maybe_unused]] std::string_view value) { | ||
| 126 | NotImplemented(); | ||
| 127 | } | ||
| 128 | |||
| 129 | void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 130 | [[maybe_unused]] std::string_view value) { | ||
| 131 | NotImplemented(); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 135 | [[maybe_unused]] std::string_view value) { | ||
| 136 | NotImplemented(); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 140 | [[maybe_unused]] std::string_view value) { | ||
| 141 | NotImplemented(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 145 | [[maybe_unused]] std::string_view value) { | ||
| 146 | NotImplemented(); | ||
| 147 | } | ||
| 148 | |||
| 149 | void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 150 | [[maybe_unused]] std::string_view value) { | ||
| 151 | NotImplemented(); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 155 | [[maybe_unused]] std::string_view value) { | ||
| 156 | NotImplemented(); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 160 | [[maybe_unused]] std::string_view value) { | ||
| 161 | NotImplemented(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 165 | [[maybe_unused]] std::string_view value) { | ||
| 166 | NotImplemented(); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 170 | ctx.AddF32("{}=float(int({}));", inst, value); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 174 | ctx.AddF32("{}=float(int64_t({}));", inst, value); | ||
| 175 | } | ||
| 176 | |||
| 177 | void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 178 | [[maybe_unused]] std::string_view value) { | ||
| 179 | NotImplemented(); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 183 | ctx.AddF32("{}=float({}&0xffff);", inst, value); | ||
| 184 | } | ||
| 185 | |||
| 186 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 187 | ctx.AddF32("{}=float({});", inst, value); | ||
| 188 | } | ||
| 189 | |||
| 190 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 191 | ctx.AddF32("{}=float({});", inst, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 195 | [[maybe_unused]] std::string_view value) { | ||
| 196 | NotImplemented(); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 200 | [[maybe_unused]] std::string_view value) { | ||
| 201 | NotImplemented(); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 205 | ctx.AddF64("{}=double(int({}));", inst, value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 209 | ctx.AddF64("{}=double(int64_t({}));", inst, value); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 213 | [[maybe_unused]] std::string_view value) { | ||
| 214 | NotImplemented(); | ||
| 215 | } | ||
| 216 | |||
| 217 | void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 218 | [[maybe_unused]] std::string_view value) { | ||
| 219 | NotImplemented(); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 223 | ctx.AddF64("{}=double({});", inst, value); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 227 | ctx.AddF64("{}=double({});", inst, value); | ||
| 228 | } | ||
| 229 | |||
| 230 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..d423bfb1b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp | |||
| @@ -0,0 +1,456 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, | ||
| 15 | std::string_view op, bool ordered) { | ||
| 16 | const auto nan_op{ordered ? "&&!" : "||"}; | ||
| 17 | ctx.AddU1("{}={}{}{}" | ||
| 18 | "{}isnan({}){}isnan({});", | ||
| 19 | inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); | ||
| 20 | } | ||
| 21 | |||
| 22 | bool IsPrecise(const IR::Inst& inst) { | ||
| 23 | return inst.Flags<IR::FpControl>().no_contraction; | ||
| 24 | } | ||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 28 | [[maybe_unused]] std::string_view value) { | ||
| 29 | NotImplemented(); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 33 | ctx.AddF32("{}=abs({});", inst, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 37 | ctx.AddF64("{}=abs({});", inst, value); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 41 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 46 | if (IsPrecise(inst)) { | ||
| 47 | ctx.AddPrecF32("{}={}+{};", inst, a, b); | ||
| 48 | } else { | ||
| 49 | ctx.AddF32("{}={}+{};", inst, a, b); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 54 | if (IsPrecise(inst)) { | ||
| 55 | ctx.AddPrecF64("{}={}+{};", inst, a, b); | ||
| 56 | } else { | ||
| 57 | ctx.AddF64("{}={}+{};", inst, a, b); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 62 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, | ||
| 63 | [[maybe_unused]] std::string_view c) { | ||
| 64 | NotImplemented(); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 68 | std::string_view c) { | ||
| 69 | if (IsPrecise(inst)) { | ||
| 70 | ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); | ||
| 71 | } else { | ||
| 72 | ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 77 | std::string_view c) { | ||
| 78 | if (IsPrecise(inst)) { | ||
| 79 | ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); | ||
| 80 | } else { | ||
| 81 | ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 86 | ctx.AddF32("{}=max({},{});", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 90 | ctx.AddF64("{}=max({},{});", inst, a, b); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 94 | ctx.AddF32("{}=min({},{});", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 98 | ctx.AddF64("{}=min({},{});", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 102 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 107 | if (IsPrecise(inst)) { | ||
| 108 | ctx.AddPrecF32("{}={}*{};", inst, a, b); | ||
| 109 | } else { | ||
| 110 | ctx.AddF32("{}={}*{};", inst, a, b); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 115 | if (IsPrecise(inst)) { | ||
| 116 | ctx.AddPrecF64("{}={}*{};", inst, a, b); | ||
| 117 | } else { | ||
| 118 | ctx.AddF64("{}={}*{};", inst, a, b); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 123 | [[maybe_unused]] std::string_view value) { | ||
| 124 | NotImplemented(); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 128 | ctx.AddF32("{}=-({});", inst, value); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 132 | ctx.AddF64("{}=-({});", inst, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 136 | ctx.AddF32("{}=sin({});", inst, value); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 140 | ctx.AddF32("{}=cos({});", inst, value); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 144 | ctx.AddF32("{}=exp2({});", inst, value); | ||
| 145 | } | ||
| 146 | |||
| 147 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 148 | ctx.AddF32("{}=log2({});", inst, value); | ||
| 149 | } | ||
| 150 | |||
| 151 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 152 | ctx.AddF32("{}=(1.0f)/{};", inst, value); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 156 | ctx.AddF64("{}=1.0/{};", inst, value); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 160 | [[maybe_unused]] std::string_view value) { | ||
| 161 | ctx.AddF32("{}=inversesqrt({});", inst, value); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 165 | [[maybe_unused]] std::string_view value) { | ||
| 166 | NotImplemented(); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 170 | ctx.AddF32("{}=sqrt({});", inst, value); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 174 | [[maybe_unused]] std::string_view value) { | ||
| 175 | NotImplemented(); | ||
| 176 | } | ||
| 177 | |||
| 178 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 179 | ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 183 | ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); | ||
| 184 | } | ||
| 185 | |||
| 186 | void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 187 | [[maybe_unused]] std::string_view value, | ||
| 188 | [[maybe_unused]] std::string_view min_value, | ||
| 189 | [[maybe_unused]] std::string_view max_value) { | ||
| 190 | NotImplemented(); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 194 | std::string_view min_value, std::string_view max_value) { | ||
| 195 | // GLSL's clamp does not produce desirable results | ||
| 196 | ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 200 | std::string_view min_value, std::string_view max_value) { | ||
| 201 | // GLSL's clamp does not produce desirable results | ||
| 202 | ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 206 | [[maybe_unused]] std::string_view value) { | ||
| 207 | NotImplemented(); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 211 | ctx.AddF32("{}=roundEven({});", inst, value); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 215 | ctx.AddF64("{}=roundEven({});", inst, value); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 219 | [[maybe_unused]] std::string_view value) { | ||
| 220 | NotImplemented(); | ||
| 221 | } | ||
| 222 | |||
| 223 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 224 | ctx.AddF32("{}=floor({});", inst, value); | ||
| 225 | } | ||
| 226 | |||
| 227 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 228 | ctx.AddF64("{}=floor({});", inst, value); | ||
| 229 | } | ||
| 230 | |||
| 231 | void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 232 | [[maybe_unused]] std::string_view value) { | ||
| 233 | NotImplemented(); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 237 | ctx.AddF32("{}=ceil({});", inst, value); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 241 | ctx.AddF64("{}=ceil({});", inst, value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 245 | [[maybe_unused]] std::string_view value) { | ||
| 246 | NotImplemented(); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 250 | ctx.AddF32("{}=trunc({});", inst, value); | ||
| 251 | } | ||
| 252 | |||
| 253 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 254 | ctx.AddF64("{}=trunc({});", inst, value); | ||
| 255 | } | ||
| 256 | |||
| 257 | void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 258 | [[maybe_unused]] std::string_view rhs) { | ||
| 259 | NotImplemented(); | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 263 | std::string_view rhs) { | ||
| 264 | Compare(ctx, inst, lhs, rhs, "==", true); | ||
| 265 | } | ||
| 266 | |||
| 267 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 268 | std::string_view rhs) { | ||
| 269 | Compare(ctx, inst, lhs, rhs, "==", true); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 273 | [[maybe_unused]] std::string_view rhs) { | ||
| 274 | NotImplemented(); | ||
| 275 | } | ||
| 276 | |||
| 277 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 278 | std::string_view rhs) { | ||
| 279 | Compare(ctx, inst, lhs, rhs, "==", false); | ||
| 280 | } | ||
| 281 | |||
| 282 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 283 | std::string_view rhs) { | ||
| 284 | Compare(ctx, inst, lhs, rhs, "==", false); | ||
| 285 | } | ||
| 286 | |||
| 287 | void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 288 | [[maybe_unused]] std::string_view rhs) { | ||
| 289 | NotImplemented(); | ||
| 290 | } | ||
| 291 | |||
| 292 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 293 | std::string_view rhs) { | ||
| 294 | Compare(ctx, inst, lhs, rhs, "!=", true); | ||
| 295 | } | ||
| 296 | |||
| 297 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 298 | std::string_view rhs) { | ||
| 299 | Compare(ctx, inst, lhs, rhs, "!=", true); | ||
| 300 | } | ||
| 301 | |||
| 302 | void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 303 | [[maybe_unused]] std::string_view rhs) { | ||
| 304 | NotImplemented(); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 308 | std::string_view rhs) { | ||
| 309 | Compare(ctx, inst, lhs, rhs, "!=", false); | ||
| 310 | } | ||
| 311 | |||
| 312 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 313 | std::string_view rhs) { | ||
| 314 | Compare(ctx, inst, lhs, rhs, "!=", false); | ||
| 315 | } | ||
| 316 | |||
| 317 | void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 318 | [[maybe_unused]] std::string_view rhs) { | ||
| 319 | NotImplemented(); | ||
| 320 | } | ||
| 321 | |||
| 322 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 323 | std::string_view rhs) { | ||
| 324 | Compare(ctx, inst, lhs, rhs, "<", true); | ||
| 325 | } | ||
| 326 | |||
| 327 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 328 | std::string_view rhs) { | ||
| 329 | Compare(ctx, inst, lhs, rhs, "<", true); | ||
| 330 | } | ||
| 331 | |||
| 332 | void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 333 | [[maybe_unused]] std::string_view rhs) { | ||
| 334 | NotImplemented(); | ||
| 335 | } | ||
| 336 | |||
| 337 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 338 | std::string_view rhs) { | ||
| 339 | Compare(ctx, inst, lhs, rhs, "<", false); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 343 | std::string_view rhs) { | ||
| 344 | Compare(ctx, inst, lhs, rhs, "<", false); | ||
| 345 | } | ||
| 346 | |||
| 347 | void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, | ||
| 348 | [[maybe_unused]] std::string_view lhs, | ||
| 349 | [[maybe_unused]] std::string_view rhs) { | ||
| 350 | NotImplemented(); | ||
| 351 | } | ||
| 352 | |||
| 353 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 354 | std::string_view rhs) { | ||
| 355 | Compare(ctx, inst, lhs, rhs, ">", true); | ||
| 356 | } | ||
| 357 | |||
| 358 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 359 | std::string_view rhs) { | ||
| 360 | Compare(ctx, inst, lhs, rhs, ">", true); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, | ||
| 364 | [[maybe_unused]] std::string_view lhs, | ||
| 365 | [[maybe_unused]] std::string_view rhs) { | ||
| 366 | NotImplemented(); | ||
| 367 | } | ||
| 368 | |||
| 369 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 370 | std::string_view rhs) { | ||
| 371 | Compare(ctx, inst, lhs, rhs, ">", false); | ||
| 372 | } | ||
| 373 | |||
| 374 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 375 | std::string_view rhs) { | ||
| 376 | Compare(ctx, inst, lhs, rhs, ">", false); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 380 | [[maybe_unused]] std::string_view lhs, | ||
| 381 | [[maybe_unused]] std::string_view rhs) { | ||
| 382 | NotImplemented(); | ||
| 383 | } | ||
| 384 | |||
| 385 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 386 | std::string_view rhs) { | ||
| 387 | Compare(ctx, inst, lhs, rhs, "<=", true); | ||
| 388 | } | ||
| 389 | |||
| 390 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 391 | std::string_view rhs) { | ||
| 392 | Compare(ctx, inst, lhs, rhs, "<=", true); | ||
| 393 | } | ||
| 394 | |||
| 395 | void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 396 | [[maybe_unused]] std::string_view lhs, | ||
| 397 | [[maybe_unused]] std::string_view rhs) { | ||
| 398 | NotImplemented(); | ||
| 399 | } | ||
| 400 | |||
| 401 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 402 | std::string_view rhs) { | ||
| 403 | Compare(ctx, inst, lhs, rhs, "<=", false); | ||
| 404 | } | ||
| 405 | |||
| 406 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 407 | std::string_view rhs) { | ||
| 408 | Compare(ctx, inst, lhs, rhs, "<=", false); | ||
| 409 | } | ||
| 410 | |||
| 411 | void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 412 | [[maybe_unused]] std::string_view lhs, | ||
| 413 | [[maybe_unused]] std::string_view rhs) { | ||
| 414 | NotImplemented(); | ||
| 415 | } | ||
| 416 | |||
| 417 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 418 | std::string_view rhs) { | ||
| 419 | Compare(ctx, inst, lhs, rhs, ">=", true); | ||
| 420 | } | ||
| 421 | |||
| 422 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 423 | std::string_view rhs) { | ||
| 424 | Compare(ctx, inst, lhs, rhs, ">=", true); | ||
| 425 | } | ||
| 426 | |||
| 427 | void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 428 | [[maybe_unused]] std::string_view lhs, | ||
| 429 | [[maybe_unused]] std::string_view rhs) { | ||
| 430 | NotImplemented(); | ||
| 431 | } | ||
| 432 | |||
| 433 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 434 | std::string_view rhs) { | ||
| 435 | Compare(ctx, inst, lhs, rhs, ">=", false); | ||
| 436 | } | ||
| 437 | |||
| 438 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 439 | std::string_view rhs) { | ||
| 440 | Compare(ctx, inst, lhs, rhs, ">=", false); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 444 | [[maybe_unused]] std::string_view value) { | ||
| 445 | NotImplemented(); | ||
| 446 | } | ||
| 447 | |||
| 448 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 449 | ctx.AddU1("{}=isnan({});", inst, value); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 453 | ctx.AddU1("{}=isnan({});", inst, value); | ||
| 454 | } | ||
| 455 | |||
| 456 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..447eb8e0a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -0,0 +1,799 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { | ||
| 16 | const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) | ||
| 17 | : ctx.textures.at(info.descriptor_index)}; | ||
| 18 | const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; | ||
| 19 | return fmt::format("tex{}{}", def.binding, index_offset); | ||
| 20 | } | ||
| 21 | |||
| 22 | std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { | ||
| 23 | const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) | ||
| 24 | : ctx.images.at(info.descriptor_index)}; | ||
| 25 | const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; | ||
| 26 | return fmt::format("img{}{}", def.binding, index_offset); | ||
| 27 | } | ||
| 28 | |||
| 29 | std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { | ||
| 30 | switch (info.type) { | ||
| 31 | case TextureType::Color1D: | ||
| 32 | case TextureType::Buffer: | ||
| 33 | return fmt::format("int({})", value); | ||
| 34 | case TextureType::ColorArray1D: | ||
| 35 | case TextureType::Color2D: | ||
| 36 | case TextureType::ColorArray2D: | ||
| 37 | return fmt::format("ivec2({})", value); | ||
| 38 | case TextureType::Color3D: | ||
| 39 | case TextureType::ColorCube: | ||
| 40 | return fmt::format("ivec3({})", value); | ||
| 41 | case TextureType::ColorArrayCube: | ||
| 42 | return fmt::format("ivec4({})", value); | ||
| 43 | default: | ||
| 44 | throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { | ||
| 49 | switch (info.type) { | ||
| 50 | case TextureType::Color1D: | ||
| 51 | case TextureType::Buffer: | ||
| 52 | return fmt::format("int({})", value); | ||
| 53 | case TextureType::ColorArray1D: | ||
| 54 | case TextureType::Color2D: | ||
| 55 | return fmt::format("ivec2({})", value); | ||
| 56 | case TextureType::ColorArray2D: | ||
| 57 | case TextureType::Color3D: | ||
| 58 | case TextureType::ColorCube: | ||
| 59 | return fmt::format("ivec3({})", value); | ||
| 60 | case TextureType::ColorArrayCube: | ||
| 61 | return fmt::format("ivec4({})", value); | ||
| 62 | default: | ||
| 63 | throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | bool NeedsShadowLodExt(TextureType type) { | ||
| 68 | switch (type) { | ||
| 69 | case TextureType::ColorArray2D: | ||
| 70 | case TextureType::ColorCube: | ||
| 71 | case TextureType::ColorArrayCube: | ||
| 72 | return true; | ||
| 73 | default: | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { | ||
| 79 | if (offset.IsImmediate()) { | ||
| 80 | return fmt::format("int({})", offset.U32()); | ||
| 81 | } | ||
| 82 | IR::Inst* const inst{offset.InstRecursive()}; | ||
| 83 | if (inst->AreAllArgsImmediates()) { | ||
| 84 | switch (inst->GetOpcode()) { | ||
| 85 | case IR::Opcode::CompositeConstructU32x2: | ||
| 86 | return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); | ||
| 87 | case IR::Opcode::CompositeConstructU32x3: | ||
| 88 | return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), | ||
| 89 | inst->Arg(2).U32()); | ||
| 90 | case IR::Opcode::CompositeConstructU32x4: | ||
| 91 | return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), | ||
| 92 | inst->Arg(2).U32(), inst->Arg(3).U32()); | ||
| 93 | default: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; | ||
| 98 | if (!has_var_aoffi) { | ||
| 99 | LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING"); | ||
| 100 | } | ||
| 101 | const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; | ||
| 102 | switch (offset.Type()) { | ||
| 103 | case IR::Type::U32: | ||
| 104 | return fmt::format("int({})", offset_str); | ||
| 105 | case IR::Type::U32x2: | ||
| 106 | return fmt::format("ivec2({})", offset_str); | ||
| 107 | case IR::Type::U32x3: | ||
| 108 | return fmt::format("ivec3({})", offset_str); | ||
| 109 | case IR::Type::U32x4: | ||
| 110 | return fmt::format("ivec4({})", offset_str); | ||
| 111 | default: | ||
| 112 | throw NotImplementedException("Offset type {}", offset.Type()); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { | ||
| 117 | const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; | ||
| 118 | if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { | ||
| 119 | LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING"); | ||
| 120 | return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; | ||
| 121 | } | ||
| 122 | const IR::Opcode opcode{values[0]->GetOpcode()}; | ||
| 123 | if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { | ||
| 124 | throw LogicError("Invalid PTP arguments"); | ||
| 125 | } | ||
| 126 | auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; | ||
| 127 | |||
| 128 | return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), | ||
| 129 | read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), | ||
| 130 | read(1, 3)); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 134 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 135 | if (sparse_inst) { | ||
| 136 | sparse_inst->Invalidate(); | ||
| 137 | } | ||
| 138 | return sparse_inst; | ||
| 139 | } | ||
| 140 | } // Anonymous namespace | ||
| 141 | |||
| 142 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 143 | std::string_view coords, std::string_view bias_lc, | ||
| 144 | const IR::Value& offset) { | ||
| 145 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 146 | if (info.has_lod_clamp) { | ||
| 147 | throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); | ||
| 148 | } | ||
| 149 | const auto texture{Texture(ctx, info, index)}; | ||
| 150 | const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; | ||
| 151 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 152 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 153 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 154 | if (sparse_inst && !supports_sparse) { | ||
| 155 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 156 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 157 | } | ||
| 158 | if (!sparse_inst || !supports_sparse) { | ||
| 159 | if (!offset.IsEmpty()) { | ||
| 160 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 161 | if (ctx.stage == Stage::Fragment) { | ||
| 162 | ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); | ||
| 163 | } else { | ||
| 164 | ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); | ||
| 165 | } | ||
| 166 | } else { | ||
| 167 | if (ctx.stage == Stage::Fragment) { | ||
| 168 | ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); | ||
| 169 | } else { | ||
| 170 | ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | if (!offset.IsEmpty()) { | ||
| 176 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", | ||
| 177 | *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); | ||
| 178 | } else { | ||
| 179 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, | ||
| 180 | texture, coords, texel, bias); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 185 | std::string_view coords, std::string_view lod_lc, | ||
| 186 | const IR::Value& offset) { | ||
| 187 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 188 | if (info.has_bias) { | ||
| 189 | throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); | ||
| 190 | } | ||
| 191 | if (info.has_lod_clamp) { | ||
| 192 | throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); | ||
| 193 | } | ||
| 194 | const auto texture{Texture(ctx, info, index)}; | ||
| 195 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 196 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 197 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 198 | if (sparse_inst && !supports_sparse) { | ||
| 199 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 200 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 201 | } | ||
| 202 | if (!sparse_inst || !supports_sparse) { | ||
| 203 | if (!offset.IsEmpty()) { | ||
| 204 | ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, | ||
| 205 | GetOffsetVec(ctx, offset)); | ||
| 206 | } else { | ||
| 207 | ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); | ||
| 208 | } | ||
| 209 | return; | ||
| 210 | } | ||
| 211 | if (!offset.IsEmpty()) { | ||
| 212 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", | ||
| 213 | *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, | ||
| 214 | GetOffsetVec(ctx, offset), texel); | ||
| 215 | } else { | ||
| 216 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, | ||
| 217 | texture, coords, lod_lc, texel); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 222 | std::string_view coords, std::string_view dref, | ||
| 223 | std::string_view bias_lc, const IR::Value& offset) { | ||
| 224 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 225 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 226 | if (sparse_inst) { | ||
| 227 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); | ||
| 228 | } | ||
| 229 | if (info.has_bias) { | ||
| 230 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); | ||
| 231 | } | ||
| 232 | if (info.has_lod_clamp) { | ||
| 233 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); | ||
| 234 | } | ||
| 235 | const auto texture{Texture(ctx, info, index)}; | ||
| 236 | const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; | ||
| 237 | const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; | ||
| 238 | const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; | ||
| 239 | const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && | ||
| 240 | ctx.stage != Stage::Fragment && needs_shadow_ext}; | ||
| 241 | if (use_grad) { | ||
| 242 | LOG_WARNING(Shader_GLSL, | ||
| 243 | "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); | ||
| 244 | if (info.type == TextureType::ColorArrayCube) { | ||
| 245 | LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); | ||
| 246 | ctx.AddF32("{}=0.0f;", inst); | ||
| 247 | return; | ||
| 248 | } | ||
| 249 | const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; | ||
| 250 | ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, | ||
| 251 | d_cast, d_cast); | ||
| 252 | return; | ||
| 253 | } | ||
| 254 | if (!offset.IsEmpty()) { | ||
| 255 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 256 | if (ctx.stage == Stage::Fragment) { | ||
| 257 | ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, | ||
| 258 | offset_str, bias); | ||
| 259 | } else { | ||
| 260 | ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, | ||
| 261 | dref, offset_str); | ||
| 262 | } | ||
| 263 | } else { | ||
| 264 | if (ctx.stage == Stage::Fragment) { | ||
| 265 | if (info.type == TextureType::ColorArrayCube) { | ||
| 266 | ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); | ||
| 267 | } else { | ||
| 268 | ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); | ||
| 269 | } | ||
| 270 | } else { | ||
| 271 | ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 277 | std::string_view coords, std::string_view dref, | ||
| 278 | std::string_view lod_lc, const IR::Value& offset) { | ||
| 279 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 280 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 281 | if (sparse_inst) { | ||
| 282 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); | ||
| 283 | } | ||
| 284 | if (info.has_bias) { | ||
| 285 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); | ||
| 286 | } | ||
| 287 | if (info.has_lod_clamp) { | ||
| 288 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); | ||
| 289 | } | ||
| 290 | const auto texture{Texture(ctx, info, index)}; | ||
| 291 | const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; | ||
| 292 | const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; | ||
| 293 | const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; | ||
| 294 | if (use_grad) { | ||
| 295 | LOG_WARNING(Shader_GLSL, | ||
| 296 | "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); | ||
| 297 | if (info.type == TextureType::ColorArrayCube) { | ||
| 298 | LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); | ||
| 299 | ctx.AddF32("{}=0.0f;", inst); | ||
| 300 | return; | ||
| 301 | } | ||
| 302 | const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; | ||
| 303 | ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, | ||
| 304 | d_cast, d_cast); | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | if (!offset.IsEmpty()) { | ||
| 308 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 309 | if (info.type == TextureType::ColorArrayCube) { | ||
| 310 | ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, | ||
| 311 | offset_str); | ||
| 312 | } else { | ||
| 313 | ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, | ||
| 314 | dref, lod_lc, offset_str); | ||
| 315 | } | ||
| 316 | } else { | ||
| 317 | if (info.type == TextureType::ColorArrayCube) { | ||
| 318 | ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); | ||
| 319 | } else { | ||
| 320 | ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, | ||
| 321 | lod_lc); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 327 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { | ||
| 328 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 329 | const auto texture{Texture(ctx, info, index)}; | ||
| 330 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 331 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 332 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 333 | if (sparse_inst && !supports_sparse) { | ||
| 334 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 335 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 336 | } | ||
| 337 | if (!sparse_inst || !supports_sparse) { | ||
| 338 | if (offset.IsEmpty()) { | ||
| 339 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, | ||
| 340 | info.gather_component); | ||
| 341 | return; | ||
| 342 | } | ||
| 343 | if (offset2.IsEmpty()) { | ||
| 344 | ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, | ||
| 345 | GetOffsetVec(ctx, offset), info.gather_component); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | // PTP | ||
| 349 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 350 | ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, | ||
| 351 | info.gather_component); | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | if (offset.IsEmpty()) { | ||
| 355 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", | ||
| 356 | *sparse_inst, texture, coords, texel, info.gather_component); | ||
| 357 | return; | ||
| 358 | } | ||
| 359 | if (offset2.IsEmpty()) { | ||
| 360 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", | ||
| 361 | *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), | ||
| 362 | texel, info.gather_component); | ||
| 363 | return; | ||
| 364 | } | ||
| 365 | // PTP | ||
| 366 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 367 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", | ||
| 368 | *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, | ||
| 369 | info.gather_component); | ||
| 370 | } | ||
| 371 | |||
| 372 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 373 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, | ||
| 374 | std::string_view dref) { | ||
| 375 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 376 | const auto texture{Texture(ctx, info, index)}; | ||
| 377 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 378 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 379 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 380 | if (sparse_inst && !supports_sparse) { | ||
| 381 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 382 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 383 | } | ||
| 384 | if (!sparse_inst || !supports_sparse) { | ||
| 385 | if (offset.IsEmpty()) { | ||
| 386 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); | ||
| 387 | return; | ||
| 388 | } | ||
| 389 | if (offset2.IsEmpty()) { | ||
| 390 | ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, | ||
| 391 | GetOffsetVec(ctx, offset)); | ||
| 392 | return; | ||
| 393 | } | ||
| 394 | // PTP | ||
| 395 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 396 | ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); | ||
| 397 | return; | ||
| 398 | } | ||
| 399 | if (offset.IsEmpty()) { | ||
| 400 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, | ||
| 401 | texture, coords, dref, texel); | ||
| 402 | return; | ||
| 403 | } | ||
| 404 | if (offset2.IsEmpty()) { | ||
| 405 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", | ||
| 406 | *sparse_inst, texture, CastToIntVec(coords, info), dref, | ||
| 407 | GetOffsetVec(ctx, offset), texel); | ||
| 408 | return; | ||
| 409 | } | ||
| 410 | // PTP | ||
| 411 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 412 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", | ||
| 413 | *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); | ||
| 414 | } | ||
| 415 | |||
| 416 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 417 | std::string_view coords, std::string_view offset, std::string_view lod, | ||
| 418 | [[maybe_unused]] std::string_view ms) { | ||
| 419 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 420 | if (info.has_bias) { | ||
| 421 | throw NotImplementedException("EmitImageFetch Bias texture samples"); | ||
| 422 | } | ||
| 423 | if (info.has_lod_clamp) { | ||
| 424 | throw NotImplementedException("EmitImageFetch Lod clamp samples"); | ||
| 425 | } | ||
| 426 | const auto texture{Texture(ctx, info, index)}; | ||
| 427 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 428 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 429 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 430 | if (sparse_inst && !supports_sparse) { | ||
| 431 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 432 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 433 | } | ||
| 434 | if (!sparse_inst || !supports_sparse) { | ||
| 435 | if (!offset.empty()) { | ||
| 436 | ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, | ||
| 437 | CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); | ||
| 438 | } else { | ||
| 439 | if (info.type == TextureType::Buffer) { | ||
| 440 | ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); | ||
| 441 | } else { | ||
| 442 | ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, | ||
| 443 | CoordsCastToInt(coords, info), lod); | ||
| 444 | } | ||
| 445 | } | ||
| 446 | return; | ||
| 447 | } | ||
| 448 | if (!offset.empty()) { | ||
| 449 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", | ||
| 450 | *sparse_inst, texture, CastToIntVec(coords, info), lod, | ||
| 451 | CastToIntVec(offset, info), texel); | ||
| 452 | } else { | ||
| 453 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", | ||
| 454 | *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); | ||
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 459 | std::string_view lod) { | ||
| 460 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 461 | const auto texture{Texture(ctx, info, index)}; | ||
| 462 | switch (info.type) { | ||
| 463 | case TextureType::Color1D: | ||
| 464 | return ctx.AddU32x4( | ||
| 465 | "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, | ||
| 466 | texture, lod, texture); | ||
| 467 | case TextureType::ColorArray1D: | ||
| 468 | case TextureType::Color2D: | ||
| 469 | case TextureType::ColorCube: | ||
| 470 | return ctx.AddU32x4( | ||
| 471 | "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, | ||
| 472 | texture, lod, texture); | ||
| 473 | case TextureType::ColorArray2D: | ||
| 474 | case TextureType::Color3D: | ||
| 475 | case TextureType::ColorArrayCube: | ||
| 476 | return ctx.AddU32x4( | ||
| 477 | "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, | ||
| 478 | lod, texture); | ||
| 479 | case TextureType::Buffer: | ||
| 480 | throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); | ||
| 481 | } | ||
| 482 | throw LogicError("Unspecified image type {}", info.type.Value()); | ||
| 483 | } | ||
| 484 | |||
| 485 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 486 | std::string_view coords) { | ||
| 487 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 488 | const auto texture{Texture(ctx, info, index)}; | ||
| 489 | return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); | ||
| 490 | } | ||
| 491 | |||
| 492 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 493 | std::string_view coords, const IR::Value& derivatives, | ||
| 494 | const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { | ||
| 495 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 496 | if (info.has_lod_clamp) { | ||
| 497 | throw NotImplementedException("EmitImageGradient Lod clamp samples"); | ||
| 498 | } | ||
| 499 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 500 | if (sparse_inst) { | ||
| 501 | throw NotImplementedException("EmitImageGradient Sparse"); | ||
| 502 | } | ||
| 503 | if (!offset.IsEmpty()) { | ||
| 504 | throw NotImplementedException("EmitImageGradient offset"); | ||
| 505 | } | ||
| 506 | const auto texture{Texture(ctx, info, index)}; | ||
| 507 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 508 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||
| 509 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | ||
| 510 | if (multi_component) { | ||
| 511 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, | ||
| 512 | derivatives_vec, derivatives_vec); | ||
| 513 | } else { | ||
| 514 | ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, | ||
| 515 | derivatives_vec, derivatives_vec); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 520 | std::string_view coords) { | ||
| 521 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 522 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 523 | if (sparse_inst) { | ||
| 524 | throw NotImplementedException("EmitImageRead Sparse"); | ||
| 525 | } | ||
| 526 | const auto image{Image(ctx, info, index)}; | ||
| 527 | ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); | ||
| 528 | } | ||
| 529 | |||
| 530 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 531 | std::string_view coords, std::string_view color) { | ||
| 532 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 533 | const auto image{Image(ctx, info, index)}; | ||
| 534 | ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); | ||
| 535 | } | ||
| 536 | |||
| 537 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 538 | std::string_view coords, std::string_view value) { | ||
| 539 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 540 | const auto image{Image(ctx, info, index)}; | ||
| 541 | ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 542 | } | ||
| 543 | |||
| 544 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 545 | std::string_view coords, std::string_view value) { | ||
| 546 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 547 | const auto image{Image(ctx, info, index)}; | ||
| 548 | ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 549 | value); | ||
| 550 | } | ||
| 551 | |||
| 552 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 553 | std::string_view coords, std::string_view value) { | ||
| 554 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 555 | const auto image{Image(ctx, info, index)}; | ||
| 556 | ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 557 | value); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 561 | std::string_view coords, std::string_view value) { | ||
| 562 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 563 | const auto image{Image(ctx, info, index)}; | ||
| 564 | ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 565 | value); | ||
| 566 | } | ||
| 567 | |||
| 568 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 569 | std::string_view coords, std::string_view value) { | ||
| 570 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 571 | const auto image{Image(ctx, info, index)}; | ||
| 572 | ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 573 | value); | ||
| 574 | } | ||
| 575 | |||
| 576 | void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, | ||
| 577 | std::string_view) { | ||
| 578 | NotImplemented(); | ||
| 579 | } | ||
| 580 | |||
| 581 | void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, | ||
| 582 | std::string_view) { | ||
| 583 | NotImplemented(); | ||
| 584 | } | ||
| 585 | |||
| 586 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 587 | std::string_view coords, std::string_view value) { | ||
| 588 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 589 | const auto image{Image(ctx, info, index)}; | ||
| 590 | ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 591 | } | ||
| 592 | |||
| 593 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 594 | std::string_view coords, std::string_view value) { | ||
| 595 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 596 | const auto image{Image(ctx, info, index)}; | ||
| 597 | ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 598 | } | ||
| 599 | |||
| 600 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 601 | std::string_view coords, std::string_view value) { | ||
| 602 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 603 | const auto image{Image(ctx, info, index)}; | ||
| 604 | ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 605 | } | ||
| 606 | |||
| 607 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 608 | std::string_view coords, std::string_view value) { | ||
| 609 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 610 | const auto image{Image(ctx, info, index)}; | ||
| 611 | ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), | ||
| 612 | value); | ||
| 613 | } | ||
| 614 | |||
| 615 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 616 | NotImplemented(); | ||
| 617 | } | ||
| 618 | |||
| 619 | void EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 620 | NotImplemented(); | ||
| 621 | } | ||
| 622 | |||
| 623 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 624 | NotImplemented(); | ||
| 625 | } | ||
| 626 | |||
| 627 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 628 | NotImplemented(); | ||
| 629 | } | ||
| 630 | |||
| 631 | void EmitBindlessImageGather(EmitContext&) { | ||
| 632 | NotImplemented(); | ||
| 633 | } | ||
| 634 | |||
| 635 | void EmitBindlessImageGatherDref(EmitContext&) { | ||
| 636 | NotImplemented(); | ||
| 637 | } | ||
| 638 | |||
| 639 | void EmitBindlessImageFetch(EmitContext&) { | ||
| 640 | NotImplemented(); | ||
| 641 | } | ||
| 642 | |||
| 643 | void EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 644 | NotImplemented(); | ||
| 645 | } | ||
| 646 | |||
| 647 | void EmitBindlessImageQueryLod(EmitContext&) { | ||
| 648 | NotImplemented(); | ||
| 649 | } | ||
| 650 | |||
| 651 | void EmitBindlessImageGradient(EmitContext&) { | ||
| 652 | NotImplemented(); | ||
| 653 | } | ||
| 654 | |||
| 655 | void EmitBindlessImageRead(EmitContext&) { | ||
| 656 | NotImplemented(); | ||
| 657 | } | ||
| 658 | |||
| 659 | void EmitBindlessImageWrite(EmitContext&) { | ||
| 660 | NotImplemented(); | ||
| 661 | } | ||
| 662 | |||
| 663 | void EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 664 | NotImplemented(); | ||
| 665 | } | ||
| 666 | |||
| 667 | void EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 668 | NotImplemented(); | ||
| 669 | } | ||
| 670 | |||
| 671 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 672 | NotImplemented(); | ||
| 673 | } | ||
| 674 | |||
| 675 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 676 | NotImplemented(); | ||
| 677 | } | ||
| 678 | |||
| 679 | void EmitBoundImageGather(EmitContext&) { | ||
| 680 | NotImplemented(); | ||
| 681 | } | ||
| 682 | |||
| 683 | void EmitBoundImageGatherDref(EmitContext&) { | ||
| 684 | NotImplemented(); | ||
| 685 | } | ||
| 686 | |||
| 687 | void EmitBoundImageFetch(EmitContext&) { | ||
| 688 | NotImplemented(); | ||
| 689 | } | ||
| 690 | |||
| 691 | void EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 692 | NotImplemented(); | ||
| 693 | } | ||
| 694 | |||
| 695 | void EmitBoundImageQueryLod(EmitContext&) { | ||
| 696 | NotImplemented(); | ||
| 697 | } | ||
| 698 | |||
| 699 | void EmitBoundImageGradient(EmitContext&) { | ||
| 700 | NotImplemented(); | ||
| 701 | } | ||
| 702 | |||
| 703 | void EmitBoundImageRead(EmitContext&) { | ||
| 704 | NotImplemented(); | ||
| 705 | } | ||
| 706 | |||
| 707 | void EmitBoundImageWrite(EmitContext&) { | ||
| 708 | NotImplemented(); | ||
| 709 | } | ||
| 710 | |||
| 711 | void EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 712 | NotImplemented(); | ||
| 713 | } | ||
| 714 | |||
| 715 | void EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 716 | NotImplemented(); | ||
| 717 | } | ||
| 718 | |||
| 719 | void EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 720 | NotImplemented(); | ||
| 721 | } | ||
| 722 | |||
| 723 | void EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 724 | NotImplemented(); | ||
| 725 | } | ||
| 726 | |||
| 727 | void EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 728 | NotImplemented(); | ||
| 729 | } | ||
| 730 | |||
| 731 | void EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 732 | NotImplemented(); | ||
| 733 | } | ||
| 734 | |||
| 735 | void EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 736 | NotImplemented(); | ||
| 737 | } | ||
| 738 | |||
| 739 | void EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 740 | NotImplemented(); | ||
| 741 | } | ||
| 742 | |||
| 743 | void EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 744 | NotImplemented(); | ||
| 745 | } | ||
| 746 | |||
| 747 | void EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 748 | NotImplemented(); | ||
| 749 | } | ||
| 750 | |||
| 751 | void EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 752 | NotImplemented(); | ||
| 753 | } | ||
| 754 | |||
| 755 | void EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 756 | NotImplemented(); | ||
| 757 | } | ||
| 758 | |||
| 759 | void EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 760 | NotImplemented(); | ||
| 761 | } | ||
| 762 | |||
| 763 | void EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 764 | NotImplemented(); | ||
| 765 | } | ||
| 766 | |||
| 767 | void EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 768 | NotImplemented(); | ||
| 769 | } | ||
| 770 | |||
| 771 | void EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 772 | NotImplemented(); | ||
| 773 | } | ||
| 774 | |||
| 775 | void EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 776 | NotImplemented(); | ||
| 777 | } | ||
| 778 | |||
| 779 | void EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 780 | NotImplemented(); | ||
| 781 | } | ||
| 782 | |||
| 783 | void EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 784 | NotImplemented(); | ||
| 785 | } | ||
| 786 | |||
| 787 | void EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 788 | NotImplemented(); | ||
| 789 | } | ||
| 790 | |||
| 791 | void EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 792 | NotImplemented(); | ||
| 793 | } | ||
| 794 | |||
| 795 | void EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 796 | NotImplemented(); | ||
| 797 | } | ||
| 798 | |||
| 799 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..5936d086f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | |||
| @@ -0,0 +1,702 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string_view> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | enum class Attribute : u64; | ||
| 13 | enum class Patch : u64; | ||
| 14 | class Inst; | ||
| 15 | class Value; | ||
| 16 | } // namespace Shader::IR | ||
| 17 | |||
| 18 | namespace Shader::Backend::GLSL { | ||
| 19 | class EmitContext; | ||
| 20 | |||
| 21 | #define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) | ||
| 22 | |||
| 23 | // Microinstruction emitters | ||
| 24 | void EmitPhi(EmitContext& ctx, IR::Inst& inst); | ||
| 25 | void EmitVoid(EmitContext& ctx); | ||
| 26 | void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 27 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 28 | void EmitReference(EmitContext& ctx, const IR::Value& value); | ||
| 29 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); | ||
| 30 | void EmitJoin(EmitContext& ctx); | ||
| 31 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 32 | void EmitBarrier(EmitContext& ctx); | ||
| 33 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 34 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 35 | void EmitPrologue(EmitContext& ctx); | ||
| 36 | void EmitEpilogue(EmitContext& ctx); | ||
| 37 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); | ||
| 38 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 39 | void EmitGetRegister(EmitContext& ctx); | ||
| 40 | void EmitSetRegister(EmitContext& ctx); | ||
| 41 | void EmitGetPred(EmitContext& ctx); | ||
| 42 | void EmitSetPred(EmitContext& ctx); | ||
| 43 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 44 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 45 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 46 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 47 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 48 | const IR::Value& offset); | ||
| 49 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 50 | const IR::Value& offset); | ||
| 51 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 52 | const IR::Value& offset); | ||
| 53 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 54 | const IR::Value& offset); | ||
| 55 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 56 | const IR::Value& offset); | ||
| 57 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 58 | const IR::Value& offset); | ||
| 59 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 60 | const IR::Value& offset); | ||
| 61 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, | ||
| 62 | std::string_view vertex); | ||
| 63 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, | ||
| 64 | std::string_view vertex); | ||
| 65 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 66 | std::string_view vertex); | ||
| 67 | void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, | ||
| 68 | std::string_view vertex); | ||
| 69 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); | ||
| 70 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); | ||
| 71 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); | ||
| 72 | void EmitSetSampleMask(EmitContext& ctx, std::string_view value); | ||
| 73 | void EmitSetFragDepth(EmitContext& ctx, std::string_view value); | ||
| 74 | void EmitGetZFlag(EmitContext& ctx); | ||
| 75 | void EmitGetSFlag(EmitContext& ctx); | ||
| 76 | void EmitGetCFlag(EmitContext& ctx); | ||
| 77 | void EmitGetOFlag(EmitContext& ctx); | ||
| 78 | void EmitSetZFlag(EmitContext& ctx); | ||
| 79 | void EmitSetSFlag(EmitContext& ctx); | ||
| 80 | void EmitSetCFlag(EmitContext& ctx); | ||
| 81 | void EmitSetOFlag(EmitContext& ctx); | ||
| 82 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); | ||
| 83 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 84 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 85 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | ||
| 86 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | ||
| 87 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | ||
| 88 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); | ||
| 89 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); | ||
| 90 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | ||
| 91 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); | ||
| 92 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); | ||
| 93 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); | ||
| 94 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); | ||
| 95 | void EmitLoadGlobalU8(EmitContext& ctx); | ||
| 96 | void EmitLoadGlobalS8(EmitContext& ctx); | ||
| 97 | void EmitLoadGlobalU16(EmitContext& ctx); | ||
| 98 | void EmitLoadGlobalS16(EmitContext& ctx); | ||
| 99 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 100 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 101 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 102 | void EmitWriteGlobalU8(EmitContext& ctx); | ||
| 103 | void EmitWriteGlobalS8(EmitContext& ctx); | ||
| 104 | void EmitWriteGlobalU16(EmitContext& ctx); | ||
| 105 | void EmitWriteGlobalS16(EmitContext& ctx); | ||
| 106 | void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 107 | void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 108 | void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 109 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 110 | const IR::Value& offset); | ||
| 111 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 112 | const IR::Value& offset); | ||
| 113 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 114 | const IR::Value& offset); | ||
| 115 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 116 | const IR::Value& offset); | ||
| 117 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 118 | const IR::Value& offset); | ||
| 119 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 120 | const IR::Value& offset); | ||
| 121 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 122 | const IR::Value& offset); | ||
| 123 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 124 | std::string_view value); | ||
| 125 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 126 | std::string_view value); | ||
| 127 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 128 | std::string_view value); | ||
| 129 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 130 | std::string_view value); | ||
| 131 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 132 | std::string_view value); | ||
| 133 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 134 | std::string_view value); | ||
| 135 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 136 | std::string_view value); | ||
| 137 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 138 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 139 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 140 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 141 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 142 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 143 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 144 | void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 145 | void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 146 | void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 147 | void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 148 | void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 149 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 150 | std::string_view e2); | ||
| 151 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 152 | std::string_view e2, std::string_view e3); | ||
| 153 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 154 | std::string_view e2, std::string_view e3, std::string_view e4); | ||
| 155 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 156 | u32 index); | ||
| 157 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 158 | u32 index); | ||
| 159 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 160 | u32 index); | ||
| 161 | void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 162 | std::string_view object, u32 index); | ||
| 163 | void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 164 | std::string_view object, u32 index); | ||
| 165 | void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 166 | std::string_view object, u32 index); | ||
| 167 | void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); | ||
| 168 | void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, | ||
| 169 | std::string_view e3); | ||
| 170 | void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, | ||
| 171 | std::string_view e3, std::string_view e4); | ||
| 172 | void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 173 | void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 174 | void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 175 | void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 176 | u32 index); | ||
| 177 | void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 178 | u32 index); | ||
| 179 | void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 180 | u32 index); | ||
| 181 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 182 | std::string_view e2); | ||
| 183 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 184 | std::string_view e2, std::string_view e3); | ||
| 185 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 186 | std::string_view e2, std::string_view e3, std::string_view e4); | ||
| 187 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 188 | u32 index); | ||
| 189 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 190 | u32 index); | ||
| 191 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 192 | u32 index); | ||
| 193 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 194 | std::string_view object, u32 index); | ||
| 195 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 196 | std::string_view object, u32 index); | ||
| 197 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 198 | std::string_view object, u32 index); | ||
| 199 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 200 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 201 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 202 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 203 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 204 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 205 | void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 206 | u32 index); | ||
| 207 | void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 208 | u32 index); | ||
| 209 | void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 210 | u32 index); | ||
| 211 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 212 | std::string_view true_value, std::string_view false_value); | ||
| 213 | void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 214 | std::string_view false_value); | ||
| 215 | void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 216 | std::string_view false_value); | ||
| 217 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 218 | std::string_view true_value, std::string_view false_value); | ||
| 219 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 220 | std::string_view true_value, std::string_view false_value); | ||
| 221 | void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 222 | std::string_view false_value); | ||
| 223 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 224 | std::string_view true_value, std::string_view false_value); | ||
| 225 | void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 226 | std::string_view true_value, std::string_view false_value); | ||
| 227 | void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); | ||
| 228 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 229 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 230 | void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); | ||
| 231 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 232 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 233 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 234 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 235 | void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 236 | void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 237 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 238 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 239 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 240 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 241 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 242 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 243 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 244 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 245 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 246 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 247 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 248 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 249 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 250 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 251 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 252 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 253 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 254 | std::string_view c); | ||
| 255 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 256 | std::string_view c); | ||
| 257 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 258 | std::string_view c); | ||
| 259 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 260 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 261 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 262 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 263 | void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 264 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 265 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 266 | void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 267 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 268 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 269 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 270 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 271 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 272 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 273 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 274 | void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 275 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 276 | void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 277 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 278 | void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 279 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 280 | void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 281 | void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 282 | std::string_view min_value, std::string_view max_value); | ||
| 283 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 284 | std::string_view min_value, std::string_view max_value); | ||
| 285 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 286 | std::string_view min_value, std::string_view max_value); | ||
| 287 | void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 288 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 289 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 290 | void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 291 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 292 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 293 | void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 294 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 295 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 296 | void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 297 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 298 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 299 | void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 300 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 301 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 302 | void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 303 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 304 | std::string_view rhs); | ||
| 305 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 306 | std::string_view rhs); | ||
| 307 | void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 308 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 309 | std::string_view rhs); | ||
| 310 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 311 | std::string_view rhs); | ||
| 312 | void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 313 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 314 | std::string_view rhs); | ||
| 315 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 316 | std::string_view rhs); | ||
| 317 | void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 318 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 319 | std::string_view rhs); | ||
| 320 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 321 | std::string_view rhs); | ||
| 322 | void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 323 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 324 | std::string_view rhs); | ||
| 325 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 326 | std::string_view rhs); | ||
| 327 | void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 328 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 329 | std::string_view rhs); | ||
| 330 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 331 | std::string_view rhs); | ||
| 332 | void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 333 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 334 | std::string_view rhs); | ||
| 335 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 336 | std::string_view rhs); | ||
| 337 | void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 338 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 339 | std::string_view rhs); | ||
| 340 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 341 | std::string_view rhs); | ||
| 342 | void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 343 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 344 | std::string_view rhs); | ||
| 345 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 346 | std::string_view rhs); | ||
| 347 | void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 348 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 349 | std::string_view rhs); | ||
| 350 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 351 | std::string_view rhs); | ||
| 352 | void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 353 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 354 | std::string_view rhs); | ||
| 355 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 356 | std::string_view rhs); | ||
| 357 | void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 358 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 359 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 360 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 361 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 362 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 363 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 364 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 365 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 366 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 367 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 368 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 369 | std::string_view shift); | ||
| 370 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 371 | std::string_view shift); | ||
| 372 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 373 | std::string_view shift); | ||
| 374 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 375 | std::string_view shift); | ||
| 376 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 377 | std::string_view shift); | ||
| 378 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 379 | std::string_view shift); | ||
| 380 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 381 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 382 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 383 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 384 | std::string_view insert, std::string_view offset, std::string_view count); | ||
| 385 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 386 | std::string_view offset, std::string_view count); | ||
| 387 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 388 | std::string_view offset, std::string_view count); | ||
| 389 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 390 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 391 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 392 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 393 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 394 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 395 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 396 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 397 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 398 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 399 | std::string_view max); | ||
| 400 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 401 | std::string_view max); | ||
| 402 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 403 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 404 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 405 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 406 | std::string_view rhs); | ||
| 407 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 408 | std::string_view rhs); | ||
| 409 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 410 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 411 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 412 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 413 | std::string_view rhs); | ||
| 414 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 415 | std::string_view rhs); | ||
| 416 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 417 | std::string_view value); | ||
| 418 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 419 | std::string_view value); | ||
| 420 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 421 | std::string_view value); | ||
| 422 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 423 | std::string_view value); | ||
| 424 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 425 | std::string_view value); | ||
| 426 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 427 | std::string_view value); | ||
| 428 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 429 | std::string_view value); | ||
| 430 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 431 | std::string_view value); | ||
| 432 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 433 | std::string_view value); | ||
| 434 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 435 | std::string_view value); | ||
| 436 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 437 | std::string_view value); | ||
| 438 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 439 | std::string_view value); | ||
| 440 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 441 | const IR::Value& offset, std::string_view value); | ||
| 442 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 443 | const IR::Value& offset, std::string_view value); | ||
| 444 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 445 | const IR::Value& offset, std::string_view value); | ||
| 446 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 447 | const IR::Value& offset, std::string_view value); | ||
| 448 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 449 | const IR::Value& offset, std::string_view value); | ||
| 450 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 451 | const IR::Value& offset, std::string_view value); | ||
| 452 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 453 | const IR::Value& offset, std::string_view value); | ||
| 454 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 455 | const IR::Value& offset, std::string_view value); | ||
| 456 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 457 | const IR::Value& offset, std::string_view value); | ||
| 458 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 459 | const IR::Value& offset, std::string_view value); | ||
| 460 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 461 | const IR::Value& offset, std::string_view value); | ||
| 462 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 463 | const IR::Value& offset, std::string_view value); | ||
| 464 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 465 | const IR::Value& offset, std::string_view value); | ||
| 466 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 467 | const IR::Value& offset, std::string_view value); | ||
| 468 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 469 | const IR::Value& offset, std::string_view value); | ||
| 470 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 471 | const IR::Value& offset, std::string_view value); | ||
| 472 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 473 | const IR::Value& offset, std::string_view value); | ||
| 474 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 475 | const IR::Value& offset, std::string_view value); | ||
| 476 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 477 | const IR::Value& offset, std::string_view value); | ||
| 478 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 479 | const IR::Value& offset, std::string_view value); | ||
| 480 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 481 | const IR::Value& offset, std::string_view value); | ||
| 482 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 483 | const IR::Value& offset, std::string_view value); | ||
| 484 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 485 | const IR::Value& offset, std::string_view value); | ||
| 486 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 487 | const IR::Value& offset, std::string_view value); | ||
| 488 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 489 | const IR::Value& offset, std::string_view value); | ||
| 490 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 491 | const IR::Value& offset, std::string_view value); | ||
| 492 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 493 | const IR::Value& offset, std::string_view value); | ||
| 494 | void EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 495 | void EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 496 | void EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 497 | void EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 498 | void EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 499 | void EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 500 | void EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 501 | void EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 502 | void EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 503 | void EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 504 | void EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 505 | void EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 506 | void EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 507 | void EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 508 | void EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 509 | void EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 510 | void EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 511 | void EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 512 | void EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 513 | void EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 514 | void EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 515 | void EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 516 | void EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 517 | void EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 518 | void EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 519 | void EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 520 | void EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 521 | void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 522 | void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 523 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 524 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 525 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 526 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 527 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 528 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 529 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 530 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 531 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 532 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 533 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 534 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 535 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 536 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 537 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 538 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 539 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 540 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 541 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 542 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 543 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 544 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 545 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 546 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 547 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 548 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 549 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 550 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 551 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 552 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 553 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 554 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 555 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 556 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 557 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 558 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 559 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 560 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 561 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 562 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 563 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 564 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 565 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 566 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 567 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 568 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 569 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 570 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 571 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 572 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 573 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 574 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 575 | void EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 576 | void EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 577 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 578 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 579 | void EmitBindlessImageGather(EmitContext&); | ||
| 580 | void EmitBindlessImageGatherDref(EmitContext&); | ||
| 581 | void EmitBindlessImageFetch(EmitContext&); | ||
| 582 | void EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 583 | void EmitBindlessImageQueryLod(EmitContext&); | ||
| 584 | void EmitBindlessImageGradient(EmitContext&); | ||
| 585 | void EmitBindlessImageRead(EmitContext&); | ||
| 586 | void EmitBindlessImageWrite(EmitContext&); | ||
| 587 | void EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 588 | void EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 589 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 590 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 591 | void EmitBoundImageGather(EmitContext&); | ||
| 592 | void EmitBoundImageGatherDref(EmitContext&); | ||
| 593 | void EmitBoundImageFetch(EmitContext&); | ||
| 594 | void EmitBoundImageQueryDimensions(EmitContext&); | ||
| 595 | void EmitBoundImageQueryLod(EmitContext&); | ||
| 596 | void EmitBoundImageGradient(EmitContext&); | ||
| 597 | void EmitBoundImageRead(EmitContext&); | ||
| 598 | void EmitBoundImageWrite(EmitContext&); | ||
| 599 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 600 | std::string_view coords, std::string_view bias_lc, | ||
| 601 | const IR::Value& offset); | ||
| 602 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 603 | std::string_view coords, std::string_view lod_lc, | ||
| 604 | const IR::Value& offset); | ||
| 605 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 606 | std::string_view coords, std::string_view dref, | ||
| 607 | std::string_view bias_lc, const IR::Value& offset); | ||
| 608 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 609 | std::string_view coords, std::string_view dref, | ||
| 610 | std::string_view lod_lc, const IR::Value& offset); | ||
| 611 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 612 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2); | ||
| 613 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 614 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, | ||
| 615 | std::string_view dref); | ||
| 616 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 617 | std::string_view coords, std::string_view offset, std::string_view lod, | ||
| 618 | std::string_view ms); | ||
| 619 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 620 | std::string_view lod); | ||
| 621 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 622 | std::string_view coords); | ||
| 623 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 624 | std::string_view coords, const IR::Value& derivatives, | ||
| 625 | const IR::Value& offset, const IR::Value& lod_clamp); | ||
| 626 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 627 | std::string_view coords); | ||
| 628 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 629 | std::string_view coords, std::string_view color); | ||
| 630 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 631 | void EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 632 | void EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 633 | void EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 634 | void EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 635 | void EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 636 | void EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 637 | void EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 638 | void EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 639 | void EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 640 | void EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 641 | void EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 642 | void EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 643 | void EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 644 | void EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 645 | void EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 646 | void EmitBoundImageAtomicInc32(EmitContext&); | ||
| 647 | void EmitBoundImageAtomicDec32(EmitContext&); | ||
| 648 | void EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 649 | void EmitBoundImageAtomicOr32(EmitContext&); | ||
| 650 | void EmitBoundImageAtomicXor32(EmitContext&); | ||
| 651 | void EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 652 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 653 | std::string_view coords, std::string_view value); | ||
| 654 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 655 | std::string_view coords, std::string_view value); | ||
| 656 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 657 | std::string_view coords, std::string_view value); | ||
| 658 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 659 | std::string_view coords, std::string_view value); | ||
| 660 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 661 | std::string_view coords, std::string_view value); | ||
| 662 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 663 | std::string_view coords, std::string_view value); | ||
| 664 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 665 | std::string_view coords, std::string_view value); | ||
| 666 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 667 | std::string_view coords, std::string_view value); | ||
| 668 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 669 | std::string_view coords, std::string_view value); | ||
| 670 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 671 | std::string_view coords, std::string_view value); | ||
| 672 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 673 | std::string_view coords, std::string_view value); | ||
| 674 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst); | ||
| 675 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 676 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 677 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 678 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 679 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); | ||
| 680 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 681 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 682 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 683 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 684 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 685 | std::string_view index, std::string_view clamp, | ||
| 686 | std::string_view segmentation_mask); | ||
| 687 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||
| 688 | std::string_view clamp, std::string_view segmentation_mask); | ||
| 689 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 690 | std::string_view index, std::string_view clamp, | ||
| 691 | std::string_view segmentation_mask); | ||
| 692 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 693 | std::string_view index, std::string_view clamp, | ||
| 694 | std::string_view segmentation_mask); | ||
| 695 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, | ||
| 696 | std::string_view swizzle); | ||
| 697 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 698 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 699 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 700 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 701 | |||
| 702 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..38419f88f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { | ||
| 14 | IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; | ||
| 15 | if (!zero) { | ||
| 16 | return; | ||
| 17 | } | ||
| 18 | ctx.AddU1("{}={}==0;", *zero, result); | ||
| 19 | zero->Invalidate(); | ||
| 20 | } | ||
| 21 | |||
| 22 | void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { | ||
| 23 | IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; | ||
| 24 | if (!sign) { | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | ctx.AddU1("{}=int({})<0;", *sign, result); | ||
| 28 | sign->Invalidate(); | ||
| 29 | } | ||
| 30 | |||
| 31 | void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 32 | char lop) { | ||
| 33 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 34 | ctx.Add("{}={}{}{};", result, a, lop, b); | ||
| 35 | SetZeroFlag(ctx, inst, result); | ||
| 36 | SetSignFlag(ctx, inst, result); | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 41 | // Compute the overflow CC first as it requires the original operand values, | ||
| 42 | // which may be overwritten by the result of the addition | ||
| 43 | if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { | ||
| 44 | // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c | ||
| 45 | constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; | ||
| 46 | const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; | ||
| 47 | const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; | ||
| 48 | const auto negative_result{fmt::format("int({})<int({})", b, sub_a)}; | ||
| 49 | ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result); | ||
| 50 | overflow->Invalidate(); | ||
| 51 | } | ||
| 52 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 53 | if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { | ||
| 54 | ctx.uses_cc_carry = true; | ||
| 55 | ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); | ||
| 56 | ctx.AddU1("{}=carry!=0;", *carry); | ||
| 57 | carry->Invalidate(); | ||
| 58 | } else { | ||
| 59 | ctx.Add("{}={}+{};", result, a, b); | ||
| 60 | } | ||
| 61 | SetZeroFlag(ctx, inst, result); | ||
| 62 | SetSignFlag(ctx, inst, result); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 66 | ctx.AddU64("{}={}+{};", inst, a, b); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 70 | ctx.AddU32("{}={}-{};", inst, a, b); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 74 | ctx.AddU64("{}={}-{};", inst, a, b); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 78 | ctx.AddU32("{}=uint({}*{});", inst, a, b); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 82 | ctx.AddU32("{}=uint(-({}));", inst, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 86 | ctx.AddU64("{}=-({});", inst, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 90 | ctx.AddU32("{}=abs(int({}));", inst, value); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 94 | std::string_view shift) { | ||
| 95 | ctx.AddU32("{}={}<<{};", inst, base, shift); | ||
| 96 | } | ||
| 97 | |||
| 98 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 99 | std::string_view shift) { | ||
| 100 | ctx.AddU64("{}={}<<{};", inst, base, shift); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 104 | std::string_view shift) { | ||
| 105 | ctx.AddU32("{}={}>>{};", inst, base, shift); | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 109 | std::string_view shift) { | ||
| 110 | ctx.AddU64("{}={}>>{};", inst, base, shift); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 114 | std::string_view shift) { | ||
| 115 | ctx.AddU32("{}=int({})>>{};", inst, base, shift); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 119 | std::string_view shift) { | ||
| 120 | ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 124 | BitwiseLogicalOp(ctx, inst, a, b, '&'); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 128 | BitwiseLogicalOp(ctx, inst, a, b, '|'); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 132 | BitwiseLogicalOp(ctx, inst, a, b, '^'); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 136 | std::string_view insert, std::string_view offset, std::string_view count) { | ||
| 137 | ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 141 | std::string_view offset, std::string_view count) { | ||
| 142 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 143 | ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); | ||
| 144 | SetZeroFlag(ctx, inst, result); | ||
| 145 | SetSignFlag(ctx, inst, result); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 149 | std::string_view offset, std::string_view count) { | ||
| 150 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 151 | ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); | ||
| 152 | SetZeroFlag(ctx, inst, result); | ||
| 153 | SetSignFlag(ctx, inst, result); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 157 | ctx.AddU32("{}=bitfieldReverse({});", inst, value); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 161 | ctx.AddU32("{}=bitCount({});", inst, value); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 165 | ctx.AddU32("{}=~{};", inst, value); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 169 | ctx.AddU32("{}=findMSB(int({}));", inst, value); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 173 | ctx.AddU32("{}=findMSB(uint({}));", inst, value); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 177 | ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 181 | ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 185 | ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 189 | ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 193 | std::string_view max) { | ||
| 194 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 195 | ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); | ||
| 196 | SetZeroFlag(ctx, inst, result); | ||
| 197 | SetSignFlag(ctx, inst, result); | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 201 | std::string_view max) { | ||
| 202 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 203 | ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); | ||
| 204 | SetZeroFlag(ctx, inst, result); | ||
| 205 | SetSignFlag(ctx, inst, result); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 209 | ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 213 | ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs); | ||
| 214 | } | ||
| 215 | |||
| 216 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 217 | ctx.AddU1("{}={}=={};", inst, lhs, rhs); | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 221 | std::string_view rhs) { | ||
| 222 | ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 226 | std::string_view rhs) { | ||
| 227 | ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 231 | std::string_view rhs) { | ||
| 232 | ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs); | ||
| 233 | } | ||
| 234 | |||
| 235 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 236 | std::string_view rhs) { | ||
| 237 | ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 241 | ctx.AddU1("{}={}!={};", inst, lhs, rhs); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 245 | std::string_view rhs) { | ||
| 246 | ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 250 | std::string_view rhs) { | ||
| 251 | ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); | ||
| 252 | } | ||
| 253 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..338ff4bd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | |||
| 13 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 14 | ctx.AddU1("{}={}||{};", inst, a, b); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 18 | ctx.AddU1("{}={}&&{};", inst, a, b); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 22 | ctx.AddU1("{}={}^^{};", inst, a, b); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 26 | ctx.AddU1("{}=!{};", inst, value); | ||
| 27 | } | ||
| 28 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e3957491f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp | |||
| @@ -0,0 +1,202 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " | ||
| 15 | "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" | ||
| 16 | "if(cas_result==old_value){{break;}}}}"}; | ||
| 17 | |||
| 18 | void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, | ||
| 19 | std::string_view value, std::string_view bit_offset, u32 num_bits) { | ||
| 20 | const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; | ||
| 21 | ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void EmitLoadGlobalU8(EmitContext&) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitLoadGlobalS8(EmitContext&) { | ||
| 30 | NotImplemented(); | ||
| 31 | } | ||
| 32 | |||
| 33 | void EmitLoadGlobalU16(EmitContext&) { | ||
| 34 | NotImplemented(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void EmitLoadGlobalS16(EmitContext&) { | ||
| 38 | NotImplemented(); | ||
| 39 | } | ||
| 40 | |||
| 41 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 42 | if (ctx.profile.support_int64) { | ||
| 43 | return ctx.AddU32("{}=LoadGlobal32({});", inst, address); | ||
| 44 | } | ||
| 45 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 46 | ctx.AddU32("{}=0u;", inst); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 50 | if (ctx.profile.support_int64) { | ||
| 51 | return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); | ||
| 52 | } | ||
| 53 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 54 | ctx.AddU32x2("{}=uvec2(0);", inst); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 58 | if (ctx.profile.support_int64) { | ||
| 59 | return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); | ||
| 60 | } | ||
| 61 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 62 | ctx.AddU32x4("{}=uvec4(0);", inst); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitWriteGlobalU8(EmitContext&) { | ||
| 66 | NotImplemented(); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitWriteGlobalS8(EmitContext&) { | ||
| 70 | NotImplemented(); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitWriteGlobalU16(EmitContext&) { | ||
| 74 | NotImplemented(); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitWriteGlobalS16(EmitContext&) { | ||
| 78 | NotImplemented(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 82 | if (ctx.profile.support_int64) { | ||
| 83 | return ctx.Add("WriteGlobal32({},{});", address, value); | ||
| 84 | } | ||
| 85 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 89 | if (ctx.profile.support_int64) { | ||
| 90 | return ctx.Add("WriteGlobal64({},{});", address, value); | ||
| 91 | } | ||
| 92 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 96 | if (ctx.profile.support_int64) { | ||
| 97 | return ctx.Add("WriteGlobal128({},{});", address, value); | ||
| 98 | } | ||
| 99 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 100 | } | ||
| 101 | |||
| 102 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 103 | const IR::Value& offset) { | ||
| 104 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 105 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, | ||
| 106 | binding.U32(), offset_var, offset_var); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 110 | const IR::Value& offset) { | ||
| 111 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 112 | ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, | ||
| 113 | binding.U32(), offset_var, offset_var); | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 117 | const IR::Value& offset) { | ||
| 118 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 119 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, | ||
| 120 | binding.U32(), offset_var, offset_var); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 124 | const IR::Value& offset) { | ||
| 125 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 126 | ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, | ||
| 127 | ctx.stage_name, binding.U32(), offset_var, offset_var); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 131 | const IR::Value& offset) { | ||
| 132 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 133 | ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 137 | const IR::Value& offset) { | ||
| 138 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 139 | ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, | ||
| 140 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 144 | const IR::Value& offset) { | ||
| 145 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 146 | ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" | ||
| 147 | "+12)>>2]);", | ||
| 148 | inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), | ||
| 149 | offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, | ||
| 150 | binding.U32(), offset_var); | ||
| 151 | } | ||
| 152 | |||
| 153 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 154 | std::string_view value) { | ||
| 155 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 156 | const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; | ||
| 157 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 161 | std::string_view value) { | ||
| 162 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 163 | const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; | ||
| 164 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); | ||
| 165 | } | ||
| 166 | |||
| 167 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 168 | std::string_view value) { | ||
| 169 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 170 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; | ||
| 171 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 175 | std::string_view value) { | ||
| 176 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 177 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; | ||
| 178 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 182 | std::string_view value) { | ||
| 183 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 184 | ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 188 | std::string_view value) { | ||
| 189 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 190 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 191 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 195 | std::string_view value) { | ||
| 196 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 197 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 198 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 199 | ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 200 | ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 201 | } | ||
| 202 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..f420fe388 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | #ifdef _MSC_VER | ||
| 12 | #pragma warning(disable : 4100) | ||
| 13 | #endif | ||
| 14 | |||
| 15 | namespace Shader::Backend::GLSL { | ||
| 16 | |||
| 17 | void EmitGetRegister(EmitContext& ctx) { | ||
| 18 | NotImplemented(); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitSetRegister(EmitContext& ctx) { | ||
| 22 | NotImplemented(); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitGetPred(EmitContext& ctx) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitSetPred(EmitContext& ctx) { | ||
| 30 | NotImplemented(); | ||
| 31 | } | ||
| 32 | |||
| 33 | void EmitSetGotoVariable(EmitContext& ctx) { | ||
| 34 | NotImplemented(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void EmitGetGotoVariable(EmitContext& ctx) { | ||
| 38 | NotImplemented(); | ||
| 39 | } | ||
| 40 | |||
| 41 | void EmitSetIndirectBranchVariable(EmitContext& ctx) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitGetIndirectBranchVariable(EmitContext& ctx) { | ||
| 46 | NotImplemented(); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitGetZFlag(EmitContext& ctx) { | ||
| 50 | NotImplemented(); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitGetSFlag(EmitContext& ctx) { | ||
| 54 | NotImplemented(); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitGetCFlag(EmitContext& ctx) { | ||
| 58 | NotImplemented(); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitGetOFlag(EmitContext& ctx) { | ||
| 62 | NotImplemented(); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitSetZFlag(EmitContext& ctx) { | ||
| 66 | NotImplemented(); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitSetSFlag(EmitContext& ctx) { | ||
| 70 | NotImplemented(); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitSetCFlag(EmitContext& ctx) { | ||
| 74 | NotImplemented(); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitSetOFlag(EmitContext& ctx) { | ||
| 78 | NotImplemented(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitGetZeroFromOp(EmitContext& ctx) { | ||
| 82 | NotImplemented(); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitGetSignFromOp(EmitContext& ctx) { | ||
| 86 | NotImplemented(); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitGetCarryFromOp(EmitContext& ctx) { | ||
| 90 | NotImplemented(); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitGetOverflowFromOp(EmitContext& ctx) { | ||
| 94 | NotImplemented(); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitGetSparseFromOp(EmitContext& ctx) { | ||
| 98 | NotImplemented(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitGetInBoundsFromOp(EmitContext& ctx) { | ||
| 102 | NotImplemented(); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..49fba9073 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 13 | std::string_view true_value, std::string_view false_value) { | ||
| 14 | ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 18 | [[maybe_unused]] std::string_view true_value, | ||
| 19 | [[maybe_unused]] std::string_view false_value) { | ||
| 20 | NotImplemented(); | ||
| 21 | } | ||
| 22 | |||
| 23 | void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 24 | [[maybe_unused]] std::string_view true_value, | ||
| 25 | [[maybe_unused]] std::string_view false_value) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 30 | std::string_view true_value, std::string_view false_value) { | ||
| 31 | ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 32 | } | ||
| 33 | |||
| 34 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 35 | std::string_view true_value, std::string_view false_value) { | ||
| 36 | ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 40 | [[maybe_unused]] std::string_view true_value, | ||
| 41 | [[maybe_unused]] std::string_view false_value) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 46 | std::string_view true_value, std::string_view false_value) { | ||
| 47 | ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 51 | std::string_view true_value, std::string_view false_value) { | ||
| 52 | ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..518b78f06 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " | ||
| 14 | "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" | ||
| 15 | "if(cas_result==old_value){{break;}}}}"}; | ||
| 16 | |||
| 17 | void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, | ||
| 18 | std::string_view bit_offset, u32 num_bits) { | ||
| 19 | const auto smem{fmt::format("smem[{}>>2]", offset)}; | ||
| 20 | ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 25 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 29 | ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 33 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 37 | ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 41 | ctx.AddU32("{}=smem[{}>>2];", inst, offset); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 45 | ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 49 | ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, | ||
| 50 | offset, offset, offset, offset); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 54 | const auto bit_offset{fmt::format("int({}%4)*8", offset)}; | ||
| 55 | SharedWriteCas(ctx, offset, value, bit_offset, 8); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 59 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; | ||
| 60 | SharedWriteCas(ctx, offset, value, bit_offset, 16); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 64 | ctx.Add("smem[{}>>2]={};", offset, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 68 | ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||
| 69 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 73 | ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||
| 74 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||
| 75 | ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); | ||
| 76 | ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..9b866f889 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | std::string_view OutputVertexIndex(EmitContext& ctx) { | ||
| 16 | return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; | ||
| 17 | } | ||
| 18 | |||
| 19 | void InitializeOutputVaryings(EmitContext& ctx) { | ||
| 20 | if (ctx.uses_geometry_passthrough) { | ||
| 21 | return; | ||
| 22 | } | ||
| 23 | if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { | ||
| 24 | ctx.Add("gl_Position=vec4(0,0,0,1);"); | ||
| 25 | } | ||
| 26 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 27 | if (!ctx.info.stores.Generic(index)) { | ||
| 28 | continue; | ||
| 29 | } | ||
| 30 | const auto& info_array{ctx.output_generics.at(index)}; | ||
| 31 | const auto output_decorator{OutputVertexIndex(ctx)}; | ||
| 32 | size_t element{}; | ||
| 33 | while (element < info_array.size()) { | ||
| 34 | const auto& info{info_array.at(element)}; | ||
| 35 | const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; | ||
| 36 | switch (info.num_components) { | ||
| 37 | case 1: { | ||
| 38 | const char value{element == 3 ? '1' : '0'}; | ||
| 39 | ctx.Add("{}={}.f;", varying_name, value); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | case 2: | ||
| 43 | case 3: | ||
| 44 | if (element + info.num_components < 4) { | ||
| 45 | ctx.Add("{}=vec{}(0);", varying_name, info.num_components); | ||
| 46 | } else { | ||
| 47 | // last element is the w component, must be initialized to 1 | ||
| 48 | const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; | ||
| 49 | ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); | ||
| 50 | } | ||
| 51 | break; | ||
| 52 | case 4: | ||
| 53 | ctx.Add("{}=vec4(0,0,0,1);", varying_name); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | element += info.num_components; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } | ||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 65 | const size_t num_args{phi.NumArgs()}; | ||
| 66 | for (size_t i = 0; i < num_args; ++i) { | ||
| 67 | ctx.var_alloc.Consume(phi.Arg(i)); | ||
| 68 | } | ||
| 69 | if (!phi.Definition<Id>().is_valid) { | ||
| 70 | // The phi node wasn't forward defined | ||
| 71 | ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitVoid(EmitContext&) {} | ||
| 76 | |||
| 77 | void EmitReference(EmitContext& ctx, const IR::Value& value) { | ||
| 78 | ctx.var_alloc.Consume(value); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { | ||
| 82 | IR::Inst& phi{*phi_value.InstRecursive()}; | ||
| 83 | const auto phi_type{phi.Arg(0).Type()}; | ||
| 84 | if (!phi.Definition<Id>().is_valid) { | ||
| 85 | // The phi node wasn't forward defined | ||
| 86 | ctx.var_alloc.PhiDefine(phi, phi_type); | ||
| 87 | } | ||
| 88 | const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; | ||
| 89 | const auto val_reg{ctx.var_alloc.Consume(value)}; | ||
| 90 | if (phi_reg == val_reg) { | ||
| 91 | return; | ||
| 92 | } | ||
| 93 | ctx.Add("{}={};", phi_reg, val_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitPrologue(EmitContext& ctx) { | ||
| 97 | InitializeOutputVaryings(ctx); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitEpilogue(EmitContext&) {} | ||
| 101 | |||
| 102 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||
| 103 | ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); | ||
| 104 | InitializeOutputVaryings(ctx); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 108 | ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..15bf02dd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLSL { | ||
| 11 | |||
| 12 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | ||
| 13 | ctx.AddU1("{}=false;", inst); | ||
| 14 | } | ||
| 15 | |||
| 16 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { | ||
| 17 | ctx.AddU32("{}=0u;", inst); | ||
| 18 | } | ||
| 19 | |||
| 20 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { | ||
| 21 | ctx.AddU32("{}=0u;", inst); | ||
| 22 | } | ||
| 23 | |||
| 24 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { | ||
| 25 | ctx.AddU32("{}=0u;", inst); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { | ||
| 29 | ctx.AddU64("{}=0u;", inst); | ||
| 30 | } | ||
| 31 | |||
| 32 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..a982dd8a2 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | ||
| 15 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 16 | if (!in_bounds) { | ||
| 17 | return; | ||
| 18 | } | ||
| 19 | ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); | ||
| 20 | in_bounds->Invalidate(); | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { | ||
| 24 | return fmt::format("({}&{})", thread_id, segmentation_mask); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, | ||
| 28 | std::string_view not_seg_mask) { | ||
| 29 | return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); | ||
| 30 | } | ||
| 31 | |||
| 32 | std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, | ||
| 33 | std::string_view segmentation_mask) { | ||
| 34 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 35 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 36 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); | ||
| 37 | } | ||
| 38 | |||
| 39 | void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | ||
| 40 | std::string_view value, std::string_view index, | ||
| 41 | [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { | ||
| 42 | const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; | ||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | ||
| 44 | SetInBoundsFlag(ctx, inst); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 53 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 54 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 55 | } else { | ||
| 56 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 57 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 58 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 63 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 64 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | ||
| 65 | } else { | ||
| 66 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 67 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 68 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 75 | } else { | ||
| 76 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 77 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 78 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 79 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | ||
| 86 | } else { | ||
| 87 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 92 | ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 96 | ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 100 | ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 104 | ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 108 | ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 112 | std::string_view index, std::string_view clamp, | ||
| 113 | std::string_view segmentation_mask) { | ||
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 121 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | ||
| 122 | |||
| 123 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | ||
| 124 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | ||
| 125 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 126 | SetInBoundsFlag(ctx, inst); | ||
| 127 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||
| 131 | std::string_view clamp, std::string_view segmentation_mask) { | ||
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | ||
| 139 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | ||
| 140 | SetInBoundsFlag(ctx, inst); | ||
| 141 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 145 | std::string_view index, std::string_view clamp, | ||
| 146 | std::string_view segmentation_mask) { | ||
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | ||
| 154 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 155 | SetInBoundsFlag(ctx, inst); | ||
| 156 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 160 | std::string_view index, std::string_view clamp, | ||
| 161 | std::string_view segmentation_mask) { | ||
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | ||
| 164 | return; | ||
| 165 | } | ||
| 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | ||
| 169 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 170 | SetInBoundsFlag(ctx, inst); | ||
| 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, | ||
| 175 | std::string_view swizzle) { | ||
| 176 | const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; | ||
| 177 | const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); | ||
| 178 | const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); | ||
| 179 | ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 183 | if (ctx.profile.support_gl_derivative_control) { | ||
| 184 | ctx.AddF32("{}=dFdxFine({});", inst, op_a); | ||
| 185 | } else { | ||
| 186 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); | ||
| 187 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 192 | if (ctx.profile.support_gl_derivative_control) { | ||
| 193 | ctx.AddF32("{}=dFdyFine({});", inst, op_a); | ||
| 194 | } else { | ||
| 195 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); | ||
| 196 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 201 | if (ctx.profile.support_gl_derivative_control) { | ||
| 202 | ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); | ||
| 203 | } else { | ||
| 204 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); | ||
| 205 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 210 | if (ctx.profile.support_gl_derivative_control) { | ||
| 211 | ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); | ||
| 212 | } else { | ||
| 213 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); | ||
| 214 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp new file mode 100644 index 000000000..194f926ca --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp | |||
| @@ -0,0 +1,308 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "shader_recompiler/backend/glsl/var_alloc.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLSL { | ||
| 15 | namespace { | ||
| 16 | std::string TypePrefix(GlslVarType type) { | ||
| 17 | switch (type) { | ||
| 18 | case GlslVarType::U1: | ||
| 19 | return "b_"; | ||
| 20 | case GlslVarType::F16x2: | ||
| 21 | return "f16x2_"; | ||
| 22 | case GlslVarType::U32: | ||
| 23 | return "u_"; | ||
| 24 | case GlslVarType::F32: | ||
| 25 | return "f_"; | ||
| 26 | case GlslVarType::U64: | ||
| 27 | return "u64_"; | ||
| 28 | case GlslVarType::F64: | ||
| 29 | return "d_"; | ||
| 30 | case GlslVarType::U32x2: | ||
| 31 | return "u2_"; | ||
| 32 | case GlslVarType::F32x2: | ||
| 33 | return "f2_"; | ||
| 34 | case GlslVarType::U32x3: | ||
| 35 | return "u3_"; | ||
| 36 | case GlslVarType::F32x3: | ||
| 37 | return "f3_"; | ||
| 38 | case GlslVarType::U32x4: | ||
| 39 | return "u4_"; | ||
| 40 | case GlslVarType::F32x4: | ||
| 41 | return "f4_"; | ||
| 42 | case GlslVarType::PrecF32: | ||
| 43 | return "pf_"; | ||
| 44 | case GlslVarType::PrecF64: | ||
| 45 | return "pd_"; | ||
| 46 | case GlslVarType::Void: | ||
| 47 | return ""; | ||
| 48 | default: | ||
| 49 | throw NotImplementedException("Type {}", type); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | std::string FormatFloat(std::string_view value, IR::Type type) { | ||
| 54 | // TODO: Confirm FP64 nan/inf | ||
| 55 | if (type == IR::Type::F32) { | ||
| 56 | if (value == "nan") { | ||
| 57 | return "utof(0x7fc00000)"; | ||
| 58 | } | ||
| 59 | if (value == "inf") { | ||
| 60 | return "utof(0x7f800000)"; | ||
| 61 | } | ||
| 62 | if (value == "-inf") { | ||
| 63 | return "utof(0xff800000)"; | ||
| 64 | } | ||
| 65 | } | ||
| 66 | if (value.find_first_of('e') != std::string_view::npos) { | ||
| 67 | // scientific notation | ||
| 68 | const auto cast{type == IR::Type::F32 ? "float" : "double"}; | ||
| 69 | return fmt::format("{}({})", cast, value); | ||
| 70 | } | ||
| 71 | const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; | ||
| 72 | const bool needs_suffix{!value.ends_with('f')}; | ||
| 73 | const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; | ||
| 74 | return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); | ||
| 75 | } | ||
| 76 | |||
| 77 | std::string MakeImm(const IR::Value& value) { | ||
| 78 | switch (value.Type()) { | ||
| 79 | case IR::Type::U1: | ||
| 80 | return fmt::format("{}", value.U1() ? "true" : "false"); | ||
| 81 | case IR::Type::U32: | ||
| 82 | return fmt::format("{}u", value.U32()); | ||
| 83 | case IR::Type::F32: | ||
| 84 | return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); | ||
| 85 | case IR::Type::U64: | ||
| 86 | return fmt::format("{}ul", value.U64()); | ||
| 87 | case IR::Type::F64: | ||
| 88 | return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); | ||
| 89 | case IR::Type::Void: | ||
| 90 | return ""; | ||
| 91 | default: | ||
| 92 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } // Anonymous namespace | ||
| 96 | |||
| 97 | std::string VarAlloc::Representation(u32 index, GlslVarType type) const { | ||
| 98 | const auto prefix{TypePrefix(type)}; | ||
| 99 | return fmt::format("{}{}", prefix, index); | ||
| 100 | } | ||
| 101 | |||
| 102 | std::string VarAlloc::Representation(Id id) const { | ||
| 103 | return Representation(id.index, id.type); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { | ||
| 107 | if (inst.HasUses()) { | ||
| 108 | inst.SetDefinition<Id>(Alloc(type)); | ||
| 109 | return Representation(inst.Definition<Id>()); | ||
| 110 | } else { | ||
| 111 | Id id{}; | ||
| 112 | id.type.Assign(type); | ||
| 113 | GetUseTracker(type).uses_temp = true; | ||
| 114 | inst.SetDefinition<Id>(id); | ||
| 115 | return 't' + Representation(inst.Definition<Id>()); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { | ||
| 120 | return Define(inst, RegType(type)); | ||
| 121 | } | ||
| 122 | |||
| 123 | std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { | ||
| 124 | return AddDefine(inst, RegType(type)); | ||
| 125 | } | ||
| 126 | |||
| 127 | std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) { | ||
| 128 | if (inst.HasUses()) { | ||
| 129 | inst.SetDefinition<Id>(Alloc(type)); | ||
| 130 | return Representation(inst.Definition<Id>()); | ||
| 131 | } else { | ||
| 132 | return ""; | ||
| 133 | } | ||
| 134 | return Representation(inst.Definition<Id>()); | ||
| 135 | } | ||
| 136 | |||
| 137 | std::string VarAlloc::Consume(const IR::Value& value) { | ||
| 138 | return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); | ||
| 139 | } | ||
| 140 | |||
| 141 | std::string VarAlloc::ConsumeInst(IR::Inst& inst) { | ||
| 142 | inst.DestructiveRemoveUsage(); | ||
| 143 | if (!inst.HasUses()) { | ||
| 144 | Free(inst.Definition<Id>()); | ||
| 145 | } | ||
| 146 | return Representation(inst.Definition<Id>()); | ||
| 147 | } | ||
| 148 | |||
| 149 | std::string VarAlloc::GetGlslType(IR::Type type) const { | ||
| 150 | return GetGlslType(RegType(type)); | ||
| 151 | } | ||
| 152 | |||
| 153 | Id VarAlloc::Alloc(GlslVarType type) { | ||
| 154 | auto& use_tracker{GetUseTracker(type)}; | ||
| 155 | const auto num_vars{use_tracker.var_use.size()}; | ||
| 156 | for (size_t var = 0; var < num_vars; ++var) { | ||
| 157 | if (use_tracker.var_use[var]) { | ||
| 158 | continue; | ||
| 159 | } | ||
| 160 | use_tracker.num_used = std::max(use_tracker.num_used, var + 1); | ||
| 161 | use_tracker.var_use[var] = true; | ||
| 162 | Id ret{}; | ||
| 163 | ret.is_valid.Assign(1); | ||
| 164 | ret.type.Assign(type); | ||
| 165 | ret.index.Assign(static_cast<u32>(var)); | ||
| 166 | return ret; | ||
| 167 | } | ||
| 168 | // Allocate a new variable | ||
| 169 | use_tracker.var_use.push_back(true); | ||
| 170 | Id ret{}; | ||
| 171 | ret.is_valid.Assign(1); | ||
| 172 | ret.type.Assign(type); | ||
| 173 | ret.index.Assign(static_cast<u32>(use_tracker.num_used)); | ||
| 174 | ++use_tracker.num_used; | ||
| 175 | return ret; | ||
| 176 | } | ||
| 177 | |||
| 178 | void VarAlloc::Free(Id id) { | ||
| 179 | if (id.is_valid == 0) { | ||
| 180 | throw LogicError("Freeing invalid variable"); | ||
| 181 | } | ||
| 182 | auto& use_tracker{GetUseTracker(id.type)}; | ||
| 183 | use_tracker.var_use[id.index] = false; | ||
| 184 | } | ||
| 185 | |||
| 186 | GlslVarType VarAlloc::RegType(IR::Type type) const { | ||
| 187 | switch (type) { | ||
| 188 | case IR::Type::U1: | ||
| 189 | return GlslVarType::U1; | ||
| 190 | case IR::Type::U32: | ||
| 191 | return GlslVarType::U32; | ||
| 192 | case IR::Type::F32: | ||
| 193 | return GlslVarType::F32; | ||
| 194 | case IR::Type::U64: | ||
| 195 | return GlslVarType::U64; | ||
| 196 | case IR::Type::F64: | ||
| 197 | return GlslVarType::F64; | ||
| 198 | default: | ||
| 199 | throw NotImplementedException("IR type {}", type); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | std::string VarAlloc::GetGlslType(GlslVarType type) const { | ||
| 204 | switch (type) { | ||
| 205 | case GlslVarType::U1: | ||
| 206 | return "bool"; | ||
| 207 | case GlslVarType::F16x2: | ||
| 208 | return "f16vec2"; | ||
| 209 | case GlslVarType::U32: | ||
| 210 | return "uint"; | ||
| 211 | case GlslVarType::F32: | ||
| 212 | case GlslVarType::PrecF32: | ||
| 213 | return "float"; | ||
| 214 | case GlslVarType::U64: | ||
| 215 | return "uint64_t"; | ||
| 216 | case GlslVarType::F64: | ||
| 217 | case GlslVarType::PrecF64: | ||
| 218 | return "double"; | ||
| 219 | case GlslVarType::U32x2: | ||
| 220 | return "uvec2"; | ||
| 221 | case GlslVarType::F32x2: | ||
| 222 | return "vec2"; | ||
| 223 | case GlslVarType::U32x3: | ||
| 224 | return "uvec3"; | ||
| 225 | case GlslVarType::F32x3: | ||
| 226 | return "vec3"; | ||
| 227 | case GlslVarType::U32x4: | ||
| 228 | return "uvec4"; | ||
| 229 | case GlslVarType::F32x4: | ||
| 230 | return "vec4"; | ||
| 231 | case GlslVarType::Void: | ||
| 232 | return ""; | ||
| 233 | default: | ||
| 234 | throw NotImplementedException("Type {}", type); | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { | ||
| 239 | switch (type) { | ||
| 240 | case GlslVarType::U1: | ||
| 241 | return var_bool; | ||
| 242 | case GlslVarType::F16x2: | ||
| 243 | return var_f16x2; | ||
| 244 | case GlslVarType::U32: | ||
| 245 | return var_u32; | ||
| 246 | case GlslVarType::F32: | ||
| 247 | return var_f32; | ||
| 248 | case GlslVarType::U64: | ||
| 249 | return var_u64; | ||
| 250 | case GlslVarType::F64: | ||
| 251 | return var_f64; | ||
| 252 | case GlslVarType::U32x2: | ||
| 253 | return var_u32x2; | ||
| 254 | case GlslVarType::F32x2: | ||
| 255 | return var_f32x2; | ||
| 256 | case GlslVarType::U32x3: | ||
| 257 | return var_u32x3; | ||
| 258 | case GlslVarType::F32x3: | ||
| 259 | return var_f32x3; | ||
| 260 | case GlslVarType::U32x4: | ||
| 261 | return var_u32x4; | ||
| 262 | case GlslVarType::F32x4: | ||
| 263 | return var_f32x4; | ||
| 264 | case GlslVarType::PrecF32: | ||
| 265 | return var_precf32; | ||
| 266 | case GlslVarType::PrecF64: | ||
| 267 | return var_precf64; | ||
| 268 | default: | ||
| 269 | throw NotImplementedException("Type {}", type); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { | ||
| 274 | switch (type) { | ||
| 275 | case GlslVarType::U1: | ||
| 276 | return var_bool; | ||
| 277 | case GlslVarType::F16x2: | ||
| 278 | return var_f16x2; | ||
| 279 | case GlslVarType::U32: | ||
| 280 | return var_u32; | ||
| 281 | case GlslVarType::F32: | ||
| 282 | return var_f32; | ||
| 283 | case GlslVarType::U64: | ||
| 284 | return var_u64; | ||
| 285 | case GlslVarType::F64: | ||
| 286 | return var_f64; | ||
| 287 | case GlslVarType::U32x2: | ||
| 288 | return var_u32x2; | ||
| 289 | case GlslVarType::F32x2: | ||
| 290 | return var_f32x2; | ||
| 291 | case GlslVarType::U32x3: | ||
| 292 | return var_u32x3; | ||
| 293 | case GlslVarType::F32x3: | ||
| 294 | return var_f32x3; | ||
| 295 | case GlslVarType::U32x4: | ||
| 296 | return var_u32x4; | ||
| 297 | case GlslVarType::F32x4: | ||
| 298 | return var_f32x4; | ||
| 299 | case GlslVarType::PrecF32: | ||
| 300 | return var_precf32; | ||
| 301 | case GlslVarType::PrecF64: | ||
| 302 | return var_precf64; | ||
| 303 | default: | ||
| 304 | throw NotImplementedException("Type {}", type); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h new file mode 100644 index 000000000..8b49f32a6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.h | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <string> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | class Inst; | ||
| 16 | class Value; | ||
| 17 | enum class Type; | ||
| 18 | } // namespace Shader::IR | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLSL { | ||
| 21 | enum class GlslVarType : u32 { | ||
| 22 | U1, | ||
| 23 | F16x2, | ||
| 24 | U32, | ||
| 25 | F32, | ||
| 26 | U64, | ||
| 27 | F64, | ||
| 28 | U32x2, | ||
| 29 | F32x2, | ||
| 30 | U32x3, | ||
| 31 | F32x3, | ||
| 32 | U32x4, | ||
| 33 | F32x4, | ||
| 34 | PrecF32, | ||
| 35 | PrecF64, | ||
| 36 | Void, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct Id { | ||
| 40 | union { | ||
| 41 | u32 raw; | ||
| 42 | BitField<0, 1, u32> is_valid; | ||
| 43 | BitField<1, 4, GlslVarType> type; | ||
| 44 | BitField<6, 26, u32> index; | ||
| 45 | }; | ||
| 46 | |||
| 47 | bool operator==(Id rhs) const noexcept { | ||
| 48 | return raw == rhs.raw; | ||
| 49 | } | ||
| 50 | bool operator!=(Id rhs) const noexcept { | ||
| 51 | return !operator==(rhs); | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | static_assert(sizeof(Id) == sizeof(u32)); | ||
| 55 | |||
| 56 | class VarAlloc { | ||
| 57 | public: | ||
| 58 | struct UseTracker { | ||
| 59 | bool uses_temp{}; | ||
| 60 | size_t num_used{}; | ||
| 61 | std::vector<bool> var_use; | ||
| 62 | }; | ||
| 63 | |||
| 64 | /// Used for explicit usages of variables, may revert to temporaries | ||
| 65 | std::string Define(IR::Inst& inst, GlslVarType type); | ||
| 66 | std::string Define(IR::Inst& inst, IR::Type type); | ||
| 67 | |||
| 68 | /// Used to assign variables used by the IR. May return a blank string if | ||
| 69 | /// the instruction's result is unused in the IR. | ||
| 70 | std::string AddDefine(IR::Inst& inst, GlslVarType type); | ||
| 71 | std::string PhiDefine(IR::Inst& inst, IR::Type type); | ||
| 72 | |||
| 73 | std::string Consume(const IR::Value& value); | ||
| 74 | std::string ConsumeInst(IR::Inst& inst); | ||
| 75 | |||
| 76 | std::string GetGlslType(GlslVarType type) const; | ||
| 77 | std::string GetGlslType(IR::Type type) const; | ||
| 78 | |||
| 79 | const UseTracker& GetUseTracker(GlslVarType type) const; | ||
| 80 | std::string Representation(u32 index, GlslVarType type) const; | ||
| 81 | |||
| 82 | private: | ||
| 83 | GlslVarType RegType(IR::Type type) const; | ||
| 84 | Id Alloc(GlslVarType type); | ||
| 85 | void Free(Id id); | ||
| 86 | UseTracker& GetUseTracker(GlslVarType type); | ||
| 87 | std::string Representation(Id id) const; | ||
| 88 | |||
| 89 | UseTracker var_bool{}; | ||
| 90 | UseTracker var_f16x2{}; | ||
| 91 | UseTracker var_u32{}; | ||
| 92 | UseTracker var_u32x2{}; | ||
| 93 | UseTracker var_u32x3{}; | ||
| 94 | UseTracker var_u32x4{}; | ||
| 95 | UseTracker var_f32{}; | ||
| 96 | UseTracker var_f32x2{}; | ||
| 97 | UseTracker var_f32x3{}; | ||
| 98 | UseTracker var_f32x4{}; | ||
| 99 | UseTracker var_u64{}; | ||
| 100 | UseTracker var_f64{}; | ||
| 101 | UseTracker var_precf32{}; | ||
| 102 | UseTracker var_precf64{}; | ||
| 103 | }; | ||
| 104 | |||
| 105 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..2d29d8c14 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -0,0 +1,1368 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <climits> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 14 | #include "shader_recompiler/backend/spirv/emit_context.h" | ||
| 15 | |||
| 16 | namespace Shader::Backend::SPIRV { | ||
| 17 | namespace { | ||
| 18 | enum class Operation { | ||
| 19 | Increment, | ||
| 20 | Decrement, | ||
| 21 | FPAdd, | ||
| 22 | FPMin, | ||
| 23 | FPMax, | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct AttrInfo { | ||
| 27 | Id pointer; | ||
| 28 | Id id; | ||
| 29 | bool needs_cast; | ||
| 30 | }; | ||
| 31 | |||
| 32 | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | ||
| 33 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||
| 34 | const Id type{ctx.F32[1]}; | ||
| 35 | const bool depth{desc.is_depth}; | ||
| 36 | switch (desc.type) { | ||
| 37 | case TextureType::Color1D: | ||
| 38 | return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); | ||
| 39 | case TextureType::ColorArray1D: | ||
| 40 | return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); | ||
| 41 | case TextureType::Color2D: | ||
| 42 | return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); | ||
| 43 | case TextureType::ColorArray2D: | ||
| 44 | return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); | ||
| 45 | case TextureType::Color3D: | ||
| 46 | return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); | ||
| 47 | case TextureType::ColorCube: | ||
| 48 | return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); | ||
| 49 | case TextureType::ColorArrayCube: | ||
| 50 | return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); | ||
| 51 | case TextureType::Buffer: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw InvalidArgument("Invalid texture type {}", desc.type); | ||
| 55 | } | ||
| 56 | |||
| 57 | spv::ImageFormat GetImageFormat(ImageFormat format) { | ||
| 58 | switch (format) { | ||
| 59 | case ImageFormat::Typeless: | ||
| 60 | return spv::ImageFormat::Unknown; | ||
| 61 | case ImageFormat::R8_UINT: | ||
| 62 | return spv::ImageFormat::R8ui; | ||
| 63 | case ImageFormat::R8_SINT: | ||
| 64 | return spv::ImageFormat::R8i; | ||
| 65 | case ImageFormat::R16_UINT: | ||
| 66 | return spv::ImageFormat::R16ui; | ||
| 67 | case ImageFormat::R16_SINT: | ||
| 68 | return spv::ImageFormat::R16i; | ||
| 69 | case ImageFormat::R32_UINT: | ||
| 70 | return spv::ImageFormat::R32ui; | ||
| 71 | case ImageFormat::R32G32_UINT: | ||
| 72 | return spv::ImageFormat::Rg32ui; | ||
| 73 | case ImageFormat::R32G32B32A32_UINT: | ||
| 74 | return spv::ImageFormat::Rgba32ui; | ||
| 75 | } | ||
| 76 | throw InvalidArgument("Invalid image format {}", format); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { | ||
| 80 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | ||
| 81 | const Id type{ctx.U32[1]}; | ||
| 82 | switch (desc.type) { | ||
| 83 | case TextureType::Color1D: | ||
| 84 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); | ||
| 85 | case TextureType::ColorArray1D: | ||
| 86 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); | ||
| 87 | case TextureType::Color2D: | ||
| 88 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); | ||
| 89 | case TextureType::ColorArray2D: | ||
| 90 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); | ||
| 91 | case TextureType::Color3D: | ||
| 92 | return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); | ||
| 93 | case TextureType::Buffer: | ||
| 94 | throw NotImplementedException("Image buffer"); | ||
| 95 | default: | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | throw InvalidArgument("Invalid texture type {}", desc.type); | ||
| 99 | } | ||
| 100 | |||
| 101 | Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin, | ||
| 102 | spv::StorageClass storage_class) { | ||
| 103 | const Id pointer_type{ctx.TypePointer(storage_class, type)}; | ||
| 104 | const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; | ||
| 105 | if (builtin) { | ||
| 106 | ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); | ||
| 107 | } | ||
| 108 | ctx.interfaces.push_back(id); | ||
| 109 | return id; | ||
| 110 | } | ||
| 111 | |||
| 112 | u32 NumVertices(InputTopology input_topology) { | ||
| 113 | switch (input_topology) { | ||
| 114 | case InputTopology::Points: | ||
| 115 | return 1; | ||
| 116 | case InputTopology::Lines: | ||
| 117 | return 2; | ||
| 118 | case InputTopology::LinesAdjacency: | ||
| 119 | return 4; | ||
| 120 | case InputTopology::Triangles: | ||
| 121 | return 3; | ||
| 122 | case InputTopology::TrianglesAdjacency: | ||
| 123 | return 6; | ||
| 124 | } | ||
| 125 | throw InvalidArgument("Invalid input topology {}", input_topology); | ||
| 126 | } | ||
| 127 | |||
| 128 | Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, | ||
| 129 | std::optional<spv::BuiltIn> builtin = std::nullopt) { | ||
| 130 | switch (ctx.stage) { | ||
| 131 | case Stage::TessellationControl: | ||
| 132 | case Stage::TessellationEval: | ||
| 133 | if (per_invocation) { | ||
| 134 | type = ctx.TypeArray(type, ctx.Const(32u)); | ||
| 135 | } | ||
| 136 | break; | ||
| 137 | case Stage::Geometry: | ||
| 138 | if (per_invocation) { | ||
| 139 | const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; | ||
| 140 | type = ctx.TypeArray(type, ctx.Const(num_vertices)); | ||
| 141 | } | ||
| 142 | break; | ||
| 143 | default: | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations, | ||
| 150 | std::optional<spv::BuiltIn> builtin = std::nullopt) { | ||
| 151 | if (invocations && ctx.stage == Stage::TessellationControl) { | ||
| 152 | type = ctx.TypeArray(type, ctx.Const(*invocations)); | ||
| 153 | } | ||
| 154 | return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); | ||
| 155 | } | ||
| 156 | |||
| 157 | void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) { | ||
| 158 | static constexpr std::string_view swizzle{"xyzw"}; | ||
| 159 | const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 160 | u32 element{0}; | ||
| 161 | while (element < 4) { | ||
| 162 | const u32 remainder{4 - element}; | ||
| 163 | const TransformFeedbackVarying* xfb_varying{}; | ||
| 164 | if (!ctx.runtime_info.xfb_varyings.empty()) { | ||
| 165 | xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; | ||
| 166 | xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; | ||
| 167 | } | ||
| 168 | const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; | ||
| 169 | |||
| 170 | const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; | ||
| 171 | ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 172 | if (element > 0) { | ||
| 173 | ctx.Decorate(id, spv::Decoration::Component, element); | ||
| 174 | } | ||
| 175 | if (xfb_varying) { | ||
| 176 | ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); | ||
| 177 | ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); | ||
| 178 | ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); | ||
| 179 | } | ||
| 180 | if (num_components < 4 || element > 0) { | ||
| 181 | const std::string_view subswizzle{swizzle.substr(element, num_components)}; | ||
| 182 | ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); | ||
| 183 | } else { | ||
| 184 | ctx.Name(id, fmt::format("out_attr{}", index)); | ||
| 185 | } | ||
| 186 | const GenericElementInfo info{ | ||
| 187 | .id = id, | ||
| 188 | .first_element = element, | ||
| 189 | .num_components = num_components, | ||
| 190 | }; | ||
| 191 | std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); | ||
| 192 | element += num_components; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | Id GetAttributeType(EmitContext& ctx, AttributeType type) { | ||
| 197 | switch (type) { | ||
| 198 | case AttributeType::Float: | ||
| 199 | return ctx.F32[4]; | ||
| 200 | case AttributeType::SignedInt: | ||
| 201 | return ctx.TypeVector(ctx.TypeInt(32, true), 4); | ||
| 202 | case AttributeType::UnsignedInt: | ||
| 203 | return ctx.U32[4]; | ||
| 204 | case AttributeType::Disabled: | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 208 | } | ||
| 209 | |||
| 210 | std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { | ||
| 211 | const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; | ||
| 212 | switch (type) { | ||
| 213 | case AttributeType::Float: | ||
| 214 | return AttrInfo{ctx.input_f32, ctx.F32[1], false}; | ||
| 215 | case AttributeType::UnsignedInt: | ||
| 216 | return AttrInfo{ctx.input_u32, ctx.U32[1], true}; | ||
| 217 | case AttributeType::SignedInt: | ||
| 218 | return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; | ||
| 219 | case AttributeType::Disabled: | ||
| 220 | return std::nullopt; | ||
| 221 | } | ||
| 222 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 223 | } | ||
| 224 | |||
| 225 | std::string_view StageName(Stage stage) { | ||
| 226 | switch (stage) { | ||
| 227 | case Stage::VertexA: | ||
| 228 | return "vs_a"; | ||
| 229 | case Stage::VertexB: | ||
| 230 | return "vs"; | ||
| 231 | case Stage::TessellationControl: | ||
| 232 | return "tcs"; | ||
| 233 | case Stage::TessellationEval: | ||
| 234 | return "tes"; | ||
| 235 | case Stage::Geometry: | ||
| 236 | return "gs"; | ||
| 237 | case Stage::Fragment: | ||
| 238 | return "fs"; | ||
| 239 | case Stage::Compute: | ||
| 240 | return "cs"; | ||
| 241 | } | ||
| 242 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 243 | } | ||
| 244 | |||
| 245 | template <typename... Args> | ||
| 246 | void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { | ||
| 247 | ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), | ||
| 248 | std::forward<Args>(args)...) | ||
| 249 | .c_str()); | ||
| 250 | } | ||
| 251 | |||
| 252 | void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, | ||
| 253 | u32 binding, Id type, char type_char, u32 element_size) { | ||
| 254 | const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; | ||
| 255 | ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 256 | |||
| 257 | const Id struct_type{ctx.TypeStruct(array_type)}; | ||
| 258 | Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT); | ||
| 259 | ctx.Decorate(struct_type, spv::Decoration::Block); | ||
| 260 | ctx.MemberName(struct_type, 0, "data"); | ||
| 261 | ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | ||
| 262 | |||
| 263 | const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; | ||
| 264 | const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; | ||
| 265 | ctx.uniform_types.*member_type = uniform_type; | ||
| 266 | |||
| 267 | for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | ||
| 268 | const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; | ||
| 269 | ctx.Decorate(id, spv::Decoration::Binding, binding); | ||
| 270 | ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 271 | ctx.Name(id, fmt::format("c{}", desc.index)); | ||
| 272 | for (size_t i = 0; i < desc.count; ++i) { | ||
| 273 | ctx.cbufs[desc.index + i].*member_type = id; | ||
| 274 | } | ||
| 275 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 276 | ctx.interfaces.push_back(id); | ||
| 277 | } | ||
| 278 | binding += desc.count; | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, | ||
| 283 | Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, | ||
| 284 | u32 stride) { | ||
| 285 | const Id array_type{ctx.TypeRuntimeArray(type)}; | ||
| 286 | ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); | ||
| 287 | |||
| 288 | const Id struct_type{ctx.TypeStruct(array_type)}; | ||
| 289 | ctx.Decorate(struct_type, spv::Decoration::Block); | ||
| 290 | ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | ||
| 291 | |||
| 292 | const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; | ||
| 293 | type_def.array = struct_pointer; | ||
| 294 | type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); | ||
| 295 | |||
| 296 | u32 index{}; | ||
| 297 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||
| 298 | const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; | ||
| 299 | ctx.Decorate(id, spv::Decoration::Binding, binding); | ||
| 300 | ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 301 | ctx.Name(id, fmt::format("ssbo{}", index)); | ||
| 302 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 303 | ctx.interfaces.push_back(id); | ||
| 304 | } | ||
| 305 | for (size_t i = 0; i < desc.count; ++i) { | ||
| 306 | ctx.ssbos[index + i].*member_type = id; | ||
| 307 | } | ||
| 308 | index += desc.count; | ||
| 309 | binding += desc.count; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { | ||
| 314 | const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; | ||
| 315 | const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 316 | const Id op_a{ctx.OpFunctionParameter(value_type)}; | ||
| 317 | const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||
| 318 | ctx.AddLabel(); | ||
| 319 | Id result{}; | ||
| 320 | switch (operation) { | ||
| 321 | case Operation::Increment: { | ||
| 322 | const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; | ||
| 323 | const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 324 | result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); | ||
| 325 | break; | ||
| 326 | } | ||
| 327 | case Operation::Decrement: { | ||
| 328 | const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; | ||
| 329 | const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; | ||
| 330 | const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; | ||
| 331 | const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 332 | result = ctx.OpSelect(value_type, pred, op_b, decr); | ||
| 333 | break; | ||
| 334 | } | ||
| 335 | case Operation::FPAdd: | ||
| 336 | result = ctx.OpFAdd(value_type, op_a, op_b); | ||
| 337 | break; | ||
| 338 | case Operation::FPMin: | ||
| 339 | result = ctx.OpFMin(value_type, op_a, op_b); | ||
| 340 | break; | ||
| 341 | case Operation::FPMax: | ||
| 342 | result = ctx.OpFMax(value_type, op_a, op_b); | ||
| 343 | break; | ||
| 344 | default: | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | ctx.OpReturnValue(result); | ||
| 348 | ctx.OpFunctionEnd(); | ||
| 349 | return func; | ||
| 350 | } | ||
| 351 | |||
| 352 | Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, | ||
| 353 | Id value_type, Id memory_type, spv::Scope scope) { | ||
| 354 | const bool is_shared{scope == spv::Scope::Workgroup}; | ||
| 355 | const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; | ||
| 356 | const Id cas_func{CasFunction(ctx, operation, value_type)}; | ||
| 357 | const Id zero{ctx.u32_zero_value}; | ||
| 358 | const Id scope_id{ctx.Const(static_cast<u32>(scope))}; | ||
| 359 | |||
| 360 | const Id loop_header{ctx.OpLabel()}; | ||
| 361 | const Id continue_block{ctx.OpLabel()}; | ||
| 362 | const Id merge_block{ctx.OpLabel()}; | ||
| 363 | const Id func_type{is_shared | ||
| 364 | ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) | ||
| 365 | : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; | ||
| 366 | |||
| 367 | const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 368 | const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; | ||
| 369 | const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||
| 370 | const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; | ||
| 371 | ctx.AddLabel(); | ||
| 372 | ctx.OpBranch(loop_header); | ||
| 373 | ctx.AddLabel(loop_header); | ||
| 374 | |||
| 375 | ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 376 | ctx.OpBranch(continue_block); | ||
| 377 | |||
| 378 | ctx.AddLabel(continue_block); | ||
| 379 | const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) | ||
| 380 | : ctx.OpAccessChain(element_pointer, base, index)}; | ||
| 381 | if (value_type.value == ctx.F32[2].value) { | ||
| 382 | const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; | ||
| 383 | const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; | ||
| 384 | const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; | ||
| 385 | const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; | ||
| 386 | const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, | ||
| 387 | zero, u32_new_value, u32_value)}; | ||
| 388 | const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; | ||
| 389 | ctx.OpBranchConditional(success, merge_block, loop_header); | ||
| 390 | |||
| 391 | ctx.AddLabel(merge_block); | ||
| 392 | ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); | ||
| 393 | } else { | ||
| 394 | const Id value{ctx.OpLoad(memory_type, word_pointer)}; | ||
| 395 | const bool matching_type{value_type.value == memory_type.value}; | ||
| 396 | const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; | ||
| 397 | const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; | ||
| 398 | const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; | ||
| 399 | const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, | ||
| 400 | zero, new_value, value)}; | ||
| 401 | const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; | ||
| 402 | ctx.OpBranchConditional(success, merge_block, loop_header); | ||
| 403 | |||
| 404 | ctx.AddLabel(merge_block); | ||
| 405 | ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); | ||
| 406 | } | ||
| 407 | ctx.OpFunctionEnd(); | ||
| 408 | return func; | ||
| 409 | } | ||
| 410 | |||
| 411 | template <typename Desc> | ||
| 412 | std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) { | ||
| 413 | if (desc.count > 1) { | ||
| 414 | return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index, | ||
| 415 | desc.cbuf_offset, desc.count); | ||
| 416 | } else { | ||
| 417 | return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index, | ||
| 418 | desc.cbuf_offset); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { | ||
| 423 | if (count > 1) { | ||
| 424 | const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; | ||
| 425 | return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); | ||
| 426 | } else { | ||
| 427 | return pointer_type; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | } // Anonymous namespace | ||
| 431 | |||
| 432 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | ||
| 433 | defs[0] = sirit_ctx.Name(base_type, name); | ||
| 434 | |||
| 435 | std::array<char, 6> def_name; | ||
| 436 | for (int i = 1; i < 4; ++i) { | ||
| 437 | const std::string_view def_name_view( | ||
| 438 | def_name.data(), | ||
| 439 | fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); | ||
| 440 | defs[static_cast<size_t>(i)] = | ||
| 441 | sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); | ||
| 442 | } | ||
| 443 | } | ||
| 444 | |||
| 445 | EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, | ||
| 446 | IR::Program& program, Bindings& bindings) | ||
| 447 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, | ||
| 448 | runtime_info{runtime_info_}, stage{program.stage} { | ||
| 449 | const bool is_unified{profile.unified_descriptor_binding}; | ||
| 450 | u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; | ||
| 451 | u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; | ||
| 452 | u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; | ||
| 453 | u32& image_binding{is_unified ? bindings.unified : bindings.image}; | ||
| 454 | AddCapability(spv::Capability::Shader); | ||
| 455 | DefineCommonTypes(program.info); | ||
| 456 | DefineCommonConstants(); | ||
| 457 | DefineInterfaces(program); | ||
| 458 | DefineLocalMemory(program); | ||
| 459 | DefineSharedMemory(program); | ||
| 460 | DefineSharedMemoryFunctions(program); | ||
| 461 | DefineConstantBuffers(program.info, uniform_binding); | ||
| 462 | DefineStorageBuffers(program.info, storage_binding); | ||
| 463 | DefineTextureBuffers(program.info, texture_binding); | ||
| 464 | DefineImageBuffers(program.info, image_binding); | ||
| 465 | DefineTextures(program.info, texture_binding); | ||
| 466 | DefineImages(program.info, image_binding); | ||
| 467 | DefineAttributeMemAccess(program.info); | ||
| 468 | DefineGlobalMemoryFunctions(program.info); | ||
| 469 | } | ||
| 470 | |||
| 471 | EmitContext::~EmitContext() = default; | ||
| 472 | |||
| 473 | Id EmitContext::Def(const IR::Value& value) { | ||
| 474 | if (!value.IsImmediate()) { | ||
| 475 | return value.InstRecursive()->Definition<Id>(); | ||
| 476 | } | ||
| 477 | switch (value.Type()) { | ||
| 478 | case IR::Type::Void: | ||
| 479 | // Void instructions are used for optional arguments (e.g. texture offsets) | ||
| 480 | // They are not meant to be used in the SPIR-V module | ||
| 481 | return Id{}; | ||
| 482 | case IR::Type::U1: | ||
| 483 | return value.U1() ? true_value : false_value; | ||
| 484 | case IR::Type::U32: | ||
| 485 | return Const(value.U32()); | ||
| 486 | case IR::Type::U64: | ||
| 487 | return Constant(U64, value.U64()); | ||
| 488 | case IR::Type::F32: | ||
| 489 | return Const(value.F32()); | ||
| 490 | case IR::Type::F64: | ||
| 491 | return Constant(F64[1], value.F64()); | ||
| 492 | default: | ||
| 493 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | Id EmitContext::BitOffset8(const IR::Value& offset) { | ||
| 498 | if (offset.IsImmediate()) { | ||
| 499 | return Const((offset.U32() % 4) * 8); | ||
| 500 | } | ||
| 501 | return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); | ||
| 502 | } | ||
| 503 | |||
| 504 | Id EmitContext::BitOffset16(const IR::Value& offset) { | ||
| 505 | if (offset.IsImmediate()) { | ||
| 506 | return Const(((offset.U32() / 2) % 2) * 16); | ||
| 507 | } | ||
| 508 | return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); | ||
| 509 | } | ||
| 510 | |||
| 511 | void EmitContext::DefineCommonTypes(const Info& info) { | ||
| 512 | void_id = TypeVoid(); | ||
| 513 | |||
| 514 | U1 = Name(TypeBool(), "u1"); | ||
| 515 | |||
| 516 | F32.Define(*this, TypeFloat(32), "f32"); | ||
| 517 | U32.Define(*this, TypeInt(32, false), "u32"); | ||
| 518 | S32.Define(*this, TypeInt(32, true), "s32"); | ||
| 519 | |||
| 520 | private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); | ||
| 521 | |||
| 522 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); | ||
| 523 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); | ||
| 524 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); | ||
| 525 | |||
| 526 | output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); | ||
| 527 | output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); | ||
| 528 | |||
| 529 | if (info.uses_int8 && profile.support_int8) { | ||
| 530 | AddCapability(spv::Capability::Int8); | ||
| 531 | U8 = Name(TypeInt(8, false), "u8"); | ||
| 532 | S8 = Name(TypeInt(8, true), "s8"); | ||
| 533 | } | ||
| 534 | if (info.uses_int16 && profile.support_int16) { | ||
| 535 | AddCapability(spv::Capability::Int16); | ||
| 536 | U16 = Name(TypeInt(16, false), "u16"); | ||
| 537 | S16 = Name(TypeInt(16, true), "s16"); | ||
| 538 | } | ||
| 539 | if (info.uses_int64) { | ||
| 540 | AddCapability(spv::Capability::Int64); | ||
| 541 | U64 = Name(TypeInt(64, false), "u64"); | ||
| 542 | } | ||
| 543 | if (info.uses_fp16) { | ||
| 544 | AddCapability(spv::Capability::Float16); | ||
| 545 | F16.Define(*this, TypeFloat(16), "f16"); | ||
| 546 | } | ||
| 547 | if (info.uses_fp64) { | ||
| 548 | AddCapability(spv::Capability::Float64); | ||
| 549 | F64.Define(*this, TypeFloat(64), "f64"); | ||
| 550 | } | ||
| 551 | } | ||
| 552 | |||
| 553 | void EmitContext::DefineCommonConstants() { | ||
| 554 | true_value = ConstantTrue(U1); | ||
| 555 | false_value = ConstantFalse(U1); | ||
| 556 | u32_zero_value = Const(0U); | ||
| 557 | f32_zero_value = Const(0.0f); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitContext::DefineInterfaces(const IR::Program& program) { | ||
| 561 | DefineInputs(program); | ||
| 562 | DefineOutputs(program); | ||
| 563 | } | ||
| 564 | |||
| 565 | void EmitContext::DefineLocalMemory(const IR::Program& program) { | ||
| 566 | if (program.local_memory_size == 0) { | ||
| 567 | return; | ||
| 568 | } | ||
| 569 | const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; | ||
| 570 | const Id type{TypeArray(U32[1], Const(num_elements))}; | ||
| 571 | const Id pointer{TypePointer(spv::StorageClass::Private, type)}; | ||
| 572 | local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); | ||
| 573 | if (profile.supported_spirv >= 0x00010400) { | ||
| 574 | interfaces.push_back(local_memory); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||
| 579 | if (program.shared_memory_size == 0) { | ||
| 580 | return; | ||
| 581 | } | ||
| 582 | const auto make{[&](Id element_type, u32 element_size) { | ||
| 583 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; | ||
| 584 | const Id array_type{TypeArray(element_type, Const(num_elements))}; | ||
| 585 | Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 586 | |||
| 587 | const Id struct_type{TypeStruct(array_type)}; | ||
| 588 | MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); | ||
| 589 | Decorate(struct_type, spv::Decoration::Block); | ||
| 590 | |||
| 591 | const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; | ||
| 592 | const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; | ||
| 593 | const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; | ||
| 594 | Decorate(variable, spv::Decoration::Aliased); | ||
| 595 | interfaces.push_back(variable); | ||
| 596 | |||
| 597 | return std::make_tuple(variable, element_pointer, pointer); | ||
| 598 | }}; | ||
| 599 | if (profile.support_explicit_workgroup_layout) { | ||
| 600 | AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); | ||
| 601 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); | ||
| 602 | if (program.info.uses_int8) { | ||
| 603 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); | ||
| 604 | std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); | ||
| 605 | } | ||
| 606 | if (program.info.uses_int16) { | ||
| 607 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); | ||
| 608 | std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); | ||
| 609 | } | ||
| 610 | if (program.info.uses_int64) { | ||
| 611 | std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); | ||
| 612 | } | ||
| 613 | std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); | ||
| 614 | std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); | ||
| 615 | std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); | ||
| 616 | return; | ||
| 617 | } | ||
| 618 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | ||
| 619 | const Id type{TypeArray(U32[1], Const(num_elements))}; | ||
| 620 | shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); | ||
| 621 | |||
| 622 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | ||
| 623 | shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); | ||
| 624 | interfaces.push_back(shared_memory_u32); | ||
| 625 | |||
| 626 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | ||
| 627 | const auto make_function{[&](u32 mask, u32 size) { | ||
| 628 | const Id loop_header{OpLabel()}; | ||
| 629 | const Id continue_block{OpLabel()}; | ||
| 630 | const Id merge_block{OpLabel()}; | ||
| 631 | |||
| 632 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 633 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 634 | const Id insert_value{OpFunctionParameter(U32[1])}; | ||
| 635 | AddLabel(); | ||
| 636 | OpBranch(loop_header); | ||
| 637 | |||
| 638 | AddLabel(loop_header); | ||
| 639 | const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 640 | const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; | ||
| 641 | const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; | ||
| 642 | const Id count{Const(size)}; | ||
| 643 | OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 644 | OpBranch(continue_block); | ||
| 645 | |||
| 646 | AddLabel(continue_block); | ||
| 647 | const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; | ||
| 648 | const Id old_value{OpLoad(U32[1], word_pointer)}; | ||
| 649 | const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; | ||
| 650 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, | ||
| 651 | u32_zero_value, new_value, old_value)}; | ||
| 652 | const Id success{OpIEqual(U1, atomic_res, old_value)}; | ||
| 653 | OpBranchConditional(success, merge_block, loop_header); | ||
| 654 | |||
| 655 | AddLabel(merge_block); | ||
| 656 | OpReturn(); | ||
| 657 | OpFunctionEnd(); | ||
| 658 | return func; | ||
| 659 | }}; | ||
| 660 | if (program.info.uses_int8) { | ||
| 661 | shared_store_u8_func = make_function(24, 8); | ||
| 662 | } | ||
| 663 | if (program.info.uses_int16) { | ||
| 664 | shared_store_u16_func = make_function(16, 16); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { | ||
| 669 | if (program.info.uses_shared_increment) { | ||
| 670 | increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, | ||
| 671 | shared_u32, U32[1], U32[1], spv::Scope::Workgroup); | ||
| 672 | } | ||
| 673 | if (program.info.uses_shared_decrement) { | ||
| 674 | decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, | ||
| 675 | shared_u32, U32[1], U32[1], spv::Scope::Workgroup); | ||
| 676 | } | ||
| 677 | } | ||
| 678 | |||
| 679 | void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||
| 680 | const auto make_load{[&] { | ||
| 681 | const bool is_array{stage == Stage::Geometry}; | ||
| 682 | const Id end_block{OpLabel()}; | ||
| 683 | const Id default_label{OpLabel()}; | ||
| 684 | |||
| 685 | const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) | ||
| 686 | : TypeFunction(F32[1], U32[1])}; | ||
| 687 | const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; | ||
| 688 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 689 | const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; | ||
| 690 | |||
| 691 | AddLabel(); | ||
| 692 | const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 693 | const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; | ||
| 694 | const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; | ||
| 695 | std::vector<Sirit::Literal> literals; | ||
| 696 | std::vector<Id> labels; | ||
| 697 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 698 | literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); | ||
| 699 | labels.push_back(OpLabel()); | ||
| 700 | } | ||
| 701 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 702 | for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { | ||
| 703 | if (!info.loads.Generic(index)) { | ||
| 704 | continue; | ||
| 705 | } | ||
| 706 | literals.push_back(base_attribute_value + index); | ||
| 707 | labels.push_back(OpLabel()); | ||
| 708 | } | ||
| 709 | OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); | ||
| 710 | OpSwitch(compare_index, default_label, literals, labels); | ||
| 711 | AddLabel(default_label); | ||
| 712 | OpReturnValue(Const(0.0f)); | ||
| 713 | size_t label_index{0}; | ||
| 714 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 715 | AddLabel(labels[label_index]); | ||
| 716 | const Id pointer{is_array | ||
| 717 | ? OpAccessChain(input_f32, input_position, vertex, masked_index) | ||
| 718 | : OpAccessChain(input_f32, input_position, masked_index)}; | ||
| 719 | const Id result{OpLoad(F32[1], pointer)}; | ||
| 720 | OpReturnValue(result); | ||
| 721 | ++label_index; | ||
| 722 | } | ||
| 723 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 724 | if (!info.loads.Generic(index)) { | ||
| 725 | continue; | ||
| 726 | } | ||
| 727 | AddLabel(labels[label_index]); | ||
| 728 | const auto type{AttrTypes(*this, static_cast<u32>(index))}; | ||
| 729 | if (!type) { | ||
| 730 | OpReturnValue(Const(0.0f)); | ||
| 731 | ++label_index; | ||
| 732 | continue; | ||
| 733 | } | ||
| 734 | const Id generic_id{input_generics.at(index)}; | ||
| 735 | const Id pointer{is_array | ||
| 736 | ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) | ||
| 737 | : OpAccessChain(type->pointer, generic_id, masked_index)}; | ||
| 738 | const Id value{OpLoad(type->id, pointer)}; | ||
| 739 | const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; | ||
| 740 | OpReturnValue(result); | ||
| 741 | ++label_index; | ||
| 742 | } | ||
| 743 | AddLabel(end_block); | ||
| 744 | OpUnreachable(); | ||
| 745 | OpFunctionEnd(); | ||
| 746 | return func; | ||
| 747 | }}; | ||
| 748 | const auto make_store{[&] { | ||
| 749 | const Id end_block{OpLabel()}; | ||
| 750 | const Id default_label{OpLabel()}; | ||
| 751 | |||
| 752 | const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])}; | ||
| 753 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)}; | ||
| 754 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 755 | const Id store_value{OpFunctionParameter(F32[1])}; | ||
| 756 | AddLabel(); | ||
| 757 | const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 758 | const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; | ||
| 759 | const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; | ||
| 760 | std::vector<Sirit::Literal> literals; | ||
| 761 | std::vector<Id> labels; | ||
| 762 | if (info.stores.AnyComponent(IR::Attribute::PositionX)) { | ||
| 763 | literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); | ||
| 764 | labels.push_back(OpLabel()); | ||
| 765 | } | ||
| 766 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 767 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 768 | if (!info.stores.Generic(index)) { | ||
| 769 | continue; | ||
| 770 | } | ||
| 771 | literals.push_back(base_attribute_value + static_cast<u32>(index)); | ||
| 772 | labels.push_back(OpLabel()); | ||
| 773 | } | ||
| 774 | if (info.stores.ClipDistances()) { | ||
| 775 | literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); | ||
| 776 | labels.push_back(OpLabel()); | ||
| 777 | literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); | ||
| 778 | labels.push_back(OpLabel()); | ||
| 779 | } | ||
| 780 | OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); | ||
| 781 | OpSwitch(compare_index, default_label, literals, labels); | ||
| 782 | AddLabel(default_label); | ||
| 783 | OpReturn(); | ||
| 784 | size_t label_index{0}; | ||
| 785 | if (info.stores.AnyComponent(IR::Attribute::PositionX)) { | ||
| 786 | AddLabel(labels[label_index]); | ||
| 787 | const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; | ||
| 788 | OpStore(pointer, store_value); | ||
| 789 | OpReturn(); | ||
| 790 | ++label_index; | ||
| 791 | } | ||
| 792 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 793 | if (!info.stores.Generic(index)) { | ||
| 794 | continue; | ||
| 795 | } | ||
| 796 | if (output_generics[index][0].num_components != 4) { | ||
| 797 | throw NotImplementedException("Physical stores and transform feedbacks"); | ||
| 798 | } | ||
| 799 | AddLabel(labels[label_index]); | ||
| 800 | const Id generic_id{output_generics[index][0].id}; | ||
| 801 | const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; | ||
| 802 | OpStore(pointer, store_value); | ||
| 803 | OpReturn(); | ||
| 804 | ++label_index; | ||
| 805 | } | ||
| 806 | if (info.stores.ClipDistances()) { | ||
| 807 | AddLabel(labels[label_index]); | ||
| 808 | const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; | ||
| 809 | OpStore(pointer, store_value); | ||
| 810 | OpReturn(); | ||
| 811 | ++label_index; | ||
| 812 | AddLabel(labels[label_index]); | ||
| 813 | const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; | ||
| 814 | const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; | ||
| 815 | OpStore(pointer2, store_value); | ||
| 816 | OpReturn(); | ||
| 817 | ++label_index; | ||
| 818 | } | ||
| 819 | AddLabel(end_block); | ||
| 820 | OpUnreachable(); | ||
| 821 | OpFunctionEnd(); | ||
| 822 | return func; | ||
| 823 | }}; | ||
| 824 | if (info.loads_indexed_attributes) { | ||
| 825 | indexed_load_func = make_load(); | ||
| 826 | } | ||
| 827 | if (info.stores_indexed_attributes) { | ||
| 828 | indexed_store_func = make_store(); | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | ||
| 833 | if (!info.uses_global_memory || !profile.support_int64) { | ||
| 834 | return; | ||
| 835 | } | ||
| 836 | using DefPtr = Id StorageDefinitions::*; | ||
| 837 | const Id zero{u32_zero_value}; | ||
| 838 | const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, | ||
| 839 | auto&& callback) { | ||
| 840 | AddLabel(); | ||
| 841 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 842 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 843 | if (!info.nvn_buffer_used[index]) { | ||
| 844 | continue; | ||
| 845 | } | ||
| 846 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 847 | const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; | ||
| 848 | const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; | ||
| 849 | const Id ssbo_addr_pointer{OpAccessChain( | ||
| 850 | uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; | ||
| 851 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | ||
| 852 | zero, ssbo_size_cbuf_offset)}; | ||
| 853 | |||
| 854 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 855 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | ||
| 856 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | ||
| 857 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | ||
| 858 | OpULessThan(U1, addr, ssbo_end))}; | ||
| 859 | const Id then_label{OpLabel()}; | ||
| 860 | const Id else_label{OpLabel()}; | ||
| 861 | OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); | ||
| 862 | OpBranchConditional(cond, then_label, else_label); | ||
| 863 | AddLabel(then_label); | ||
| 864 | const Id ssbo_id{ssbos[index].*ssbo_member}; | ||
| 865 | const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; | ||
| 866 | const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; | ||
| 867 | const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; | ||
| 868 | callback(ssbo_pointer); | ||
| 869 | AddLabel(else_label); | ||
| 870 | } | ||
| 871 | }}; | ||
| 872 | const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 873 | const Id function_type{TypeFunction(type, U64)}; | ||
| 874 | const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 875 | const Id addr{OpFunctionParameter(U64)}; | ||
| 876 | define_body(ssbo_member, addr, element_pointer, shift, | ||
| 877 | [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); | ||
| 878 | OpReturnValue(ConstantNull(type)); | ||
| 879 | OpFunctionEnd(); | ||
| 880 | return func_id; | ||
| 881 | }}; | ||
| 882 | const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 883 | const Id function_type{TypeFunction(void_id, U64, type)}; | ||
| 884 | const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 885 | const Id addr{OpFunctionParameter(U64)}; | ||
| 886 | const Id data{OpFunctionParameter(type)}; | ||
| 887 | define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { | ||
| 888 | OpStore(ssbo_pointer, data); | ||
| 889 | OpReturn(); | ||
| 890 | }); | ||
| 891 | OpReturn(); | ||
| 892 | OpFunctionEnd(); | ||
| 893 | return func_id; | ||
| 894 | }}; | ||
| 895 | const auto define{ | ||
| 896 | [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { | ||
| 897 | const Id element_type{type_def.element}; | ||
| 898 | const u32 shift{static_cast<u32>(std::countr_zero(size))}; | ||
| 899 | const Id load_func{define_load(ssbo_member, element_type, type, shift)}; | ||
| 900 | const Id write_func{define_write(ssbo_member, element_type, type, shift)}; | ||
| 901 | return std::make_pair(load_func, write_func); | ||
| 902 | }}; | ||
| 903 | std::tie(load_global_func_u32, write_global_func_u32) = | ||
| 904 | define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); | ||
| 905 | std::tie(load_global_func_u32x2, write_global_func_u32x2) = | ||
| 906 | define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); | ||
| 907 | std::tie(load_global_func_u32x4, write_global_func_u32x4) = | ||
| 908 | define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); | ||
| 909 | } | ||
| 910 | |||
| 911 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | ||
| 912 | if (info.constant_buffer_descriptors.empty()) { | ||
| 913 | return; | ||
| 914 | } | ||
| 915 | if (!profile.support_descriptor_aliasing) { | ||
| 916 | DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', | ||
| 917 | sizeof(u32[4])); | ||
| 918 | for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | ||
| 919 | binding += desc.count; | ||
| 920 | } | ||
| 921 | return; | ||
| 922 | } | ||
| 923 | IR::Type types{info.used_constant_buffer_types}; | ||
| 924 | if (True(types & IR::Type::U8)) { | ||
| 925 | if (profile.support_int8) { | ||
| 926 | DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); | ||
| 927 | DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); | ||
| 928 | } else { | ||
| 929 | types |= IR::Type::U32; | ||
| 930 | } | ||
| 931 | } | ||
| 932 | if (True(types & IR::Type::U16)) { | ||
| 933 | if (profile.support_int16) { | ||
| 934 | DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', | ||
| 935 | sizeof(u16)); | ||
| 936 | DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', | ||
| 937 | sizeof(s16)); | ||
| 938 | } else { | ||
| 939 | types |= IR::Type::U32; | ||
| 940 | } | ||
| 941 | } | ||
| 942 | if (True(types & IR::Type::U32)) { | ||
| 943 | DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', | ||
| 944 | sizeof(u32)); | ||
| 945 | } | ||
| 946 | if (True(types & IR::Type::F32)) { | ||
| 947 | DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', | ||
| 948 | sizeof(f32)); | ||
| 949 | } | ||
| 950 | if (True(types & IR::Type::U32x2)) { | ||
| 951 | DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', | ||
| 952 | sizeof(u32[2])); | ||
| 953 | } | ||
| 954 | binding += static_cast<u32>(info.constant_buffer_descriptors.size()); | ||
| 955 | } | ||
| 956 | |||
| 957 | void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||
| 958 | if (info.storage_buffers_descriptors.empty()) { | ||
| 959 | return; | ||
| 960 | } | ||
| 961 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | ||
| 962 | |||
| 963 | const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types | ||
| 964 | : IR::Type::U32}; | ||
| 965 | if (profile.support_int8 && True(used_types & IR::Type::U8)) { | ||
| 966 | DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, | ||
| 967 | sizeof(u8)); | ||
| 968 | DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, | ||
| 969 | sizeof(u8)); | ||
| 970 | } | ||
| 971 | if (profile.support_int16 && True(used_types & IR::Type::U16)) { | ||
| 972 | DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, | ||
| 973 | sizeof(u16)); | ||
| 974 | DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, | ||
| 975 | sizeof(u16)); | ||
| 976 | } | ||
| 977 | if (True(used_types & IR::Type::U32)) { | ||
| 978 | DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], | ||
| 979 | sizeof(u32)); | ||
| 980 | } | ||
| 981 | if (True(used_types & IR::Type::F32)) { | ||
| 982 | DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], | ||
| 983 | sizeof(f32)); | ||
| 984 | } | ||
| 985 | if (True(used_types & IR::Type::U64)) { | ||
| 986 | DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, | ||
| 987 | sizeof(u64)); | ||
| 988 | } | ||
| 989 | if (True(used_types & IR::Type::U32x2)) { | ||
| 990 | DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], | ||
| 991 | sizeof(u32[2])); | ||
| 992 | } | ||
| 993 | if (True(used_types & IR::Type::U32x4)) { | ||
| 994 | DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], | ||
| 995 | sizeof(u32[4])); | ||
| 996 | } | ||
| 997 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||
| 998 | binding += desc.count; | ||
| 999 | } | ||
| 1000 | const bool needs_function{ | ||
| 1001 | info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || | ||
| 1002 | info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || | ||
| 1003 | info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; | ||
| 1004 | if (needs_function) { | ||
| 1005 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 1006 | } | ||
| 1007 | if (info.uses_global_increment) { | ||
| 1008 | increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, | ||
| 1009 | storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); | ||
| 1010 | } | ||
| 1011 | if (info.uses_global_decrement) { | ||
| 1012 | decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, | ||
| 1013 | storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); | ||
| 1014 | } | ||
| 1015 | if (info.uses_atomic_f32_add) { | ||
| 1016 | f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1017 | storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); | ||
| 1018 | } | ||
| 1019 | if (info.uses_atomic_f16x2_add) { | ||
| 1020 | f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1021 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1022 | } | ||
| 1023 | if (info.uses_atomic_f16x2_min) { | ||
| 1024 | f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, | ||
| 1025 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1026 | } | ||
| 1027 | if (info.uses_atomic_f16x2_max) { | ||
| 1028 | f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, | ||
| 1029 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1030 | } | ||
| 1031 | if (info.uses_atomic_f32x2_add) { | ||
| 1032 | f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1033 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1034 | } | ||
| 1035 | if (info.uses_atomic_f32x2_min) { | ||
| 1036 | f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, | ||
| 1037 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1038 | } | ||
| 1039 | if (info.uses_atomic_f32x2_max) { | ||
| 1040 | f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, | ||
| 1041 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1042 | } | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { | ||
| 1046 | if (info.texture_buffer_descriptors.empty()) { | ||
| 1047 | return; | ||
| 1048 | } | ||
| 1049 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||
| 1050 | image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); | ||
| 1051 | sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); | ||
| 1052 | |||
| 1053 | const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; | ||
| 1054 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | ||
| 1055 | for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { | ||
| 1056 | if (desc.count != 1) { | ||
| 1057 | throw NotImplementedException("Array of texture buffers"); | ||
| 1058 | } | ||
| 1059 | const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; | ||
| 1060 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1061 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1062 | Name(id, NameOf(stage, desc, "texbuf")); | ||
| 1063 | texture_buffers.push_back({ | ||
| 1064 | .id = id, | ||
| 1065 | .count = desc.count, | ||
| 1066 | }); | ||
| 1067 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1068 | interfaces.push_back(id); | ||
| 1069 | } | ||
| 1070 | ++binding; | ||
| 1071 | } | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { | ||
| 1075 | image_buffers.reserve(info.image_buffer_descriptors.size()); | ||
| 1076 | for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { | ||
| 1077 | if (desc.count != 1) { | ||
| 1078 | throw NotImplementedException("Array of image buffers"); | ||
| 1079 | } | ||
| 1080 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | ||
| 1081 | const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; | ||
| 1082 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; | ||
| 1083 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||
| 1084 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1085 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1086 | Name(id, NameOf(stage, desc, "imgbuf")); | ||
| 1087 | image_buffers.push_back({ | ||
| 1088 | .id = id, | ||
| 1089 | .image_type = image_type, | ||
| 1090 | .count = desc.count, | ||
| 1091 | }); | ||
| 1092 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1093 | interfaces.push_back(id); | ||
| 1094 | } | ||
| 1095 | ++binding; | ||
| 1096 | } | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void EmitContext::DefineTextures(const Info& info, u32& binding) { | ||
| 1100 | textures.reserve(info.texture_descriptors.size()); | ||
| 1101 | for (const TextureDescriptor& desc : info.texture_descriptors) { | ||
| 1102 | const Id image_type{ImageType(*this, desc)}; | ||
| 1103 | const Id sampled_type{TypeSampledImage(image_type)}; | ||
| 1104 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; | ||
| 1105 | const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; | ||
| 1106 | const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; | ||
| 1107 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1108 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1109 | Name(id, NameOf(stage, desc, "tex")); | ||
| 1110 | textures.push_back({ | ||
| 1111 | .id = id, | ||
| 1112 | .sampled_type = sampled_type, | ||
| 1113 | .pointer_type = pointer_type, | ||
| 1114 | .image_type = image_type, | ||
| 1115 | .count = desc.count, | ||
| 1116 | }); | ||
| 1117 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1118 | interfaces.push_back(id); | ||
| 1119 | } | ||
| 1120 | ++binding; | ||
| 1121 | } | ||
| 1122 | if (info.uses_atomic_image_u32) { | ||
| 1123 | image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); | ||
| 1124 | } | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | void EmitContext::DefineImages(const Info& info, u32& binding) { | ||
| 1128 | images.reserve(info.image_descriptors.size()); | ||
| 1129 | for (const ImageDescriptor& desc : info.image_descriptors) { | ||
| 1130 | if (desc.count != 1) { | ||
| 1131 | throw NotImplementedException("Array of images"); | ||
| 1132 | } | ||
| 1133 | const Id image_type{ImageType(*this, desc)}; | ||
| 1134 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; | ||
| 1135 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||
| 1136 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1137 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1138 | Name(id, NameOf(stage, desc, "img")); | ||
| 1139 | images.push_back({ | ||
| 1140 | .id = id, | ||
| 1141 | .image_type = image_type, | ||
| 1142 | .count = desc.count, | ||
| 1143 | }); | ||
| 1144 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1145 | interfaces.push_back(id); | ||
| 1146 | } | ||
| 1147 | ++binding; | ||
| 1148 | } | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void EmitContext::DefineInputs(const IR::Program& program) { | ||
| 1152 | const Info& info{program.info}; | ||
| 1153 | const VaryingState loads{info.loads.mask | info.passthrough.mask}; | ||
| 1154 | |||
| 1155 | if (info.uses_workgroup_id) { | ||
| 1156 | workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); | ||
| 1157 | } | ||
| 1158 | if (info.uses_local_invocation_id) { | ||
| 1159 | local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); | ||
| 1160 | } | ||
| 1161 | if (info.uses_invocation_id) { | ||
| 1162 | invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); | ||
| 1163 | } | ||
| 1164 | if (info.uses_sample_id) { | ||
| 1165 | sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); | ||
| 1166 | } | ||
| 1167 | if (info.uses_is_helper_invocation) { | ||
| 1168 | is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); | ||
| 1169 | } | ||
| 1170 | if (info.uses_subgroup_mask) { | ||
| 1171 | subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); | ||
| 1172 | subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); | ||
| 1173 | subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); | ||
| 1174 | subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); | ||
| 1175 | subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); | ||
| 1176 | } | ||
| 1177 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || | ||
| 1178 | (profile.warp_size_potentially_larger_than_guest && | ||
| 1179 | (info.uses_subgroup_vote || info.uses_subgroup_mask))) { | ||
| 1180 | subgroup_local_invocation_id = | ||
| 1181 | DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); | ||
| 1182 | } | ||
| 1183 | if (info.uses_fswzadd) { | ||
| 1184 | const Id f32_one{Const(1.0f)}; | ||
| 1185 | const Id f32_minus_one{Const(-1.0f)}; | ||
| 1186 | const Id f32_zero{Const(0.0f)}; | ||
| 1187 | fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); | ||
| 1188 | fswzadd_lut_b = | ||
| 1189 | ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); | ||
| 1190 | } | ||
| 1191 | if (loads[IR::Attribute::PrimitiveId]) { | ||
| 1192 | primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); | ||
| 1193 | } | ||
| 1194 | if (loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1195 | const bool is_fragment{stage != Stage::Fragment}; | ||
| 1196 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | ||
| 1197 | input_position = DefineInput(*this, F32[4], true, built_in); | ||
| 1198 | if (profile.support_geometry_shader_passthrough) { | ||
| 1199 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1200 | Decorate(input_position, spv::Decoration::PassthroughNV); | ||
| 1201 | } | ||
| 1202 | } | ||
| 1203 | } | ||
| 1204 | if (loads[IR::Attribute::InstanceId]) { | ||
| 1205 | if (profile.support_vertex_instance_id) { | ||
| 1206 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | ||
| 1207 | } else { | ||
| 1208 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | ||
| 1209 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||
| 1210 | } | ||
| 1211 | } | ||
| 1212 | if (loads[IR::Attribute::VertexId]) { | ||
| 1213 | if (profile.support_vertex_instance_id) { | ||
| 1214 | vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); | ||
| 1215 | } else { | ||
| 1216 | vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); | ||
| 1217 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||
| 1218 | } | ||
| 1219 | } | ||
| 1220 | if (loads[IR::Attribute::FrontFace]) { | ||
| 1221 | front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); | ||
| 1222 | } | ||
| 1223 | if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { | ||
| 1224 | point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); | ||
| 1225 | } | ||
| 1226 | if (loads[IR::Attribute::TessellationEvaluationPointU] || | ||
| 1227 | loads[IR::Attribute::TessellationEvaluationPointV]) { | ||
| 1228 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); | ||
| 1229 | } | ||
| 1230 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 1231 | const AttributeType input_type{runtime_info.generic_input_types[index]}; | ||
| 1232 | if (!runtime_info.previous_stage_stores.Generic(index)) { | ||
| 1233 | continue; | ||
| 1234 | } | ||
| 1235 | if (!loads.Generic(index)) { | ||
| 1236 | continue; | ||
| 1237 | } | ||
| 1238 | if (input_type == AttributeType::Disabled) { | ||
| 1239 | continue; | ||
| 1240 | } | ||
| 1241 | const Id type{GetAttributeType(*this, input_type)}; | ||
| 1242 | const Id id{DefineInput(*this, type, true)}; | ||
| 1243 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1244 | Name(id, fmt::format("in_attr{}", index)); | ||
| 1245 | input_generics[index] = id; | ||
| 1246 | |||
| 1247 | if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { | ||
| 1248 | Decorate(id, spv::Decoration::PassthroughNV); | ||
| 1249 | } | ||
| 1250 | if (stage != Stage::Fragment) { | ||
| 1251 | continue; | ||
| 1252 | } | ||
| 1253 | switch (info.interpolation[index]) { | ||
| 1254 | case Interpolation::Smooth: | ||
| 1255 | // Default | ||
| 1256 | // Decorate(id, spv::Decoration::Smooth); | ||
| 1257 | break; | ||
| 1258 | case Interpolation::NoPerspective: | ||
| 1259 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 1260 | break; | ||
| 1261 | case Interpolation::Flat: | ||
| 1262 | Decorate(id, spv::Decoration::Flat); | ||
| 1263 | break; | ||
| 1264 | } | ||
| 1265 | } | ||
| 1266 | if (stage == Stage::TessellationEval) { | ||
| 1267 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 1268 | if (!info.uses_patches[index]) { | ||
| 1269 | continue; | ||
| 1270 | } | ||
| 1271 | const Id id{DefineInput(*this, F32[4], false)}; | ||
| 1272 | Decorate(id, spv::Decoration::Patch); | ||
| 1273 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1274 | patches[index] = id; | ||
| 1275 | } | ||
| 1276 | } | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | void EmitContext::DefineOutputs(const IR::Program& program) { | ||
| 1280 | const Info& info{program.info}; | ||
| 1281 | const std::optional<u32> invocations{program.invocations}; | ||
| 1282 | if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { | ||
| 1283 | output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); | ||
| 1284 | } | ||
| 1285 | if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { | ||
| 1286 | if (stage == Stage::Fragment) { | ||
| 1287 | throw NotImplementedException("Storing PointSize in fragment stage"); | ||
| 1288 | } | ||
| 1289 | output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); | ||
| 1290 | } | ||
| 1291 | if (info.stores.ClipDistances()) { | ||
| 1292 | if (stage == Stage::Fragment) { | ||
| 1293 | throw NotImplementedException("Storing ClipDistance in fragment stage"); | ||
| 1294 | } | ||
| 1295 | const Id type{TypeArray(F32[1], Const(8U))}; | ||
| 1296 | clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); | ||
| 1297 | } | ||
| 1298 | if (info.stores[IR::Attribute::Layer] && | ||
| 1299 | (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { | ||
| 1300 | if (stage == Stage::Fragment) { | ||
| 1301 | throw NotImplementedException("Storing Layer in fragment stage"); | ||
| 1302 | } | ||
| 1303 | layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); | ||
| 1304 | } | ||
| 1305 | if (info.stores[IR::Attribute::ViewportIndex] && | ||
| 1306 | (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { | ||
| 1307 | if (stage == Stage::Fragment) { | ||
| 1308 | throw NotImplementedException("Storing ViewportIndex in fragment stage"); | ||
| 1309 | } | ||
| 1310 | viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); | ||
| 1311 | } | ||
| 1312 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 1313 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, | ||
| 1314 | spv::BuiltIn::ViewportMaskNV); | ||
| 1315 | } | ||
| 1316 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 1317 | if (info.stores.Generic(index)) { | ||
| 1318 | DefineGenericOutput(*this, index, invocations); | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | switch (stage) { | ||
| 1322 | case Stage::TessellationControl: | ||
| 1323 | if (info.stores_tess_level_outer) { | ||
| 1324 | const Id type{TypeArray(F32[1], Const(4U))}; | ||
| 1325 | output_tess_level_outer = | ||
| 1326 | DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); | ||
| 1327 | Decorate(output_tess_level_outer, spv::Decoration::Patch); | ||
| 1328 | } | ||
| 1329 | if (info.stores_tess_level_inner) { | ||
| 1330 | const Id type{TypeArray(F32[1], Const(2U))}; | ||
| 1331 | output_tess_level_inner = | ||
| 1332 | DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); | ||
| 1333 | Decorate(output_tess_level_inner, spv::Decoration::Patch); | ||
| 1334 | } | ||
| 1335 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 1336 | if (!info.uses_patches[index]) { | ||
| 1337 | continue; | ||
| 1338 | } | ||
| 1339 | const Id id{DefineOutput(*this, F32[4], std::nullopt)}; | ||
| 1340 | Decorate(id, spv::Decoration::Patch); | ||
| 1341 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1342 | patches[index] = id; | ||
| 1343 | } | ||
| 1344 | break; | ||
| 1345 | case Stage::Fragment: | ||
| 1346 | for (u32 index = 0; index < 8; ++index) { | ||
| 1347 | if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { | ||
| 1348 | continue; | ||
| 1349 | } | ||
| 1350 | frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); | ||
| 1351 | Decorate(frag_color[index], spv::Decoration::Location, index); | ||
| 1352 | Name(frag_color[index], fmt::format("frag_color{}", index)); | ||
| 1353 | } | ||
| 1354 | if (info.stores_frag_depth) { | ||
| 1355 | frag_depth = DefineOutput(*this, F32[1], std::nullopt); | ||
| 1356 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); | ||
| 1357 | } | ||
| 1358 | if (info.stores_sample_mask) { | ||
| 1359 | sample_mask = DefineOutput(*this, U32[1], std::nullopt); | ||
| 1360 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); | ||
| 1361 | } | ||
| 1362 | break; | ||
| 1363 | default: | ||
| 1364 | break; | ||
| 1365 | } | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..e277bc358 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -0,0 +1,307 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include <sirit/sirit.h> | ||
| 11 | |||
| 12 | #include "shader_recompiler/backend/bindings.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 14 | #include "shader_recompiler/profile.h" | ||
| 15 | #include "shader_recompiler/runtime_info.h" | ||
| 16 | #include "shader_recompiler/shader_info.h" | ||
| 17 | |||
| 18 | namespace Shader::Backend::SPIRV { | ||
| 19 | |||
| 20 | using Sirit::Id; | ||
| 21 | |||
| 22 | class VectorTypes { | ||
| 23 | public: | ||
| 24 | void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); | ||
| 25 | |||
| 26 | [[nodiscard]] Id operator[](size_t size) const noexcept { | ||
| 27 | return defs[size - 1]; | ||
| 28 | } | ||
| 29 | |||
| 30 | private: | ||
| 31 | std::array<Id, 4> defs{}; | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct TextureDefinition { | ||
| 35 | Id id; | ||
| 36 | Id sampled_type; | ||
| 37 | Id pointer_type; | ||
| 38 | Id image_type; | ||
| 39 | u32 count; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct TextureBufferDefinition { | ||
| 43 | Id id; | ||
| 44 | u32 count; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct ImageBufferDefinition { | ||
| 48 | Id id; | ||
| 49 | Id image_type; | ||
| 50 | u32 count; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct ImageDefinition { | ||
| 54 | Id id; | ||
| 55 | Id image_type; | ||
| 56 | u32 count; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct UniformDefinitions { | ||
| 60 | Id U8{}; | ||
| 61 | Id S8{}; | ||
| 62 | Id U16{}; | ||
| 63 | Id S16{}; | ||
| 64 | Id U32{}; | ||
| 65 | Id F32{}; | ||
| 66 | Id U32x2{}; | ||
| 67 | Id U32x4{}; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct StorageTypeDefinition { | ||
| 71 | Id array{}; | ||
| 72 | Id element{}; | ||
| 73 | }; | ||
| 74 | |||
| 75 | struct StorageTypeDefinitions { | ||
| 76 | StorageTypeDefinition U8{}; | ||
| 77 | StorageTypeDefinition S8{}; | ||
| 78 | StorageTypeDefinition U16{}; | ||
| 79 | StorageTypeDefinition S16{}; | ||
| 80 | StorageTypeDefinition U32{}; | ||
| 81 | StorageTypeDefinition U64{}; | ||
| 82 | StorageTypeDefinition F32{}; | ||
| 83 | StorageTypeDefinition U32x2{}; | ||
| 84 | StorageTypeDefinition U32x4{}; | ||
| 85 | }; | ||
| 86 | |||
| 87 | struct StorageDefinitions { | ||
| 88 | Id U8{}; | ||
| 89 | Id S8{}; | ||
| 90 | Id U16{}; | ||
| 91 | Id S16{}; | ||
| 92 | Id U32{}; | ||
| 93 | Id F32{}; | ||
| 94 | Id U64{}; | ||
| 95 | Id U32x2{}; | ||
| 96 | Id U32x4{}; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct GenericElementInfo { | ||
| 100 | Id id{}; | ||
| 101 | u32 first_element{}; | ||
| 102 | u32 num_components{}; | ||
| 103 | }; | ||
| 104 | |||
| 105 | class EmitContext final : public Sirit::Module { | ||
| 106 | public: | ||
| 107 | explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 108 | IR::Program& program, Bindings& binding); | ||
| 109 | ~EmitContext(); | ||
| 110 | |||
| 111 | [[nodiscard]] Id Def(const IR::Value& value); | ||
| 112 | |||
| 113 | [[nodiscard]] Id BitOffset8(const IR::Value& offset); | ||
| 114 | [[nodiscard]] Id BitOffset16(const IR::Value& offset); | ||
| 115 | |||
| 116 | Id Const(u32 value) { | ||
| 117 | return Constant(U32[1], value); | ||
| 118 | } | ||
| 119 | |||
| 120 | Id Const(u32 element_1, u32 element_2) { | ||
| 121 | return ConstantComposite(U32[2], Const(element_1), Const(element_2)); | ||
| 122 | } | ||
| 123 | |||
| 124 | Id Const(u32 element_1, u32 element_2, u32 element_3) { | ||
| 125 | return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3)); | ||
| 126 | } | ||
| 127 | |||
| 128 | Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { | ||
| 129 | return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), | ||
| 130 | Const(element_4)); | ||
| 131 | } | ||
| 132 | |||
| 133 | Id SConst(s32 value) { | ||
| 134 | return Constant(S32[1], value); | ||
| 135 | } | ||
| 136 | |||
| 137 | Id SConst(s32 element_1, s32 element_2) { | ||
| 138 | return ConstantComposite(S32[2], SConst(element_1), SConst(element_2)); | ||
| 139 | } | ||
| 140 | |||
| 141 | Id SConst(s32 element_1, s32 element_2, s32 element_3) { | ||
| 142 | return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3)); | ||
| 143 | } | ||
| 144 | |||
| 145 | Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) { | ||
| 146 | return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3), | ||
| 147 | SConst(element_4)); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id Const(f32 value) { | ||
| 151 | return Constant(F32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Profile& profile; | ||
| 155 | const RuntimeInfo& runtime_info; | ||
| 156 | Stage stage{}; | ||
| 157 | |||
| 158 | Id void_id{}; | ||
| 159 | Id U1{}; | ||
| 160 | Id U8{}; | ||
| 161 | Id S8{}; | ||
| 162 | Id U16{}; | ||
| 163 | Id S16{}; | ||
| 164 | Id U64{}; | ||
| 165 | VectorTypes F32; | ||
| 166 | VectorTypes U32; | ||
| 167 | VectorTypes S32; | ||
| 168 | VectorTypes F16; | ||
| 169 | VectorTypes F64; | ||
| 170 | |||
| 171 | Id true_value{}; | ||
| 172 | Id false_value{}; | ||
| 173 | Id u32_zero_value{}; | ||
| 174 | Id f32_zero_value{}; | ||
| 175 | |||
| 176 | UniformDefinitions uniform_types; | ||
| 177 | StorageTypeDefinitions storage_types; | ||
| 178 | |||
| 179 | Id private_u32{}; | ||
| 180 | |||
| 181 | Id shared_u8{}; | ||
| 182 | Id shared_u16{}; | ||
| 183 | Id shared_u32{}; | ||
| 184 | Id shared_u64{}; | ||
| 185 | Id shared_u32x2{}; | ||
| 186 | Id shared_u32x4{}; | ||
| 187 | |||
| 188 | Id input_f32{}; | ||
| 189 | Id input_u32{}; | ||
| 190 | Id input_s32{}; | ||
| 191 | |||
| 192 | Id output_f32{}; | ||
| 193 | Id output_u32{}; | ||
| 194 | |||
| 195 | Id image_buffer_type{}; | ||
| 196 | Id sampled_texture_buffer_type{}; | ||
| 197 | Id image_u32{}; | ||
| 198 | |||
| 199 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; | ||
| 200 | std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{}; | ||
| 201 | std::vector<TextureBufferDefinition> texture_buffers; | ||
| 202 | std::vector<ImageBufferDefinition> image_buffers; | ||
| 203 | std::vector<TextureDefinition> textures; | ||
| 204 | std::vector<ImageDefinition> images; | ||
| 205 | |||
| 206 | Id workgroup_id{}; | ||
| 207 | Id local_invocation_id{}; | ||
| 208 | Id invocation_id{}; | ||
| 209 | Id sample_id{}; | ||
| 210 | Id is_helper_invocation{}; | ||
| 211 | Id subgroup_local_invocation_id{}; | ||
| 212 | Id subgroup_mask_eq{}; | ||
| 213 | Id subgroup_mask_lt{}; | ||
| 214 | Id subgroup_mask_le{}; | ||
| 215 | Id subgroup_mask_gt{}; | ||
| 216 | Id subgroup_mask_ge{}; | ||
| 217 | Id instance_id{}; | ||
| 218 | Id instance_index{}; | ||
| 219 | Id base_instance{}; | ||
| 220 | Id vertex_id{}; | ||
| 221 | Id vertex_index{}; | ||
| 222 | Id base_vertex{}; | ||
| 223 | Id front_face{}; | ||
| 224 | Id point_coord{}; | ||
| 225 | Id tess_coord{}; | ||
| 226 | Id clip_distances{}; | ||
| 227 | Id layer{}; | ||
| 228 | Id viewport_index{}; | ||
| 229 | Id viewport_mask{}; | ||
| 230 | Id primitive_id{}; | ||
| 231 | |||
| 232 | Id fswzadd_lut_a{}; | ||
| 233 | Id fswzadd_lut_b{}; | ||
| 234 | |||
| 235 | Id indexed_load_func{}; | ||
| 236 | Id indexed_store_func{}; | ||
| 237 | |||
| 238 | Id local_memory{}; | ||
| 239 | |||
| 240 | Id shared_memory_u8{}; | ||
| 241 | Id shared_memory_u16{}; | ||
| 242 | Id shared_memory_u32{}; | ||
| 243 | Id shared_memory_u64{}; | ||
| 244 | Id shared_memory_u32x2{}; | ||
| 245 | Id shared_memory_u32x4{}; | ||
| 246 | |||
| 247 | Id shared_memory_u32_type{}; | ||
| 248 | |||
| 249 | Id shared_store_u8_func{}; | ||
| 250 | Id shared_store_u16_func{}; | ||
| 251 | Id increment_cas_shared{}; | ||
| 252 | Id increment_cas_ssbo{}; | ||
| 253 | Id decrement_cas_shared{}; | ||
| 254 | Id decrement_cas_ssbo{}; | ||
| 255 | Id f32_add_cas{}; | ||
| 256 | Id f16x2_add_cas{}; | ||
| 257 | Id f16x2_min_cas{}; | ||
| 258 | Id f16x2_max_cas{}; | ||
| 259 | Id f32x2_add_cas{}; | ||
| 260 | Id f32x2_min_cas{}; | ||
| 261 | Id f32x2_max_cas{}; | ||
| 262 | |||
| 263 | Id load_global_func_u32{}; | ||
| 264 | Id load_global_func_u32x2{}; | ||
| 265 | Id load_global_func_u32x4{}; | ||
| 266 | Id write_global_func_u32{}; | ||
| 267 | Id write_global_func_u32x2{}; | ||
| 268 | Id write_global_func_u32x4{}; | ||
| 269 | |||
| 270 | Id input_position{}; | ||
| 271 | std::array<Id, 32> input_generics{}; | ||
| 272 | |||
| 273 | Id output_point_size{}; | ||
| 274 | Id output_position{}; | ||
| 275 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | ||
| 276 | |||
| 277 | Id output_tess_level_outer{}; | ||
| 278 | Id output_tess_level_inner{}; | ||
| 279 | std::array<Id, 30> patches{}; | ||
| 280 | |||
| 281 | std::array<Id, 8> frag_color{}; | ||
| 282 | Id sample_mask{}; | ||
| 283 | Id frag_depth{}; | ||
| 284 | |||
| 285 | std::vector<Id> interfaces; | ||
| 286 | |||
| 287 | private: | ||
| 288 | void DefineCommonTypes(const Info& info); | ||
| 289 | void DefineCommonConstants(); | ||
| 290 | void DefineInterfaces(const IR::Program& program); | ||
| 291 | void DefineLocalMemory(const IR::Program& program); | ||
| 292 | void DefineSharedMemory(const IR::Program& program); | ||
| 293 | void DefineSharedMemoryFunctions(const IR::Program& program); | ||
| 294 | void DefineConstantBuffers(const Info& info, u32& binding); | ||
| 295 | void DefineStorageBuffers(const Info& info, u32& binding); | ||
| 296 | void DefineTextureBuffers(const Info& info, u32& binding); | ||
| 297 | void DefineImageBuffers(const Info& info, u32& binding); | ||
| 298 | void DefineTextures(const Info& info, u32& binding); | ||
| 299 | void DefineImages(const Info& info, u32& binding); | ||
| 300 | void DefineAttributeMemAccess(const Info& info); | ||
| 301 | void DefineGlobalMemoryFunctions(const Info& info); | ||
| 302 | |||
| 303 | void DefineInputs(const IR::Program& program); | ||
| 304 | void DefineOutputs(const IR::Program& program); | ||
| 305 | }; | ||
| 306 | |||
| 307 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..d7a86e270 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -0,0 +1,541 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <span> | ||
| 6 | #include <tuple> | ||
| 7 | #include <type_traits> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/settings.h" | ||
| 12 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 13 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::SPIRV { | ||
| 18 | namespace { | ||
| 19 | template <class Func> | ||
| 20 | struct FuncTraits {}; | ||
| 21 | |||
| 22 | template <class ReturnType_, class... Args> | ||
| 23 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 24 | using ReturnType = ReturnType_; | ||
| 25 | |||
| 26 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 27 | |||
| 28 | template <size_t I> | ||
| 29 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 30 | }; | ||
| 31 | |||
| 32 | template <auto func, typename... Args> | ||
| 33 | void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { | ||
| 34 | inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); | ||
| 35 | } | ||
| 36 | |||
| 37 | template <typename ArgType> | ||
| 38 | ArgType Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 39 | if constexpr (std::is_same_v<ArgType, Id>) { | ||
| 40 | return ctx.Def(arg); | ||
| 41 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 42 | return arg; | ||
| 43 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 44 | return arg.U32(); | ||
| 45 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 46 | return arg.Attribute(); | ||
| 47 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 48 | return arg.Patch(); | ||
| 49 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 50 | return arg.Reg(); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 55 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 56 | using Traits = FuncTraits<decltype(func)>; | ||
| 57 | if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { | ||
| 58 | if constexpr (is_first_arg_inst) { | ||
| 59 | SetDefinition<func>( | ||
| 60 | ctx, inst, inst, | ||
| 61 | Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 62 | } else { | ||
| 63 | SetDefinition<func>( | ||
| 64 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 65 | } | ||
| 66 | } else { | ||
| 67 | if constexpr (is_first_arg_inst) { | ||
| 68 | func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 69 | } else { | ||
| 70 | func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | template <auto func> | ||
| 76 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 77 | using Traits = FuncTraits<decltype(func)>; | ||
| 78 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 79 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 80 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 81 | } else { | ||
| 82 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 83 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>; | ||
| 84 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 85 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 90 | switch (inst->GetOpcode()) { | ||
| 91 | #define OPCODE(name, result_type, ...) \ | ||
| 92 | case IR::Opcode::name: \ | ||
| 93 | return Invoke<&Emit##name>(ctx, inst); | ||
| 94 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 95 | #undef OPCODE | ||
| 96 | } | ||
| 97 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 98 | } | ||
| 99 | |||
| 100 | Id TypeId(const EmitContext& ctx, IR::Type type) { | ||
| 101 | switch (type) { | ||
| 102 | case IR::Type::U1: | ||
| 103 | return ctx.U1; | ||
| 104 | case IR::Type::U32: | ||
| 105 | return ctx.U32[1]; | ||
| 106 | default: | ||
| 107 | throw NotImplementedException("Phi node type {}", type); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | void Traverse(EmitContext& ctx, IR::Program& program) { | ||
| 112 | IR::Block* current_block{}; | ||
| 113 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 114 | switch (node.type) { | ||
| 115 | case IR::AbstractSyntaxNode::Type::Block: { | ||
| 116 | const Id label{node.data.block->Definition<Id>()}; | ||
| 117 | if (current_block) { | ||
| 118 | ctx.OpBranch(label); | ||
| 119 | } | ||
| 120 | current_block = node.data.block; | ||
| 121 | ctx.AddLabel(label); | ||
| 122 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 123 | EmitInst(ctx, &inst); | ||
| 124 | } | ||
| 125 | break; | ||
| 126 | } | ||
| 127 | case IR::AbstractSyntaxNode::Type::If: { | ||
| 128 | const Id if_label{node.data.if_node.body->Definition<Id>()}; | ||
| 129 | const Id endif_label{node.data.if_node.merge->Definition<Id>()}; | ||
| 130 | ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 131 | ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case IR::AbstractSyntaxNode::Type::Loop: { | ||
| 135 | const Id body_label{node.data.loop.body->Definition<Id>()}; | ||
| 136 | const Id continue_label{node.data.loop.continue_block->Definition<Id>()}; | ||
| 137 | const Id endloop_label{node.data.loop.merge->Definition<Id>()}; | ||
| 138 | |||
| 139 | ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); | ||
| 140 | ctx.OpBranch(body_label); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case IR::AbstractSyntaxNode::Type::Break: { | ||
| 144 | const Id break_label{node.data.break_node.merge->Definition<Id>()}; | ||
| 145 | const Id skip_label{node.data.break_node.skip->Definition<Id>()}; | ||
| 146 | ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 150 | if (current_block) { | ||
| 151 | ctx.OpBranch(node.data.end_if.merge->Definition<Id>()); | ||
| 152 | } | ||
| 153 | break; | ||
| 154 | case IR::AbstractSyntaxNode::Type::Repeat: { | ||
| 155 | Id cond{ctx.Def(node.data.repeat.cond)}; | ||
| 156 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 157 | const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])}; | ||
| 158 | const Id safety_counter{ctx.AddGlobalVariable( | ||
| 159 | pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))}; | ||
| 160 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 161 | ctx.interfaces.push_back(safety_counter); | ||
| 162 | } | ||
| 163 | const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)}; | ||
| 164 | const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))}; | ||
| 165 | ctx.OpStore(safety_counter, new_counter); | ||
| 166 | |||
| 167 | const Id safety_cond{ | ||
| 168 | ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)}; | ||
| 169 | cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond); | ||
| 170 | } | ||
| 171 | const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()}; | ||
| 172 | const Id merge_label{node.data.repeat.merge->Definition<Id>()}; | ||
| 173 | ctx.OpBranchConditional(cond, loop_header_label, merge_label); | ||
| 174 | break; | ||
| 175 | } | ||
| 176 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 177 | ctx.OpReturn(); | ||
| 178 | break; | ||
| 179 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 180 | ctx.OpUnreachable(); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | if (node.type != IR::AbstractSyntaxNode::Type::Block) { | ||
| 184 | current_block = nullptr; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | Id DefineMain(EmitContext& ctx, IR::Program& program) { | ||
| 190 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; | ||
| 191 | const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; | ||
| 192 | for (IR::Block* const block : program.blocks) { | ||
| 193 | block->SetDefinition(ctx.OpLabel()); | ||
| 194 | } | ||
| 195 | Traverse(ctx, program); | ||
| 196 | ctx.OpFunctionEnd(); | ||
| 197 | return main; | ||
| 198 | } | ||
| 199 | |||
| 200 | spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { | ||
| 201 | switch (primitive) { | ||
| 202 | case TessPrimitive::Isolines: | ||
| 203 | return spv::ExecutionMode::Isolines; | ||
| 204 | case TessPrimitive::Triangles: | ||
| 205 | return spv::ExecutionMode::Triangles; | ||
| 206 | case TessPrimitive::Quads: | ||
| 207 | return spv::ExecutionMode::Quads; | ||
| 208 | } | ||
| 209 | throw InvalidArgument("Tessellation primitive {}", primitive); | ||
| 210 | } | ||
| 211 | |||
| 212 | spv::ExecutionMode ExecutionMode(TessSpacing spacing) { | ||
| 213 | switch (spacing) { | ||
| 214 | case TessSpacing::Equal: | ||
| 215 | return spv::ExecutionMode::SpacingEqual; | ||
| 216 | case TessSpacing::FractionalOdd: | ||
| 217 | return spv::ExecutionMode::SpacingFractionalOdd; | ||
| 218 | case TessSpacing::FractionalEven: | ||
| 219 | return spv::ExecutionMode::SpacingFractionalEven; | ||
| 220 | } | ||
| 221 | throw InvalidArgument("Tessellation spacing {}", spacing); | ||
| 222 | } | ||
| 223 | |||
| 224 | void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { | ||
| 225 | const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); | ||
| 226 | spv::ExecutionModel execution_model{}; | ||
| 227 | switch (program.stage) { | ||
| 228 | case Stage::Compute: { | ||
| 229 | const std::array<u32, 3> workgroup_size{program.workgroup_size}; | ||
| 230 | execution_model = spv::ExecutionModel::GLCompute; | ||
| 231 | ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], | ||
| 232 | workgroup_size[1], workgroup_size[2]); | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | case Stage::VertexB: | ||
| 236 | execution_model = spv::ExecutionModel::Vertex; | ||
| 237 | break; | ||
| 238 | case Stage::TessellationControl: | ||
| 239 | execution_model = spv::ExecutionModel::TessellationControl; | ||
| 240 | ctx.AddCapability(spv::Capability::Tessellation); | ||
| 241 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); | ||
| 242 | break; | ||
| 243 | case Stage::TessellationEval: | ||
| 244 | execution_model = spv::ExecutionModel::TessellationEvaluation; | ||
| 245 | ctx.AddCapability(spv::Capability::Tessellation); | ||
| 246 | ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); | ||
| 247 | ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); | ||
| 248 | ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise | ||
| 249 | ? spv::ExecutionMode::VertexOrderCw | ||
| 250 | : spv::ExecutionMode::VertexOrderCcw); | ||
| 251 | break; | ||
| 252 | case Stage::Geometry: | ||
| 253 | execution_model = spv::ExecutionModel::Geometry; | ||
| 254 | ctx.AddCapability(spv::Capability::Geometry); | ||
| 255 | ctx.AddCapability(spv::Capability::GeometryStreams); | ||
| 256 | switch (ctx.runtime_info.input_topology) { | ||
| 257 | case InputTopology::Points: | ||
| 258 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); | ||
| 259 | break; | ||
| 260 | case InputTopology::Lines: | ||
| 261 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); | ||
| 262 | break; | ||
| 263 | case InputTopology::LinesAdjacency: | ||
| 264 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); | ||
| 265 | break; | ||
| 266 | case InputTopology::Triangles: | ||
| 267 | ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); | ||
| 268 | break; | ||
| 269 | case InputTopology::TrianglesAdjacency: | ||
| 270 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); | ||
| 271 | break; | ||
| 272 | } | ||
| 273 | switch (program.output_topology) { | ||
| 274 | case OutputTopology::PointList: | ||
| 275 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); | ||
| 276 | break; | ||
| 277 | case OutputTopology::LineStrip: | ||
| 278 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); | ||
| 279 | break; | ||
| 280 | case OutputTopology::TriangleStrip: | ||
| 281 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); | ||
| 282 | break; | ||
| 283 | } | ||
| 284 | if (program.info.stores[IR::Attribute::PointSize]) { | ||
| 285 | ctx.AddCapability(spv::Capability::GeometryPointSize); | ||
| 286 | } | ||
| 287 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); | ||
| 288 | ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); | ||
| 289 | if (program.is_geometry_passthrough) { | ||
| 290 | if (ctx.profile.support_geometry_shader_passthrough) { | ||
| 291 | ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); | ||
| 292 | ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); | ||
| 293 | } else { | ||
| 294 | LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); | ||
| 295 | } | ||
| 296 | } | ||
| 297 | break; | ||
| 298 | case Stage::Fragment: | ||
| 299 | execution_model = spv::ExecutionModel::Fragment; | ||
| 300 | if (ctx.profile.lower_left_origin_mode) { | ||
| 301 | ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); | ||
| 302 | } else { | ||
| 303 | ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | ||
| 304 | } | ||
| 305 | if (program.info.stores_frag_depth) { | ||
| 306 | ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); | ||
| 307 | } | ||
| 308 | if (ctx.runtime_info.force_early_z) { | ||
| 309 | ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); | ||
| 310 | } | ||
| 311 | break; | ||
| 312 | default: | ||
| 313 | throw NotImplementedException("Stage {}", program.stage); | ||
| 314 | } | ||
| 315 | ctx.AddEntryPoint(execution_model, main, "main", interfaces); | ||
| 316 | } | ||
| 317 | |||
| 318 | void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, | ||
| 319 | Id main_func) { | ||
| 320 | const Info& info{program.info}; | ||
| 321 | if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { | ||
| 322 | LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); | ||
| 323 | } else if (info.uses_fp32_denorms_flush) { | ||
| 324 | if (profile.support_fp32_denorm_flush) { | ||
| 325 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 326 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); | ||
| 327 | } else { | ||
| 328 | // Drivers will most likely flush denorms by default, no need to warn | ||
| 329 | } | ||
| 330 | } else if (info.uses_fp32_denorms_preserve) { | ||
| 331 | if (profile.support_fp32_denorm_preserve) { | ||
| 332 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 333 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); | ||
| 334 | } else { | ||
| 335 | LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { | ||
| 339 | // No separate denorm behavior | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { | ||
| 343 | LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); | ||
| 344 | } else if (info.uses_fp16_denorms_flush) { | ||
| 345 | if (profile.support_fp16_denorm_flush) { | ||
| 346 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 347 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); | ||
| 348 | } else { | ||
| 349 | // Same as fp32, no need to warn as most drivers will flush by default | ||
| 350 | } | ||
| 351 | } else if (info.uses_fp16_denorms_preserve) { | ||
| 352 | if (profile.support_fp16_denorm_preserve) { | ||
| 353 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 354 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||
| 355 | } else { | ||
| 356 | LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | } | ||
| 360 | |||
| 361 | void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, | ||
| 362 | EmitContext& ctx, Id main_func) { | ||
| 363 | if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { | ||
| 364 | return; | ||
| 365 | } | ||
| 366 | if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { | ||
| 367 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 368 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); | ||
| 369 | } | ||
| 370 | if (profile.support_fp32_signed_zero_nan_preserve) { | ||
| 371 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 372 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); | ||
| 373 | } | ||
| 374 | if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { | ||
| 375 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 376 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { | ||
| 381 | if (info.uses_sampled_1d) { | ||
| 382 | ctx.AddCapability(spv::Capability::Sampled1D); | ||
| 383 | } | ||
| 384 | if (info.uses_sparse_residency) { | ||
| 385 | ctx.AddCapability(spv::Capability::SparseResidency); | ||
| 386 | } | ||
| 387 | if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) { | ||
| 388 | ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); | ||
| 389 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); | ||
| 390 | } | ||
| 391 | if (info.stores[IR::Attribute::ViewportIndex]) { | ||
| 392 | ctx.AddCapability(spv::Capability::MultiViewport); | ||
| 393 | } | ||
| 394 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 395 | ctx.AddExtension("SPV_NV_viewport_array2"); | ||
| 396 | ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); | ||
| 397 | } | ||
| 398 | if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { | ||
| 399 | if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { | ||
| 400 | ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); | ||
| 401 | ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); | ||
| 402 | } | ||
| 403 | } | ||
| 404 | if (!profile.support_vertex_instance_id && | ||
| 405 | (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { | ||
| 406 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | ||
| 407 | ctx.AddCapability(spv::Capability::DrawParameters); | ||
| 408 | } | ||
| 409 | if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id || | ||
| 410 | info.uses_subgroup_shuffles) && | ||
| 411 | profile.support_vote) { | ||
| 412 | ctx.AddExtension("SPV_KHR_shader_ballot"); | ||
| 413 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | ||
| 414 | if (!profile.warp_size_potentially_larger_than_guest) { | ||
| 415 | // vote ops are only used when not taking the long path | ||
| 416 | ctx.AddExtension("SPV_KHR_subgroup_vote"); | ||
| 417 | ctx.AddCapability(spv::Capability::SubgroupVoteKHR); | ||
| 418 | } | ||
| 419 | } | ||
| 420 | if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { | ||
| 421 | ctx.AddCapability(spv::Capability::Int64Atomics); | ||
| 422 | } | ||
| 423 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 424 | ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); | ||
| 425 | } | ||
| 426 | if (info.uses_typeless_image_writes) { | ||
| 427 | ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); | ||
| 428 | } | ||
| 429 | if (info.uses_image_buffers) { | ||
| 430 | ctx.AddCapability(spv::Capability::ImageBuffer); | ||
| 431 | } | ||
| 432 | if (info.uses_sample_id) { | ||
| 433 | ctx.AddCapability(spv::Capability::SampleRateShading); | ||
| 434 | } | ||
| 435 | if (!ctx.runtime_info.xfb_varyings.empty()) { | ||
| 436 | ctx.AddCapability(spv::Capability::TransformFeedback); | ||
| 437 | } | ||
| 438 | if (info.uses_derivatives) { | ||
| 439 | ctx.AddCapability(spv::Capability::DerivativeControl); | ||
| 440 | } | ||
| 441 | // TODO: Track this usage | ||
| 442 | ctx.AddCapability(spv::Capability::ImageGatherExtended); | ||
| 443 | ctx.AddCapability(spv::Capability::ImageQuery); | ||
| 444 | ctx.AddCapability(spv::Capability::SampledBuffer); | ||
| 445 | } | ||
| 446 | |||
| 447 | void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { | ||
| 448 | auto inst{program.blocks.front()->begin()}; | ||
| 449 | size_t block_index{0}; | ||
| 450 | ctx.PatchDeferredPhi([&](size_t phi_arg) { | ||
| 451 | if (phi_arg == 0) { | ||
| 452 | ++inst; | ||
| 453 | if (inst == program.blocks[block_index]->end() || | ||
| 454 | inst->GetOpcode() != IR::Opcode::Phi) { | ||
| 455 | do { | ||
| 456 | ++block_index; | ||
| 457 | inst = program.blocks[block_index]->begin(); | ||
| 458 | } while (inst->GetOpcode() != IR::Opcode::Phi); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | return ctx.Def(inst->Arg(phi_arg)); | ||
| 462 | }); | ||
| 463 | } | ||
| 464 | } // Anonymous namespace | ||
| 465 | |||
| 466 | std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 467 | IR::Program& program, Bindings& bindings) { | ||
| 468 | EmitContext ctx{profile, runtime_info, program, bindings}; | ||
| 469 | const Id main{DefineMain(ctx, program)}; | ||
| 470 | DefineEntryPoint(program, ctx, main); | ||
| 471 | if (profile.support_float_controls) { | ||
| 472 | ctx.AddExtension("SPV_KHR_float_controls"); | ||
| 473 | SetupDenormControl(profile, program, ctx, main); | ||
| 474 | SetupSignedNanCapabilities(profile, program, ctx, main); | ||
| 475 | } | ||
| 476 | SetupCapabilities(profile, program.info, ctx); | ||
| 477 | PatchPhiNodes(program, ctx); | ||
| 478 | return ctx.Assemble(); | ||
| 479 | } | ||
| 480 | |||
| 481 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { | ||
| 482 | const size_t num_args{inst->NumArgs()}; | ||
| 483 | boost::container::small_vector<Id, 32> blocks; | ||
| 484 | blocks.reserve(num_args); | ||
| 485 | for (size_t index = 0; index < num_args; ++index) { | ||
| 486 | blocks.push_back(inst->PhiBlock(index)->Definition<Id>()); | ||
| 487 | } | ||
| 488 | // The type of a phi instruction is stored in its flags | ||
| 489 | const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())}; | ||
| 490 | return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); | ||
| 491 | } | ||
| 492 | |||
| 493 | void EmitVoid(EmitContext&) {} | ||
| 494 | |||
| 495 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { | ||
| 496 | const Id id{ctx.Def(value)}; | ||
| 497 | if (!Sirit::ValidId(id)) { | ||
| 498 | throw NotImplementedException("Forward identity declaration"); | ||
| 499 | } | ||
| 500 | return id; | ||
| 501 | } | ||
| 502 | |||
| 503 | Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { | ||
| 504 | const Id id{ctx.Def(value)}; | ||
| 505 | if (!Sirit::ValidId(id)) { | ||
| 506 | throw NotImplementedException("Forward identity declaration"); | ||
| 507 | } | ||
| 508 | return id; | ||
| 509 | } | ||
| 510 | |||
| 511 | void EmitReference(EmitContext&) {} | ||
| 512 | |||
| 513 | void EmitPhiMove(EmitContext&) { | ||
| 514 | throw LogicError("Unreachable instruction"); | ||
| 515 | } | ||
| 516 | |||
| 517 | void EmitGetZeroFromOp(EmitContext&) { | ||
| 518 | throw LogicError("Unreachable instruction"); | ||
| 519 | } | ||
| 520 | |||
| 521 | void EmitGetSignFromOp(EmitContext&) { | ||
| 522 | throw LogicError("Unreachable instruction"); | ||
| 523 | } | ||
| 524 | |||
| 525 | void EmitGetCarryFromOp(EmitContext&) { | ||
| 526 | throw LogicError("Unreachable instruction"); | ||
| 527 | } | ||
| 528 | |||
| 529 | void EmitGetOverflowFromOp(EmitContext&) { | ||
| 530 | throw LogicError("Unreachable instruction"); | ||
| 531 | } | ||
| 532 | |||
| 533 | void EmitGetSparseFromOp(EmitContext&) { | ||
| 534 | throw LogicError("Unreachable instruction"); | ||
| 535 | } | ||
| 536 | |||
| 537 | void EmitGetInBoundsFromOp(EmitContext&) { | ||
| 538 | throw LogicError("Unreachable instruction"); | ||
| 539 | } | ||
| 540 | |||
| 541 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 000000000..db0c935fe --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <sirit/sirit.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/backend/bindings.h" | ||
| 13 | #include "shader_recompiler/backend/spirv/emit_context.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 15 | #include "shader_recompiler/profile.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::SPIRV { | ||
| 18 | |||
| 19 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 20 | IR::Program& program, Bindings& bindings); | ||
| 21 | |||
| 22 | [[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) { | ||
| 23 | Bindings binding; | ||
| 24 | return EmitSPIRV(profile, {}, program, binding); | ||
| 25 | } | ||
| 26 | |||
| 27 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..9af8bb9e1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | |||
| @@ -0,0 +1,448 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { | ||
| 11 | const Id shift_id{ctx.Const(2U)}; | ||
| 12 | Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 13 | if (index_offset > 0) { | ||
| 14 | index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); | ||
| 15 | } | ||
| 16 | return ctx.profile.support_explicit_workgroup_layout | ||
| 17 | ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) | ||
| 18 | : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); | ||
| 19 | } | ||
| 20 | |||
| 21 | Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { | ||
| 22 | if (offset.IsImmediate()) { | ||
| 23 | const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; | ||
| 24 | return ctx.Const(imm_offset); | ||
| 25 | } | ||
| 26 | const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 27 | const Id index{ctx.Def(offset)}; | ||
| 28 | if (shift == 0) { | ||
| 29 | return index; | ||
| 30 | } | ||
| 31 | const Id shift_id{ctx.Const(shift)}; | ||
| 32 | return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||
| 33 | } | ||
| 34 | |||
| 35 | Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, | ||
| 36 | Id StorageDefinitions::*member_ptr, const IR::Value& binding, | ||
| 37 | const IR::Value& offset, size_t element_size) { | ||
| 38 | if (!binding.IsImmediate()) { | ||
| 39 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 40 | } | ||
| 41 | const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; | ||
| 42 | const Id index{StorageIndex(ctx, offset, element_size)}; | ||
| 43 | return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { | ||
| 47 | const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; | ||
| 48 | const Id semantics{ctx.u32_zero_value}; | ||
| 49 | return {scope, semantics}; | ||
| 50 | } | ||
| 51 | |||
| 52 | Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, | ||
| 53 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 54 | const Id pointer{SharedPointer(ctx, offset)}; | ||
| 55 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 56 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 57 | } | ||
| 58 | |||
| 59 | Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 60 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 61 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, | ||
| 62 | offset, sizeof(u32))}; | ||
| 63 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 64 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 68 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), | ||
| 69 | Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { | ||
| 70 | if (ctx.profile.support_int64_atomics) { | ||
| 71 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, | ||
| 72 | binding, offset, sizeof(u64))}; | ||
| 73 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 74 | return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); | ||
| 75 | } | ||
| 76 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 77 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, | ||
| 78 | binding, offset, sizeof(u32[2]))}; | ||
| 79 | const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; | ||
| 80 | const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; | ||
| 81 | ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); | ||
| 82 | return original_value; | ||
| 83 | } | ||
| 84 | } // Anonymous namespace | ||
| 85 | |||
| 86 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { | ||
| 87 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { | ||
| 91 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { | ||
| 95 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); | ||
| 96 | } | ||
| 97 | |||
| 98 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { | ||
| 99 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { | ||
| 103 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); | ||
| 104 | } | ||
| 105 | |||
| 106 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { | ||
| 107 | const Id shift_id{ctx.Const(2U)}; | ||
| 108 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 109 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); | ||
| 110 | } | ||
| 111 | |||
| 112 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { | ||
| 113 | const Id shift_id{ctx.Const(2U)}; | ||
| 114 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 115 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { | ||
| 119 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { | ||
| 123 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); | ||
| 124 | } | ||
| 125 | |||
| 126 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { | ||
| 127 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); | ||
| 128 | } | ||
| 129 | |||
| 130 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { | ||
| 131 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); | ||
| 132 | } | ||
| 133 | |||
| 134 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { | ||
| 135 | if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { | ||
| 136 | const Id shift_id{ctx.Const(3U)}; | ||
| 137 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 138 | const Id pointer{ | ||
| 139 | ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; | ||
| 140 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 141 | return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); | ||
| 142 | } | ||
| 143 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 144 | const Id pointer_1{SharedPointer(ctx, offset, 0)}; | ||
| 145 | const Id pointer_2{SharedPointer(ctx, offset, 1)}; | ||
| 146 | const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; | ||
| 147 | const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; | ||
| 148 | const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; | ||
| 149 | ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); | ||
| 150 | ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); | ||
| 151 | return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 155 | Id value) { | ||
| 156 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 160 | Id value) { | ||
| 161 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); | ||
| 162 | } | ||
| 163 | |||
| 164 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 165 | Id value) { | ||
| 166 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); | ||
| 167 | } | ||
| 168 | |||
| 169 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 170 | Id value) { | ||
| 171 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); | ||
| 172 | } | ||
| 173 | |||
| 174 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 175 | Id value) { | ||
| 176 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 180 | Id value) { | ||
| 181 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 182 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 183 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); | ||
| 184 | } | ||
| 185 | |||
| 186 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 187 | Id value) { | ||
| 188 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 189 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 190 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); | ||
| 191 | } | ||
| 192 | |||
| 193 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 194 | Id value) { | ||
| 195 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 199 | Id value) { | ||
| 200 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); | ||
| 201 | } | ||
| 202 | |||
| 203 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 204 | Id value) { | ||
| 205 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 209 | Id value) { | ||
| 210 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); | ||
| 211 | } | ||
| 212 | |||
| 213 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 214 | Id value) { | ||
| 215 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, | ||
| 216 | &Sirit::Module::OpIAdd); | ||
| 217 | } | ||
| 218 | |||
| 219 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 220 | Id value) { | ||
| 221 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, | ||
| 222 | &Sirit::Module::OpSMin); | ||
| 223 | } | ||
| 224 | |||
| 225 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 226 | Id value) { | ||
| 227 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, | ||
| 228 | &Sirit::Module::OpUMin); | ||
| 229 | } | ||
| 230 | |||
| 231 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 232 | Id value) { | ||
| 233 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, | ||
| 234 | &Sirit::Module::OpSMax); | ||
| 235 | } | ||
| 236 | |||
| 237 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 238 | Id value) { | ||
| 239 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, | ||
| 240 | &Sirit::Module::OpUMax); | ||
| 241 | } | ||
| 242 | |||
| 243 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 244 | Id value) { | ||
| 245 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, | ||
| 246 | &Sirit::Module::OpBitwiseAnd); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 250 | Id value) { | ||
| 251 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, | ||
| 252 | &Sirit::Module::OpBitwiseOr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 256 | Id value) { | ||
| 257 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, | ||
| 258 | &Sirit::Module::OpBitwiseXor); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 262 | Id value) { | ||
| 263 | if (ctx.profile.support_int64_atomics) { | ||
| 264 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, | ||
| 265 | binding, offset, sizeof(u64))}; | ||
| 266 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 267 | return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); | ||
| 268 | } | ||
| 269 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 270 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, | ||
| 271 | binding, offset, sizeof(u32[2]))}; | ||
| 272 | const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; | ||
| 273 | ctx.OpStore(pointer, value); | ||
| 274 | return original; | ||
| 275 | } | ||
| 276 | |||
| 277 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 278 | Id value) { | ||
| 279 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 280 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 281 | return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); | ||
| 282 | } | ||
| 283 | |||
| 284 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 285 | Id value) { | ||
| 286 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 287 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 288 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; | ||
| 289 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 290 | } | ||
| 291 | |||
| 292 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 293 | Id value) { | ||
| 294 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 295 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 296 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; | ||
| 297 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 298 | } | ||
| 299 | |||
| 300 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 301 | Id value) { | ||
| 302 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 303 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 304 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; | ||
| 305 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 306 | } | ||
| 307 | |||
| 308 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 309 | Id value) { | ||
| 310 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 311 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 312 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; | ||
| 313 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 314 | } | ||
| 315 | |||
| 316 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 317 | Id value) { | ||
| 318 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 319 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 320 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; | ||
| 321 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 322 | } | ||
| 323 | |||
| 324 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 325 | Id value) { | ||
| 326 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 327 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 328 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; | ||
| 329 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 330 | } | ||
| 331 | |||
| 332 | Id EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 333 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 334 | } | ||
| 335 | |||
| 336 | Id EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 337 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 338 | } | ||
| 339 | |||
| 340 | Id EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 341 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 342 | } | ||
| 343 | |||
| 344 | Id EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 345 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 346 | } | ||
| 347 | |||
| 348 | Id EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 349 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 350 | } | ||
| 351 | |||
| 352 | Id EmitGlobalAtomicInc32(EmitContext&) { | ||
| 353 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 354 | } | ||
| 355 | |||
| 356 | Id EmitGlobalAtomicDec32(EmitContext&) { | ||
| 357 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 358 | } | ||
| 359 | |||
| 360 | Id EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 361 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 362 | } | ||
| 363 | |||
| 364 | Id EmitGlobalAtomicOr32(EmitContext&) { | ||
| 365 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 366 | } | ||
| 367 | |||
| 368 | Id EmitGlobalAtomicXor32(EmitContext&) { | ||
| 369 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 370 | } | ||
| 371 | |||
| 372 | Id EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 373 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 374 | } | ||
| 375 | |||
| 376 | Id EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 377 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 378 | } | ||
| 379 | |||
| 380 | Id EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 381 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 382 | } | ||
| 383 | |||
| 384 | Id EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 385 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 386 | } | ||
| 387 | |||
| 388 | Id EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 389 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 393 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 394 | } | ||
| 395 | |||
| 396 | Id EmitGlobalAtomicInc64(EmitContext&) { | ||
| 397 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 398 | } | ||
| 399 | |||
| 400 | Id EmitGlobalAtomicDec64(EmitContext&) { | ||
| 401 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 402 | } | ||
| 403 | |||
| 404 | Id EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 405 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 406 | } | ||
| 407 | |||
| 408 | Id EmitGlobalAtomicOr64(EmitContext&) { | ||
| 409 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 410 | } | ||
| 411 | |||
| 412 | Id EmitGlobalAtomicXor64(EmitContext&) { | ||
| 413 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 414 | } | ||
| 415 | |||
| 416 | Id EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 417 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 418 | } | ||
| 419 | |||
| 420 | Id EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 421 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 422 | } | ||
| 423 | |||
| 424 | Id EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 425 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 426 | } | ||
| 427 | |||
| 428 | Id EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 429 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 430 | } | ||
| 431 | |||
| 432 | Id EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 433 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 434 | } | ||
| 435 | |||
| 436 | Id EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 437 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 438 | } | ||
| 439 | |||
| 440 | Id EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 441 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 442 | } | ||
| 443 | |||
| 444 | Id EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 445 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 446 | } | ||
| 447 | |||
| 448 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp new file mode 100644 index 000000000..e0b52a001 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { | ||
| 12 | const auto semantics{ | ||
| 13 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | ||
| 14 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | | ||
| 15 | spv::MemorySemanticsMask::ImageMemory}; | ||
| 16 | ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics))); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | void EmitBarrier(EmitContext& ctx) { | ||
| 21 | const auto execution{spv::Scope::Workgroup}; | ||
| 22 | const auto memory{spv::Scope::Workgroup}; | ||
| 23 | const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | | ||
| 24 | spv::MemorySemanticsMask::WorkgroupMemory}; | ||
| 25 | ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)), | ||
| 26 | ctx.Const(static_cast<u32>(memory)), | ||
| 27 | ctx.Const(static_cast<u32>(memory_semantics))); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 31 | MemoryBarrier(ctx, spv::Scope::Workgroup); | ||
| 32 | } | ||
| 33 | |||
| 34 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 35 | MemoryBarrier(ctx, spv::Scope::Device); | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..bb11f4f4e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | void EmitBitCastU16F16(EmitContext&) { | ||
| 11 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitBitCastU32F32(EmitContext& ctx, Id value) { | ||
| 15 | return ctx.OpBitcast(ctx.U32[1], value); | ||
| 16 | } | ||
| 17 | |||
| 18 | void EmitBitCastU64F64(EmitContext&) { | ||
| 19 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | void EmitBitCastF16U16(EmitContext&) { | ||
| 23 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitBitCastF32U32(EmitContext& ctx, Id value) { | ||
| 27 | return ctx.OpBitcast(ctx.F32[1], value); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitBitCastF64U64(EmitContext&) { | ||
| 31 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 32 | } | ||
| 33 | |||
| 34 | Id EmitPackUint2x32(EmitContext& ctx, Id value) { | ||
| 35 | return ctx.OpBitcast(ctx.U64, value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { | ||
| 39 | return ctx.OpBitcast(ctx.U32[2], value); | ||
| 40 | } | ||
| 41 | |||
| 42 | Id EmitPackFloat2x16(EmitContext& ctx, Id value) { | ||
| 43 | return ctx.OpBitcast(ctx.U32[1], value); | ||
| 44 | } | ||
| 45 | |||
| 46 | Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { | ||
| 47 | return ctx.OpBitcast(ctx.F16[2], value); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitPackHalf2x16(EmitContext& ctx, Id value) { | ||
| 51 | return ctx.OpPackHalf2x16(ctx.U32[1], value); | ||
| 52 | } | ||
| 53 | |||
| 54 | Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { | ||
| 55 | return ctx.OpUnpackHalf2x16(ctx.F32[2], value); | ||
| 56 | } | ||
| 57 | |||
| 58 | Id EmitPackDouble2x32(EmitContext& ctx, Id value) { | ||
| 59 | return ctx.OpBitcast(ctx.F64[1], value); | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { | ||
| 63 | return ctx.OpBitcast(ctx.U32[2], value); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..10ff4ecab --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp | |||
| @@ -0,0 +1,155 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | |||
| 11 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 12 | return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 16 | return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); | ||
| 17 | } | ||
| 18 | |||
| 19 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 20 | return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); | ||
| 21 | } | ||
| 22 | |||
| 23 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 24 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 25 | } | ||
| 26 | |||
| 27 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 28 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 29 | } | ||
| 30 | |||
| 31 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 32 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 33 | } | ||
| 34 | |||
| 35 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 36 | return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); | ||
| 37 | } | ||
| 38 | |||
| 39 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 40 | return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); | ||
| 41 | } | ||
| 42 | |||
| 43 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 44 | return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); | ||
| 45 | } | ||
| 46 | |||
| 47 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 48 | return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | ||
| 49 | } | ||
| 50 | |||
| 51 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 52 | return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); | ||
| 53 | } | ||
| 54 | |||
| 55 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 56 | return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 60 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 64 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 68 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 72 | return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 76 | return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 80 | return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 84 | return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 88 | return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 92 | return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 96 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 100 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 104 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 108 | return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 112 | return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 116 | return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitCompositeConstructF64x2(EmitContext&) { | ||
| 120 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitCompositeConstructF64x3(EmitContext&) { | ||
| 124 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitCompositeConstructF64x4(EmitContext&) { | ||
| 128 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitCompositeExtractF64x2(EmitContext&) { | ||
| 132 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitCompositeExtractF64x3(EmitContext&) { | ||
| 136 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitCompositeExtractF64x4(EmitContext&) { | ||
| 140 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 144 | return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 148 | return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 152 | return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | ||
| 153 | } | ||
| 154 | |||
| 155 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..fb8c02a77 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -0,0 +1,505 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | #include <utility> | ||
| 7 | |||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 9 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::SPIRV { | ||
| 12 | namespace { | ||
| 13 | struct AttrInfo { | ||
| 14 | Id pointer; | ||
| 15 | Id id; | ||
| 16 | bool needs_cast; | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { | ||
| 20 | const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; | ||
| 21 | switch (type) { | ||
| 22 | case AttributeType::Float: | ||
| 23 | return AttrInfo{ctx.input_f32, ctx.F32[1], false}; | ||
| 24 | case AttributeType::UnsignedInt: | ||
| 25 | return AttrInfo{ctx.input_u32, ctx.U32[1], true}; | ||
| 26 | case AttributeType::SignedInt: | ||
| 27 | return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; | ||
| 28 | case AttributeType::Disabled: | ||
| 29 | return std::nullopt; | ||
| 30 | } | ||
| 31 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 32 | } | ||
| 33 | |||
| 34 | template <typename... Args> | ||
| 35 | Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { | ||
| 36 | switch (ctx.stage) { | ||
| 37 | case Stage::TessellationControl: | ||
| 38 | case Stage::TessellationEval: | ||
| 39 | case Stage::Geometry: | ||
| 40 | return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); | ||
| 41 | default: | ||
| 42 | return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename... Args> | ||
| 47 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { | ||
| 48 | if (ctx.stage == Stage::TessellationControl) { | ||
| 49 | const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; | ||
| 50 | return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...); | ||
| 51 | } else { | ||
| 52 | return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | struct OutAttr { | ||
| 57 | OutAttr(Id pointer_) : pointer{pointer_} {} | ||
| 58 | OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {} | ||
| 59 | |||
| 60 | Id pointer{}; | ||
| 61 | Id type{}; | ||
| 62 | }; | ||
| 63 | |||
| 64 | std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||
| 65 | if (IR::IsGeneric(attr)) { | ||
| 66 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 67 | const u32 element{IR::GenericAttributeElement(attr)}; | ||
| 68 | const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; | ||
| 69 | if (info.num_components == 1) { | ||
| 70 | return info.id; | ||
| 71 | } else { | ||
| 72 | const u32 index_element{element - info.first_element}; | ||
| 73 | const Id index_id{ctx.Const(index_element)}; | ||
| 74 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | switch (attr) { | ||
| 78 | case IR::Attribute::PointSize: | ||
| 79 | return ctx.output_point_size; | ||
| 80 | case IR::Attribute::PositionX: | ||
| 81 | case IR::Attribute::PositionY: | ||
| 82 | case IR::Attribute::PositionZ: | ||
| 83 | case IR::Attribute::PositionW: { | ||
| 84 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 85 | const Id element_id{ctx.Const(element)}; | ||
| 86 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); | ||
| 87 | } | ||
| 88 | case IR::Attribute::ClipDistance0: | ||
| 89 | case IR::Attribute::ClipDistance1: | ||
| 90 | case IR::Attribute::ClipDistance2: | ||
| 91 | case IR::Attribute::ClipDistance3: | ||
| 92 | case IR::Attribute::ClipDistance4: | ||
| 93 | case IR::Attribute::ClipDistance5: | ||
| 94 | case IR::Attribute::ClipDistance6: | ||
| 95 | case IR::Attribute::ClipDistance7: { | ||
| 96 | const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 97 | const u32 index{static_cast<u32>(attr) - base}; | ||
| 98 | const Id clip_num{ctx.Const(index)}; | ||
| 99 | return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); | ||
| 100 | } | ||
| 101 | case IR::Attribute::Layer: | ||
| 102 | if (ctx.profile.support_viewport_index_layer_non_geometry || | ||
| 103 | ctx.stage == Shader::Stage::Geometry) { | ||
| 104 | return OutAttr{ctx.layer, ctx.U32[1]}; | ||
| 105 | } | ||
| 106 | return std::nullopt; | ||
| 107 | case IR::Attribute::ViewportIndex: | ||
| 108 | if (ctx.profile.support_viewport_index_layer_non_geometry || | ||
| 109 | ctx.stage == Shader::Stage::Geometry) { | ||
| 110 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; | ||
| 111 | } | ||
| 112 | return std::nullopt; | ||
| 113 | case IR::Attribute::ViewportMask: | ||
| 114 | if (!ctx.profile.support_viewport_mask) { | ||
| 115 | return std::nullopt; | ||
| 116 | } | ||
| 117 | return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value), | ||
| 118 | ctx.U32[1]}; | ||
| 119 | default: | ||
| 120 | throw NotImplementedException("Read attribute {}", attr); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, | ||
| 125 | const IR::Value& binding, const IR::Value& offset) { | ||
| 126 | if (!binding.IsImmediate()) { | ||
| 127 | throw NotImplementedException("Constant buffer indexing"); | ||
| 128 | } | ||
| 129 | const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||
| 130 | const Id uniform_type{ctx.uniform_types.*member_ptr}; | ||
| 131 | if (!offset.IsImmediate()) { | ||
| 132 | Id index{ctx.Def(offset)}; | ||
| 133 | if (element_size > 1) { | ||
| 134 | const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 135 | const Id shift{ctx.Const(log2_element_size)}; | ||
| 136 | index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | ||
| 137 | } | ||
| 138 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; | ||
| 139 | return ctx.OpLoad(result_type, access_chain); | ||
| 140 | } | ||
| 141 | // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) | ||
| 142 | const Id imm_offset{ctx.Const(offset.U32() / element_size)}; | ||
| 143 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; | ||
| 144 | return ctx.OpLoad(result_type, access_chain); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 148 | return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 152 | return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { | ||
| 156 | if (offset.IsImmediate()) { | ||
| 157 | const u32 element{(offset.U32() / 4) % 4 + index_offset}; | ||
| 158 | return ctx.OpCompositeExtract(ctx.U32[1], vector, element); | ||
| 159 | } | ||
| 160 | const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | ||
| 161 | Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; | ||
| 162 | if (index_offset > 0) { | ||
| 163 | element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); | ||
| 164 | } | ||
| 165 | return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); | ||
| 166 | } | ||
| 167 | } // Anonymous namespace | ||
| 168 | |||
| 169 | void EmitGetRegister(EmitContext&) { | ||
| 170 | throw LogicError("Unreachable instruction"); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitSetRegister(EmitContext&) { | ||
| 174 | throw LogicError("Unreachable instruction"); | ||
| 175 | } | ||
| 176 | |||
| 177 | void EmitGetPred(EmitContext&) { | ||
| 178 | throw LogicError("Unreachable instruction"); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitSetPred(EmitContext&) { | ||
| 182 | throw LogicError("Unreachable instruction"); | ||
| 183 | } | ||
| 184 | |||
| 185 | void EmitSetGotoVariable(EmitContext&) { | ||
| 186 | throw LogicError("Unreachable instruction"); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitGetGotoVariable(EmitContext&) { | ||
| 190 | throw LogicError("Unreachable instruction"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitSetIndirectBranchVariable(EmitContext&) { | ||
| 194 | throw LogicError("Unreachable instruction"); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitGetIndirectBranchVariable(EmitContext&) { | ||
| 198 | throw LogicError("Unreachable instruction"); | ||
| 199 | } | ||
| 200 | |||
| 201 | Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 202 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { | ||
| 203 | const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; | ||
| 204 | return ctx.OpUConvert(ctx.U32[1], load); | ||
| 205 | } | ||
| 206 | Id element{}; | ||
| 207 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 208 | element = GetCbufU32(ctx, binding, offset); | ||
| 209 | } else { | ||
| 210 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 211 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 212 | } | ||
| 213 | const Id bit_offset{ctx.BitOffset8(offset)}; | ||
| 214 | return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); | ||
| 215 | } | ||
| 216 | |||
| 217 | Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 218 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { | ||
| 219 | const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; | ||
| 220 | return ctx.OpSConvert(ctx.U32[1], load); | ||
| 221 | } | ||
| 222 | Id element{}; | ||
| 223 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 224 | element = GetCbufU32(ctx, binding, offset); | ||
| 225 | } else { | ||
| 226 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 227 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 228 | } | ||
| 229 | const Id bit_offset{ctx.BitOffset8(offset)}; | ||
| 230 | return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); | ||
| 231 | } | ||
| 232 | |||
| 233 | Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 234 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { | ||
| 235 | const Id load{ | ||
| 236 | GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; | ||
| 237 | return ctx.OpUConvert(ctx.U32[1], load); | ||
| 238 | } | ||
| 239 | Id element{}; | ||
| 240 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 241 | element = GetCbufU32(ctx, binding, offset); | ||
| 242 | } else { | ||
| 243 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 244 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 245 | } | ||
| 246 | const Id bit_offset{ctx.BitOffset16(offset)}; | ||
| 247 | return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); | ||
| 248 | } | ||
| 249 | |||
| 250 | Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 251 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { | ||
| 252 | const Id load{ | ||
| 253 | GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; | ||
| 254 | return ctx.OpSConvert(ctx.U32[1], load); | ||
| 255 | } | ||
| 256 | Id element{}; | ||
| 257 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 258 | element = GetCbufU32(ctx, binding, offset); | ||
| 259 | } else { | ||
| 260 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 261 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 262 | } | ||
| 263 | const Id bit_offset{ctx.BitOffset16(offset)}; | ||
| 264 | return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); | ||
| 265 | } | ||
| 266 | |||
| 267 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 268 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 269 | return GetCbufU32(ctx, binding, offset); | ||
| 270 | } else { | ||
| 271 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 272 | return GetCbufElement(ctx, vector, offset, 0u); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 277 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 278 | return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); | ||
| 279 | } else { | ||
| 280 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 281 | return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 286 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 287 | return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, | ||
| 288 | offset); | ||
| 289 | } else { | ||
| 290 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 291 | return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), | ||
| 292 | GetCbufElement(ctx, vector, offset, 1u)); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | ||
| 297 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 298 | if (IR::IsGeneric(attr)) { | ||
| 299 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 300 | const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; | ||
| 301 | if (!type) { | ||
| 302 | // Attribute is disabled | ||
| 303 | return ctx.Const(element == 3 ? 1.0f : 0.0f); | ||
| 304 | } | ||
| 305 | if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { | ||
| 306 | // Varying component is not written | ||
| 307 | return ctx.Const(type && element == 3 ? 1.0f : 0.0f); | ||
| 308 | } | ||
| 309 | const Id generic_id{ctx.input_generics.at(index)}; | ||
| 310 | const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; | ||
| 311 | const Id value{ctx.OpLoad(type->id, pointer)}; | ||
| 312 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; | ||
| 313 | } | ||
| 314 | switch (attr) { | ||
| 315 | case IR::Attribute::PrimitiveId: | ||
| 316 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); | ||
| 317 | case IR::Attribute::PositionX: | ||
| 318 | case IR::Attribute::PositionY: | ||
| 319 | case IR::Attribute::PositionZ: | ||
| 320 | case IR::Attribute::PositionW: | ||
| 321 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | ||
| 322 | ctx.Const(element))); | ||
| 323 | case IR::Attribute::InstanceId: | ||
| 324 | if (ctx.profile.support_vertex_instance_id) { | ||
| 325 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | ||
| 326 | } else { | ||
| 327 | const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; | ||
| 328 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; | ||
| 329 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||
| 330 | } | ||
| 331 | case IR::Attribute::VertexId: | ||
| 332 | if (ctx.profile.support_vertex_instance_id) { | ||
| 333 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); | ||
| 334 | } else { | ||
| 335 | const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; | ||
| 336 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||
| 337 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||
| 338 | } | ||
| 339 | case IR::Attribute::FrontFace: | ||
| 340 | return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | ||
| 341 | ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); | ||
| 342 | case IR::Attribute::PointSpriteS: | ||
| 343 | return ctx.OpLoad(ctx.F32[1], | ||
| 344 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); | ||
| 345 | case IR::Attribute::PointSpriteT: | ||
| 346 | return ctx.OpLoad(ctx.F32[1], | ||
| 347 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); | ||
| 348 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 349 | return ctx.OpLoad(ctx.F32[1], | ||
| 350 | ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); | ||
| 351 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 352 | return ctx.OpLoad(ctx.F32[1], | ||
| 353 | ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); | ||
| 354 | |||
| 355 | default: | ||
| 356 | throw NotImplementedException("Read attribute {}", attr); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { | ||
| 361 | const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)}; | ||
| 362 | if (!output) { | ||
| 363 | return; | ||
| 364 | } | ||
| 365 | if (Sirit::ValidId(output->type)) { | ||
| 366 | value = ctx.OpBitcast(output->type, value); | ||
| 367 | } | ||
| 368 | ctx.OpStore(output->pointer, value); | ||
| 369 | } | ||
| 370 | |||
| 371 | Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { | ||
| 372 | switch (ctx.stage) { | ||
| 373 | case Stage::TessellationControl: | ||
| 374 | case Stage::TessellationEval: | ||
| 375 | case Stage::Geometry: | ||
| 376 | return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); | ||
| 377 | default: | ||
| 378 | return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { | ||
| 383 | ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); | ||
| 384 | } | ||
| 385 | |||
| 386 | Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { | ||
| 387 | if (!IR::IsGeneric(patch)) { | ||
| 388 | throw NotImplementedException("Non-generic patch load"); | ||
| 389 | } | ||
| 390 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 391 | const Id element{ctx.Const(IR::GenericPatchElement(patch))}; | ||
| 392 | const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; | ||
| 393 | const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; | ||
| 394 | return ctx.OpLoad(ctx.F32[1], pointer); | ||
| 395 | } | ||
| 396 | |||
| 397 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { | ||
| 398 | const Id pointer{[&] { | ||
| 399 | if (IR::IsGeneric(patch)) { | ||
| 400 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 401 | const Id element{ctx.Const(IR::GenericPatchElement(patch))}; | ||
| 402 | return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); | ||
| 403 | } | ||
| 404 | switch (patch) { | ||
| 405 | case IR::Patch::TessellationLodLeft: | ||
| 406 | case IR::Patch::TessellationLodRight: | ||
| 407 | case IR::Patch::TessellationLodTop: | ||
| 408 | case IR::Patch::TessellationLodBottom: { | ||
| 409 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 410 | const Id index_id{ctx.Const(index)}; | ||
| 411 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); | ||
| 412 | } | ||
| 413 | case IR::Patch::TessellationLodInteriorU: | ||
| 414 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, | ||
| 415 | ctx.u32_zero_value); | ||
| 416 | case IR::Patch::TessellationLodInteriorV: | ||
| 417 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); | ||
| 418 | default: | ||
| 419 | throw NotImplementedException("Patch {}", patch); | ||
| 420 | } | ||
| 421 | }()}; | ||
| 422 | ctx.OpStore(pointer, value); | ||
| 423 | } | ||
| 424 | |||
| 425 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | ||
| 426 | const Id component_id{ctx.Const(component)}; | ||
| 427 | const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; | ||
| 428 | ctx.OpStore(pointer, value); | ||
| 429 | } | ||
| 430 | |||
| 431 | void EmitSetSampleMask(EmitContext& ctx, Id value) { | ||
| 432 | ctx.OpStore(ctx.sample_mask, value); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | ||
| 436 | ctx.OpStore(ctx.frag_depth, value); | ||
| 437 | } | ||
| 438 | |||
| 439 | void EmitGetZFlag(EmitContext&) { | ||
| 440 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitGetSFlag(EmitContext&) { | ||
| 444 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 445 | } | ||
| 446 | |||
| 447 | void EmitGetCFlag(EmitContext&) { | ||
| 448 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 449 | } | ||
| 450 | |||
| 451 | void EmitGetOFlag(EmitContext&) { | ||
| 452 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 453 | } | ||
| 454 | |||
| 455 | void EmitSetZFlag(EmitContext&) { | ||
| 456 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 457 | } | ||
| 458 | |||
| 459 | void EmitSetSFlag(EmitContext&) { | ||
| 460 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 461 | } | ||
| 462 | |||
| 463 | void EmitSetCFlag(EmitContext&) { | ||
| 464 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 465 | } | ||
| 466 | |||
| 467 | void EmitSetOFlag(EmitContext&) { | ||
| 468 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 469 | } | ||
| 470 | |||
| 471 | Id EmitWorkgroupId(EmitContext& ctx) { | ||
| 472 | return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); | ||
| 473 | } | ||
| 474 | |||
| 475 | Id EmitLocalInvocationId(EmitContext& ctx) { | ||
| 476 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); | ||
| 477 | } | ||
| 478 | |||
| 479 | Id EmitInvocationId(EmitContext& ctx) { | ||
| 480 | return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); | ||
| 481 | } | ||
| 482 | |||
| 483 | Id EmitSampleId(EmitContext& ctx) { | ||
| 484 | return ctx.OpLoad(ctx.U32[1], ctx.sample_id); | ||
| 485 | } | ||
| 486 | |||
| 487 | Id EmitIsHelperInvocation(EmitContext& ctx) { | ||
| 488 | return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); | ||
| 489 | } | ||
| 490 | |||
| 491 | Id EmitYDirection(EmitContext& ctx) { | ||
| 492 | return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); | ||
| 493 | } | ||
| 494 | |||
| 495 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { | ||
| 496 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 497 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 498 | } | ||
| 499 | |||
| 500 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { | ||
| 501 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 502 | ctx.OpStore(pointer, value); | ||
| 503 | } | ||
| 504 | |||
| 505 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..d33486f28 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | void EmitJoin(EmitContext&) { | ||
| 11 | throw NotImplementedException("Join shouldn't be emitted"); | ||
| 12 | } | ||
| 13 | |||
| 14 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 15 | if (ctx.profile.support_demote_to_helper_invocation) { | ||
| 16 | ctx.OpDemoteToHelperInvocationEXT(); | ||
| 17 | } else { | ||
| 18 | const Id kill_label{ctx.OpLabel()}; | ||
| 19 | const Id impossible_label{ctx.OpLabel()}; | ||
| 20 | ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); | ||
| 21 | ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); | ||
| 22 | ctx.AddLabel(kill_label); | ||
| 23 | ctx.OpKill(); | ||
| 24 | ctx.AddLabel(impossible_label); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..fd42b7a16 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | |||
| @@ -0,0 +1,269 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id ExtractU16(EmitContext& ctx, Id value) { | ||
| 11 | if (ctx.profile.support_int16) { | ||
| 12 | return ctx.OpUConvert(ctx.U16, value); | ||
| 13 | } else { | ||
| 14 | return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | Id ExtractS16(EmitContext& ctx, Id value) { | ||
| 19 | if (ctx.profile.support_int16) { | ||
| 20 | return ctx.OpSConvert(ctx.S16, value); | ||
| 21 | } else { | ||
| 22 | return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | Id ExtractU8(EmitContext& ctx, Id value) { | ||
| 27 | if (ctx.profile.support_int8) { | ||
| 28 | return ctx.OpUConvert(ctx.U8, value); | ||
| 29 | } else { | ||
| 30 | return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | Id ExtractS8(EmitContext& ctx, Id value) { | ||
| 35 | if (ctx.profile.support_int8) { | ||
| 36 | return ctx.OpSConvert(ctx.S8, value); | ||
| 37 | } else { | ||
| 38 | return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | Id EmitConvertS16F16(EmitContext& ctx, Id value) { | ||
| 44 | if (ctx.profile.support_int16) { | ||
| 45 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 46 | } else { | ||
| 47 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | Id EmitConvertS16F32(EmitContext& ctx, Id value) { | ||
| 52 | if (ctx.profile.support_int16) { | ||
| 53 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 54 | } else { | ||
| 55 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitConvertS16F64(EmitContext& ctx, Id value) { | ||
| 60 | if (ctx.profile.support_int16) { | ||
| 61 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 62 | } else { | ||
| 63 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitConvertS32F16(EmitContext& ctx, Id value) { | ||
| 68 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitConvertS32F32(EmitContext& ctx, Id value) { | ||
| 72 | if (ctx.profile.has_broken_signed_operations) { | ||
| 73 | return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); | ||
| 74 | } else { | ||
| 75 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitConvertS32F64(EmitContext& ctx, Id value) { | ||
| 80 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitConvertS64F16(EmitContext& ctx, Id value) { | ||
| 84 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitConvertS64F32(EmitContext& ctx, Id value) { | ||
| 88 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitConvertS64F64(EmitContext& ctx, Id value) { | ||
| 92 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitConvertU16F16(EmitContext& ctx, Id value) { | ||
| 96 | if (ctx.profile.support_int16) { | ||
| 97 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 98 | } else { | ||
| 99 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitConvertU16F32(EmitContext& ctx, Id value) { | ||
| 104 | if (ctx.profile.support_int16) { | ||
| 105 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 106 | } else { | ||
| 107 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitConvertU16F64(EmitContext& ctx, Id value) { | ||
| 112 | if (ctx.profile.support_int16) { | ||
| 113 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 114 | } else { | ||
| 115 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitConvertU32F16(EmitContext& ctx, Id value) { | ||
| 120 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitConvertU32F32(EmitContext& ctx, Id value) { | ||
| 124 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitConvertU32F64(EmitContext& ctx, Id value) { | ||
| 128 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 129 | } | ||
| 130 | |||
| 131 | Id EmitConvertU64F16(EmitContext& ctx, Id value) { | ||
| 132 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | Id EmitConvertU64F32(EmitContext& ctx, Id value) { | ||
| 136 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 137 | } | ||
| 138 | |||
| 139 | Id EmitConvertU64F64(EmitContext& ctx, Id value) { | ||
| 140 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitConvertU64U32(EmitContext& ctx, Id value) { | ||
| 144 | return ctx.OpUConvert(ctx.U64, value); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitConvertU32U64(EmitContext& ctx, Id value) { | ||
| 148 | return ctx.OpUConvert(ctx.U32[1], value); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitConvertF16F32(EmitContext& ctx, Id value) { | ||
| 152 | return ctx.OpFConvert(ctx.F16[1], value); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id EmitConvertF32F16(EmitContext& ctx, Id value) { | ||
| 156 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitConvertF32F64(EmitContext& ctx, Id value) { | ||
| 160 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 161 | } | ||
| 162 | |||
| 163 | Id EmitConvertF64F32(EmitContext& ctx, Id value) { | ||
| 164 | return ctx.OpFConvert(ctx.F64[1], value); | ||
| 165 | } | ||
| 166 | |||
| 167 | Id EmitConvertF16S8(EmitContext& ctx, Id value) { | ||
| 168 | return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitConvertF16S16(EmitContext& ctx, Id value) { | ||
| 172 | return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); | ||
| 173 | } | ||
| 174 | |||
| 175 | Id EmitConvertF16S32(EmitContext& ctx, Id value) { | ||
| 176 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitConvertF16S64(EmitContext& ctx, Id value) { | ||
| 180 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 181 | } | ||
| 182 | |||
| 183 | Id EmitConvertF16U8(EmitContext& ctx, Id value) { | ||
| 184 | return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitConvertF16U16(EmitContext& ctx, Id value) { | ||
| 188 | return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); | ||
| 189 | } | ||
| 190 | |||
| 191 | Id EmitConvertF16U32(EmitContext& ctx, Id value) { | ||
| 192 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitConvertF16U64(EmitContext& ctx, Id value) { | ||
| 196 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 197 | } | ||
| 198 | |||
| 199 | Id EmitConvertF32S8(EmitContext& ctx, Id value) { | ||
| 200 | return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); | ||
| 201 | } | ||
| 202 | |||
| 203 | Id EmitConvertF32S16(EmitContext& ctx, Id value) { | ||
| 204 | return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); | ||
| 205 | } | ||
| 206 | |||
| 207 | Id EmitConvertF32S32(EmitContext& ctx, Id value) { | ||
| 208 | if (ctx.profile.has_broken_signed_operations) { | ||
| 209 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 210 | } | ||
| 211 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 212 | } | ||
| 213 | |||
| 214 | Id EmitConvertF32S64(EmitContext& ctx, Id value) { | ||
| 215 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 216 | } | ||
| 217 | |||
| 218 | Id EmitConvertF32U8(EmitContext& ctx, Id value) { | ||
| 219 | return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); | ||
| 220 | } | ||
| 221 | |||
| 222 | Id EmitConvertF32U16(EmitContext& ctx, Id value) { | ||
| 223 | return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); | ||
| 224 | } | ||
| 225 | |||
| 226 | Id EmitConvertF32U32(EmitContext& ctx, Id value) { | ||
| 227 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 228 | } | ||
| 229 | |||
| 230 | Id EmitConvertF32U64(EmitContext& ctx, Id value) { | ||
| 231 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 232 | } | ||
| 233 | |||
| 234 | Id EmitConvertF64S8(EmitContext& ctx, Id value) { | ||
| 235 | return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); | ||
| 236 | } | ||
| 237 | |||
| 238 | Id EmitConvertF64S16(EmitContext& ctx, Id value) { | ||
| 239 | return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); | ||
| 240 | } | ||
| 241 | |||
| 242 | Id EmitConvertF64S32(EmitContext& ctx, Id value) { | ||
| 243 | if (ctx.profile.has_broken_signed_operations) { | ||
| 244 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 245 | } | ||
| 246 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitConvertF64S64(EmitContext& ctx, Id value) { | ||
| 250 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 251 | } | ||
| 252 | |||
| 253 | Id EmitConvertF64U8(EmitContext& ctx, Id value) { | ||
| 254 | return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); | ||
| 255 | } | ||
| 256 | |||
| 257 | Id EmitConvertF64U16(EmitContext& ctx, Id value) { | ||
| 258 | return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitConvertF64U32(EmitContext& ctx, Id value) { | ||
| 262 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 263 | } | ||
| 264 | |||
| 265 | Id EmitConvertF64U64(EmitContext& ctx, Id value) { | ||
| 266 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 267 | } | ||
| 268 | |||
| 269 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..61cf25f9c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp | |||
| @@ -0,0 +1,396 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | ||
| 12 | const auto flags{inst->Flags<IR::FpControl>()}; | ||
| 13 | if (flags.no_contraction) { | ||
| 14 | ctx.Decorate(op, spv::Decoration::NoContraction); | ||
| 15 | } | ||
| 16 | return op; | ||
| 17 | } | ||
| 18 | |||
| 19 | Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { | ||
| 20 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 21 | return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); | ||
| 22 | } else { | ||
| 23 | return ctx.OpFClamp(type, value, zero, one); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 28 | if (ctx.profile.ignore_nan_fp_comparisons) { | ||
| 29 | const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; | ||
| 30 | const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; | ||
| 31 | const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; | ||
| 32 | return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); | ||
| 33 | } else { | ||
| 34 | return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { | ||
| 39 | if (ctx.profile.ignore_nan_fp_comparisons) { | ||
| 40 | const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; | ||
| 41 | const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; | ||
| 42 | const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; | ||
| 43 | return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); | ||
| 44 | } else { | ||
| 45 | return (ctx.*comp_func)(ctx.U1, lhs, rhs); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | Id EmitFPAbs16(EmitContext& ctx, Id value) { | ||
| 51 | return ctx.OpFAbs(ctx.F16[1], value); | ||
| 52 | } | ||
| 53 | |||
| 54 | Id EmitFPAbs32(EmitContext& ctx, Id value) { | ||
| 55 | return ctx.OpFAbs(ctx.F32[1], value); | ||
| 56 | } | ||
| 57 | |||
| 58 | Id EmitFPAbs64(EmitContext& ctx, Id value) { | ||
| 59 | return ctx.OpFAbs(ctx.F64[1], value); | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 63 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); | ||
| 64 | } | ||
| 65 | |||
| 66 | Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 67 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); | ||
| 68 | } | ||
| 69 | |||
| 70 | Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 71 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); | ||
| 72 | } | ||
| 73 | |||
| 74 | Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 75 | return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); | ||
| 76 | } | ||
| 77 | |||
| 78 | Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 79 | return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); | ||
| 80 | } | ||
| 81 | |||
| 82 | Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 83 | return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); | ||
| 84 | } | ||
| 85 | |||
| 86 | Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { | ||
| 87 | return ctx.OpFMax(ctx.F32[1], a, b); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { | ||
| 91 | return ctx.OpFMax(ctx.F64[1], a, b); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { | ||
| 95 | return ctx.OpFMin(ctx.F32[1], a, b); | ||
| 96 | } | ||
| 97 | |||
| 98 | Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { | ||
| 99 | return ctx.OpFMin(ctx.F64[1], a, b); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 103 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); | ||
| 104 | } | ||
| 105 | |||
| 106 | Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 107 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); | ||
| 108 | } | ||
| 109 | |||
| 110 | Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 111 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); | ||
| 112 | } | ||
| 113 | |||
| 114 | Id EmitFPNeg16(EmitContext& ctx, Id value) { | ||
| 115 | return ctx.OpFNegate(ctx.F16[1], value); | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitFPNeg32(EmitContext& ctx, Id value) { | ||
| 119 | return ctx.OpFNegate(ctx.F32[1], value); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id EmitFPNeg64(EmitContext& ctx, Id value) { | ||
| 123 | return ctx.OpFNegate(ctx.F64[1], value); | ||
| 124 | } | ||
| 125 | |||
| 126 | Id EmitFPSin(EmitContext& ctx, Id value) { | ||
| 127 | return ctx.OpSin(ctx.F32[1], value); | ||
| 128 | } | ||
| 129 | |||
| 130 | Id EmitFPCos(EmitContext& ctx, Id value) { | ||
| 131 | return ctx.OpCos(ctx.F32[1], value); | ||
| 132 | } | ||
| 133 | |||
| 134 | Id EmitFPExp2(EmitContext& ctx, Id value) { | ||
| 135 | return ctx.OpExp2(ctx.F32[1], value); | ||
| 136 | } | ||
| 137 | |||
| 138 | Id EmitFPLog2(EmitContext& ctx, Id value) { | ||
| 139 | return ctx.OpLog2(ctx.F32[1], value); | ||
| 140 | } | ||
| 141 | |||
| 142 | Id EmitFPRecip32(EmitContext& ctx, Id value) { | ||
| 143 | return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); | ||
| 144 | } | ||
| 145 | |||
| 146 | Id EmitFPRecip64(EmitContext& ctx, Id value) { | ||
| 147 | return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { | ||
| 151 | return ctx.OpInverseSqrt(ctx.F32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { | ||
| 155 | return ctx.OpInverseSqrt(ctx.F64[1], value); | ||
| 156 | } | ||
| 157 | |||
| 158 | Id EmitFPSqrt(EmitContext& ctx, Id value) { | ||
| 159 | return ctx.OpSqrt(ctx.F32[1], value); | ||
| 160 | } | ||
| 161 | |||
| 162 | Id EmitFPSaturate16(EmitContext& ctx, Id value) { | ||
| 163 | const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; | ||
| 164 | const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; | ||
| 165 | return Clamp(ctx, ctx.F16[1], value, zero, one); | ||
| 166 | } | ||
| 167 | |||
| 168 | Id EmitFPSaturate32(EmitContext& ctx, Id value) { | ||
| 169 | const Id zero{ctx.Const(f32{0.0})}; | ||
| 170 | const Id one{ctx.Const(f32{1.0})}; | ||
| 171 | return Clamp(ctx, ctx.F32[1], value, zero, one); | ||
| 172 | } | ||
| 173 | |||
| 174 | Id EmitFPSaturate64(EmitContext& ctx, Id value) { | ||
| 175 | const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; | ||
| 176 | const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; | ||
| 177 | return Clamp(ctx, ctx.F64[1], value, zero, one); | ||
| 178 | } | ||
| 179 | |||
| 180 | Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 181 | return Clamp(ctx, ctx.F16[1], value, min_value, max_value); | ||
| 182 | } | ||
| 183 | |||
| 184 | Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 185 | return Clamp(ctx, ctx.F32[1], value, min_value, max_value); | ||
| 186 | } | ||
| 187 | |||
| 188 | Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 189 | return Clamp(ctx, ctx.F64[1], value, min_value, max_value); | ||
| 190 | } | ||
| 191 | |||
| 192 | Id EmitFPRoundEven16(EmitContext& ctx, Id value) { | ||
| 193 | return ctx.OpRoundEven(ctx.F16[1], value); | ||
| 194 | } | ||
| 195 | |||
| 196 | Id EmitFPRoundEven32(EmitContext& ctx, Id value) { | ||
| 197 | return ctx.OpRoundEven(ctx.F32[1], value); | ||
| 198 | } | ||
| 199 | |||
| 200 | Id EmitFPRoundEven64(EmitContext& ctx, Id value) { | ||
| 201 | return ctx.OpRoundEven(ctx.F64[1], value); | ||
| 202 | } | ||
| 203 | |||
| 204 | Id EmitFPFloor16(EmitContext& ctx, Id value) { | ||
| 205 | return ctx.OpFloor(ctx.F16[1], value); | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitFPFloor32(EmitContext& ctx, Id value) { | ||
| 209 | return ctx.OpFloor(ctx.F32[1], value); | ||
| 210 | } | ||
| 211 | |||
| 212 | Id EmitFPFloor64(EmitContext& ctx, Id value) { | ||
| 213 | return ctx.OpFloor(ctx.F64[1], value); | ||
| 214 | } | ||
| 215 | |||
| 216 | Id EmitFPCeil16(EmitContext& ctx, Id value) { | ||
| 217 | return ctx.OpCeil(ctx.F16[1], value); | ||
| 218 | } | ||
| 219 | |||
| 220 | Id EmitFPCeil32(EmitContext& ctx, Id value) { | ||
| 221 | return ctx.OpCeil(ctx.F32[1], value); | ||
| 222 | } | ||
| 223 | |||
| 224 | Id EmitFPCeil64(EmitContext& ctx, Id value) { | ||
| 225 | return ctx.OpCeil(ctx.F64[1], value); | ||
| 226 | } | ||
| 227 | |||
| 228 | Id EmitFPTrunc16(EmitContext& ctx, Id value) { | ||
| 229 | return ctx.OpTrunc(ctx.F16[1], value); | ||
| 230 | } | ||
| 231 | |||
| 232 | Id EmitFPTrunc32(EmitContext& ctx, Id value) { | ||
| 233 | return ctx.OpTrunc(ctx.F32[1], value); | ||
| 234 | } | ||
| 235 | |||
| 236 | Id EmitFPTrunc64(EmitContext& ctx, Id value) { | ||
| 237 | return ctx.OpTrunc(ctx.F64[1], value); | ||
| 238 | } | ||
| 239 | |||
| 240 | Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 241 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 242 | } | ||
| 243 | |||
| 244 | Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 245 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 246 | } | ||
| 247 | |||
| 248 | Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 249 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 250 | } | ||
| 251 | |||
| 252 | Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 253 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 254 | } | ||
| 255 | |||
| 256 | Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 257 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 258 | } | ||
| 259 | |||
| 260 | Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 261 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 262 | } | ||
| 263 | |||
| 264 | Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 265 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 266 | } | ||
| 267 | |||
| 268 | Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 269 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 270 | } | ||
| 271 | |||
| 272 | Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 273 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 274 | } | ||
| 275 | |||
| 276 | Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 277 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 278 | } | ||
| 279 | |||
| 280 | Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 281 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 282 | } | ||
| 283 | |||
| 284 | Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 285 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 286 | } | ||
| 287 | |||
| 288 | Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 289 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 290 | } | ||
| 291 | |||
| 292 | Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 293 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 294 | } | ||
| 295 | |||
| 296 | Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 297 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 298 | } | ||
| 299 | |||
| 300 | Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 301 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 302 | } | ||
| 303 | |||
| 304 | Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 305 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 306 | } | ||
| 307 | |||
| 308 | Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 309 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 310 | } | ||
| 311 | |||
| 312 | Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 313 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 314 | } | ||
| 315 | |||
| 316 | Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 317 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 318 | } | ||
| 319 | |||
| 320 | Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 321 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 322 | } | ||
| 323 | |||
| 324 | Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 325 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 326 | } | ||
| 327 | |||
| 328 | Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 329 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 330 | } | ||
| 331 | |||
| 332 | Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 333 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 334 | } | ||
| 335 | |||
| 336 | Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 337 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 338 | } | ||
| 339 | |||
| 340 | Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 341 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 342 | } | ||
| 343 | |||
| 344 | Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 345 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 346 | } | ||
| 347 | |||
| 348 | Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 349 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 350 | } | ||
| 351 | |||
| 352 | Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 353 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 354 | } | ||
| 355 | |||
| 356 | Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 357 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 358 | } | ||
| 359 | |||
| 360 | Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 361 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 362 | } | ||
| 363 | |||
| 364 | Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 365 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 366 | } | ||
| 367 | |||
| 368 | Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 369 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 370 | } | ||
| 371 | |||
| 372 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 373 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 374 | } | ||
| 375 | |||
| 376 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 377 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 378 | } | ||
| 379 | |||
| 380 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 381 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 382 | } | ||
| 383 | |||
| 384 | Id EmitFPIsNan16(EmitContext& ctx, Id value) { | ||
| 385 | return ctx.OpIsNan(ctx.U1, value); | ||
| 386 | } | ||
| 387 | |||
| 388 | Id EmitFPIsNan32(EmitContext& ctx, Id value) { | ||
| 389 | return ctx.OpIsNan(ctx.U1, value); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitFPIsNan64(EmitContext& ctx, Id value) { | ||
| 393 | return ctx.OpIsNan(ctx.U1, value); | ||
| 394 | } | ||
| 395 | |||
| 396 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..3588f052b --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -0,0 +1,462 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <boost/container/static_vector.hpp> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::SPIRV { | ||
| 12 | namespace { | ||
| 13 | class ImageOperands { | ||
| 14 | public: | ||
| 15 | explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, | ||
| 16 | Id lod, const IR::Value& offset) { | ||
| 17 | if (has_bias) { | ||
| 18 | const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 19 | Add(spv::ImageOperandsMask::Bias, bias); | ||
| 20 | } | ||
| 21 | if (has_lod) { | ||
| 22 | const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 23 | Add(spv::ImageOperandsMask::Lod, lod_value); | ||
| 24 | } | ||
| 25 | AddOffset(ctx, offset); | ||
| 26 | if (has_lod_clamp) { | ||
| 27 | const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; | ||
| 28 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { | ||
| 33 | if (offset2.IsEmpty()) { | ||
| 34 | if (offset.IsEmpty()) { | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; | ||
| 41 | if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { | ||
| 42 | LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | const IR::Opcode opcode{values[0]->GetOpcode()}; | ||
| 46 | if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { | ||
| 47 | throw LogicError("Invalid PTP arguments"); | ||
| 48 | } | ||
| 49 | auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; | ||
| 50 | |||
| 51 | const Id offsets{ctx.ConstantComposite( | ||
| 52 | ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), | ||
| 53 | ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), | ||
| 54 | ctx.Const(read(1, 2), read(1, 3)))}; | ||
| 55 | Add(spv::ImageOperandsMask::ConstOffsets, offsets); | ||
| 56 | } | ||
| 57 | |||
| 58 | explicit ImageOperands(Id offset, Id lod, Id ms) { | ||
| 59 | if (Sirit::ValidId(lod)) { | ||
| 60 | Add(spv::ImageOperandsMask::Lod, lod); | ||
| 61 | } | ||
| 62 | if (Sirit::ValidId(offset)) { | ||
| 63 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 64 | } | ||
| 65 | if (Sirit::ValidId(ms)) { | ||
| 66 | Add(spv::ImageOperandsMask::Sample, ms); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, | ||
| 71 | Id offset, Id lod_clamp) { | ||
| 72 | if (!Sirit::ValidId(derivates)) { | ||
| 73 | throw LogicError("Derivates must be present"); | ||
| 74 | } | ||
| 75 | boost::container::static_vector<Id, 3> deriv_x_accum; | ||
| 76 | boost::container::static_vector<Id, 3> deriv_y_accum; | ||
| 77 | for (u32 i = 0; i < num_derivates; ++i) { | ||
| 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | ||
| 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | ||
| 80 | } | ||
| 81 | const Id derivates_X{ctx.OpCompositeConstruct( | ||
| 82 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; | ||
| 83 | const Id derivates_Y{ctx.OpCompositeConstruct( | ||
| 84 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | ||
| 85 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | ||
| 86 | if (Sirit::ValidId(offset)) { | ||
| 87 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 88 | } | ||
| 89 | if (has_lod_clamp) { | ||
| 90 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | std::span<const Id> Span() const noexcept { | ||
| 95 | return std::span{operands.data(), operands.size()}; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept { | ||
| 99 | return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt; | ||
| 100 | } | ||
| 101 | |||
| 102 | spv::ImageOperandsMask Mask() const noexcept { | ||
| 103 | return mask; | ||
| 104 | } | ||
| 105 | |||
| 106 | private: | ||
| 107 | void AddOffset(EmitContext& ctx, const IR::Value& offset) { | ||
| 108 | if (offset.IsEmpty()) { | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | if (offset.IsImmediate()) { | ||
| 112 | Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32()))); | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | IR::Inst* const inst{offset.InstRecursive()}; | ||
| 116 | if (inst->AreAllArgsImmediates()) { | ||
| 117 | switch (inst->GetOpcode()) { | ||
| 118 | case IR::Opcode::CompositeConstructU32x2: | ||
| 119 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 120 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 121 | static_cast<s32>(inst->Arg(1).U32()))); | ||
| 122 | return; | ||
| 123 | case IR::Opcode::CompositeConstructU32x3: | ||
| 124 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 125 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 126 | static_cast<s32>(inst->Arg(1).U32()), | ||
| 127 | static_cast<s32>(inst->Arg(2).U32()))); | ||
| 128 | return; | ||
| 129 | case IR::Opcode::CompositeConstructU32x4: | ||
| 130 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 131 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 132 | static_cast<s32>(inst->Arg(1).U32()), | ||
| 133 | static_cast<s32>(inst->Arg(2).U32()), | ||
| 134 | static_cast<s32>(inst->Arg(3).U32()))); | ||
| 135 | return; | ||
| 136 | default: | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | ||
| 141 | } | ||
| 142 | |||
| 143 | void Add(spv::ImageOperandsMask new_mask, Id value) { | ||
| 144 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||
| 145 | static_cast<unsigned>(new_mask)); | ||
| 146 | operands.push_back(value); | ||
| 147 | } | ||
| 148 | |||
| 149 | void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { | ||
| 150 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||
| 151 | static_cast<unsigned>(new_mask)); | ||
| 152 | operands.push_back(value_1); | ||
| 153 | operands.push_back(value_2); | ||
| 154 | } | ||
| 155 | |||
| 156 | boost::container::static_vector<Id, 4> operands; | ||
| 157 | spv::ImageOperandsMask mask{}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { | ||
| 161 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; | ||
| 162 | if (def.count > 1) { | ||
| 163 | const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; | ||
| 164 | return ctx.OpLoad(def.sampled_type, pointer); | ||
| 165 | } else { | ||
| 166 | return ctx.OpLoad(def.sampled_type, def.id); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) { | ||
| 171 | if (!index.IsImmediate() || index.U32() != 0) { | ||
| 172 | throw NotImplementedException("Indirect image indexing"); | ||
| 173 | } | ||
| 174 | if (info.type == TextureType::Buffer) { | ||
| 175 | const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; | ||
| 176 | if (def.count > 1) { | ||
| 177 | throw NotImplementedException("Indirect texture sample"); | ||
| 178 | } | ||
| 179 | const Id sampler_id{def.id}; | ||
| 180 | const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; | ||
| 181 | return ctx.OpImage(ctx.image_buffer_type, id); | ||
| 182 | } else { | ||
| 183 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; | ||
| 184 | if (def.count > 1) { | ||
| 185 | throw NotImplementedException("Indirect texture sample"); | ||
| 186 | } | ||
| 187 | return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { | ||
| 192 | if (!index.IsImmediate() || index.U32() != 0) { | ||
| 193 | throw NotImplementedException("Indirect image indexing"); | ||
| 194 | } | ||
| 195 | if (info.type == TextureType::Buffer) { | ||
| 196 | const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; | ||
| 197 | return ctx.OpLoad(def.image_type, def.id); | ||
| 198 | } else { | ||
| 199 | const ImageDefinition def{ctx.images.at(info.descriptor_index)}; | ||
| 200 | return ctx.OpLoad(def.image_type, def.id); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { | ||
| 205 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 206 | if (info.relaxed_precision != 0) { | ||
| 207 | ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); | ||
| 208 | } | ||
| 209 | return sample; | ||
| 210 | } | ||
| 211 | |||
| 212 | template <typename MethodPtrType, typename... Args> | ||
| 213 | Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, | ||
| 214 | Id result_type, Args&&... args) { | ||
| 215 | IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 216 | if (!sparse) { | ||
| 217 | return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...)); | ||
| 218 | } | ||
| 219 | const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; | ||
| 220 | const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; | ||
| 221 | const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; | ||
| 222 | sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); | ||
| 223 | sparse->Invalidate(); | ||
| 224 | Decorate(ctx, inst, sample); | ||
| 225 | return ctx.OpCompositeExtract(result_type, sample, 1U); | ||
| 226 | } | ||
| 227 | } // Anonymous namespace | ||
| 228 | |||
| 229 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 230 | throw LogicError("Unreachable instruction"); | ||
| 231 | } | ||
| 232 | |||
| 233 | Id EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 234 | throw LogicError("Unreachable instruction"); | ||
| 235 | } | ||
| 236 | |||
| 237 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 238 | throw LogicError("Unreachable instruction"); | ||
| 239 | } | ||
| 240 | |||
| 241 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 242 | throw LogicError("Unreachable instruction"); | ||
| 243 | } | ||
| 244 | |||
| 245 | Id EmitBindlessImageGather(EmitContext&) { | ||
| 246 | throw LogicError("Unreachable instruction"); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitBindlessImageGatherDref(EmitContext&) { | ||
| 250 | throw LogicError("Unreachable instruction"); | ||
| 251 | } | ||
| 252 | |||
| 253 | Id EmitBindlessImageFetch(EmitContext&) { | ||
| 254 | throw LogicError("Unreachable instruction"); | ||
| 255 | } | ||
| 256 | |||
| 257 | Id EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 258 | throw LogicError("Unreachable instruction"); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitBindlessImageQueryLod(EmitContext&) { | ||
| 262 | throw LogicError("Unreachable instruction"); | ||
| 263 | } | ||
| 264 | |||
| 265 | Id EmitBindlessImageGradient(EmitContext&) { | ||
| 266 | throw LogicError("Unreachable instruction"); | ||
| 267 | } | ||
| 268 | |||
| 269 | Id EmitBindlessImageRead(EmitContext&) { | ||
| 270 | throw LogicError("Unreachable instruction"); | ||
| 271 | } | ||
| 272 | |||
| 273 | Id EmitBindlessImageWrite(EmitContext&) { | ||
| 274 | throw LogicError("Unreachable instruction"); | ||
| 275 | } | ||
| 276 | |||
| 277 | Id EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 278 | throw LogicError("Unreachable instruction"); | ||
| 279 | } | ||
| 280 | |||
| 281 | Id EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 282 | throw LogicError("Unreachable instruction"); | ||
| 283 | } | ||
| 284 | |||
| 285 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 286 | throw LogicError("Unreachable instruction"); | ||
| 287 | } | ||
| 288 | |||
| 289 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 290 | throw LogicError("Unreachable instruction"); | ||
| 291 | } | ||
| 292 | |||
| 293 | Id EmitBoundImageGather(EmitContext&) { | ||
| 294 | throw LogicError("Unreachable instruction"); | ||
| 295 | } | ||
| 296 | |||
| 297 | Id EmitBoundImageGatherDref(EmitContext&) { | ||
| 298 | throw LogicError("Unreachable instruction"); | ||
| 299 | } | ||
| 300 | |||
| 301 | Id EmitBoundImageFetch(EmitContext&) { | ||
| 302 | throw LogicError("Unreachable instruction"); | ||
| 303 | } | ||
| 304 | |||
| 305 | Id EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 306 | throw LogicError("Unreachable instruction"); | ||
| 307 | } | ||
| 308 | |||
| 309 | Id EmitBoundImageQueryLod(EmitContext&) { | ||
| 310 | throw LogicError("Unreachable instruction"); | ||
| 311 | } | ||
| 312 | |||
| 313 | Id EmitBoundImageGradient(EmitContext&) { | ||
| 314 | throw LogicError("Unreachable instruction"); | ||
| 315 | } | ||
| 316 | |||
| 317 | Id EmitBoundImageRead(EmitContext&) { | ||
| 318 | throw LogicError("Unreachable instruction"); | ||
| 319 | } | ||
| 320 | |||
| 321 | Id EmitBoundImageWrite(EmitContext&) { | ||
| 322 | throw LogicError("Unreachable instruction"); | ||
| 323 | } | ||
| 324 | |||
| 325 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 326 | Id bias_lc, const IR::Value& offset) { | ||
| 327 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 328 | if (ctx.stage == Stage::Fragment) { | ||
| 329 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, | ||
| 330 | bias_lc, offset); | ||
| 331 | return Emit(&EmitContext::OpImageSparseSampleImplicitLod, | ||
| 332 | &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], | ||
| 333 | Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); | ||
| 334 | } else { | ||
| 335 | // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as | ||
| 336 | // if the lod was explicitly zero. This may change on Turing with implicit compute | ||
| 337 | // derivatives | ||
| 338 | const Id lod{ctx.Const(0.0f)}; | ||
| 339 | const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); | ||
| 340 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 341 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 342 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 347 | Id lod, const IR::Value& offset) { | ||
| 348 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 349 | const ImageOperands operands(ctx, false, true, false, lod, offset); | ||
| 350 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 351 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 352 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 353 | } | ||
| 354 | |||
| 355 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 356 | Id coords, Id dref, Id bias_lc, const IR::Value& offset) { | ||
| 357 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 358 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||
| 359 | offset); | ||
| 360 | return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, | ||
| 361 | &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], | ||
| 362 | Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); | ||
| 363 | } | ||
| 364 | |||
| 365 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 366 | Id coords, Id dref, Id lod, const IR::Value& offset) { | ||
| 367 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 368 | const ImageOperands operands(ctx, false, true, false, lod, offset); | ||
| 369 | return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, | ||
| 370 | &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], | ||
| 371 | Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); | ||
| 372 | } | ||
| 373 | |||
| 374 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 375 | const IR::Value& offset, const IR::Value& offset2) { | ||
| 376 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 377 | const ImageOperands operands(ctx, offset, offset2); | ||
| 378 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | ||
| 379 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), | ||
| 380 | operands.MaskOptional(), operands.Span()); | ||
| 381 | } | ||
| 382 | |||
| 383 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 384 | const IR::Value& offset, const IR::Value& offset2, Id dref) { | ||
| 385 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 386 | const ImageOperands operands(ctx, offset, offset2); | ||
| 387 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | ||
| 388 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), | ||
| 389 | operands.Span()); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 393 | Id lod, Id ms) { | ||
| 394 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 395 | if (info.type == TextureType::Buffer) { | ||
| 396 | lod = Id{}; | ||
| 397 | } | ||
| 398 | const ImageOperands operands(offset, lod, ms); | ||
| 399 | return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], | ||
| 400 | TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); | ||
| 401 | } | ||
| 402 | |||
| 403 | Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { | ||
| 404 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 405 | const Id image{TextureImage(ctx, info, index)}; | ||
| 406 | const Id zero{ctx.u32_zero_value}; | ||
| 407 | const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; | ||
| 408 | switch (info.type) { | ||
| 409 | case TextureType::Color1D: | ||
| 410 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), | ||
| 411 | zero, zero, mips()); | ||
| 412 | case TextureType::ColorArray1D: | ||
| 413 | case TextureType::Color2D: | ||
| 414 | case TextureType::ColorCube: | ||
| 415 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), | ||
| 416 | zero, mips()); | ||
| 417 | case TextureType::ColorArray2D: | ||
| 418 | case TextureType::Color3D: | ||
| 419 | case TextureType::ColorArrayCube: | ||
| 420 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), | ||
| 421 | mips()); | ||
| 422 | case TextureType::Buffer: | ||
| 423 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, | ||
| 424 | zero, mips()); | ||
| 425 | } | ||
| 426 | throw LogicError("Unspecified image type {}", info.type.Value()); | ||
| 427 | } | ||
| 428 | |||
| 429 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { | ||
| 430 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 431 | const Id zero{ctx.f32_zero_value}; | ||
| 432 | const Id sampler{Texture(ctx, info, index)}; | ||
| 433 | return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), | ||
| 434 | zero, zero); | ||
| 435 | } | ||
| 436 | |||
| 437 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 438 | Id derivates, Id offset, Id lod_clamp) { | ||
| 439 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 440 | const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, | ||
| 441 | offset, lod_clamp); | ||
| 442 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 443 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 444 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 445 | } | ||
| 446 | |||
| 447 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { | ||
| 448 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 449 | if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { | ||
| 450 | LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); | ||
| 451 | return ctx.ConstantNull(ctx.U32[4]); | ||
| 452 | } | ||
| 453 | return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], | ||
| 454 | Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{}); | ||
| 455 | } | ||
| 456 | |||
| 457 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { | ||
| 458 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 459 | ctx.OpImageWrite(Image(ctx, index, info), coords, color); | ||
| 460 | } | ||
| 461 | |||
| 462 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp new file mode 100644 index 000000000..d7f1a365a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp | |||
| @@ -0,0 +1,183 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { | ||
| 12 | if (!index.IsImmediate()) { | ||
| 13 | throw NotImplementedException("Indirect image indexing"); | ||
| 14 | } | ||
| 15 | if (info.type == TextureType::Buffer) { | ||
| 16 | const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; | ||
| 17 | return def.id; | ||
| 18 | } else { | ||
| 19 | const ImageDefinition def{ctx.images.at(index.U32())}; | ||
| 20 | return def.id; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { | ||
| 25 | const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; | ||
| 26 | const Id semantics{ctx.u32_zero_value}; | ||
| 27 | return {scope, semantics}; | ||
| 28 | } | ||
| 29 | |||
| 30 | Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, | ||
| 31 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 32 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 33 | const Id image{Image(ctx, index, info)}; | ||
| 34 | const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; | ||
| 35 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 36 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 41 | Id value) { | ||
| 42 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd); | ||
| 43 | } | ||
| 44 | |||
| 45 | Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 46 | Id value) { | ||
| 47 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 51 | Id value) { | ||
| 52 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin); | ||
| 53 | } | ||
| 54 | |||
| 55 | Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 56 | Id value) { | ||
| 57 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax); | ||
| 58 | } | ||
| 59 | |||
| 60 | Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 61 | Id value) { | ||
| 62 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax); | ||
| 63 | } | ||
| 64 | |||
| 65 | Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { | ||
| 66 | // TODO: This is not yet implemented | ||
| 67 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 68 | } | ||
| 69 | |||
| 70 | Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { | ||
| 71 | // TODO: This is not yet implemented | ||
| 72 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 76 | Id value) { | ||
| 77 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd); | ||
| 78 | } | ||
| 79 | |||
| 80 | Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 81 | Id value) { | ||
| 82 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr); | ||
| 83 | } | ||
| 84 | |||
| 85 | Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 86 | Id value) { | ||
| 87 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 91 | Id value) { | ||
| 92 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 96 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 100 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 104 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 108 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 112 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 116 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 120 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 124 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 128 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 129 | } | ||
| 130 | |||
| 131 | Id EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 132 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 133 | } | ||
| 134 | |||
| 135 | Id EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 136 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 137 | } | ||
| 138 | |||
| 139 | Id EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 140 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 144 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 148 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 152 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 156 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 160 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 161 | } | ||
| 162 | |||
| 163 | Id EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 164 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 165 | } | ||
| 166 | |||
| 167 | Id EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 168 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 172 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 173 | } | ||
| 174 | |||
| 175 | Id EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 176 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 180 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 181 | } | ||
| 182 | |||
| 183 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..f99c02848 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -0,0 +1,579 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <sirit/sirit.h> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | enum class Attribute : u64; | ||
| 11 | enum class Patch : u64; | ||
| 12 | class Inst; | ||
| 13 | class Value; | ||
| 14 | } // namespace Shader::IR | ||
| 15 | |||
| 16 | namespace Shader::Backend::SPIRV { | ||
| 17 | |||
| 18 | using Sirit::Id; | ||
| 19 | |||
| 20 | class EmitContext; | ||
| 21 | |||
| 22 | // Microinstruction emitters | ||
| 23 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); | ||
| 24 | void EmitVoid(EmitContext& ctx); | ||
| 25 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value); | ||
| 26 | Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); | ||
| 27 | void EmitReference(EmitContext&); | ||
| 28 | void EmitPhiMove(EmitContext&); | ||
| 29 | void EmitJoin(EmitContext& ctx); | ||
| 30 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 31 | void EmitBarrier(EmitContext& ctx); | ||
| 32 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 33 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 34 | void EmitPrologue(EmitContext& ctx); | ||
| 35 | void EmitEpilogue(EmitContext& ctx); | ||
| 36 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); | ||
| 37 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 38 | void EmitGetRegister(EmitContext& ctx); | ||
| 39 | void EmitSetRegister(EmitContext& ctx); | ||
| 40 | void EmitGetPred(EmitContext& ctx); | ||
| 41 | void EmitSetPred(EmitContext& ctx); | ||
| 42 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 43 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 44 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 45 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 46 | Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 47 | Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 48 | Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 49 | Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 50 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 51 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 52 | Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 53 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); | ||
| 54 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); | ||
| 55 | Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); | ||
| 56 | void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); | ||
| 57 | Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); | ||
| 58 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); | ||
| 59 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); | ||
| 60 | void EmitSetSampleMask(EmitContext& ctx, Id value); | ||
| 61 | void EmitSetFragDepth(EmitContext& ctx, Id value); | ||
| 62 | void EmitGetZFlag(EmitContext& ctx); | ||
| 63 | void EmitGetSFlag(EmitContext& ctx); | ||
| 64 | void EmitGetCFlag(EmitContext& ctx); | ||
| 65 | void EmitGetOFlag(EmitContext& ctx); | ||
| 66 | void EmitSetZFlag(EmitContext& ctx); | ||
| 67 | void EmitSetSFlag(EmitContext& ctx); | ||
| 68 | void EmitSetCFlag(EmitContext& ctx); | ||
| 69 | void EmitSetOFlag(EmitContext& ctx); | ||
| 70 | Id EmitWorkgroupId(EmitContext& ctx); | ||
| 71 | Id EmitLocalInvocationId(EmitContext& ctx); | ||
| 72 | Id EmitInvocationId(EmitContext& ctx); | ||
| 73 | Id EmitSampleId(EmitContext& ctx); | ||
| 74 | Id EmitIsHelperInvocation(EmitContext& ctx); | ||
| 75 | Id EmitYDirection(EmitContext& ctx); | ||
| 76 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); | ||
| 77 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); | ||
| 78 | Id EmitUndefU1(EmitContext& ctx); | ||
| 79 | Id EmitUndefU8(EmitContext& ctx); | ||
| 80 | Id EmitUndefU16(EmitContext& ctx); | ||
| 81 | Id EmitUndefU32(EmitContext& ctx); | ||
| 82 | Id EmitUndefU64(EmitContext& ctx); | ||
| 83 | void EmitLoadGlobalU8(EmitContext& ctx); | ||
| 84 | void EmitLoadGlobalS8(EmitContext& ctx); | ||
| 85 | void EmitLoadGlobalU16(EmitContext& ctx); | ||
| 86 | void EmitLoadGlobalS16(EmitContext& ctx); | ||
| 87 | Id EmitLoadGlobal32(EmitContext& ctx, Id address); | ||
| 88 | Id EmitLoadGlobal64(EmitContext& ctx, Id address); | ||
| 89 | Id EmitLoadGlobal128(EmitContext& ctx, Id address); | ||
| 90 | void EmitWriteGlobalU8(EmitContext& ctx); | ||
| 91 | void EmitWriteGlobalS8(EmitContext& ctx); | ||
| 92 | void EmitWriteGlobalU16(EmitContext& ctx); | ||
| 93 | void EmitWriteGlobalS16(EmitContext& ctx); | ||
| 94 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); | ||
| 95 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); | ||
| 96 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); | ||
| 97 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 98 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 99 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 100 | Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 101 | Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 102 | Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 103 | Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 104 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 105 | Id value); | ||
| 106 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 107 | Id value); | ||
| 108 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 109 | Id value); | ||
| 110 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 111 | Id value); | ||
| 112 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 113 | Id value); | ||
| 114 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 115 | Id value); | ||
| 116 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 117 | Id value); | ||
| 118 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset); | ||
| 119 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset); | ||
| 120 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset); | ||
| 121 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset); | ||
| 122 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset); | ||
| 123 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset); | ||
| 124 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset); | ||
| 125 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); | ||
| 126 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); | ||
| 127 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); | ||
| 128 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); | ||
| 129 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); | ||
| 130 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | ||
| 131 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 132 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 133 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | ||
| 134 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); | ||
| 135 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | ||
| 136 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 137 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 138 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 139 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | ||
| 140 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 141 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 142 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | ||
| 143 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); | ||
| 144 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | ||
| 145 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 146 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 147 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 148 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | ||
| 149 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 150 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 151 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | ||
| 152 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); | ||
| 153 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | ||
| 154 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 155 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 156 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 157 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 158 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 159 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 160 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 161 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 162 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 163 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 164 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 165 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 166 | Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 167 | Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 168 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 169 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 170 | Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 171 | Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 172 | Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 173 | Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 174 | void EmitBitCastU16F16(EmitContext& ctx); | ||
| 175 | Id EmitBitCastU32F32(EmitContext& ctx, Id value); | ||
| 176 | void EmitBitCastU64F64(EmitContext& ctx); | ||
| 177 | void EmitBitCastF16U16(EmitContext& ctx); | ||
| 178 | Id EmitBitCastF32U32(EmitContext& ctx, Id value); | ||
| 179 | void EmitBitCastF64U64(EmitContext& ctx); | ||
| 180 | Id EmitPackUint2x32(EmitContext& ctx, Id value); | ||
| 181 | Id EmitUnpackUint2x32(EmitContext& ctx, Id value); | ||
| 182 | Id EmitPackFloat2x16(EmitContext& ctx, Id value); | ||
| 183 | Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); | ||
| 184 | Id EmitPackHalf2x16(EmitContext& ctx, Id value); | ||
| 185 | Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); | ||
| 186 | Id EmitPackDouble2x32(EmitContext& ctx, Id value); | ||
| 187 | Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); | ||
| 188 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 189 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 190 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 191 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 192 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 193 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 194 | Id EmitFPAbs16(EmitContext& ctx, Id value); | ||
| 195 | Id EmitFPAbs32(EmitContext& ctx, Id value); | ||
| 196 | Id EmitFPAbs64(EmitContext& ctx, Id value); | ||
| 197 | Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 198 | Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 199 | Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 200 | Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 201 | Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 202 | Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 203 | Id EmitFPMax32(EmitContext& ctx, Id a, Id b); | ||
| 204 | Id EmitFPMax64(EmitContext& ctx, Id a, Id b); | ||
| 205 | Id EmitFPMin32(EmitContext& ctx, Id a, Id b); | ||
| 206 | Id EmitFPMin64(EmitContext& ctx, Id a, Id b); | ||
| 207 | Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 208 | Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 209 | Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 210 | Id EmitFPNeg16(EmitContext& ctx, Id value); | ||
| 211 | Id EmitFPNeg32(EmitContext& ctx, Id value); | ||
| 212 | Id EmitFPNeg64(EmitContext& ctx, Id value); | ||
| 213 | Id EmitFPSin(EmitContext& ctx, Id value); | ||
| 214 | Id EmitFPCos(EmitContext& ctx, Id value); | ||
| 215 | Id EmitFPExp2(EmitContext& ctx, Id value); | ||
| 216 | Id EmitFPLog2(EmitContext& ctx, Id value); | ||
| 217 | Id EmitFPRecip32(EmitContext& ctx, Id value); | ||
| 218 | Id EmitFPRecip64(EmitContext& ctx, Id value); | ||
| 219 | Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); | ||
| 220 | Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); | ||
| 221 | Id EmitFPSqrt(EmitContext& ctx, Id value); | ||
| 222 | Id EmitFPSaturate16(EmitContext& ctx, Id value); | ||
| 223 | Id EmitFPSaturate32(EmitContext& ctx, Id value); | ||
| 224 | Id EmitFPSaturate64(EmitContext& ctx, Id value); | ||
| 225 | Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 226 | Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 227 | Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 228 | Id EmitFPRoundEven16(EmitContext& ctx, Id value); | ||
| 229 | Id EmitFPRoundEven32(EmitContext& ctx, Id value); | ||
| 230 | Id EmitFPRoundEven64(EmitContext& ctx, Id value); | ||
| 231 | Id EmitFPFloor16(EmitContext& ctx, Id value); | ||
| 232 | Id EmitFPFloor32(EmitContext& ctx, Id value); | ||
| 233 | Id EmitFPFloor64(EmitContext& ctx, Id value); | ||
| 234 | Id EmitFPCeil16(EmitContext& ctx, Id value); | ||
| 235 | Id EmitFPCeil32(EmitContext& ctx, Id value); | ||
| 236 | Id EmitFPCeil64(EmitContext& ctx, Id value); | ||
| 237 | Id EmitFPTrunc16(EmitContext& ctx, Id value); | ||
| 238 | Id EmitFPTrunc32(EmitContext& ctx, Id value); | ||
| 239 | Id EmitFPTrunc64(EmitContext& ctx, Id value); | ||
| 240 | Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 241 | Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 242 | Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 243 | Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 244 | Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 245 | Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 246 | Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 247 | Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 248 | Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 249 | Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 250 | Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 251 | Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 252 | Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 253 | Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 254 | Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 255 | Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 256 | Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 257 | Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 258 | Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 259 | Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 260 | Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 261 | Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 262 | Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 263 | Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 264 | Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 265 | Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 266 | Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 267 | Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 268 | Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 269 | Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 270 | Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 271 | Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 272 | Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 273 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 274 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 275 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 276 | Id EmitFPIsNan16(EmitContext& ctx, Id value); | ||
| 277 | Id EmitFPIsNan32(EmitContext& ctx, Id value); | ||
| 278 | Id EmitFPIsNan64(EmitContext& ctx, Id value); | ||
| 279 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 280 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b); | ||
| 281 | Id EmitISub32(EmitContext& ctx, Id a, Id b); | ||
| 282 | Id EmitISub64(EmitContext& ctx, Id a, Id b); | ||
| 283 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); | ||
| 284 | Id EmitINeg32(EmitContext& ctx, Id value); | ||
| 285 | Id EmitINeg64(EmitContext& ctx, Id value); | ||
| 286 | Id EmitIAbs32(EmitContext& ctx, Id value); | ||
| 287 | Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); | ||
| 288 | Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); | ||
| 289 | Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); | ||
| 290 | Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); | ||
| 291 | Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); | ||
| 292 | Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); | ||
| 293 | Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 294 | Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 295 | Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 296 | Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); | ||
| 297 | Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); | ||
| 298 | Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); | ||
| 299 | Id EmitBitReverse32(EmitContext& ctx, Id value); | ||
| 300 | Id EmitBitCount32(EmitContext& ctx, Id value); | ||
| 301 | Id EmitBitwiseNot32(EmitContext& ctx, Id value); | ||
| 302 | Id EmitFindSMsb32(EmitContext& ctx, Id value); | ||
| 303 | Id EmitFindUMsb32(EmitContext& ctx, Id value); | ||
| 304 | Id EmitSMin32(EmitContext& ctx, Id a, Id b); | ||
| 305 | Id EmitUMin32(EmitContext& ctx, Id a, Id b); | ||
| 306 | Id EmitSMax32(EmitContext& ctx, Id a, Id b); | ||
| 307 | Id EmitUMax32(EmitContext& ctx, Id a, Id b); | ||
| 308 | Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); | ||
| 309 | Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); | ||
| 310 | Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 311 | Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 312 | Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 313 | Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 314 | Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 315 | Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 316 | Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 317 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 318 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 319 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 320 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 321 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 322 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 323 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 324 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 325 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 326 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 327 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 328 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 329 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 330 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 331 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 332 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 333 | Id value); | ||
| 334 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 335 | Id value); | ||
| 336 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 337 | Id value); | ||
| 338 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 339 | Id value); | ||
| 340 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 341 | Id value); | ||
| 342 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 343 | Id value); | ||
| 344 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 345 | Id value); | ||
| 346 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 347 | Id value); | ||
| 348 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 349 | Id value); | ||
| 350 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 351 | Id value); | ||
| 352 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 353 | Id value); | ||
| 354 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 355 | Id value); | ||
| 356 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 357 | Id value); | ||
| 358 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 359 | Id value); | ||
| 360 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 361 | Id value); | ||
| 362 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 363 | Id value); | ||
| 364 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 365 | Id value); | ||
| 366 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 367 | Id value); | ||
| 368 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 369 | Id value); | ||
| 370 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 371 | Id value); | ||
| 372 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 373 | Id value); | ||
| 374 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 375 | Id value); | ||
| 376 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 377 | Id value); | ||
| 378 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 379 | Id value); | ||
| 380 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 381 | Id value); | ||
| 382 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 383 | Id value); | ||
| 384 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 385 | Id value); | ||
| 386 | Id EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 387 | Id EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 388 | Id EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 389 | Id EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 390 | Id EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 391 | Id EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 392 | Id EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 393 | Id EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 394 | Id EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 395 | Id EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 396 | Id EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 397 | Id EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 398 | Id EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 399 | Id EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 400 | Id EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 401 | Id EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 402 | Id EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 403 | Id EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 404 | Id EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 405 | Id EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 406 | Id EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 407 | Id EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 408 | Id EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 409 | Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 410 | Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 411 | Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 412 | Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 413 | Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 414 | Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 415 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); | ||
| 416 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); | ||
| 417 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); | ||
| 418 | Id EmitLogicalNot(EmitContext& ctx, Id value); | ||
| 419 | Id EmitConvertS16F16(EmitContext& ctx, Id value); | ||
| 420 | Id EmitConvertS16F32(EmitContext& ctx, Id value); | ||
| 421 | Id EmitConvertS16F64(EmitContext& ctx, Id value); | ||
| 422 | Id EmitConvertS32F16(EmitContext& ctx, Id value); | ||
| 423 | Id EmitConvertS32F32(EmitContext& ctx, Id value); | ||
| 424 | Id EmitConvertS32F64(EmitContext& ctx, Id value); | ||
| 425 | Id EmitConvertS64F16(EmitContext& ctx, Id value); | ||
| 426 | Id EmitConvertS64F32(EmitContext& ctx, Id value); | ||
| 427 | Id EmitConvertS64F64(EmitContext& ctx, Id value); | ||
| 428 | Id EmitConvertU16F16(EmitContext& ctx, Id value); | ||
| 429 | Id EmitConvertU16F32(EmitContext& ctx, Id value); | ||
| 430 | Id EmitConvertU16F64(EmitContext& ctx, Id value); | ||
| 431 | Id EmitConvertU32F16(EmitContext& ctx, Id value); | ||
| 432 | Id EmitConvertU32F32(EmitContext& ctx, Id value); | ||
| 433 | Id EmitConvertU32F64(EmitContext& ctx, Id value); | ||
| 434 | Id EmitConvertU64F16(EmitContext& ctx, Id value); | ||
| 435 | Id EmitConvertU64F32(EmitContext& ctx, Id value); | ||
| 436 | Id EmitConvertU64F64(EmitContext& ctx, Id value); | ||
| 437 | Id EmitConvertU64U32(EmitContext& ctx, Id value); | ||
| 438 | Id EmitConvertU32U64(EmitContext& ctx, Id value); | ||
| 439 | Id EmitConvertF16F32(EmitContext& ctx, Id value); | ||
| 440 | Id EmitConvertF32F16(EmitContext& ctx, Id value); | ||
| 441 | Id EmitConvertF32F64(EmitContext& ctx, Id value); | ||
| 442 | Id EmitConvertF64F32(EmitContext& ctx, Id value); | ||
| 443 | Id EmitConvertF16S8(EmitContext& ctx, Id value); | ||
| 444 | Id EmitConvertF16S16(EmitContext& ctx, Id value); | ||
| 445 | Id EmitConvertF16S32(EmitContext& ctx, Id value); | ||
| 446 | Id EmitConvertF16S64(EmitContext& ctx, Id value); | ||
| 447 | Id EmitConvertF16U8(EmitContext& ctx, Id value); | ||
| 448 | Id EmitConvertF16U16(EmitContext& ctx, Id value); | ||
| 449 | Id EmitConvertF16U32(EmitContext& ctx, Id value); | ||
| 450 | Id EmitConvertF16U64(EmitContext& ctx, Id value); | ||
| 451 | Id EmitConvertF32S8(EmitContext& ctx, Id value); | ||
| 452 | Id EmitConvertF32S16(EmitContext& ctx, Id value); | ||
| 453 | Id EmitConvertF32S32(EmitContext& ctx, Id value); | ||
| 454 | Id EmitConvertF32S64(EmitContext& ctx, Id value); | ||
| 455 | Id EmitConvertF32U8(EmitContext& ctx, Id value); | ||
| 456 | Id EmitConvertF32U16(EmitContext& ctx, Id value); | ||
| 457 | Id EmitConvertF32U32(EmitContext& ctx, Id value); | ||
| 458 | Id EmitConvertF32U64(EmitContext& ctx, Id value); | ||
| 459 | Id EmitConvertF64S8(EmitContext& ctx, Id value); | ||
| 460 | Id EmitConvertF64S16(EmitContext& ctx, Id value); | ||
| 461 | Id EmitConvertF64S32(EmitContext& ctx, Id value); | ||
| 462 | Id EmitConvertF64S64(EmitContext& ctx, Id value); | ||
| 463 | Id EmitConvertF64U8(EmitContext& ctx, Id value); | ||
| 464 | Id EmitConvertF64U16(EmitContext& ctx, Id value); | ||
| 465 | Id EmitConvertF64U32(EmitContext& ctx, Id value); | ||
| 466 | Id EmitConvertF64U64(EmitContext& ctx, Id value); | ||
| 467 | Id EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 468 | Id EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 469 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 470 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 471 | Id EmitBindlessImageGather(EmitContext&); | ||
| 472 | Id EmitBindlessImageGatherDref(EmitContext&); | ||
| 473 | Id EmitBindlessImageFetch(EmitContext&); | ||
| 474 | Id EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 475 | Id EmitBindlessImageQueryLod(EmitContext&); | ||
| 476 | Id EmitBindlessImageGradient(EmitContext&); | ||
| 477 | Id EmitBindlessImageRead(EmitContext&); | ||
| 478 | Id EmitBindlessImageWrite(EmitContext&); | ||
| 479 | Id EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 480 | Id EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 481 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 482 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 483 | Id EmitBoundImageGather(EmitContext&); | ||
| 484 | Id EmitBoundImageGatherDref(EmitContext&); | ||
| 485 | Id EmitBoundImageFetch(EmitContext&); | ||
| 486 | Id EmitBoundImageQueryDimensions(EmitContext&); | ||
| 487 | Id EmitBoundImageQueryLod(EmitContext&); | ||
| 488 | Id EmitBoundImageGradient(EmitContext&); | ||
| 489 | Id EmitBoundImageRead(EmitContext&); | ||
| 490 | Id EmitBoundImageWrite(EmitContext&); | ||
| 491 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 492 | Id bias_lc, const IR::Value& offset); | ||
| 493 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 494 | Id lod, const IR::Value& offset); | ||
| 495 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 496 | Id coords, Id dref, Id bias_lc, const IR::Value& offset); | ||
| 497 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 498 | Id coords, Id dref, Id lod, const IR::Value& offset); | ||
| 499 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 500 | const IR::Value& offset, const IR::Value& offset2); | ||
| 501 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 502 | const IR::Value& offset, const IR::Value& offset2, Id dref); | ||
| 503 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 504 | Id lod, Id ms); | ||
| 505 | Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); | ||
| 506 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | ||
| 507 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 508 | Id derivates, Id offset, Id lod_clamp); | ||
| 509 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | ||
| 510 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | ||
| 511 | Id EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 512 | Id EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 513 | Id EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 514 | Id EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 515 | Id EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 516 | Id EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 517 | Id EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 518 | Id EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 519 | Id EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 520 | Id EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 521 | Id EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 522 | Id EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 523 | Id EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 524 | Id EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 525 | Id EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 526 | Id EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 527 | Id EmitBoundImageAtomicInc32(EmitContext&); | ||
| 528 | Id EmitBoundImageAtomicDec32(EmitContext&); | ||
| 529 | Id EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 530 | Id EmitBoundImageAtomicOr32(EmitContext&); | ||
| 531 | Id EmitBoundImageAtomicXor32(EmitContext&); | ||
| 532 | Id EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 533 | Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 534 | Id value); | ||
| 535 | Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 536 | Id value); | ||
| 537 | Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 538 | Id value); | ||
| 539 | Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 540 | Id value); | ||
| 541 | Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 542 | Id value); | ||
| 543 | Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 544 | Id value); | ||
| 545 | Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 546 | Id value); | ||
| 547 | Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 548 | Id value); | ||
| 549 | Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 550 | Id value); | ||
| 551 | Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 552 | Id value); | ||
| 553 | Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 554 | Id value); | ||
| 555 | Id EmitLaneId(EmitContext& ctx); | ||
| 556 | Id EmitVoteAll(EmitContext& ctx, Id pred); | ||
| 557 | Id EmitVoteAny(EmitContext& ctx, Id pred); | ||
| 558 | Id EmitVoteEqual(EmitContext& ctx, Id pred); | ||
| 559 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | ||
| 560 | Id EmitSubgroupEqMask(EmitContext& ctx); | ||
| 561 | Id EmitSubgroupLtMask(EmitContext& ctx); | ||
| 562 | Id EmitSubgroupLeMask(EmitContext& ctx); | ||
| 563 | Id EmitSubgroupGtMask(EmitContext& ctx); | ||
| 564 | Id EmitSubgroupGeMask(EmitContext& ctx); | ||
| 565 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 566 | Id segmentation_mask); | ||
| 567 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 568 | Id segmentation_mask); | ||
| 569 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 570 | Id segmentation_mask); | ||
| 571 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 572 | Id segmentation_mask); | ||
| 573 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); | ||
| 574 | Id EmitDPdxFine(EmitContext& ctx, Id op_a); | ||
| 575 | Id EmitDPdyFine(EmitContext& ctx, Id op_a); | ||
| 576 | Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); | ||
| 577 | Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); | ||
| 578 | |||
| 579 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3501d7495 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { | ||
| 11 | IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; | ||
| 12 | if (!zero) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); | ||
| 16 | zero->Invalidate(); | ||
| 17 | } | ||
| 18 | |||
| 19 | void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { | ||
| 20 | IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; | ||
| 21 | if (!sign) { | ||
| 22 | return; | ||
| 23 | } | ||
| 24 | sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); | ||
| 25 | sign->Invalidate(); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 30 | Id result{}; | ||
| 31 | if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { | ||
| 32 | const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; | ||
| 33 | const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; | ||
| 34 | result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); | ||
| 35 | |||
| 36 | const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; | ||
| 37 | carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); | ||
| 38 | carry->Invalidate(); | ||
| 39 | } else { | ||
| 40 | result = ctx.OpIAdd(ctx.U32[1], a, b); | ||
| 41 | } | ||
| 42 | SetZeroFlag(ctx, inst, result); | ||
| 43 | SetSignFlag(ctx, inst, result); | ||
| 44 | if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { | ||
| 45 | // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c | ||
| 46 | constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; | ||
| 47 | const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; | ||
| 48 | const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; | ||
| 49 | |||
| 50 | const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; | ||
| 51 | const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; | ||
| 52 | const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; | ||
| 53 | overflow->SetDefinition(carry_flag); | ||
| 54 | overflow->Invalidate(); | ||
| 55 | } | ||
| 56 | return result; | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { | ||
| 60 | return ctx.OpIAdd(ctx.U64, a, b); | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitISub32(EmitContext& ctx, Id a, Id b) { | ||
| 64 | return ctx.OpISub(ctx.U32[1], a, b); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitISub64(EmitContext& ctx, Id a, Id b) { | ||
| 68 | return ctx.OpISub(ctx.U64, a, b); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitIMul32(EmitContext& ctx, Id a, Id b) { | ||
| 72 | return ctx.OpIMul(ctx.U32[1], a, b); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitINeg32(EmitContext& ctx, Id value) { | ||
| 76 | return ctx.OpSNegate(ctx.U32[1], value); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitINeg64(EmitContext& ctx, Id value) { | ||
| 80 | return ctx.OpSNegate(ctx.U64, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitIAbs32(EmitContext& ctx, Id value) { | ||
| 84 | return ctx.OpSAbs(ctx.U32[1], value); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { | ||
| 88 | return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { | ||
| 92 | return ctx.OpShiftLeftLogical(ctx.U64, base, shift); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { | ||
| 96 | return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { | ||
| 100 | return ctx.OpShiftRightLogical(ctx.U64, base, shift); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { | ||
| 104 | return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { | ||
| 108 | return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 112 | const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; | ||
| 113 | SetZeroFlag(ctx, inst, result); | ||
| 114 | SetSignFlag(ctx, inst, result); | ||
| 115 | return result; | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 119 | const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; | ||
| 120 | SetZeroFlag(ctx, inst, result); | ||
| 121 | SetSignFlag(ctx, inst, result); | ||
| 122 | return result; | ||
| 123 | } | ||
| 124 | |||
| 125 | Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 126 | const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; | ||
| 127 | SetZeroFlag(ctx, inst, result); | ||
| 128 | SetSignFlag(ctx, inst, result); | ||
| 129 | return result; | ||
| 130 | } | ||
| 131 | |||
| 132 | Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { | ||
| 133 | return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); | ||
| 134 | } | ||
| 135 | |||
| 136 | Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { | ||
| 137 | const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; | ||
| 138 | SetZeroFlag(ctx, inst, result); | ||
| 139 | SetSignFlag(ctx, inst, result); | ||
| 140 | return result; | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { | ||
| 144 | const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; | ||
| 145 | SetZeroFlag(ctx, inst, result); | ||
| 146 | SetSignFlag(ctx, inst, result); | ||
| 147 | return result; | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitBitReverse32(EmitContext& ctx, Id value) { | ||
| 151 | return ctx.OpBitReverse(ctx.U32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitBitCount32(EmitContext& ctx, Id value) { | ||
| 155 | return ctx.OpBitCount(ctx.U32[1], value); | ||
| 156 | } | ||
| 157 | |||
| 158 | Id EmitBitwiseNot32(EmitContext& ctx, Id value) { | ||
| 159 | return ctx.OpNot(ctx.U32[1], value); | ||
| 160 | } | ||
| 161 | |||
| 162 | Id EmitFindSMsb32(EmitContext& ctx, Id value) { | ||
| 163 | return ctx.OpFindSMsb(ctx.U32[1], value); | ||
| 164 | } | ||
| 165 | |||
| 166 | Id EmitFindUMsb32(EmitContext& ctx, Id value) { | ||
| 167 | return ctx.OpFindUMsb(ctx.U32[1], value); | ||
| 168 | } | ||
| 169 | |||
| 170 | Id EmitSMin32(EmitContext& ctx, Id a, Id b) { | ||
| 171 | const bool is_broken{ctx.profile.has_broken_signed_operations}; | ||
| 172 | if (is_broken) { | ||
| 173 | a = ctx.OpBitcast(ctx.S32[1], a); | ||
| 174 | b = ctx.OpBitcast(ctx.S32[1], b); | ||
| 175 | } | ||
| 176 | const Id result{ctx.OpSMin(ctx.U32[1], a, b)}; | ||
| 177 | return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; | ||
| 178 | } | ||
| 179 | |||
| 180 | Id EmitUMin32(EmitContext& ctx, Id a, Id b) { | ||
| 181 | return ctx.OpUMin(ctx.U32[1], a, b); | ||
| 182 | } | ||
| 183 | |||
| 184 | Id EmitSMax32(EmitContext& ctx, Id a, Id b) { | ||
| 185 | const bool is_broken{ctx.profile.has_broken_signed_operations}; | ||
| 186 | if (is_broken) { | ||
| 187 | a = ctx.OpBitcast(ctx.S32[1], a); | ||
| 188 | b = ctx.OpBitcast(ctx.S32[1], b); | ||
| 189 | } | ||
| 190 | const Id result{ctx.OpSMax(ctx.U32[1], a, b)}; | ||
| 191 | return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; | ||
| 192 | } | ||
| 193 | |||
| 194 | Id EmitUMax32(EmitContext& ctx, Id a, Id b) { | ||
| 195 | return ctx.OpUMax(ctx.U32[1], a, b); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { | ||
| 199 | Id result{}; | ||
| 200 | if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) { | ||
| 201 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 202 | min = ctx.OpBitcast(ctx.S32[1], min); | ||
| 203 | max = ctx.OpBitcast(ctx.S32[1], max); | ||
| 204 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 205 | result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); | ||
| 206 | } else { | ||
| 207 | result = ctx.OpSClamp(ctx.S32[1], value, min, max); | ||
| 208 | } | ||
| 209 | result = ctx.OpBitcast(ctx.U32[1], result); | ||
| 210 | } else { | ||
| 211 | result = ctx.OpSClamp(ctx.U32[1], value, min, max); | ||
| 212 | } | ||
| 213 | SetZeroFlag(ctx, inst, result); | ||
| 214 | SetSignFlag(ctx, inst, result); | ||
| 215 | return result; | ||
| 216 | } | ||
| 217 | |||
| 218 | Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { | ||
| 219 | Id result{}; | ||
| 220 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 221 | result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); | ||
| 222 | } else { | ||
| 223 | result = ctx.OpUClamp(ctx.U32[1], value, min, max); | ||
| 224 | } | ||
| 225 | SetZeroFlag(ctx, inst, result); | ||
| 226 | SetSignFlag(ctx, inst, result); | ||
| 227 | return result; | ||
| 228 | } | ||
| 229 | |||
| 230 | Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 231 | return ctx.OpSLessThan(ctx.U1, lhs, rhs); | ||
| 232 | } | ||
| 233 | |||
| 234 | Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 235 | return ctx.OpULessThan(ctx.U1, lhs, rhs); | ||
| 236 | } | ||
| 237 | |||
| 238 | Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 239 | return ctx.OpIEqual(ctx.U1, lhs, rhs); | ||
| 240 | } | ||
| 241 | |||
| 242 | Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 243 | return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); | ||
| 244 | } | ||
| 245 | |||
| 246 | Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 247 | return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); | ||
| 248 | } | ||
| 249 | |||
| 250 | Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 251 | return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); | ||
| 252 | } | ||
| 253 | |||
| 254 | Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 255 | return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); | ||
| 256 | } | ||
| 257 | |||
| 258 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 259 | return ctx.OpINotEqual(ctx.U1, lhs, rhs); | ||
| 260 | } | ||
| 261 | |||
| 262 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 263 | return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 264 | } | ||
| 265 | |||
| 266 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 267 | return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 268 | } | ||
| 269 | |||
| 270 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..b9a9500fc --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { | ||
| 11 | return ctx.OpLogicalOr(ctx.U1, a, b); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { | ||
| 15 | return ctx.OpLogicalAnd(ctx.U1, a, b); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { | ||
| 19 | return ctx.OpLogicalNotEqual(ctx.U1, a, b); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitLogicalNot(EmitContext& ctx, Id value) { | ||
| 23 | return ctx.OpLogicalNot(ctx.U1, value); | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..679ee2684 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | |||
| @@ -0,0 +1,275 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <bit> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::SPIRV { | ||
| 11 | namespace { | ||
| 12 | Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size, | ||
| 13 | u32 index_offset = 0) { | ||
| 14 | if (offset.IsImmediate()) { | ||
| 15 | const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset}; | ||
| 16 | return ctx.Const(imm_offset); | ||
| 17 | } | ||
| 18 | const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 19 | Id index{ctx.Def(offset)}; | ||
| 20 | if (shift != 0) { | ||
| 21 | const Id shift_id{ctx.Const(shift)}; | ||
| 22 | index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||
| 23 | } | ||
| 24 | if (index_offset != 0) { | ||
| 25 | index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); | ||
| 26 | } | ||
| 27 | return index; | ||
| 28 | } | ||
| 29 | |||
| 30 | Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 31 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 32 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 33 | if (!binding.IsImmediate()) { | ||
| 34 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 35 | } | ||
| 36 | const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; | ||
| 37 | const Id index{StorageIndex(ctx, offset, element_size, index_offset)}; | ||
| 38 | return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); | ||
| 39 | } | ||
| 40 | |||
| 41 | Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, | ||
| 42 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 43 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 44 | const Id pointer{ | ||
| 45 | StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; | ||
| 46 | return ctx.OpLoad(result_type, pointer); | ||
| 47 | } | ||
| 48 | |||
| 49 | Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 50 | u32 index_offset = 0) { | ||
| 51 | return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), | ||
| 52 | &StorageDefinitions::U32, index_offset); | ||
| 53 | } | ||
| 54 | |||
| 55 | void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 56 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 57 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 58 | const Id pointer{ | ||
| 59 | StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; | ||
| 60 | ctx.OpStore(pointer, value); | ||
| 61 | } | ||
| 62 | |||
| 63 | void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 64 | u32 index_offset = 0) { | ||
| 65 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), | ||
| 66 | &StorageDefinitions::U32, index_offset); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void EmitLoadGlobalU8(EmitContext&) { | ||
| 71 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitLoadGlobalS8(EmitContext&) { | ||
| 75 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 76 | } | ||
| 77 | |||
| 78 | void EmitLoadGlobalU16(EmitContext&) { | ||
| 79 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 80 | } | ||
| 81 | |||
| 82 | void EmitLoadGlobalS16(EmitContext&) { | ||
| 83 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 84 | } | ||
| 85 | |||
| 86 | Id EmitLoadGlobal32(EmitContext& ctx, Id address) { | ||
| 87 | if (ctx.profile.support_int64) { | ||
| 88 | return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); | ||
| 89 | } | ||
| 90 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 91 | return ctx.Const(0u); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitLoadGlobal64(EmitContext& ctx, Id address) { | ||
| 95 | if (ctx.profile.support_int64) { | ||
| 96 | return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); | ||
| 97 | } | ||
| 98 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 99 | return ctx.Const(0u, 0u); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitLoadGlobal128(EmitContext& ctx, Id address) { | ||
| 103 | if (ctx.profile.support_int64) { | ||
| 104 | return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); | ||
| 105 | } | ||
| 106 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 107 | return ctx.Const(0u, 0u, 0u, 0u); | ||
| 108 | } | ||
| 109 | |||
| 110 | void EmitWriteGlobalU8(EmitContext&) { | ||
| 111 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitWriteGlobalS8(EmitContext&) { | ||
| 115 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitWriteGlobalU16(EmitContext&) { | ||
| 119 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitWriteGlobalS16(EmitContext&) { | ||
| 123 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { | ||
| 127 | if (ctx.profile.support_int64) { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); | ||
| 129 | return; | ||
| 130 | } | ||
| 131 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { | ||
| 135 | if (ctx.profile.support_int64) { | ||
| 136 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); | ||
| 137 | return; | ||
| 138 | } | ||
| 139 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { | ||
| 143 | if (ctx.profile.support_int64) { | ||
| 144 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 151 | if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { | ||
| 152 | return ctx.OpUConvert(ctx.U32[1], | ||
| 153 | LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, | ||
| 154 | sizeof(u8), &StorageDefinitions::U8)); | ||
| 155 | } else { | ||
| 156 | return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 157 | ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 162 | if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { | ||
| 163 | return ctx.OpSConvert(ctx.U32[1], | ||
| 164 | LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, | ||
| 165 | sizeof(s8), &StorageDefinitions::S8)); | ||
| 166 | } else { | ||
| 167 | return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 168 | ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 173 | if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { | ||
| 174 | return ctx.OpUConvert(ctx.U32[1], | ||
| 175 | LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, | ||
| 176 | sizeof(u16), &StorageDefinitions::U16)); | ||
| 177 | } else { | ||
| 178 | return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 179 | ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 184 | if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { | ||
| 185 | return ctx.OpSConvert(ctx.U32[1], | ||
| 186 | LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, | ||
| 187 | sizeof(s16), &StorageDefinitions::S16)); | ||
| 188 | } else { | ||
| 189 | return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 190 | ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 195 | return LoadStorage32(ctx, binding, offset); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 199 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 200 | return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, | ||
| 201 | sizeof(u32[2]), &StorageDefinitions::U32x2); | ||
| 202 | } else { | ||
| 203 | return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0), | ||
| 204 | LoadStorage32(ctx, binding, offset, 1)); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 209 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 210 | return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, | ||
| 211 | sizeof(u32[4]), &StorageDefinitions::U32x4); | ||
| 212 | } else { | ||
| 213 | return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0), | ||
| 214 | LoadStorage32(ctx, binding, offset, 1), | ||
| 215 | LoadStorage32(ctx, binding, offset, 2), | ||
| 216 | LoadStorage32(ctx, binding, offset, 3)); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 221 | Id value) { | ||
| 222 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, | ||
| 223 | sizeof(u8), &StorageDefinitions::U8); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 227 | Id value) { | ||
| 228 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, | ||
| 229 | sizeof(s8), &StorageDefinitions::S8); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 233 | Id value) { | ||
| 234 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, | ||
| 235 | sizeof(u16), &StorageDefinitions::U16); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 239 | Id value) { | ||
| 240 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, | ||
| 241 | sizeof(s16), &StorageDefinitions::S16); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 245 | Id value) { | ||
| 246 | WriteStorage32(ctx, binding, offset, value); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 250 | Id value) { | ||
| 251 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 252 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), | ||
| 253 | &StorageDefinitions::U32x2); | ||
| 254 | } else { | ||
| 255 | for (u32 index = 0; index < 2; ++index) { | ||
| 256 | const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; | ||
| 257 | WriteStorage32(ctx, binding, offset, element, index); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 263 | Id value) { | ||
| 264 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 265 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), | ||
| 266 | &StorageDefinitions::U32x4); | ||
| 267 | } else { | ||
| 268 | for (u32 index = 0; index < 4; ++index) { | ||
| 269 | const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; | ||
| 270 | WriteStorage32(ctx, binding, offset, element, index); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..c5b4f4720 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 11 | return ctx.OpSelect(ctx.U1, cond, true_value, false_value); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitSelectU8(EmitContext&, Id, Id, Id) { | ||
| 15 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 19 | return ctx.OpSelect(ctx.U16, cond, true_value, false_value); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 23 | return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 27 | return ctx.OpSelect(ctx.U64, cond, true_value, false_value); | ||
| 28 | } | ||
| 29 | |||
| 30 | Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 31 | return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); | ||
| 32 | } | ||
| 33 | |||
| 34 | Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 35 | return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 39 | return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..9a79fc7a2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { | ||
| 11 | const Id shift_id{ctx.Const(shift)}; | ||
| 12 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 13 | return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); | ||
| 14 | } | ||
| 15 | |||
| 16 | Id Word(EmitContext& ctx, Id offset) { | ||
| 17 | const Id shift_id{ctx.Const(2U)}; | ||
| 18 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 19 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 20 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 21 | } | ||
| 22 | |||
| 23 | std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { | ||
| 24 | const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; | ||
| 25 | const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; | ||
| 26 | const Id count_id{ctx.Const(count)}; | ||
| 27 | return {bit, count_id}; | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { | ||
| 32 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 33 | const Id pointer{ | ||
| 34 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 35 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 36 | } else { | ||
| 37 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 38 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { | ||
| 43 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 44 | const Id pointer{ | ||
| 45 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 46 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 47 | } else { | ||
| 48 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 49 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { | ||
| 54 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 55 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 56 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 57 | } else { | ||
| 58 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 59 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { | ||
| 64 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 65 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 66 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 67 | } else { | ||
| 68 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 69 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { | ||
| 74 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 75 | const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; | ||
| 76 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 77 | } else { | ||
| 78 | return Word(ctx, offset); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { | ||
| 83 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 84 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 85 | return ctx.OpLoad(ctx.U32[2], pointer); | ||
| 86 | } else { | ||
| 87 | const Id shift_id{ctx.Const(2U)}; | ||
| 88 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 89 | const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; | ||
| 90 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; | ||
| 91 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; | ||
| 92 | return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), | ||
| 93 | ctx.OpLoad(ctx.U32[1], rhs_pointer)); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { | ||
| 98 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 99 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 100 | return ctx.OpLoad(ctx.U32[4], pointer); | ||
| 101 | } | ||
| 102 | const Id shift_id{ctx.Const(2U)}; | ||
| 103 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 104 | std::array<Id, 4> values{}; | ||
| 105 | for (u32 i = 0; i < 4; ++i) { | ||
| 106 | const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; | ||
| 107 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 108 | values[i] = ctx.OpLoad(ctx.U32[1], pointer); | ||
| 109 | } | ||
| 110 | return ctx.OpCompositeConstruct(ctx.U32[4], values); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { | ||
| 114 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 115 | const Id pointer{ | ||
| 116 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 117 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); | ||
| 118 | } else { | ||
| 119 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { | ||
| 124 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 125 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 126 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); | ||
| 127 | } else { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { | ||
| 133 | Id pointer{}; | ||
| 134 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 135 | pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); | ||
| 136 | } else { | ||
| 137 | const Id shift{ctx.Const(2U)}; | ||
| 138 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 139 | pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); | ||
| 140 | } | ||
| 141 | ctx.OpStore(pointer, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { | ||
| 145 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 146 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 147 | ctx.OpStore(pointer, value); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | const Id shift{ctx.Const(2U)}; | ||
| 151 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 152 | const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; | ||
| 153 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; | ||
| 154 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; | ||
| 155 | ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); | ||
| 156 | ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { | ||
| 160 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 161 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 162 | ctx.OpStore(pointer, value); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const Id shift{ctx.Const(2U)}; | ||
| 166 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 167 | for (u32 i = 0; i < 4; ++i) { | ||
| 168 | const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; | ||
| 169 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 170 | ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..9e7eb3cb1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | void ConvertDepthMode(EmitContext& ctx) { | ||
| 11 | const Id type{ctx.F32[1]}; | ||
| 12 | const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; | ||
| 13 | const Id z{ctx.OpCompositeExtract(type, position, 2u)}; | ||
| 14 | const Id w{ctx.OpCompositeExtract(type, position, 3u)}; | ||
| 15 | const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; | ||
| 16 | const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; | ||
| 17 | ctx.OpStore(ctx.output_position, vector); | ||
| 18 | } | ||
| 19 | |||
| 20 | void SetFixedPipelinePointSize(EmitContext& ctx) { | ||
| 21 | if (ctx.runtime_info.fixed_state_point_size) { | ||
| 22 | const float point_size{*ctx.runtime_info.fixed_state_point_size}; | ||
| 23 | ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, | ||
| 28 | Id default_vector) { | ||
| 29 | switch (num_components) { | ||
| 30 | case 1: | ||
| 31 | return element == 3 ? one : zero; | ||
| 32 | case 2: | ||
| 33 | return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); | ||
| 34 | case 3: | ||
| 35 | return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); | ||
| 36 | case 4: | ||
| 37 | return default_vector; | ||
| 38 | } | ||
| 39 | throw InvalidArgument("Bad element"); | ||
| 40 | } | ||
| 41 | |||
| 42 | Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { | ||
| 43 | switch (comparison) { | ||
| 44 | case CompareFunction::Never: | ||
| 45 | return ctx.false_value; | ||
| 46 | case CompareFunction::Less: | ||
| 47 | return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); | ||
| 48 | case CompareFunction::Equal: | ||
| 49 | return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); | ||
| 50 | case CompareFunction::LessThanEqual: | ||
| 51 | return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); | ||
| 52 | case CompareFunction::Greater: | ||
| 53 | return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); | ||
| 54 | case CompareFunction::NotEqual: | ||
| 55 | return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); | ||
| 56 | case CompareFunction::GreaterThanEqual: | ||
| 57 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); | ||
| 58 | case CompareFunction::Always: | ||
| 59 | return ctx.true_value; | ||
| 60 | } | ||
| 61 | throw InvalidArgument("Comparison function {}", comparison); | ||
| 62 | } | ||
| 63 | |||
| 64 | void AlphaTest(EmitContext& ctx) { | ||
| 65 | if (!ctx.runtime_info.alpha_test_func) { | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | const auto comparison{*ctx.runtime_info.alpha_test_func}; | ||
| 69 | if (comparison == CompareFunction::Always) { | ||
| 70 | return; | ||
| 71 | } | ||
| 72 | if (!Sirit::ValidId(ctx.frag_color[0])) { | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | |||
| 76 | const Id type{ctx.F32[1]}; | ||
| 77 | const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; | ||
| 78 | const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; | ||
| 79 | |||
| 80 | const Id true_label{ctx.OpLabel()}; | ||
| 81 | const Id discard_label{ctx.OpLabel()}; | ||
| 82 | const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; | ||
| 83 | const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; | ||
| 84 | |||
| 85 | ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); | ||
| 86 | ctx.OpBranchConditional(condition, true_label, discard_label); | ||
| 87 | ctx.AddLabel(discard_label); | ||
| 88 | ctx.OpKill(); | ||
| 89 | ctx.AddLabel(true_label); | ||
| 90 | } | ||
| 91 | } // Anonymous namespace | ||
| 92 | |||
| 93 | void EmitPrologue(EmitContext& ctx) { | ||
| 94 | if (ctx.stage == Stage::VertexB) { | ||
| 95 | const Id zero{ctx.Const(0.0f)}; | ||
| 96 | const Id one{ctx.Const(1.0f)}; | ||
| 97 | const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; | ||
| 98 | ctx.OpStore(ctx.output_position, default_vector); | ||
| 99 | for (const auto& info : ctx.output_generics) { | ||
| 100 | if (info[0].num_components == 0) { | ||
| 101 | continue; | ||
| 102 | } | ||
| 103 | u32 element{0}; | ||
| 104 | while (element < 4) { | ||
| 105 | const auto& element_info{info[element]}; | ||
| 106 | const u32 num{element_info.num_components}; | ||
| 107 | const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; | ||
| 108 | ctx.OpStore(element_info.id, value); | ||
| 109 | element += num; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } | ||
| 113 | if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { | ||
| 114 | SetFixedPipelinePointSize(ctx); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitEpilogue(EmitContext& ctx) { | ||
| 119 | if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { | ||
| 120 | ConvertDepthMode(ctx); | ||
| 121 | } | ||
| 122 | if (ctx.stage == Stage::Fragment) { | ||
| 123 | AlphaTest(ctx); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||
| 128 | if (ctx.runtime_info.convert_depth_mode) { | ||
| 129 | ConvertDepthMode(ctx); | ||
| 130 | } | ||
| 131 | if (stream.IsImmediate()) { | ||
| 132 | ctx.OpEmitStreamVertex(ctx.Def(stream)); | ||
| 133 | } else { | ||
| 134 | LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); | ||
| 135 | ctx.OpEmitStreamVertex(ctx.u32_zero_value); | ||
| 136 | } | ||
| 137 | // Restore fixed pipeline point size after emitting the vertex | ||
| 138 | SetFixedPipelinePointSize(ctx); | ||
| 139 | } | ||
| 140 | |||
| 141 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 142 | if (stream.IsImmediate()) { | ||
| 143 | ctx.OpEndStreamPrimitive(ctx.Def(stream)); | ||
| 144 | } else { | ||
| 145 | LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); | ||
| 146 | ctx.OpEndStreamPrimitive(ctx.u32_zero_value); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..c9f469e90 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitUndefU1(EmitContext& ctx) { | ||
| 11 | return ctx.OpUndef(ctx.U1); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitUndefU8(EmitContext&) { | ||
| 15 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitUndefU16(EmitContext&) { | ||
| 19 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitUndefU32(EmitContext& ctx) { | ||
| 23 | return ctx.OpUndef(ctx.U32[1]); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitUndefU64(EmitContext&) { | ||
| 27 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..78b1e1ba7 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -0,0 +1,203 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id LoadMask(EmitContext& ctx, Id mask) { | ||
| 16 | const Id value{ctx.OpLoad(ctx.U32[4], mask)}; | ||
| 17 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 18 | return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); | ||
| 19 | } | ||
| 20 | return WarpExtract(ctx, value); | ||
| 21 | } | ||
| 22 | |||
| 23 | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||
| 24 | IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 25 | if (!in_bounds) { | ||
| 26 | return; | ||
| 27 | } | ||
| 28 | in_bounds->SetDefinition(result); | ||
| 29 | in_bounds->Invalidate(); | ||
| 30 | } | ||
| 31 | |||
| 32 | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||
| 33 | return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||
| 34 | } | ||
| 35 | |||
| 36 | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||
| 37 | return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||
| 38 | ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||
| 39 | } | ||
| 40 | |||
| 41 | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||
| 42 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 43 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 44 | return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||
| 45 | } | ||
| 46 | |||
| 47 | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||
| 48 | return ctx.OpSelect(ctx.U32[1], in_range, | ||
| 49 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | Id EmitLaneId(EmitContext& ctx) { | ||
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 56 | return id; | ||
| 57 | } | ||
| 58 | return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); | ||
| 59 | } | ||
| 60 | |||
| 61 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 62 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 63 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 64 | } | ||
| 65 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 66 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 67 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 68 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 69 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 70 | } | ||
| 71 | |||
| 72 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 75 | } | ||
| 76 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 77 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 78 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 79 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 80 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 86 | } | ||
| 87 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 88 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 89 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 90 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 91 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 92 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 96 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 97 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 98 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 99 | } | ||
| 100 | return WarpExtract(ctx, ballot); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitSubgroupEqMask(EmitContext& ctx) { | ||
| 104 | return LoadMask(ctx, ctx.subgroup_mask_eq); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitSubgroupLtMask(EmitContext& ctx) { | ||
| 108 | return LoadMask(ctx, ctx.subgroup_mask_lt); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitSubgroupLeMask(EmitContext& ctx) { | ||
| 112 | return LoadMask(ctx, ctx.subgroup_mask_le); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitSubgroupGtMask(EmitContext& ctx) { | ||
| 116 | return LoadMask(ctx, ctx.subgroup_mask_gt); | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitSubgroupGeMask(EmitContext& ctx) { | ||
| 120 | return LoadMask(ctx, ctx.subgroup_mask_ge); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 124 | Id segmentation_mask) { | ||
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||
| 129 | |||
| 130 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||
| 131 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||
| 132 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 133 | |||
| 134 | SetInBoundsFlag(inst, in_range); | ||
| 135 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 139 | Id segmentation_mask) { | ||
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 144 | |||
| 145 | SetInBoundsFlag(inst, in_range); | ||
| 146 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 150 | Id segmentation_mask) { | ||
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 155 | |||
| 156 | SetInBoundsFlag(inst, in_range); | ||
| 157 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 158 | } | ||
| 159 | |||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 161 | Id segmentation_mask) { | ||
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 166 | |||
| 167 | SetInBoundsFlag(inst, in_range); | ||
| 168 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | ||
| 172 | const Id three{ctx.Const(3U)}; | ||
| 173 | Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 174 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 175 | mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); | ||
| 176 | mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); | ||
| 177 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 178 | |||
| 179 | const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; | ||
| 180 | const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; | ||
| 181 | |||
| 182 | const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; | ||
| 183 | const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; | ||
| 184 | return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitDPdxFine(EmitContext& ctx, Id op_a) { | ||
| 188 | return ctx.OpDPdxFine(ctx.F32[1], op_a); | ||
| 189 | } | ||
| 190 | |||
| 191 | Id EmitDPdyFine(EmitContext& ctx, Id op_a) { | ||
| 192 | return ctx.OpDPdyFine(ctx.F32[1], op_a); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { | ||
| 196 | return ctx.OpDPdxCoarse(ctx.F32[1], op_a); | ||
| 197 | } | ||
| 198 | |||
| 199 | Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { | ||
| 200 | return ctx.OpDPdyCoarse(ctx.F32[1], op_a); | ||
| 201 | } | ||
| 202 | |||
| 203 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h new file mode 100644 index 000000000..8369d0d84 --- /dev/null +++ b/src/shader_recompiler/environment.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/program_header.h" | ||
| 7 | #include "shader_recompiler/shader_info.h" | ||
| 8 | #include "shader_recompiler/stage.h" | ||
| 9 | |||
| 10 | namespace Shader { | ||
| 11 | |||
| 12 | class Environment { | ||
| 13 | public: | ||
| 14 | virtual ~Environment() = default; | ||
| 15 | |||
| 16 | [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; | ||
| 17 | |||
| 18 | [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; | ||
| 19 | |||
| 20 | [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; | ||
| 21 | |||
| 22 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; | ||
| 23 | |||
| 24 | [[nodiscard]] virtual u32 LocalMemorySize() const = 0; | ||
| 25 | |||
| 26 | [[nodiscard]] virtual u32 SharedMemorySize() const = 0; | ||
| 27 | |||
| 28 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | ||
| 29 | |||
| 30 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | ||
| 31 | return sph; | ||
| 32 | } | ||
| 33 | |||
| 34 | [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept { | ||
| 35 | return gp_passthrough_mask; | ||
| 36 | } | ||
| 37 | |||
| 38 | [[nodiscard]] Stage ShaderStage() const noexcept { | ||
| 39 | return stage; | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] u32 StartAddress() const noexcept { | ||
| 43 | return start_address; | ||
| 44 | } | ||
| 45 | |||
| 46 | protected: | ||
| 47 | ProgramHeader sph{}; | ||
| 48 | std::array<u32, 8> gp_passthrough_mask{}; | ||
| 49 | Stage stage{}; | ||
| 50 | u32 start_address{}; | ||
| 51 | }; | ||
| 52 | |||
| 53 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h new file mode 100644 index 000000000..337e7f0c8 --- /dev/null +++ b/src/shader_recompiler/exception.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <stdexcept> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | namespace Shader { | ||
| 15 | |||
| 16 | class Exception : public std::exception { | ||
| 17 | public: | ||
| 18 | explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} | ||
| 19 | |||
| 20 | const char* what() const noexcept override { | ||
| 21 | return err_message.c_str(); | ||
| 22 | } | ||
| 23 | |||
| 24 | void Prepend(std::string_view prepend) { | ||
| 25 | err_message.insert(0, prepend); | ||
| 26 | } | ||
| 27 | |||
| 28 | void Append(std::string_view append) { | ||
| 29 | err_message += append; | ||
| 30 | } | ||
| 31 | |||
| 32 | private: | ||
| 33 | std::string err_message; | ||
| 34 | }; | ||
| 35 | |||
| 36 | class LogicError : public Exception { | ||
| 37 | public: | ||
| 38 | template <typename... Args> | ||
| 39 | LogicError(const char* message, Args&&... args) | ||
| 40 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 41 | }; | ||
| 42 | |||
| 43 | class RuntimeError : public Exception { | ||
| 44 | public: | ||
| 45 | template <typename... Args> | ||
| 46 | RuntimeError(const char* message, Args&&... args) | ||
| 47 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 48 | }; | ||
| 49 | |||
| 50 | class NotImplementedException : public Exception { | ||
| 51 | public: | ||
| 52 | template <typename... Args> | ||
| 53 | NotImplementedException(const char* message, Args&&... args) | ||
| 54 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} { | ||
| 55 | Append(" is not implemented"); | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | |||
| 59 | class InvalidArgument : public Exception { | ||
| 60 | public: | ||
| 61 | template <typename... Args> | ||
| 62 | InvalidArgument(const char* message, Args&&... args) | ||
| 63 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 000000000..b61773487 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | class Block; | ||
| 14 | |||
| 15 | struct AbstractSyntaxNode { | ||
| 16 | enum class Type { | ||
| 17 | Block, | ||
| 18 | If, | ||
| 19 | EndIf, | ||
| 20 | Loop, | ||
| 21 | Repeat, | ||
| 22 | Break, | ||
| 23 | Return, | ||
| 24 | Unreachable, | ||
| 25 | }; | ||
| 26 | union Data { | ||
| 27 | Block* block; | ||
| 28 | struct { | ||
| 29 | U1 cond; | ||
| 30 | Block* body; | ||
| 31 | Block* merge; | ||
| 32 | } if_node; | ||
| 33 | struct { | ||
| 34 | Block* merge; | ||
| 35 | } end_if; | ||
| 36 | struct { | ||
| 37 | Block* body; | ||
| 38 | Block* continue_block; | ||
| 39 | Block* merge; | ||
| 40 | } loop; | ||
| 41 | struct { | ||
| 42 | U1 cond; | ||
| 43 | Block* loop_header; | ||
| 44 | Block* merge; | ||
| 45 | } repeat; | ||
| 46 | struct { | ||
| 47 | U1 cond; | ||
| 48 | Block* merge; | ||
| 49 | Block* skip; | ||
| 50 | } break_node; | ||
| 51 | }; | ||
| 52 | |||
| 53 | Data data{}; | ||
| 54 | Type type{}; | ||
| 55 | }; | ||
| 56 | using AbstractSyntaxList = std::vector<AbstractSyntaxNode>; | ||
| 57 | |||
| 58 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp new file mode 100644 index 000000000..4d0b8b8e5 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.cpp | |||
| @@ -0,0 +1,454 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <fmt/format.h> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | bool IsGeneric(Attribute attribute) noexcept { | ||
| 13 | return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; | ||
| 14 | } | ||
| 15 | |||
| 16 | u32 GenericAttributeIndex(Attribute attribute) { | ||
| 17 | if (!IsGeneric(attribute)) { | ||
| 18 | throw InvalidArgument("Attribute is not generic {}", attribute); | ||
| 19 | } | ||
| 20 | return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u; | ||
| 21 | } | ||
| 22 | |||
| 23 | u32 GenericAttributeElement(Attribute attribute) { | ||
| 24 | if (!IsGeneric(attribute)) { | ||
| 25 | throw InvalidArgument("Attribute is not generic {}", attribute); | ||
| 26 | } | ||
| 27 | return static_cast<u32>(attribute) % 4; | ||
| 28 | } | ||
| 29 | |||
| 30 | std::string NameOf(Attribute attribute) { | ||
| 31 | switch (attribute) { | ||
| 32 | case Attribute::PrimitiveId: | ||
| 33 | return "PrimitiveId"; | ||
| 34 | case Attribute::Layer: | ||
| 35 | return "Layer"; | ||
| 36 | case Attribute::ViewportIndex: | ||
| 37 | return "ViewportIndex"; | ||
| 38 | case Attribute::PointSize: | ||
| 39 | return "PointSize"; | ||
| 40 | case Attribute::PositionX: | ||
| 41 | return "Position.X"; | ||
| 42 | case Attribute::PositionY: | ||
| 43 | return "Position.Y"; | ||
| 44 | case Attribute::PositionZ: | ||
| 45 | return "Position.Z"; | ||
| 46 | case Attribute::PositionW: | ||
| 47 | return "Position.W"; | ||
| 48 | case Attribute::Generic0X: | ||
| 49 | return "Generic[0].X"; | ||
| 50 | case Attribute::Generic0Y: | ||
| 51 | return "Generic[0].Y"; | ||
| 52 | case Attribute::Generic0Z: | ||
| 53 | return "Generic[0].Z"; | ||
| 54 | case Attribute::Generic0W: | ||
| 55 | return "Generic[0].W"; | ||
| 56 | case Attribute::Generic1X: | ||
| 57 | return "Generic[1].X"; | ||
| 58 | case Attribute::Generic1Y: | ||
| 59 | return "Generic[1].Y"; | ||
| 60 | case Attribute::Generic1Z: | ||
| 61 | return "Generic[1].Z"; | ||
| 62 | case Attribute::Generic1W: | ||
| 63 | return "Generic[1].W"; | ||
| 64 | case Attribute::Generic2X: | ||
| 65 | return "Generic[2].X"; | ||
| 66 | case Attribute::Generic2Y: | ||
| 67 | return "Generic[2].Y"; | ||
| 68 | case Attribute::Generic2Z: | ||
| 69 | return "Generic[2].Z"; | ||
| 70 | case Attribute::Generic2W: | ||
| 71 | return "Generic[2].W"; | ||
| 72 | case Attribute::Generic3X: | ||
| 73 | return "Generic[3].X"; | ||
| 74 | case Attribute::Generic3Y: | ||
| 75 | return "Generic[3].Y"; | ||
| 76 | case Attribute::Generic3Z: | ||
| 77 | return "Generic[3].Z"; | ||
| 78 | case Attribute::Generic3W: | ||
| 79 | return "Generic[3].W"; | ||
| 80 | case Attribute::Generic4X: | ||
| 81 | return "Generic[4].X"; | ||
| 82 | case Attribute::Generic4Y: | ||
| 83 | return "Generic[4].Y"; | ||
| 84 | case Attribute::Generic4Z: | ||
| 85 | return "Generic[4].Z"; | ||
| 86 | case Attribute::Generic4W: | ||
| 87 | return "Generic[4].W"; | ||
| 88 | case Attribute::Generic5X: | ||
| 89 | return "Generic[5].X"; | ||
| 90 | case Attribute::Generic5Y: | ||
| 91 | return "Generic[5].Y"; | ||
| 92 | case Attribute::Generic5Z: | ||
| 93 | return "Generic[5].Z"; | ||
| 94 | case Attribute::Generic5W: | ||
| 95 | return "Generic[5].W"; | ||
| 96 | case Attribute::Generic6X: | ||
| 97 | return "Generic[6].X"; | ||
| 98 | case Attribute::Generic6Y: | ||
| 99 | return "Generic[6].Y"; | ||
| 100 | case Attribute::Generic6Z: | ||
| 101 | return "Generic[6].Z"; | ||
| 102 | case Attribute::Generic6W: | ||
| 103 | return "Generic[6].W"; | ||
| 104 | case Attribute::Generic7X: | ||
| 105 | return "Generic[7].X"; | ||
| 106 | case Attribute::Generic7Y: | ||
| 107 | return "Generic[7].Y"; | ||
| 108 | case Attribute::Generic7Z: | ||
| 109 | return "Generic[7].Z"; | ||
| 110 | case Attribute::Generic7W: | ||
| 111 | return "Generic[7].W"; | ||
| 112 | case Attribute::Generic8X: | ||
| 113 | return "Generic[8].X"; | ||
| 114 | case Attribute::Generic8Y: | ||
| 115 | return "Generic[8].Y"; | ||
| 116 | case Attribute::Generic8Z: | ||
| 117 | return "Generic[8].Z"; | ||
| 118 | case Attribute::Generic8W: | ||
| 119 | return "Generic[8].W"; | ||
| 120 | case Attribute::Generic9X: | ||
| 121 | return "Generic[9].X"; | ||
| 122 | case Attribute::Generic9Y: | ||
| 123 | return "Generic[9].Y"; | ||
| 124 | case Attribute::Generic9Z: | ||
| 125 | return "Generic[9].Z"; | ||
| 126 | case Attribute::Generic9W: | ||
| 127 | return "Generic[9].W"; | ||
| 128 | case Attribute::Generic10X: | ||
| 129 | return "Generic[10].X"; | ||
| 130 | case Attribute::Generic10Y: | ||
| 131 | return "Generic[10].Y"; | ||
| 132 | case Attribute::Generic10Z: | ||
| 133 | return "Generic[10].Z"; | ||
| 134 | case Attribute::Generic10W: | ||
| 135 | return "Generic[10].W"; | ||
| 136 | case Attribute::Generic11X: | ||
| 137 | return "Generic[11].X"; | ||
| 138 | case Attribute::Generic11Y: | ||
| 139 | return "Generic[11].Y"; | ||
| 140 | case Attribute::Generic11Z: | ||
| 141 | return "Generic[11].Z"; | ||
| 142 | case Attribute::Generic11W: | ||
| 143 | return "Generic[11].W"; | ||
| 144 | case Attribute::Generic12X: | ||
| 145 | return "Generic[12].X"; | ||
| 146 | case Attribute::Generic12Y: | ||
| 147 | return "Generic[12].Y"; | ||
| 148 | case Attribute::Generic12Z: | ||
| 149 | return "Generic[12].Z"; | ||
| 150 | case Attribute::Generic12W: | ||
| 151 | return "Generic[12].W"; | ||
| 152 | case Attribute::Generic13X: | ||
| 153 | return "Generic[13].X"; | ||
| 154 | case Attribute::Generic13Y: | ||
| 155 | return "Generic[13].Y"; | ||
| 156 | case Attribute::Generic13Z: | ||
| 157 | return "Generic[13].Z"; | ||
| 158 | case Attribute::Generic13W: | ||
| 159 | return "Generic[13].W"; | ||
| 160 | case Attribute::Generic14X: | ||
| 161 | return "Generic[14].X"; | ||
| 162 | case Attribute::Generic14Y: | ||
| 163 | return "Generic[14].Y"; | ||
| 164 | case Attribute::Generic14Z: | ||
| 165 | return "Generic[14].Z"; | ||
| 166 | case Attribute::Generic14W: | ||
| 167 | return "Generic[14].W"; | ||
| 168 | case Attribute::Generic15X: | ||
| 169 | return "Generic[15].X"; | ||
| 170 | case Attribute::Generic15Y: | ||
| 171 | return "Generic[15].Y"; | ||
| 172 | case Attribute::Generic15Z: | ||
| 173 | return "Generic[15].Z"; | ||
| 174 | case Attribute::Generic15W: | ||
| 175 | return "Generic[15].W"; | ||
| 176 | case Attribute::Generic16X: | ||
| 177 | return "Generic[16].X"; | ||
| 178 | case Attribute::Generic16Y: | ||
| 179 | return "Generic[16].Y"; | ||
| 180 | case Attribute::Generic16Z: | ||
| 181 | return "Generic[16].Z"; | ||
| 182 | case Attribute::Generic16W: | ||
| 183 | return "Generic[16].W"; | ||
| 184 | case Attribute::Generic17X: | ||
| 185 | return "Generic[17].X"; | ||
| 186 | case Attribute::Generic17Y: | ||
| 187 | return "Generic[17].Y"; | ||
| 188 | case Attribute::Generic17Z: | ||
| 189 | return "Generic[17].Z"; | ||
| 190 | case Attribute::Generic17W: | ||
| 191 | return "Generic[17].W"; | ||
| 192 | case Attribute::Generic18X: | ||
| 193 | return "Generic[18].X"; | ||
| 194 | case Attribute::Generic18Y: | ||
| 195 | return "Generic[18].Y"; | ||
| 196 | case Attribute::Generic18Z: | ||
| 197 | return "Generic[18].Z"; | ||
| 198 | case Attribute::Generic18W: | ||
| 199 | return "Generic[18].W"; | ||
| 200 | case Attribute::Generic19X: | ||
| 201 | return "Generic[19].X"; | ||
| 202 | case Attribute::Generic19Y: | ||
| 203 | return "Generic[19].Y"; | ||
| 204 | case Attribute::Generic19Z: | ||
| 205 | return "Generic[19].Z"; | ||
| 206 | case Attribute::Generic19W: | ||
| 207 | return "Generic[19].W"; | ||
| 208 | case Attribute::Generic20X: | ||
| 209 | return "Generic[20].X"; | ||
| 210 | case Attribute::Generic20Y: | ||
| 211 | return "Generic[20].Y"; | ||
| 212 | case Attribute::Generic20Z: | ||
| 213 | return "Generic[20].Z"; | ||
| 214 | case Attribute::Generic20W: | ||
| 215 | return "Generic[20].W"; | ||
| 216 | case Attribute::Generic21X: | ||
| 217 | return "Generic[21].X"; | ||
| 218 | case Attribute::Generic21Y: | ||
| 219 | return "Generic[21].Y"; | ||
| 220 | case Attribute::Generic21Z: | ||
| 221 | return "Generic[21].Z"; | ||
| 222 | case Attribute::Generic21W: | ||
| 223 | return "Generic[21].W"; | ||
| 224 | case Attribute::Generic22X: | ||
| 225 | return "Generic[22].X"; | ||
| 226 | case Attribute::Generic22Y: | ||
| 227 | return "Generic[22].Y"; | ||
| 228 | case Attribute::Generic22Z: | ||
| 229 | return "Generic[22].Z"; | ||
| 230 | case Attribute::Generic22W: | ||
| 231 | return "Generic[22].W"; | ||
| 232 | case Attribute::Generic23X: | ||
| 233 | return "Generic[23].X"; | ||
| 234 | case Attribute::Generic23Y: | ||
| 235 | return "Generic[23].Y"; | ||
| 236 | case Attribute::Generic23Z: | ||
| 237 | return "Generic[23].Z"; | ||
| 238 | case Attribute::Generic23W: | ||
| 239 | return "Generic[23].W"; | ||
| 240 | case Attribute::Generic24X: | ||
| 241 | return "Generic[24].X"; | ||
| 242 | case Attribute::Generic24Y: | ||
| 243 | return "Generic[24].Y"; | ||
| 244 | case Attribute::Generic24Z: | ||
| 245 | return "Generic[24].Z"; | ||
| 246 | case Attribute::Generic24W: | ||
| 247 | return "Generic[24].W"; | ||
| 248 | case Attribute::Generic25X: | ||
| 249 | return "Generic[25].X"; | ||
| 250 | case Attribute::Generic25Y: | ||
| 251 | return "Generic[25].Y"; | ||
| 252 | case Attribute::Generic25Z: | ||
| 253 | return "Generic[25].Z"; | ||
| 254 | case Attribute::Generic25W: | ||
| 255 | return "Generic[25].W"; | ||
| 256 | case Attribute::Generic26X: | ||
| 257 | return "Generic[26].X"; | ||
| 258 | case Attribute::Generic26Y: | ||
| 259 | return "Generic[26].Y"; | ||
| 260 | case Attribute::Generic26Z: | ||
| 261 | return "Generic[26].Z"; | ||
| 262 | case Attribute::Generic26W: | ||
| 263 | return "Generic[26].W"; | ||
| 264 | case Attribute::Generic27X: | ||
| 265 | return "Generic[27].X"; | ||
| 266 | case Attribute::Generic27Y: | ||
| 267 | return "Generic[27].Y"; | ||
| 268 | case Attribute::Generic27Z: | ||
| 269 | return "Generic[27].Z"; | ||
| 270 | case Attribute::Generic27W: | ||
| 271 | return "Generic[27].W"; | ||
| 272 | case Attribute::Generic28X: | ||
| 273 | return "Generic[28].X"; | ||
| 274 | case Attribute::Generic28Y: | ||
| 275 | return "Generic[28].Y"; | ||
| 276 | case Attribute::Generic28Z: | ||
| 277 | return "Generic[28].Z"; | ||
| 278 | case Attribute::Generic28W: | ||
| 279 | return "Generic[28].W"; | ||
| 280 | case Attribute::Generic29X: | ||
| 281 | return "Generic[29].X"; | ||
| 282 | case Attribute::Generic29Y: | ||
| 283 | return "Generic[29].Y"; | ||
| 284 | case Attribute::Generic29Z: | ||
| 285 | return "Generic[29].Z"; | ||
| 286 | case Attribute::Generic29W: | ||
| 287 | return "Generic[29].W"; | ||
| 288 | case Attribute::Generic30X: | ||
| 289 | return "Generic[30].X"; | ||
| 290 | case Attribute::Generic30Y: | ||
| 291 | return "Generic[30].Y"; | ||
| 292 | case Attribute::Generic30Z: | ||
| 293 | return "Generic[30].Z"; | ||
| 294 | case Attribute::Generic30W: | ||
| 295 | return "Generic[30].W"; | ||
| 296 | case Attribute::Generic31X: | ||
| 297 | return "Generic[31].X"; | ||
| 298 | case Attribute::Generic31Y: | ||
| 299 | return "Generic[31].Y"; | ||
| 300 | case Attribute::Generic31Z: | ||
| 301 | return "Generic[31].Z"; | ||
| 302 | case Attribute::Generic31W: | ||
| 303 | return "Generic[31].W"; | ||
| 304 | case Attribute::ColorFrontDiffuseR: | ||
| 305 | return "ColorFrontDiffuse.R"; | ||
| 306 | case Attribute::ColorFrontDiffuseG: | ||
| 307 | return "ColorFrontDiffuse.G"; | ||
| 308 | case Attribute::ColorFrontDiffuseB: | ||
| 309 | return "ColorFrontDiffuse.B"; | ||
| 310 | case Attribute::ColorFrontDiffuseA: | ||
| 311 | return "ColorFrontDiffuse.A"; | ||
| 312 | case Attribute::ColorFrontSpecularR: | ||
| 313 | return "ColorFrontSpecular.R"; | ||
| 314 | case Attribute::ColorFrontSpecularG: | ||
| 315 | return "ColorFrontSpecular.G"; | ||
| 316 | case Attribute::ColorFrontSpecularB: | ||
| 317 | return "ColorFrontSpecular.B"; | ||
| 318 | case Attribute::ColorFrontSpecularA: | ||
| 319 | return "ColorFrontSpecular.A"; | ||
| 320 | case Attribute::ColorBackDiffuseR: | ||
| 321 | return "ColorBackDiffuse.R"; | ||
| 322 | case Attribute::ColorBackDiffuseG: | ||
| 323 | return "ColorBackDiffuse.G"; | ||
| 324 | case Attribute::ColorBackDiffuseB: | ||
| 325 | return "ColorBackDiffuse.B"; | ||
| 326 | case Attribute::ColorBackDiffuseA: | ||
| 327 | return "ColorBackDiffuse.A"; | ||
| 328 | case Attribute::ColorBackSpecularR: | ||
| 329 | return "ColorBackSpecular.R"; | ||
| 330 | case Attribute::ColorBackSpecularG: | ||
| 331 | return "ColorBackSpecular.G"; | ||
| 332 | case Attribute::ColorBackSpecularB: | ||
| 333 | return "ColorBackSpecular.B"; | ||
| 334 | case Attribute::ColorBackSpecularA: | ||
| 335 | return "ColorBackSpecular.A"; | ||
| 336 | case Attribute::ClipDistance0: | ||
| 337 | return "ClipDistance[0]"; | ||
| 338 | case Attribute::ClipDistance1: | ||
| 339 | return "ClipDistance[1]"; | ||
| 340 | case Attribute::ClipDistance2: | ||
| 341 | return "ClipDistance[2]"; | ||
| 342 | case Attribute::ClipDistance3: | ||
| 343 | return "ClipDistance[3]"; | ||
| 344 | case Attribute::ClipDistance4: | ||
| 345 | return "ClipDistance[4]"; | ||
| 346 | case Attribute::ClipDistance5: | ||
| 347 | return "ClipDistance[5]"; | ||
| 348 | case Attribute::ClipDistance6: | ||
| 349 | return "ClipDistance[6]"; | ||
| 350 | case Attribute::ClipDistance7: | ||
| 351 | return "ClipDistance[7]"; | ||
| 352 | case Attribute::PointSpriteS: | ||
| 353 | return "PointSprite.S"; | ||
| 354 | case Attribute::PointSpriteT: | ||
| 355 | return "PointSprite.T"; | ||
| 356 | case Attribute::FogCoordinate: | ||
| 357 | return "FogCoordinate"; | ||
| 358 | case Attribute::TessellationEvaluationPointU: | ||
| 359 | return "TessellationEvaluationPoint.U"; | ||
| 360 | case Attribute::TessellationEvaluationPointV: | ||
| 361 | return "TessellationEvaluationPoint.V"; | ||
| 362 | case Attribute::InstanceId: | ||
| 363 | return "InstanceId"; | ||
| 364 | case Attribute::VertexId: | ||
| 365 | return "VertexId"; | ||
| 366 | case Attribute::FixedFncTexture0S: | ||
| 367 | return "FixedFncTexture[0].S"; | ||
| 368 | case Attribute::FixedFncTexture0T: | ||
| 369 | return "FixedFncTexture[0].T"; | ||
| 370 | case Attribute::FixedFncTexture0R: | ||
| 371 | return "FixedFncTexture[0].R"; | ||
| 372 | case Attribute::FixedFncTexture0Q: | ||
| 373 | return "FixedFncTexture[0].Q"; | ||
| 374 | case Attribute::FixedFncTexture1S: | ||
| 375 | return "FixedFncTexture[1].S"; | ||
| 376 | case Attribute::FixedFncTexture1T: | ||
| 377 | return "FixedFncTexture[1].T"; | ||
| 378 | case Attribute::FixedFncTexture1R: | ||
| 379 | return "FixedFncTexture[1].R"; | ||
| 380 | case Attribute::FixedFncTexture1Q: | ||
| 381 | return "FixedFncTexture[1].Q"; | ||
| 382 | case Attribute::FixedFncTexture2S: | ||
| 383 | return "FixedFncTexture[2].S"; | ||
| 384 | case Attribute::FixedFncTexture2T: | ||
| 385 | return "FixedFncTexture[2].T"; | ||
| 386 | case Attribute::FixedFncTexture2R: | ||
| 387 | return "FixedFncTexture[2].R"; | ||
| 388 | case Attribute::FixedFncTexture2Q: | ||
| 389 | return "FixedFncTexture[2].Q"; | ||
| 390 | case Attribute::FixedFncTexture3S: | ||
| 391 | return "FixedFncTexture[3].S"; | ||
| 392 | case Attribute::FixedFncTexture3T: | ||
| 393 | return "FixedFncTexture[3].T"; | ||
| 394 | case Attribute::FixedFncTexture3R: | ||
| 395 | return "FixedFncTexture[3].R"; | ||
| 396 | case Attribute::FixedFncTexture3Q: | ||
| 397 | return "FixedFncTexture[3].Q"; | ||
| 398 | case Attribute::FixedFncTexture4S: | ||
| 399 | return "FixedFncTexture[4].S"; | ||
| 400 | case Attribute::FixedFncTexture4T: | ||
| 401 | return "FixedFncTexture[4].T"; | ||
| 402 | case Attribute::FixedFncTexture4R: | ||
| 403 | return "FixedFncTexture[4].R"; | ||
| 404 | case Attribute::FixedFncTexture4Q: | ||
| 405 | return "FixedFncTexture[4].Q"; | ||
| 406 | case Attribute::FixedFncTexture5S: | ||
| 407 | return "FixedFncTexture[5].S"; | ||
| 408 | case Attribute::FixedFncTexture5T: | ||
| 409 | return "FixedFncTexture[5].T"; | ||
| 410 | case Attribute::FixedFncTexture5R: | ||
| 411 | return "FixedFncTexture[5].R"; | ||
| 412 | case Attribute::FixedFncTexture5Q: | ||
| 413 | return "FixedFncTexture[5].Q"; | ||
| 414 | case Attribute::FixedFncTexture6S: | ||
| 415 | return "FixedFncTexture[6].S"; | ||
| 416 | case Attribute::FixedFncTexture6T: | ||
| 417 | return "FixedFncTexture[6].T"; | ||
| 418 | case Attribute::FixedFncTexture6R: | ||
| 419 | return "FixedFncTexture[6].R"; | ||
| 420 | case Attribute::FixedFncTexture6Q: | ||
| 421 | return "FixedFncTexture[6].Q"; | ||
| 422 | case Attribute::FixedFncTexture7S: | ||
| 423 | return "FixedFncTexture[7].S"; | ||
| 424 | case Attribute::FixedFncTexture7T: | ||
| 425 | return "FixedFncTexture[7].T"; | ||
| 426 | case Attribute::FixedFncTexture7R: | ||
| 427 | return "FixedFncTexture[7].R"; | ||
| 428 | case Attribute::FixedFncTexture7Q: | ||
| 429 | return "FixedFncTexture[7].Q"; | ||
| 430 | case Attribute::FixedFncTexture8S: | ||
| 431 | return "FixedFncTexture[8].S"; | ||
| 432 | case Attribute::FixedFncTexture8T: | ||
| 433 | return "FixedFncTexture[8].T"; | ||
| 434 | case Attribute::FixedFncTexture8R: | ||
| 435 | return "FixedFncTexture[8].R"; | ||
| 436 | case Attribute::FixedFncTexture8Q: | ||
| 437 | return "FixedFncTexture[8].Q"; | ||
| 438 | case Attribute::FixedFncTexture9S: | ||
| 439 | return "FixedFncTexture[9].S"; | ||
| 440 | case Attribute::FixedFncTexture9T: | ||
| 441 | return "FixedFncTexture[9].T"; | ||
| 442 | case Attribute::FixedFncTexture9R: | ||
| 443 | return "FixedFncTexture[9].R"; | ||
| 444 | case Attribute::FixedFncTexture9Q: | ||
| 445 | return "FixedFncTexture[9].Q"; | ||
| 446 | case Attribute::ViewportMask: | ||
| 447 | return "ViewportMask"; | ||
| 448 | case Attribute::FrontFace: | ||
| 449 | return "FrontFace"; | ||
| 450 | } | ||
| 451 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | ||
| 452 | } | ||
| 453 | |||
| 454 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h new file mode 100644 index 000000000..ca1199494 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.h | |||
| @@ -0,0 +1,250 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | enum class Attribute : u64 { | ||
| 14 | PrimitiveId = 24, | ||
| 15 | Layer = 25, | ||
| 16 | ViewportIndex = 26, | ||
| 17 | PointSize = 27, | ||
| 18 | PositionX = 28, | ||
| 19 | PositionY = 29, | ||
| 20 | PositionZ = 30, | ||
| 21 | PositionW = 31, | ||
| 22 | Generic0X = 32, | ||
| 23 | Generic0Y = 33, | ||
| 24 | Generic0Z = 34, | ||
| 25 | Generic0W = 35, | ||
| 26 | Generic1X = 36, | ||
| 27 | Generic1Y = 37, | ||
| 28 | Generic1Z = 38, | ||
| 29 | Generic1W = 39, | ||
| 30 | Generic2X = 40, | ||
| 31 | Generic2Y = 41, | ||
| 32 | Generic2Z = 42, | ||
| 33 | Generic2W = 43, | ||
| 34 | Generic3X = 44, | ||
| 35 | Generic3Y = 45, | ||
| 36 | Generic3Z = 46, | ||
| 37 | Generic3W = 47, | ||
| 38 | Generic4X = 48, | ||
| 39 | Generic4Y = 49, | ||
| 40 | Generic4Z = 50, | ||
| 41 | Generic4W = 51, | ||
| 42 | Generic5X = 52, | ||
| 43 | Generic5Y = 53, | ||
| 44 | Generic5Z = 54, | ||
| 45 | Generic5W = 55, | ||
| 46 | Generic6X = 56, | ||
| 47 | Generic6Y = 57, | ||
| 48 | Generic6Z = 58, | ||
| 49 | Generic6W = 59, | ||
| 50 | Generic7X = 60, | ||
| 51 | Generic7Y = 61, | ||
| 52 | Generic7Z = 62, | ||
| 53 | Generic7W = 63, | ||
| 54 | Generic8X = 64, | ||
| 55 | Generic8Y = 65, | ||
| 56 | Generic8Z = 66, | ||
| 57 | Generic8W = 67, | ||
| 58 | Generic9X = 68, | ||
| 59 | Generic9Y = 69, | ||
| 60 | Generic9Z = 70, | ||
| 61 | Generic9W = 71, | ||
| 62 | Generic10X = 72, | ||
| 63 | Generic10Y = 73, | ||
| 64 | Generic10Z = 74, | ||
| 65 | Generic10W = 75, | ||
| 66 | Generic11X = 76, | ||
| 67 | Generic11Y = 77, | ||
| 68 | Generic11Z = 78, | ||
| 69 | Generic11W = 79, | ||
| 70 | Generic12X = 80, | ||
| 71 | Generic12Y = 81, | ||
| 72 | Generic12Z = 82, | ||
| 73 | Generic12W = 83, | ||
| 74 | Generic13X = 84, | ||
| 75 | Generic13Y = 85, | ||
| 76 | Generic13Z = 86, | ||
| 77 | Generic13W = 87, | ||
| 78 | Generic14X = 88, | ||
| 79 | Generic14Y = 89, | ||
| 80 | Generic14Z = 90, | ||
| 81 | Generic14W = 91, | ||
| 82 | Generic15X = 92, | ||
| 83 | Generic15Y = 93, | ||
| 84 | Generic15Z = 94, | ||
| 85 | Generic15W = 95, | ||
| 86 | Generic16X = 96, | ||
| 87 | Generic16Y = 97, | ||
| 88 | Generic16Z = 98, | ||
| 89 | Generic16W = 99, | ||
| 90 | Generic17X = 100, | ||
| 91 | Generic17Y = 101, | ||
| 92 | Generic17Z = 102, | ||
| 93 | Generic17W = 103, | ||
| 94 | Generic18X = 104, | ||
| 95 | Generic18Y = 105, | ||
| 96 | Generic18Z = 106, | ||
| 97 | Generic18W = 107, | ||
| 98 | Generic19X = 108, | ||
| 99 | Generic19Y = 109, | ||
| 100 | Generic19Z = 110, | ||
| 101 | Generic19W = 111, | ||
| 102 | Generic20X = 112, | ||
| 103 | Generic20Y = 113, | ||
| 104 | Generic20Z = 114, | ||
| 105 | Generic20W = 115, | ||
| 106 | Generic21X = 116, | ||
| 107 | Generic21Y = 117, | ||
| 108 | Generic21Z = 118, | ||
| 109 | Generic21W = 119, | ||
| 110 | Generic22X = 120, | ||
| 111 | Generic22Y = 121, | ||
| 112 | Generic22Z = 122, | ||
| 113 | Generic22W = 123, | ||
| 114 | Generic23X = 124, | ||
| 115 | Generic23Y = 125, | ||
| 116 | Generic23Z = 126, | ||
| 117 | Generic23W = 127, | ||
| 118 | Generic24X = 128, | ||
| 119 | Generic24Y = 129, | ||
| 120 | Generic24Z = 130, | ||
| 121 | Generic24W = 131, | ||
| 122 | Generic25X = 132, | ||
| 123 | Generic25Y = 133, | ||
| 124 | Generic25Z = 134, | ||
| 125 | Generic25W = 135, | ||
| 126 | Generic26X = 136, | ||
| 127 | Generic26Y = 137, | ||
| 128 | Generic26Z = 138, | ||
| 129 | Generic26W = 139, | ||
| 130 | Generic27X = 140, | ||
| 131 | Generic27Y = 141, | ||
| 132 | Generic27Z = 142, | ||
| 133 | Generic27W = 143, | ||
| 134 | Generic28X = 144, | ||
| 135 | Generic28Y = 145, | ||
| 136 | Generic28Z = 146, | ||
| 137 | Generic28W = 147, | ||
| 138 | Generic29X = 148, | ||
| 139 | Generic29Y = 149, | ||
| 140 | Generic29Z = 150, | ||
| 141 | Generic29W = 151, | ||
| 142 | Generic30X = 152, | ||
| 143 | Generic30Y = 153, | ||
| 144 | Generic30Z = 154, | ||
| 145 | Generic30W = 155, | ||
| 146 | Generic31X = 156, | ||
| 147 | Generic31Y = 157, | ||
| 148 | Generic31Z = 158, | ||
| 149 | Generic31W = 159, | ||
| 150 | ColorFrontDiffuseR = 160, | ||
| 151 | ColorFrontDiffuseG = 161, | ||
| 152 | ColorFrontDiffuseB = 162, | ||
| 153 | ColorFrontDiffuseA = 163, | ||
| 154 | ColorFrontSpecularR = 164, | ||
| 155 | ColorFrontSpecularG = 165, | ||
| 156 | ColorFrontSpecularB = 166, | ||
| 157 | ColorFrontSpecularA = 167, | ||
| 158 | ColorBackDiffuseR = 168, | ||
| 159 | ColorBackDiffuseG = 169, | ||
| 160 | ColorBackDiffuseB = 170, | ||
| 161 | ColorBackDiffuseA = 171, | ||
| 162 | ColorBackSpecularR = 172, | ||
| 163 | ColorBackSpecularG = 173, | ||
| 164 | ColorBackSpecularB = 174, | ||
| 165 | ColorBackSpecularA = 175, | ||
| 166 | ClipDistance0 = 176, | ||
| 167 | ClipDistance1 = 177, | ||
| 168 | ClipDistance2 = 178, | ||
| 169 | ClipDistance3 = 179, | ||
| 170 | ClipDistance4 = 180, | ||
| 171 | ClipDistance5 = 181, | ||
| 172 | ClipDistance6 = 182, | ||
| 173 | ClipDistance7 = 183, | ||
| 174 | PointSpriteS = 184, | ||
| 175 | PointSpriteT = 185, | ||
| 176 | FogCoordinate = 186, | ||
| 177 | TessellationEvaluationPointU = 188, | ||
| 178 | TessellationEvaluationPointV = 189, | ||
| 179 | InstanceId = 190, | ||
| 180 | VertexId = 191, | ||
| 181 | FixedFncTexture0S = 192, | ||
| 182 | FixedFncTexture0T = 193, | ||
| 183 | FixedFncTexture0R = 194, | ||
| 184 | FixedFncTexture0Q = 195, | ||
| 185 | FixedFncTexture1S = 196, | ||
| 186 | FixedFncTexture1T = 197, | ||
| 187 | FixedFncTexture1R = 198, | ||
| 188 | FixedFncTexture1Q = 199, | ||
| 189 | FixedFncTexture2S = 200, | ||
| 190 | FixedFncTexture2T = 201, | ||
| 191 | FixedFncTexture2R = 202, | ||
| 192 | FixedFncTexture2Q = 203, | ||
| 193 | FixedFncTexture3S = 204, | ||
| 194 | FixedFncTexture3T = 205, | ||
| 195 | FixedFncTexture3R = 206, | ||
| 196 | FixedFncTexture3Q = 207, | ||
| 197 | FixedFncTexture4S = 208, | ||
| 198 | FixedFncTexture4T = 209, | ||
| 199 | FixedFncTexture4R = 210, | ||
| 200 | FixedFncTexture4Q = 211, | ||
| 201 | FixedFncTexture5S = 212, | ||
| 202 | FixedFncTexture5T = 213, | ||
| 203 | FixedFncTexture5R = 214, | ||
| 204 | FixedFncTexture5Q = 215, | ||
| 205 | FixedFncTexture6S = 216, | ||
| 206 | FixedFncTexture6T = 217, | ||
| 207 | FixedFncTexture6R = 218, | ||
| 208 | FixedFncTexture6Q = 219, | ||
| 209 | FixedFncTexture7S = 220, | ||
| 210 | FixedFncTexture7T = 221, | ||
| 211 | FixedFncTexture7R = 222, | ||
| 212 | FixedFncTexture7Q = 223, | ||
| 213 | FixedFncTexture8S = 224, | ||
| 214 | FixedFncTexture8T = 225, | ||
| 215 | FixedFncTexture8R = 226, | ||
| 216 | FixedFncTexture8Q = 227, | ||
| 217 | FixedFncTexture9S = 228, | ||
| 218 | FixedFncTexture9T = 229, | ||
| 219 | FixedFncTexture9R = 230, | ||
| 220 | FixedFncTexture9Q = 231, | ||
| 221 | ViewportMask = 232, | ||
| 222 | FrontFace = 255, | ||
| 223 | }; | ||
| 224 | |||
| 225 | constexpr size_t NUM_GENERICS = 32; | ||
| 226 | |||
| 227 | [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; | ||
| 228 | |||
| 229 | [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); | ||
| 230 | |||
| 231 | [[nodiscard]] u32 GenericAttributeElement(Attribute attribute); | ||
| 232 | |||
| 233 | [[nodiscard]] std::string NameOf(Attribute attribute); | ||
| 234 | |||
| 235 | [[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { | ||
| 236 | return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value); | ||
| 237 | } | ||
| 238 | |||
| 239 | } // namespace Shader::IR | ||
| 240 | |||
| 241 | template <> | ||
| 242 | struct fmt::formatter<Shader::IR::Attribute> { | ||
| 243 | constexpr auto parse(format_parse_context& ctx) { | ||
| 244 | return ctx.begin(); | ||
| 245 | } | ||
| 246 | template <typename FormatContext> | ||
| 247 | auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) { | ||
| 248 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); | ||
| 249 | } | ||
| 250 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 000000000..7c08b25ce --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <initializer_list> | ||
| 7 | #include <map> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "common/bit_cast.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {} | ||
| 18 | |||
| 19 | Block::~Block() = default; | ||
| 20 | |||
| 21 | void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { | ||
| 22 | PrependNewInst(end(), op, args); | ||
| 23 | } | ||
| 24 | |||
| 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, | ||
| 26 | std::initializer_list<Value> args, u32 flags) { | ||
| 27 | Inst* const inst{inst_pool->Create(op, flags)}; | ||
| 28 | const auto result_it{instructions.insert(insertion_point, *inst)}; | ||
| 29 | |||
| 30 | if (inst->NumArgs() != args.size()) { | ||
| 31 | throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); | ||
| 32 | } | ||
| 33 | std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { | ||
| 34 | inst->SetArg(index, arg); | ||
| 35 | ++index; | ||
| 36 | }); | ||
| 37 | return result_it; | ||
| 38 | } | ||
| 39 | |||
| 40 | void Block::AddBranch(Block* block) { | ||
| 41 | if (std::ranges::find(imm_successors, block) != imm_successors.end()) { | ||
| 42 | throw LogicError("Successor already inserted"); | ||
| 43 | } | ||
| 44 | if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { | ||
| 45 | throw LogicError("Predecessor already inserted"); | ||
| 46 | } | ||
| 47 | imm_successors.push_back(block); | ||
| 48 | block->imm_predecessors.push_back(this); | ||
| 49 | } | ||
| 50 | |||
| 51 | static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, | ||
| 52 | Block* block) { | ||
| 53 | if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { | ||
| 54 | return fmt::format("{{Block ${}}}", it->second); | ||
| 55 | } | ||
| 56 | return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); | ||
| 57 | } | ||
| 58 | |||
| 59 | static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, | ||
| 60 | const Inst* inst) { | ||
| 61 | const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; | ||
| 62 | if (is_inserted) { | ||
| 63 | ++inst_index; | ||
| 64 | } | ||
| 65 | return it->second; | ||
| 66 | } | ||
| 67 | |||
| 68 | static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, | ||
| 69 | const Value& arg) { | ||
| 70 | if (arg.IsEmpty()) { | ||
| 71 | return "<null>"; | ||
| 72 | } | ||
| 73 | if (!arg.IsImmediate() || arg.IsIdentity()) { | ||
| 74 | return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); | ||
| 75 | } | ||
| 76 | switch (arg.Type()) { | ||
| 77 | case Type::U1: | ||
| 78 | return fmt::format("#{}", arg.U1() ? "true" : "false"); | ||
| 79 | case Type::U8: | ||
| 80 | return fmt::format("#{}", arg.U8()); | ||
| 81 | case Type::U16: | ||
| 82 | return fmt::format("#{}", arg.U16()); | ||
| 83 | case Type::U32: | ||
| 84 | return fmt::format("#{}", arg.U32()); | ||
| 85 | case Type::U64: | ||
| 86 | return fmt::format("#{}", arg.U64()); | ||
| 87 | case Type::F32: | ||
| 88 | return fmt::format("#{}", arg.F32()); | ||
| 89 | case Type::Reg: | ||
| 90 | return fmt::format("{}", arg.Reg()); | ||
| 91 | case Type::Pred: | ||
| 92 | return fmt::format("{}", arg.Pred()); | ||
| 93 | case Type::Attribute: | ||
| 94 | return fmt::format("{}", arg.Attribute()); | ||
| 95 | default: | ||
| 96 | return "<unknown immediate type>"; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | std::string DumpBlock(const Block& block) { | ||
| 101 | size_t inst_index{0}; | ||
| 102 | std::map<const Inst*, size_t> inst_to_index; | ||
| 103 | return DumpBlock(block, {}, inst_to_index, inst_index); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index, | ||
| 107 | std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) { | ||
| 108 | std::string ret{"Block"}; | ||
| 109 | if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { | ||
| 110 | ret += fmt::format(" ${}", it->second); | ||
| 111 | } | ||
| 112 | ret += '\n'; | ||
| 113 | for (const Inst& inst : block) { | ||
| 114 | const Opcode op{inst.GetOpcode()}; | ||
| 115 | ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); | ||
| 116 | if (TypeOf(op) != Type::Void) { | ||
| 117 | ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); | ||
| 118 | } else { | ||
| 119 | ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces | ||
| 120 | } | ||
| 121 | const size_t arg_count{inst.NumArgs()}; | ||
| 122 | for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { | ||
| 123 | const Value arg{inst.Arg(arg_index)}; | ||
| 124 | const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; | ||
| 125 | ret += arg_index != 0 ? ", " : " "; | ||
| 126 | if (op == Opcode::Phi) { | ||
| 127 | ret += fmt::format("[ {}, {} ]", arg_str, | ||
| 128 | BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); | ||
| 129 | } else { | ||
| 130 | ret += arg_str; | ||
| 131 | } | ||
| 132 | if (op != Opcode::Phi) { | ||
| 133 | const Type actual_type{arg.Type()}; | ||
| 134 | const Type expected_type{ArgTypeOf(op, arg_index)}; | ||
| 135 | if (!AreTypesCompatible(actual_type, expected_type)) { | ||
| 136 | ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | if (TypeOf(op) != Type::Void) { | ||
| 141 | ret += fmt::format(" (uses: {})\n", inst.UseCount()); | ||
| 142 | } else { | ||
| 143 | ret += '\n'; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | return ret; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h new file mode 100644 index 000000000..7e134b4c7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.h | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <initializer_list> | ||
| 8 | #include <map> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/list.hpp> | ||
| 13 | |||
| 14 | #include "common/bit_cast.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 18 | #include "shader_recompiler/object_pool.h" | ||
| 19 | |||
| 20 | namespace Shader::IR { | ||
| 21 | |||
| 22 | class Block { | ||
| 23 | public: | ||
| 24 | using InstructionList = boost::intrusive::list<Inst>; | ||
| 25 | using size_type = InstructionList::size_type; | ||
| 26 | using iterator = InstructionList::iterator; | ||
| 27 | using const_iterator = InstructionList::const_iterator; | ||
| 28 | using reverse_iterator = InstructionList::reverse_iterator; | ||
| 29 | using const_reverse_iterator = InstructionList::const_reverse_iterator; | ||
| 30 | |||
| 31 | explicit Block(ObjectPool<Inst>& inst_pool_); | ||
| 32 | ~Block(); | ||
| 33 | |||
| 34 | Block(const Block&) = delete; | ||
| 35 | Block& operator=(const Block&) = delete; | ||
| 36 | |||
| 37 | Block(Block&&) = default; | ||
| 38 | Block& operator=(Block&&) = default; | ||
| 39 | |||
| 40 | /// Appends a new instruction to the end of this basic block. | ||
| 41 | void AppendNewInst(Opcode op, std::initializer_list<Value> args); | ||
| 42 | |||
| 43 | /// Prepends a new instruction to this basic block before the insertion point. | ||
| 44 | iterator PrependNewInst(iterator insertion_point, Opcode op, | ||
| 45 | std::initializer_list<Value> args = {}, u32 flags = 0); | ||
| 46 | |||
| 47 | /// Adds a new branch to this basic block. | ||
| 48 | void AddBranch(Block* block); | ||
| 49 | |||
| 50 | /// Gets a mutable reference to the instruction list for this basic block. | ||
| 51 | [[nodiscard]] InstructionList& Instructions() noexcept { | ||
| 52 | return instructions; | ||
| 53 | } | ||
| 54 | /// Gets an immutable reference to the instruction list for this basic block. | ||
| 55 | [[nodiscard]] const InstructionList& Instructions() const noexcept { | ||
| 56 | return instructions; | ||
| 57 | } | ||
| 58 | |||
| 59 | /// Gets an immutable span to the immediate predecessors. | ||
| 60 | [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept { | ||
| 61 | return imm_predecessors; | ||
| 62 | } | ||
| 63 | /// Gets an immutable span to the immediate successors. | ||
| 64 | [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept { | ||
| 65 | return imm_successors; | ||
| 66 | } | ||
| 67 | |||
| 68 | /// Intrusively store the host definition of this instruction. | ||
| 69 | template <typename DefinitionType> | ||
| 70 | void SetDefinition(DefinitionType def) { | ||
| 71 | definition = Common::BitCast<u32>(def); | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Return the intrusively stored host definition of this instruction. | ||
| 75 | template <typename DefinitionType> | ||
| 76 | [[nodiscard]] DefinitionType Definition() const noexcept { | ||
| 77 | return Common::BitCast<DefinitionType>(definition); | ||
| 78 | } | ||
| 79 | |||
| 80 | void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { | ||
| 81 | ssa_reg_values[RegIndex(reg)] = value; | ||
| 82 | } | ||
| 83 | const Value& SsaRegValue(IR::Reg reg) const noexcept { | ||
| 84 | return ssa_reg_values[RegIndex(reg)]; | ||
| 85 | } | ||
| 86 | |||
| 87 | void SsaSeal() noexcept { | ||
| 88 | is_ssa_sealed = true; | ||
| 89 | } | ||
| 90 | [[nodiscard]] bool IsSsaSealed() const noexcept { | ||
| 91 | return is_ssa_sealed; | ||
| 92 | } | ||
| 93 | |||
| 94 | [[nodiscard]] bool empty() const { | ||
| 95 | return instructions.empty(); | ||
| 96 | } | ||
| 97 | [[nodiscard]] size_type size() const { | ||
| 98 | return instructions.size(); | ||
| 99 | } | ||
| 100 | |||
| 101 | [[nodiscard]] Inst& front() { | ||
| 102 | return instructions.front(); | ||
| 103 | } | ||
| 104 | [[nodiscard]] const Inst& front() const { | ||
| 105 | return instructions.front(); | ||
| 106 | } | ||
| 107 | |||
| 108 | [[nodiscard]] Inst& back() { | ||
| 109 | return instructions.back(); | ||
| 110 | } | ||
| 111 | [[nodiscard]] const Inst& back() const { | ||
| 112 | return instructions.back(); | ||
| 113 | } | ||
| 114 | |||
| 115 | [[nodiscard]] iterator begin() { | ||
| 116 | return instructions.begin(); | ||
| 117 | } | ||
| 118 | [[nodiscard]] const_iterator begin() const { | ||
| 119 | return instructions.begin(); | ||
| 120 | } | ||
| 121 | [[nodiscard]] iterator end() { | ||
| 122 | return instructions.end(); | ||
| 123 | } | ||
| 124 | [[nodiscard]] const_iterator end() const { | ||
| 125 | return instructions.end(); | ||
| 126 | } | ||
| 127 | |||
| 128 | [[nodiscard]] reverse_iterator rbegin() { | ||
| 129 | return instructions.rbegin(); | ||
| 130 | } | ||
| 131 | [[nodiscard]] const_reverse_iterator rbegin() const { | ||
| 132 | return instructions.rbegin(); | ||
| 133 | } | ||
| 134 | [[nodiscard]] reverse_iterator rend() { | ||
| 135 | return instructions.rend(); | ||
| 136 | } | ||
| 137 | [[nodiscard]] const_reverse_iterator rend() const { | ||
| 138 | return instructions.rend(); | ||
| 139 | } | ||
| 140 | |||
| 141 | [[nodiscard]] const_iterator cbegin() const { | ||
| 142 | return instructions.cbegin(); | ||
| 143 | } | ||
| 144 | [[nodiscard]] const_iterator cend() const { | ||
| 145 | return instructions.cend(); | ||
| 146 | } | ||
| 147 | |||
| 148 | [[nodiscard]] const_reverse_iterator crbegin() const { | ||
| 149 | return instructions.crbegin(); | ||
| 150 | } | ||
| 151 | [[nodiscard]] const_reverse_iterator crend() const { | ||
| 152 | return instructions.crend(); | ||
| 153 | } | ||
| 154 | |||
| 155 | private: | ||
| 156 | /// Memory pool for instruction list | ||
| 157 | ObjectPool<Inst>* inst_pool; | ||
| 158 | |||
| 159 | /// List of instructions in this block | ||
| 160 | InstructionList instructions; | ||
| 161 | |||
| 162 | /// Block immediate predecessors | ||
| 163 | std::vector<Block*> imm_predecessors; | ||
| 164 | /// Block immediate successors | ||
| 165 | std::vector<Block*> imm_successors; | ||
| 166 | |||
| 167 | /// Intrusively store the value of a register in the block. | ||
| 168 | std::array<Value, NUM_REGS> ssa_reg_values; | ||
| 169 | /// Intrusively store if the block is sealed in the SSA pass. | ||
| 170 | bool is_ssa_sealed{false}; | ||
| 171 | |||
| 172 | /// Intrusively stored host definition of this block. | ||
| 173 | u32 definition{}; | ||
| 174 | }; | ||
| 175 | |||
| 176 | using BlockList = std::vector<Block*>; | ||
| 177 | |||
| 178 | [[nodiscard]] std::string DumpBlock(const Block& block); | ||
| 179 | |||
| 180 | [[nodiscard]] std::string DumpBlock(const Block& block, | ||
| 181 | const std::map<const Block*, size_t>& block_to_index, | ||
| 182 | std::map<const Inst*, size_t>& inst_to_index, | ||
| 183 | size_t& inst_index); | ||
| 184 | |||
| 185 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h new file mode 100644 index 000000000..a52ccbd58 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <queue> | ||
| 10 | |||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 12 | |||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | template <typename Pred> | ||
| 18 | auto BreadthFirstSearch(const Value& value, Pred&& pred) | ||
| 19 | -> std::invoke_result_t<Pred, const Inst*> { | ||
| 20 | if (value.IsImmediate()) { | ||
| 21 | // Nothing to do with immediates | ||
| 22 | return std::nullopt; | ||
| 23 | } | ||
| 24 | // Breadth-first search visiting the right most arguments first | ||
| 25 | // Small vector has been determined from shaders in Super Smash Bros. Ultimate | ||
| 26 | boost::container::small_vector<const Inst*, 2> visited; | ||
| 27 | std::queue<const Inst*> queue; | ||
| 28 | queue.push(value.InstRecursive()); | ||
| 29 | |||
| 30 | while (!queue.empty()) { | ||
| 31 | // Pop one instruction from the queue | ||
| 32 | const Inst* const inst{queue.front()}; | ||
| 33 | queue.pop(); | ||
| 34 | if (const std::optional result = pred(inst)) { | ||
| 35 | // This is the instruction we were looking for | ||
| 36 | return result; | ||
| 37 | } | ||
| 38 | // Visit the right most arguments first | ||
| 39 | for (size_t arg = inst->NumArgs(); arg--;) { | ||
| 40 | const Value arg_value{inst->Arg(arg)}; | ||
| 41 | if (arg_value.IsImmediate()) { | ||
| 42 | continue; | ||
| 43 | } | ||
| 44 | // Queue instruction if it hasn't been visited | ||
| 45 | const Inst* const arg_inst{arg_value.InstRecursive()}; | ||
| 46 | if (std::ranges::find(visited, arg_inst) == visited.end()) { | ||
| 47 | visited.push_back(arg_inst); | ||
| 48 | queue.push(arg_inst); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | } | ||
| 52 | // SSA tree has been traversed and the result hasn't been found | ||
| 53 | return std::nullopt; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp new file mode 100644 index 000000000..fc18ea2a2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.cpp | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | std::string NameOf(Condition condition) { | ||
| 14 | std::string ret; | ||
| 15 | if (condition.GetFlowTest() != FlowTest::T) { | ||
| 16 | ret = fmt::to_string(condition.GetFlowTest()); | ||
| 17 | } | ||
| 18 | const auto [pred, negated]{condition.GetPred()}; | ||
| 19 | if (!ret.empty()) { | ||
| 20 | ret += '&'; | ||
| 21 | } | ||
| 22 | if (negated) { | ||
| 23 | ret += '!'; | ||
| 24 | } | ||
| 25 | ret += fmt::to_string(pred); | ||
| 26 | return ret; | ||
| 27 | } | ||
| 28 | |||
| 29 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h new file mode 100644 index 000000000..aa8597c60 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.h | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <string> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | |||
| 18 | class Condition { | ||
| 19 | public: | ||
| 20 | Condition() noexcept = default; | ||
| 21 | |||
| 22 | explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept | ||
| 23 | : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)}, | ||
| 24 | pred_negated{pred_negated_ ? u8{1} : u8{0}} {} | ||
| 25 | |||
| 26 | explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept | ||
| 27 | : Condition(FlowTest::T, pred_, pred_negated_) {} | ||
| 28 | |||
| 29 | explicit Condition(bool value) : Condition(Pred::PT, !value) {} | ||
| 30 | |||
| 31 | auto operator<=>(const Condition&) const noexcept = default; | ||
| 32 | |||
| 33 | [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { | ||
| 34 | return static_cast<IR::FlowTest>(flow_test); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept { | ||
| 38 | return {static_cast<IR::Pred>(pred), pred_negated != 0}; | ||
| 39 | } | ||
| 40 | |||
| 41 | private: | ||
| 42 | u16 flow_test; | ||
| 43 | u8 pred; | ||
| 44 | u8 pred_negated; | ||
| 45 | }; | ||
| 46 | |||
| 47 | std::string NameOf(Condition condition); | ||
| 48 | |||
| 49 | } // namespace Shader::IR | ||
| 50 | |||
| 51 | template <> | ||
| 52 | struct fmt::formatter<Shader::IR::Condition> { | ||
| 53 | constexpr auto parse(format_parse_context& ctx) { | ||
| 54 | return ctx.begin(); | ||
| 55 | } | ||
| 56 | template <typename FormatContext> | ||
| 57 | auto format(const Shader::IR::Condition& cond, FormatContext& ctx) { | ||
| 58 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond)); | ||
| 59 | } | ||
| 60 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp new file mode 100644 index 000000000..6ebb4ad89 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.cpp | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | std::string NameOf(FlowTest flow_test) { | ||
| 14 | switch (flow_test) { | ||
| 15 | case FlowTest::F: | ||
| 16 | return "F"; | ||
| 17 | case FlowTest::LT: | ||
| 18 | return "LT"; | ||
| 19 | case FlowTest::EQ: | ||
| 20 | return "EQ"; | ||
| 21 | case FlowTest::LE: | ||
| 22 | return "LE"; | ||
| 23 | case FlowTest::GT: | ||
| 24 | return "GT"; | ||
| 25 | case FlowTest::NE: | ||
| 26 | return "NE"; | ||
| 27 | case FlowTest::GE: | ||
| 28 | return "GE"; | ||
| 29 | case FlowTest::NUM: | ||
| 30 | return "NUM"; | ||
| 31 | case FlowTest::NaN: | ||
| 32 | return "NAN"; | ||
| 33 | case FlowTest::LTU: | ||
| 34 | return "LTU"; | ||
| 35 | case FlowTest::EQU: | ||
| 36 | return "EQU"; | ||
| 37 | case FlowTest::LEU: | ||
| 38 | return "LEU"; | ||
| 39 | case FlowTest::GTU: | ||
| 40 | return "GTU"; | ||
| 41 | case FlowTest::NEU: | ||
| 42 | return "NEU"; | ||
| 43 | case FlowTest::GEU: | ||
| 44 | return "GEU"; | ||
| 45 | case FlowTest::T: | ||
| 46 | return "T"; | ||
| 47 | case FlowTest::OFF: | ||
| 48 | return "OFF"; | ||
| 49 | case FlowTest::LO: | ||
| 50 | return "LO"; | ||
| 51 | case FlowTest::SFF: | ||
| 52 | return "SFF"; | ||
| 53 | case FlowTest::LS: | ||
| 54 | return "LS"; | ||
| 55 | case FlowTest::HI: | ||
| 56 | return "HI"; | ||
| 57 | case FlowTest::SFT: | ||
| 58 | return "SFT"; | ||
| 59 | case FlowTest::HS: | ||
| 60 | return "HS"; | ||
| 61 | case FlowTest::OFT: | ||
| 62 | return "OFT"; | ||
| 63 | case FlowTest::CSM_TA: | ||
| 64 | return "CSM_TA"; | ||
| 65 | case FlowTest::CSM_TR: | ||
| 66 | return "CSM_TR"; | ||
| 67 | case FlowTest::CSM_MX: | ||
| 68 | return "CSM_MX"; | ||
| 69 | case FlowTest::FCSM_TA: | ||
| 70 | return "FCSM_TA"; | ||
| 71 | case FlowTest::FCSM_TR: | ||
| 72 | return "FCSM_TR"; | ||
| 73 | case FlowTest::FCSM_MX: | ||
| 74 | return "FCSM_MX"; | ||
| 75 | case FlowTest::RLE: | ||
| 76 | return "RLE"; | ||
| 77 | case FlowTest::RGT: | ||
| 78 | return "RGT"; | ||
| 79 | } | ||
| 80 | return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test)); | ||
| 81 | } | ||
| 82 | |||
| 83 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h new file mode 100644 index 000000000..09e113773 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | |||
| 14 | enum class FlowTest : u64 { | ||
| 15 | F, | ||
| 16 | LT, | ||
| 17 | EQ, | ||
| 18 | LE, | ||
| 19 | GT, | ||
| 20 | NE, | ||
| 21 | GE, | ||
| 22 | NUM, | ||
| 23 | NaN, | ||
| 24 | LTU, | ||
| 25 | EQU, | ||
| 26 | LEU, | ||
| 27 | GTU, | ||
| 28 | NEU, | ||
| 29 | GEU, | ||
| 30 | T, | ||
| 31 | OFF, | ||
| 32 | LO, | ||
| 33 | SFF, | ||
| 34 | LS, | ||
| 35 | HI, | ||
| 36 | SFT, | ||
| 37 | HS, | ||
| 38 | OFT, | ||
| 39 | CSM_TA, | ||
| 40 | CSM_TR, | ||
| 41 | CSM_MX, | ||
| 42 | FCSM_TA, | ||
| 43 | FCSM_TR, | ||
| 44 | FCSM_MX, | ||
| 45 | RLE, | ||
| 46 | RGT, | ||
| 47 | }; | ||
| 48 | |||
| 49 | [[nodiscard]] std::string NameOf(FlowTest flow_test); | ||
| 50 | |||
| 51 | } // namespace Shader::IR | ||
| 52 | |||
| 53 | template <> | ||
| 54 | struct fmt::formatter<Shader::IR::FlowTest> { | ||
| 55 | constexpr auto parse(format_parse_context& ctx) { | ||
| 56 | return ctx.begin(); | ||
| 57 | } | ||
| 58 | template <typename FormatContext> | ||
| 59 | auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) { | ||
| 60 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test)); | ||
| 61 | } | ||
| 62 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp new file mode 100644 index 000000000..13159a68d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -0,0 +1,2017 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_cast.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | namespace { | ||
| 11 | [[noreturn]] void ThrowInvalidType(Type type) { | ||
| 12 | throw InvalidArgument("Invalid type {}", type); | ||
| 13 | } | ||
| 14 | |||
| 15 | Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { | ||
| 16 | if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { | ||
| 17 | return ir.CompositeConstruct(bias_lod, lod_clamp); | ||
| 18 | } else if (!bias_lod.IsEmpty()) { | ||
| 19 | return bias_lod; | ||
| 20 | } else if (!lod_clamp.IsEmpty()) { | ||
| 21 | return lod_clamp; | ||
| 22 | } else { | ||
| 23 | return Value{}; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | U1 IREmitter::Imm1(bool value) const { | ||
| 29 | return U1{Value{value}}; | ||
| 30 | } | ||
| 31 | |||
| 32 | U8 IREmitter::Imm8(u8 value) const { | ||
| 33 | return U8{Value{value}}; | ||
| 34 | } | ||
| 35 | |||
| 36 | U16 IREmitter::Imm16(u16 value) const { | ||
| 37 | return U16{Value{value}}; | ||
| 38 | } | ||
| 39 | |||
| 40 | U32 IREmitter::Imm32(u32 value) const { | ||
| 41 | return U32{Value{value}}; | ||
| 42 | } | ||
| 43 | |||
| 44 | U32 IREmitter::Imm32(s32 value) const { | ||
| 45 | return U32{Value{static_cast<u32>(value)}}; | ||
| 46 | } | ||
| 47 | |||
| 48 | F32 IREmitter::Imm32(f32 value) const { | ||
| 49 | return F32{Value{value}}; | ||
| 50 | } | ||
| 51 | |||
| 52 | U64 IREmitter::Imm64(u64 value) const { | ||
| 53 | return U64{Value{value}}; | ||
| 54 | } | ||
| 55 | |||
| 56 | U64 IREmitter::Imm64(s64 value) const { | ||
| 57 | return U64{Value{static_cast<u64>(value)}}; | ||
| 58 | } | ||
| 59 | |||
| 60 | F64 IREmitter::Imm64(f64 value) const { | ||
| 61 | return F64{Value{value}}; | ||
| 62 | } | ||
| 63 | |||
| 64 | U1 IREmitter::ConditionRef(const U1& value) { | ||
| 65 | return Inst<U1>(Opcode::ConditionRef, value); | ||
| 66 | } | ||
| 67 | |||
| 68 | void IREmitter::Reference(const Value& value) { | ||
| 69 | Inst(Opcode::Reference, value); | ||
| 70 | } | ||
| 71 | |||
| 72 | void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { | ||
| 73 | Inst(Opcode::PhiMove, Value{&phi}, value); | ||
| 74 | } | ||
| 75 | |||
| 76 | void IREmitter::Prologue() { | ||
| 77 | Inst(Opcode::Prologue); | ||
| 78 | } | ||
| 79 | |||
| 80 | void IREmitter::Epilogue() { | ||
| 81 | Inst(Opcode::Epilogue); | ||
| 82 | } | ||
| 83 | |||
| 84 | void IREmitter::DemoteToHelperInvocation() { | ||
| 85 | Inst(Opcode::DemoteToHelperInvocation); | ||
| 86 | } | ||
| 87 | |||
| 88 | void IREmitter::EmitVertex(const U32& stream) { | ||
| 89 | Inst(Opcode::EmitVertex, stream); | ||
| 90 | } | ||
| 91 | |||
| 92 | void IREmitter::EndPrimitive(const U32& stream) { | ||
| 93 | Inst(Opcode::EndPrimitive, stream); | ||
| 94 | } | ||
| 95 | |||
| 96 | void IREmitter::Barrier() { | ||
| 97 | Inst(Opcode::Barrier); | ||
| 98 | } | ||
| 99 | |||
| 100 | void IREmitter::WorkgroupMemoryBarrier() { | ||
| 101 | Inst(Opcode::WorkgroupMemoryBarrier); | ||
| 102 | } | ||
| 103 | |||
| 104 | void IREmitter::DeviceMemoryBarrier() { | ||
| 105 | Inst(Opcode::DeviceMemoryBarrier); | ||
| 106 | } | ||
| 107 | |||
| 108 | U32 IREmitter::GetReg(IR::Reg reg) { | ||
| 109 | return Inst<U32>(Opcode::GetRegister, reg); | ||
| 110 | } | ||
| 111 | |||
| 112 | void IREmitter::SetReg(IR::Reg reg, const U32& value) { | ||
| 113 | Inst(Opcode::SetRegister, reg, value); | ||
| 114 | } | ||
| 115 | |||
| 116 | U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { | ||
| 117 | if (pred == Pred::PT) { | ||
| 118 | return Imm1(!is_negated); | ||
| 119 | } | ||
| 120 | const U1 value{Inst<U1>(Opcode::GetPred, pred)}; | ||
| 121 | if (is_negated) { | ||
| 122 | return Inst<U1>(Opcode::LogicalNot, value); | ||
| 123 | } else { | ||
| 124 | return value; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void IREmitter::SetPred(IR::Pred pred, const U1& value) { | ||
| 129 | if (pred != IR::Pred::PT) { | ||
| 130 | Inst(Opcode::SetPred, pred, value); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | U1 IREmitter::GetGotoVariable(u32 id) { | ||
| 135 | return Inst<U1>(Opcode::GetGotoVariable, id); | ||
| 136 | } | ||
| 137 | |||
| 138 | void IREmitter::SetGotoVariable(u32 id, const U1& value) { | ||
| 139 | Inst(Opcode::SetGotoVariable, id, value); | ||
| 140 | } | ||
| 141 | |||
| 142 | U32 IREmitter::GetIndirectBranchVariable() { | ||
| 143 | return Inst<U32>(Opcode::GetIndirectBranchVariable); | ||
| 144 | } | ||
| 145 | |||
| 146 | void IREmitter::SetIndirectBranchVariable(const U32& value) { | ||
| 147 | Inst(Opcode::SetIndirectBranchVariable, value); | ||
| 148 | } | ||
| 149 | |||
| 150 | U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { | ||
| 151 | return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); | ||
| 152 | } | ||
| 153 | |||
| 154 | Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, | ||
| 155 | bool is_signed) { | ||
| 156 | switch (bitsize) { | ||
| 157 | case 8: | ||
| 158 | return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); | ||
| 159 | case 16: | ||
| 160 | return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); | ||
| 161 | case 32: | ||
| 162 | return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); | ||
| 163 | case 64: | ||
| 164 | return Inst(Opcode::GetCbufU32x2, binding, byte_offset); | ||
| 165 | default: | ||
| 166 | throw InvalidArgument("Invalid bit size {}", bitsize); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { | ||
| 171 | return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset); | ||
| 172 | } | ||
| 173 | |||
| 174 | U1 IREmitter::GetZFlag() { | ||
| 175 | return Inst<U1>(Opcode::GetZFlag); | ||
| 176 | } | ||
| 177 | |||
| 178 | U1 IREmitter::GetSFlag() { | ||
| 179 | return Inst<U1>(Opcode::GetSFlag); | ||
| 180 | } | ||
| 181 | |||
| 182 | U1 IREmitter::GetCFlag() { | ||
| 183 | return Inst<U1>(Opcode::GetCFlag); | ||
| 184 | } | ||
| 185 | |||
| 186 | U1 IREmitter::GetOFlag() { | ||
| 187 | return Inst<U1>(Opcode::GetOFlag); | ||
| 188 | } | ||
| 189 | |||
| 190 | void IREmitter::SetZFlag(const U1& value) { | ||
| 191 | Inst(Opcode::SetZFlag, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void IREmitter::SetSFlag(const U1& value) { | ||
| 195 | Inst(Opcode::SetSFlag, value); | ||
| 196 | } | ||
| 197 | |||
| 198 | void IREmitter::SetCFlag(const U1& value) { | ||
| 199 | Inst(Opcode::SetCFlag, value); | ||
| 200 | } | ||
| 201 | |||
| 202 | void IREmitter::SetOFlag(const U1& value) { | ||
| 203 | Inst(Opcode::SetOFlag, value); | ||
| 204 | } | ||
| 205 | |||
| 206 | static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { | ||
| 207 | switch (flow_test) { | ||
| 208 | case FlowTest::F: | ||
| 209 | return ir.Imm1(false); | ||
| 210 | case FlowTest::LT: | ||
| 211 | return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())), | ||
| 212 | ir.GetOFlag()); | ||
| 213 | case FlowTest::EQ: | ||
| 214 | return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()); | ||
| 215 | case FlowTest::LE: | ||
| 216 | return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); | ||
| 217 | case FlowTest::GT: | ||
| 218 | return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()), | ||
| 219 | ir.LogicalNot(ir.GetZFlag())); | ||
| 220 | case FlowTest::NE: | ||
| 221 | return ir.LogicalNot(ir.GetZFlag()); | ||
| 222 | case FlowTest::GE: | ||
| 223 | return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag())); | ||
| 224 | case FlowTest::NUM: | ||
| 225 | return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); | ||
| 226 | case FlowTest::NaN: | ||
| 227 | return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); | ||
| 228 | case FlowTest::LTU: | ||
| 229 | return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()); | ||
| 230 | case FlowTest::EQU: | ||
| 231 | return ir.GetZFlag(); | ||
| 232 | case FlowTest::LEU: | ||
| 233 | return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag()); | ||
| 234 | case FlowTest::GTU: | ||
| 235 | return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), | ||
| 236 | ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); | ||
| 237 | case FlowTest::NEU: | ||
| 238 | return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())); | ||
| 239 | case FlowTest::GEU: | ||
| 240 | return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()), | ||
| 241 | ir.GetOFlag()); | ||
| 242 | case FlowTest::T: | ||
| 243 | return ir.Imm1(true); | ||
| 244 | case FlowTest::OFF: | ||
| 245 | return ir.LogicalNot(ir.GetOFlag()); | ||
| 246 | case FlowTest::LO: | ||
| 247 | return ir.LogicalNot(ir.GetCFlag()); | ||
| 248 | case FlowTest::SFF: | ||
| 249 | return ir.LogicalNot(ir.GetSFlag()); | ||
| 250 | case FlowTest::LS: | ||
| 251 | return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag())); | ||
| 252 | case FlowTest::HI: | ||
| 253 | return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag())); | ||
| 254 | case FlowTest::SFT: | ||
| 255 | return ir.GetSFlag(); | ||
| 256 | case FlowTest::HS: | ||
| 257 | return ir.GetCFlag(); | ||
| 258 | case FlowTest::OFT: | ||
| 259 | return ir.GetOFlag(); | ||
| 260 | case FlowTest::RLE: | ||
| 261 | return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); | ||
| 262 | case FlowTest::RGT: | ||
| 263 | return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); | ||
| 264 | case FlowTest::FCSM_TR: | ||
| 265 | LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); | ||
| 266 | return ir.Imm1(false); | ||
| 267 | case FlowTest::CSM_TA: | ||
| 268 | case FlowTest::CSM_TR: | ||
| 269 | case FlowTest::CSM_MX: | ||
| 270 | case FlowTest::FCSM_TA: | ||
| 271 | case FlowTest::FCSM_MX: | ||
| 272 | default: | ||
| 273 | throw NotImplementedException("Flow test {}", flow_test); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | U1 IREmitter::Condition(IR::Condition cond) { | ||
| 278 | const FlowTest flow_test{cond.GetFlowTest()}; | ||
| 279 | const auto [pred, is_negated]{cond.GetPred()}; | ||
| 280 | if (flow_test == FlowTest::T) { | ||
| 281 | return GetPred(pred, is_negated); | ||
| 282 | } | ||
| 283 | return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); | ||
| 284 | } | ||
| 285 | |||
| 286 | U1 IREmitter::GetFlowTestResult(FlowTest test) { | ||
| 287 | return GetFlowTest(*this, test); | ||
| 288 | } | ||
| 289 | |||
| 290 | F32 IREmitter::GetAttribute(IR::Attribute attribute) { | ||
| 291 | return GetAttribute(attribute, Imm32(0)); | ||
| 292 | } | ||
| 293 | |||
| 294 | F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | ||
| 295 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | ||
| 296 | } | ||
| 297 | |||
| 298 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | ||
| 299 | Inst(Opcode::SetAttribute, attribute, value, vertex); | ||
| 300 | } | ||
| 301 | |||
| 302 | F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { | ||
| 303 | return GetAttributeIndexed(phys_address, Imm32(0)); | ||
| 304 | } | ||
| 305 | |||
| 306 | F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { | ||
| 307 | return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex); | ||
| 308 | } | ||
| 309 | |||
| 310 | void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { | ||
| 311 | Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); | ||
| 312 | } | ||
| 313 | |||
| 314 | F32 IREmitter::GetPatch(Patch patch) { | ||
| 315 | return Inst<F32>(Opcode::GetPatch, patch); | ||
| 316 | } | ||
| 317 | |||
| 318 | void IREmitter::SetPatch(Patch patch, const F32& value) { | ||
| 319 | Inst(Opcode::SetPatch, patch, value); | ||
| 320 | } | ||
| 321 | |||
| 322 | void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { | ||
| 323 | Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); | ||
| 324 | } | ||
| 325 | |||
| 326 | void IREmitter::SetSampleMask(const U32& value) { | ||
| 327 | Inst(Opcode::SetSampleMask, value); | ||
| 328 | } | ||
| 329 | |||
| 330 | void IREmitter::SetFragDepth(const F32& value) { | ||
| 331 | Inst(Opcode::SetFragDepth, value); | ||
| 332 | } | ||
| 333 | |||
| 334 | U32 IREmitter::WorkgroupIdX() { | ||
| 335 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; | ||
| 336 | } | ||
| 337 | |||
| 338 | U32 IREmitter::WorkgroupIdY() { | ||
| 339 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; | ||
| 340 | } | ||
| 341 | |||
| 342 | U32 IREmitter::WorkgroupIdZ() { | ||
| 343 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; | ||
| 344 | } | ||
| 345 | |||
| 346 | Value IREmitter::LocalInvocationId() { | ||
| 347 | return Inst(Opcode::LocalInvocationId); | ||
| 348 | } | ||
| 349 | |||
| 350 | U32 IREmitter::LocalInvocationIdX() { | ||
| 351 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; | ||
| 352 | } | ||
| 353 | |||
| 354 | U32 IREmitter::LocalInvocationIdY() { | ||
| 355 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; | ||
| 356 | } | ||
| 357 | |||
| 358 | U32 IREmitter::LocalInvocationIdZ() { | ||
| 359 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; | ||
| 360 | } | ||
| 361 | |||
| 362 | U32 IREmitter::InvocationId() { | ||
| 363 | return Inst<U32>(Opcode::InvocationId); | ||
| 364 | } | ||
| 365 | |||
| 366 | U32 IREmitter::SampleId() { | ||
| 367 | return Inst<U32>(Opcode::SampleId); | ||
| 368 | } | ||
| 369 | |||
| 370 | U1 IREmitter::IsHelperInvocation() { | ||
| 371 | return Inst<U1>(Opcode::IsHelperInvocation); | ||
| 372 | } | ||
| 373 | |||
| 374 | F32 IREmitter::YDirection() { | ||
| 375 | return Inst<F32>(Opcode::YDirection); | ||
| 376 | } | ||
| 377 | |||
| 378 | U32 IREmitter::LaneId() { | ||
| 379 | return Inst<U32>(Opcode::LaneId); | ||
| 380 | } | ||
| 381 | |||
| 382 | U32 IREmitter::LoadGlobalU8(const U64& address) { | ||
| 383 | return Inst<U32>(Opcode::LoadGlobalU8, address); | ||
| 384 | } | ||
| 385 | |||
| 386 | U32 IREmitter::LoadGlobalS8(const U64& address) { | ||
| 387 | return Inst<U32>(Opcode::LoadGlobalS8, address); | ||
| 388 | } | ||
| 389 | |||
| 390 | U32 IREmitter::LoadGlobalU16(const U64& address) { | ||
| 391 | return Inst<U32>(Opcode::LoadGlobalU16, address); | ||
| 392 | } | ||
| 393 | |||
| 394 | U32 IREmitter::LoadGlobalS16(const U64& address) { | ||
| 395 | return Inst<U32>(Opcode::LoadGlobalS16, address); | ||
| 396 | } | ||
| 397 | |||
| 398 | U32 IREmitter::LoadGlobal32(const U64& address) { | ||
| 399 | return Inst<U32>(Opcode::LoadGlobal32, address); | ||
| 400 | } | ||
| 401 | |||
| 402 | Value IREmitter::LoadGlobal64(const U64& address) { | ||
| 403 | return Inst<Value>(Opcode::LoadGlobal64, address); | ||
| 404 | } | ||
| 405 | |||
| 406 | Value IREmitter::LoadGlobal128(const U64& address) { | ||
| 407 | return Inst<Value>(Opcode::LoadGlobal128, address); | ||
| 408 | } | ||
| 409 | |||
| 410 | void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { | ||
| 411 | Inst(Opcode::WriteGlobalU8, address, value); | ||
| 412 | } | ||
| 413 | |||
| 414 | void IREmitter::WriteGlobalS8(const U64& address, const U32& value) { | ||
| 415 | Inst(Opcode::WriteGlobalS8, address, value); | ||
| 416 | } | ||
| 417 | |||
| 418 | void IREmitter::WriteGlobalU16(const U64& address, const U32& value) { | ||
| 419 | Inst(Opcode::WriteGlobalU16, address, value); | ||
| 420 | } | ||
| 421 | |||
| 422 | void IREmitter::WriteGlobalS16(const U64& address, const U32& value) { | ||
| 423 | Inst(Opcode::WriteGlobalS16, address, value); | ||
| 424 | } | ||
| 425 | |||
| 426 | void IREmitter::WriteGlobal32(const U64& address, const U32& value) { | ||
| 427 | Inst(Opcode::WriteGlobal32, address, value); | ||
| 428 | } | ||
| 429 | |||
| 430 | void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) { | ||
| 431 | Inst(Opcode::WriteGlobal64, address, vector); | ||
| 432 | } | ||
| 433 | |||
| 434 | void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { | ||
| 435 | Inst(Opcode::WriteGlobal128, address, vector); | ||
| 436 | } | ||
| 437 | |||
| 438 | U32 IREmitter::LoadLocal(const IR::U32& word_offset) { | ||
| 439 | return Inst<U32>(Opcode::LoadLocal, word_offset); | ||
| 440 | } | ||
| 441 | |||
| 442 | void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { | ||
| 443 | Inst(Opcode::WriteLocal, word_offset, value); | ||
| 444 | } | ||
| 445 | |||
| 446 | Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { | ||
| 447 | switch (bit_size) { | ||
| 448 | case 8: | ||
| 449 | return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); | ||
| 450 | case 16: | ||
| 451 | return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); | ||
| 452 | case 32: | ||
| 453 | return Inst(Opcode::LoadSharedU32, offset); | ||
| 454 | case 64: | ||
| 455 | return Inst(Opcode::LoadSharedU64, offset); | ||
| 456 | case 128: | ||
| 457 | return Inst(Opcode::LoadSharedU128, offset); | ||
| 458 | } | ||
| 459 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 460 | } | ||
| 461 | |||
| 462 | void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { | ||
| 463 | switch (bit_size) { | ||
| 464 | case 8: | ||
| 465 | Inst(Opcode::WriteSharedU8, offset, value); | ||
| 466 | break; | ||
| 467 | case 16: | ||
| 468 | Inst(Opcode::WriteSharedU16, offset, value); | ||
| 469 | break; | ||
| 470 | case 32: | ||
| 471 | Inst(Opcode::WriteSharedU32, offset, value); | ||
| 472 | break; | ||
| 473 | case 64: | ||
| 474 | Inst(Opcode::WriteSharedU64, offset, value); | ||
| 475 | break; | ||
| 476 | case 128: | ||
| 477 | Inst(Opcode::WriteSharedU128, offset, value); | ||
| 478 | break; | ||
| 479 | default: | ||
| 480 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | |||
| 484 | U1 IREmitter::GetZeroFromOp(const Value& op) { | ||
| 485 | return Inst<U1>(Opcode::GetZeroFromOp, op); | ||
| 486 | } | ||
| 487 | |||
| 488 | U1 IREmitter::GetSignFromOp(const Value& op) { | ||
| 489 | return Inst<U1>(Opcode::GetSignFromOp, op); | ||
| 490 | } | ||
| 491 | |||
| 492 | U1 IREmitter::GetCarryFromOp(const Value& op) { | ||
| 493 | return Inst<U1>(Opcode::GetCarryFromOp, op); | ||
| 494 | } | ||
| 495 | |||
| 496 | U1 IREmitter::GetOverflowFromOp(const Value& op) { | ||
| 497 | return Inst<U1>(Opcode::GetOverflowFromOp, op); | ||
| 498 | } | ||
| 499 | |||
| 500 | U1 IREmitter::GetSparseFromOp(const Value& op) { | ||
| 501 | return Inst<U1>(Opcode::GetSparseFromOp, op); | ||
| 502 | } | ||
| 503 | |||
| 504 | U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||
| 505 | return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||
| 506 | } | ||
| 507 | |||
| 508 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||
| 509 | if (a.Type() != b.Type()) { | ||
| 510 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 511 | } | ||
| 512 | switch (a.Type()) { | ||
| 513 | case Type::F16: | ||
| 514 | return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b); | ||
| 515 | case Type::F32: | ||
| 516 | return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b); | ||
| 517 | case Type::F64: | ||
| 518 | return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b); | ||
| 519 | default: | ||
| 520 | ThrowInvalidType(a.Type()); | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 524 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { | ||
| 525 | if (e1.Type() != e2.Type()) { | ||
| 526 | throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); | ||
| 527 | } | ||
| 528 | switch (e1.Type()) { | ||
| 529 | case Type::U32: | ||
| 530 | return Inst(Opcode::CompositeConstructU32x2, e1, e2); | ||
| 531 | case Type::F16: | ||
| 532 | return Inst(Opcode::CompositeConstructF16x2, e1, e2); | ||
| 533 | case Type::F32: | ||
| 534 | return Inst(Opcode::CompositeConstructF32x2, e1, e2); | ||
| 535 | case Type::F64: | ||
| 536 | return Inst(Opcode::CompositeConstructF64x2, e1, e2); | ||
| 537 | default: | ||
| 538 | ThrowInvalidType(e1.Type()); | ||
| 539 | } | ||
| 540 | } | ||
| 541 | |||
| 542 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { | ||
| 543 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { | ||
| 544 | throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); | ||
| 545 | } | ||
| 546 | switch (e1.Type()) { | ||
| 547 | case Type::U32: | ||
| 548 | return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); | ||
| 549 | case Type::F16: | ||
| 550 | return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); | ||
| 551 | case Type::F32: | ||
| 552 | return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); | ||
| 553 | case Type::F64: | ||
| 554 | return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); | ||
| 555 | default: | ||
| 556 | ThrowInvalidType(e1.Type()); | ||
| 557 | } | ||
| 558 | } | ||
| 559 | |||
| 560 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | ||
| 561 | const Value& e4) { | ||
| 562 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { | ||
| 563 | throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), | ||
| 564 | e3.Type(), e4.Type()); | ||
| 565 | } | ||
| 566 | switch (e1.Type()) { | ||
| 567 | case Type::U32: | ||
| 568 | return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); | ||
| 569 | case Type::F16: | ||
| 570 | return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); | ||
| 571 | case Type::F32: | ||
| 572 | return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); | ||
| 573 | case Type::F64: | ||
| 574 | return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); | ||
| 575 | default: | ||
| 576 | ThrowInvalidType(e1.Type()); | ||
| 577 | } | ||
| 578 | } | ||
| 579 | |||
| 580 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | ||
| 581 | const auto read{[&](Opcode opcode, size_t limit) -> Value { | ||
| 582 | if (element >= limit) { | ||
| 583 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 584 | } | ||
| 585 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); | ||
| 586 | }}; | ||
| 587 | switch (vector.Type()) { | ||
| 588 | case Type::U32x2: | ||
| 589 | return read(Opcode::CompositeExtractU32x2, 2); | ||
| 590 | case Type::U32x3: | ||
| 591 | return read(Opcode::CompositeExtractU32x3, 3); | ||
| 592 | case Type::U32x4: | ||
| 593 | return read(Opcode::CompositeExtractU32x4, 4); | ||
| 594 | case Type::F16x2: | ||
| 595 | return read(Opcode::CompositeExtractF16x2, 2); | ||
| 596 | case Type::F16x3: | ||
| 597 | return read(Opcode::CompositeExtractF16x3, 3); | ||
| 598 | case Type::F16x4: | ||
| 599 | return read(Opcode::CompositeExtractF16x4, 4); | ||
| 600 | case Type::F32x2: | ||
| 601 | return read(Opcode::CompositeExtractF32x2, 2); | ||
| 602 | case Type::F32x3: | ||
| 603 | return read(Opcode::CompositeExtractF32x3, 3); | ||
| 604 | case Type::F32x4: | ||
| 605 | return read(Opcode::CompositeExtractF32x4, 4); | ||
| 606 | case Type::F64x2: | ||
| 607 | return read(Opcode::CompositeExtractF64x2, 2); | ||
| 608 | case Type::F64x3: | ||
| 609 | return read(Opcode::CompositeExtractF64x3, 3); | ||
| 610 | case Type::F64x4: | ||
| 611 | return read(Opcode::CompositeExtractF64x4, 4); | ||
| 612 | default: | ||
| 613 | ThrowInvalidType(vector.Type()); | ||
| 614 | } | ||
| 615 | } | ||
| 616 | |||
| 617 | Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||
| 618 | const auto insert{[&](Opcode opcode, size_t limit) { | ||
| 619 | if (element >= limit) { | ||
| 620 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 621 | } | ||
| 622 | return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||
| 623 | }}; | ||
| 624 | switch (vector.Type()) { | ||
| 625 | case Type::U32x2: | ||
| 626 | return insert(Opcode::CompositeInsertU32x2, 2); | ||
| 627 | case Type::U32x3: | ||
| 628 | return insert(Opcode::CompositeInsertU32x3, 3); | ||
| 629 | case Type::U32x4: | ||
| 630 | return insert(Opcode::CompositeInsertU32x4, 4); | ||
| 631 | case Type::F16x2: | ||
| 632 | return insert(Opcode::CompositeInsertF16x2, 2); | ||
| 633 | case Type::F16x3: | ||
| 634 | return insert(Opcode::CompositeInsertF16x3, 3); | ||
| 635 | case Type::F16x4: | ||
| 636 | return insert(Opcode::CompositeInsertF16x4, 4); | ||
| 637 | case Type::F32x2: | ||
| 638 | return insert(Opcode::CompositeInsertF32x2, 2); | ||
| 639 | case Type::F32x3: | ||
| 640 | return insert(Opcode::CompositeInsertF32x3, 3); | ||
| 641 | case Type::F32x4: | ||
| 642 | return insert(Opcode::CompositeInsertF32x4, 4); | ||
| 643 | case Type::F64x2: | ||
| 644 | return insert(Opcode::CompositeInsertF64x2, 2); | ||
| 645 | case Type::F64x3: | ||
| 646 | return insert(Opcode::CompositeInsertF64x3, 3); | ||
| 647 | case Type::F64x4: | ||
| 648 | return insert(Opcode::CompositeInsertF64x4, 4); | ||
| 649 | default: | ||
| 650 | ThrowInvalidType(vector.Type()); | ||
| 651 | } | ||
| 652 | } | ||
| 653 | |||
| 654 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | ||
| 655 | if (true_value.Type() != false_value.Type()) { | ||
| 656 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | ||
| 657 | } | ||
| 658 | switch (true_value.Type()) { | ||
| 659 | case Type::U1: | ||
| 660 | return Inst(Opcode::SelectU1, condition, true_value, false_value); | ||
| 661 | case Type::U8: | ||
| 662 | return Inst(Opcode::SelectU8, condition, true_value, false_value); | ||
| 663 | case Type::U16: | ||
| 664 | return Inst(Opcode::SelectU16, condition, true_value, false_value); | ||
| 665 | case Type::U32: | ||
| 666 | return Inst(Opcode::SelectU32, condition, true_value, false_value); | ||
| 667 | case Type::U64: | ||
| 668 | return Inst(Opcode::SelectU64, condition, true_value, false_value); | ||
| 669 | case Type::F32: | ||
| 670 | return Inst(Opcode::SelectF32, condition, true_value, false_value); | ||
| 671 | case Type::F64: | ||
| 672 | return Inst(Opcode::SelectF64, condition, true_value, false_value); | ||
| 673 | default: | ||
| 674 | throw InvalidArgument("Invalid type {}", true_value.Type()); | ||
| 675 | } | ||
| 676 | } | ||
| 677 | |||
| 678 | template <> | ||
| 679 | IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { | ||
| 680 | return Inst<IR::U32>(Opcode::BitCastU32F32, value); | ||
| 681 | } | ||
| 682 | |||
| 683 | template <> | ||
| 684 | IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { | ||
| 685 | return Inst<IR::F32>(Opcode::BitCastF32U32, value); | ||
| 686 | } | ||
| 687 | |||
| 688 | template <> | ||
| 689 | IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) { | ||
| 690 | return Inst<IR::U16>(Opcode::BitCastU16F16, value); | ||
| 691 | } | ||
| 692 | |||
| 693 | template <> | ||
| 694 | IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) { | ||
| 695 | return Inst<IR::F16>(Opcode::BitCastF16U16, value); | ||
| 696 | } | ||
| 697 | |||
| 698 | template <> | ||
| 699 | IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) { | ||
| 700 | return Inst<IR::U64>(Opcode::BitCastU64F64, value); | ||
| 701 | } | ||
| 702 | |||
| 703 | template <> | ||
| 704 | IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) { | ||
| 705 | return Inst<IR::F64>(Opcode::BitCastF64U64, value); | ||
| 706 | } | ||
| 707 | |||
| 708 | U64 IREmitter::PackUint2x32(const Value& vector) { | ||
| 709 | return Inst<U64>(Opcode::PackUint2x32, vector); | ||
| 710 | } | ||
| 711 | |||
| 712 | Value IREmitter::UnpackUint2x32(const U64& value) { | ||
| 713 | return Inst<Value>(Opcode::UnpackUint2x32, value); | ||
| 714 | } | ||
| 715 | |||
| 716 | U32 IREmitter::PackFloat2x16(const Value& vector) { | ||
| 717 | return Inst<U32>(Opcode::PackFloat2x16, vector); | ||
| 718 | } | ||
| 719 | |||
| 720 | Value IREmitter::UnpackFloat2x16(const U32& value) { | ||
| 721 | return Inst(Opcode::UnpackFloat2x16, value); | ||
| 722 | } | ||
| 723 | |||
| 724 | U32 IREmitter::PackHalf2x16(const Value& vector) { | ||
| 725 | return Inst<U32>(Opcode::PackHalf2x16, vector); | ||
| 726 | } | ||
| 727 | |||
| 728 | Value IREmitter::UnpackHalf2x16(const U32& value) { | ||
| 729 | return Inst(Opcode::UnpackHalf2x16, value); | ||
| 730 | } | ||
| 731 | |||
| 732 | F64 IREmitter::PackDouble2x32(const Value& vector) { | ||
| 733 | return Inst<F64>(Opcode::PackDouble2x32, vector); | ||
| 734 | } | ||
| 735 | |||
| 736 | Value IREmitter::UnpackDouble2x32(const F64& value) { | ||
| 737 | return Inst<Value>(Opcode::UnpackDouble2x32, value); | ||
| 738 | } | ||
| 739 | |||
| 740 | F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||
| 741 | if (a.Type() != b.Type()) { | ||
| 742 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 743 | } | ||
| 744 | switch (a.Type()) { | ||
| 745 | case Type::F16: | ||
| 746 | return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b); | ||
| 747 | case Type::F32: | ||
| 748 | return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b); | ||
| 749 | case Type::F64: | ||
| 750 | return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b); | ||
| 751 | default: | ||
| 752 | ThrowInvalidType(a.Type()); | ||
| 753 | } | ||
| 754 | } | ||
| 755 | |||
| 756 | F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, | ||
| 757 | FpControl control) { | ||
| 758 | if (a.Type() != b.Type() || a.Type() != c.Type()) { | ||
| 759 | throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); | ||
| 760 | } | ||
| 761 | switch (a.Type()) { | ||
| 762 | case Type::F16: | ||
| 763 | return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c); | ||
| 764 | case Type::F32: | ||
| 765 | return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c); | ||
| 766 | case Type::F64: | ||
| 767 | return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c); | ||
| 768 | default: | ||
| 769 | ThrowInvalidType(a.Type()); | ||
| 770 | } | ||
| 771 | } | ||
| 772 | |||
| 773 | F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { | ||
| 774 | switch (value.Type()) { | ||
| 775 | case Type::F16: | ||
| 776 | return Inst<F16>(Opcode::FPAbs16, value); | ||
| 777 | case Type::F32: | ||
| 778 | return Inst<F32>(Opcode::FPAbs32, value); | ||
| 779 | case Type::F64: | ||
| 780 | return Inst<F64>(Opcode::FPAbs64, value); | ||
| 781 | default: | ||
| 782 | ThrowInvalidType(value.Type()); | ||
| 783 | } | ||
| 784 | } | ||
| 785 | |||
| 786 | F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { | ||
| 787 | switch (value.Type()) { | ||
| 788 | case Type::F16: | ||
| 789 | return Inst<F16>(Opcode::FPNeg16, value); | ||
| 790 | case Type::F32: | ||
| 791 | return Inst<F32>(Opcode::FPNeg32, value); | ||
| 792 | case Type::F64: | ||
| 793 | return Inst<F64>(Opcode::FPNeg64, value); | ||
| 794 | default: | ||
| 795 | ThrowInvalidType(value.Type()); | ||
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 | F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { | ||
| 800 | F16F32F64 result{value}; | ||
| 801 | if (abs) { | ||
| 802 | result = FPAbs(result); | ||
| 803 | } | ||
| 804 | if (neg) { | ||
| 805 | result = FPNeg(result); | ||
| 806 | } | ||
| 807 | return result; | ||
| 808 | } | ||
| 809 | |||
| 810 | F32 IREmitter::FPCos(const F32& value) { | ||
| 811 | return Inst<F32>(Opcode::FPCos, value); | ||
| 812 | } | ||
| 813 | |||
| 814 | F32 IREmitter::FPSin(const F32& value) { | ||
| 815 | return Inst<F32>(Opcode::FPSin, value); | ||
| 816 | } | ||
| 817 | |||
| 818 | F32 IREmitter::FPExp2(const F32& value) { | ||
| 819 | return Inst<F32>(Opcode::FPExp2, value); | ||
| 820 | } | ||
| 821 | |||
| 822 | F32 IREmitter::FPLog2(const F32& value) { | ||
| 823 | return Inst<F32>(Opcode::FPLog2, value); | ||
| 824 | } | ||
| 825 | |||
| 826 | F32F64 IREmitter::FPRecip(const F32F64& value) { | ||
| 827 | switch (value.Type()) { | ||
| 828 | case Type::F32: | ||
| 829 | return Inst<F32>(Opcode::FPRecip32, value); | ||
| 830 | case Type::F64: | ||
| 831 | return Inst<F64>(Opcode::FPRecip64, value); | ||
| 832 | default: | ||
| 833 | ThrowInvalidType(value.Type()); | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { | ||
| 838 | switch (value.Type()) { | ||
| 839 | case Type::F32: | ||
| 840 | return Inst<F32>(Opcode::FPRecipSqrt32, value); | ||
| 841 | case Type::F64: | ||
| 842 | return Inst<F64>(Opcode::FPRecipSqrt64, value); | ||
| 843 | default: | ||
| 844 | ThrowInvalidType(value.Type()); | ||
| 845 | } | ||
| 846 | } | ||
| 847 | |||
| 848 | F32 IREmitter::FPSqrt(const F32& value) { | ||
| 849 | return Inst<F32>(Opcode::FPSqrt, value); | ||
| 850 | } | ||
| 851 | |||
| 852 | F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { | ||
| 853 | switch (value.Type()) { | ||
| 854 | case Type::F16: | ||
| 855 | return Inst<F16>(Opcode::FPSaturate16, value); | ||
| 856 | case Type::F32: | ||
| 857 | return Inst<F32>(Opcode::FPSaturate32, value); | ||
| 858 | case Type::F64: | ||
| 859 | return Inst<F64>(Opcode::FPSaturate64, value); | ||
| 860 | default: | ||
| 861 | ThrowInvalidType(value.Type()); | ||
| 862 | } | ||
| 863 | } | ||
| 864 | |||
| 865 | F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||
| 866 | const F16F32F64& max_value) { | ||
| 867 | if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { | ||
| 868 | throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), | ||
| 869 | max_value.Type()); | ||
| 870 | } | ||
| 871 | switch (value.Type()) { | ||
| 872 | case Type::F16: | ||
| 873 | return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value); | ||
| 874 | case Type::F32: | ||
| 875 | return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value); | ||
| 876 | case Type::F64: | ||
| 877 | return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value); | ||
| 878 | default: | ||
| 879 | ThrowInvalidType(value.Type()); | ||
| 880 | } | ||
| 881 | } | ||
| 882 | |||
| 883 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { | ||
| 884 | switch (value.Type()) { | ||
| 885 | case Type::F16: | ||
| 886 | return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); | ||
| 887 | case Type::F32: | ||
| 888 | return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); | ||
| 889 | case Type::F64: | ||
| 890 | return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); | ||
| 891 | default: | ||
| 892 | ThrowInvalidType(value.Type()); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { | ||
| 897 | switch (value.Type()) { | ||
| 898 | case Type::F16: | ||
| 899 | return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); | ||
| 900 | case Type::F32: | ||
| 901 | return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); | ||
| 902 | case Type::F64: | ||
| 903 | return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); | ||
| 904 | default: | ||
| 905 | ThrowInvalidType(value.Type()); | ||
| 906 | } | ||
| 907 | } | ||
| 908 | |||
| 909 | F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { | ||
| 910 | switch (value.Type()) { | ||
| 911 | case Type::F16: | ||
| 912 | return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); | ||
| 913 | case Type::F32: | ||
| 914 | return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); | ||
| 915 | case Type::F64: | ||
| 916 | return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); | ||
| 917 | default: | ||
| 918 | ThrowInvalidType(value.Type()); | ||
| 919 | } | ||
| 920 | } | ||
| 921 | |||
| 922 | F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { | ||
| 923 | switch (value.Type()) { | ||
| 924 | case Type::F16: | ||
| 925 | return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); | ||
| 926 | case Type::F32: | ||
| 927 | return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); | ||
| 928 | case Type::F64: | ||
| 929 | return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); | ||
| 930 | default: | ||
| 931 | ThrowInvalidType(value.Type()); | ||
| 932 | } | ||
| 933 | } | ||
| 934 | |||
| 935 | U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { | ||
| 936 | if (lhs.Type() != rhs.Type()) { | ||
| 937 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 938 | } | ||
| 939 | switch (lhs.Type()) { | ||
| 940 | case Type::F16: | ||
| 941 | return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, | ||
| 942 | lhs, rhs); | ||
| 943 | case Type::F32: | ||
| 944 | return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, | ||
| 945 | lhs, rhs); | ||
| 946 | case Type::F64: | ||
| 947 | return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, | ||
| 948 | lhs, rhs); | ||
| 949 | default: | ||
| 950 | ThrowInvalidType(lhs.Type()); | ||
| 951 | } | ||
| 952 | } | ||
| 953 | |||
| 954 | U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 955 | bool ordered) { | ||
| 956 | if (lhs.Type() != rhs.Type()) { | ||
| 957 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 958 | } | ||
| 959 | switch (lhs.Type()) { | ||
| 960 | case Type::F16: | ||
| 961 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, | ||
| 962 | Flags{control}, lhs, rhs); | ||
| 963 | case Type::F32: | ||
| 964 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, | ||
| 965 | Flags{control}, lhs, rhs); | ||
| 966 | case Type::F64: | ||
| 967 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, | ||
| 968 | Flags{control}, lhs, rhs); | ||
| 969 | default: | ||
| 970 | ThrowInvalidType(lhs.Type()); | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 975 | bool ordered) { | ||
| 976 | if (lhs.Type() != rhs.Type()) { | ||
| 977 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 978 | } | ||
| 979 | switch (lhs.Type()) { | ||
| 980 | case Type::F16: | ||
| 981 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, | ||
| 982 | Flags{control}, lhs, rhs); | ||
| 983 | case Type::F32: | ||
| 984 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, | ||
| 985 | Flags{control}, lhs, rhs); | ||
| 986 | case Type::F64: | ||
| 987 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, | ||
| 988 | Flags{control}, lhs, rhs); | ||
| 989 | default: | ||
| 990 | ThrowInvalidType(lhs.Type()); | ||
| 991 | } | ||
| 992 | } | ||
| 993 | |||
| 994 | U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 995 | bool ordered) { | ||
| 996 | if (lhs.Type() != rhs.Type()) { | ||
| 997 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 998 | } | ||
| 999 | switch (lhs.Type()) { | ||
| 1000 | case Type::F16: | ||
| 1001 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, | ||
| 1002 | Flags{control}, lhs, rhs); | ||
| 1003 | case Type::F32: | ||
| 1004 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, | ||
| 1005 | Flags{control}, lhs, rhs); | ||
| 1006 | case Type::F64: | ||
| 1007 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, | ||
| 1008 | Flags{control}, lhs, rhs); | ||
| 1009 | default: | ||
| 1010 | ThrowInvalidType(lhs.Type()); | ||
| 1011 | } | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 1015 | bool ordered) { | ||
| 1016 | if (lhs.Type() != rhs.Type()) { | ||
| 1017 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1018 | } | ||
| 1019 | switch (lhs.Type()) { | ||
| 1020 | case Type::F16: | ||
| 1021 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, | ||
| 1022 | Flags{control}, lhs, rhs); | ||
| 1023 | case Type::F32: | ||
| 1024 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, | ||
| 1025 | Flags{control}, lhs, rhs); | ||
| 1026 | case Type::F64: | ||
| 1027 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, | ||
| 1028 | Flags{control}, lhs, rhs); | ||
| 1029 | default: | ||
| 1030 | ThrowInvalidType(lhs.Type()); | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 1035 | bool ordered) { | ||
| 1036 | if (lhs.Type() != rhs.Type()) { | ||
| 1037 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1038 | } | ||
| 1039 | switch (lhs.Type()) { | ||
| 1040 | case Type::F16: | ||
| 1041 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16 | ||
| 1042 | : Opcode::FPUnordGreaterThanEqual16, | ||
| 1043 | Flags{control}, lhs, rhs); | ||
| 1044 | case Type::F32: | ||
| 1045 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32 | ||
| 1046 | : Opcode::FPUnordGreaterThanEqual32, | ||
| 1047 | Flags{control}, lhs, rhs); | ||
| 1048 | case Type::F64: | ||
| 1049 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64 | ||
| 1050 | : Opcode::FPUnordGreaterThanEqual64, | ||
| 1051 | Flags{control}, lhs, rhs); | ||
| 1052 | default: | ||
| 1053 | ThrowInvalidType(lhs.Type()); | ||
| 1054 | } | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | U1 IREmitter::FPIsNan(const F16F32F64& value) { | ||
| 1058 | switch (value.Type()) { | ||
| 1059 | case Type::F16: | ||
| 1060 | return Inst<U1>(Opcode::FPIsNan16, value); | ||
| 1061 | case Type::F32: | ||
| 1062 | return Inst<U1>(Opcode::FPIsNan32, value); | ||
| 1063 | case Type::F64: | ||
| 1064 | return Inst<U1>(Opcode::FPIsNan64, value); | ||
| 1065 | default: | ||
| 1066 | ThrowInvalidType(value.Type()); | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { | ||
| 1071 | if (lhs.Type() != rhs.Type()) { | ||
| 1072 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1073 | } | ||
| 1074 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { | ||
| 1078 | if (lhs.Type() != rhs.Type()) { | ||
| 1079 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1080 | } | ||
| 1081 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { | ||
| 1085 | if (lhs.Type() != rhs.Type()) { | ||
| 1086 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1087 | } | ||
| 1088 | switch (lhs.Type()) { | ||
| 1089 | case Type::F32: | ||
| 1090 | return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs); | ||
| 1091 | case Type::F64: | ||
| 1092 | return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs); | ||
| 1093 | default: | ||
| 1094 | ThrowInvalidType(lhs.Type()); | ||
| 1095 | } | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { | ||
| 1099 | if (lhs.Type() != rhs.Type()) { | ||
| 1100 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1101 | } | ||
| 1102 | switch (lhs.Type()) { | ||
| 1103 | case Type::F32: | ||
| 1104 | return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs); | ||
| 1105 | case Type::F64: | ||
| 1106 | return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs); | ||
| 1107 | default: | ||
| 1108 | ThrowInvalidType(lhs.Type()); | ||
| 1109 | } | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { | ||
| 1113 | if (a.Type() != b.Type()) { | ||
| 1114 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 1115 | } | ||
| 1116 | switch (a.Type()) { | ||
| 1117 | case Type::U32: | ||
| 1118 | return Inst<U32>(Opcode::IAdd32, a, b); | ||
| 1119 | case Type::U64: | ||
| 1120 | return Inst<U64>(Opcode::IAdd64, a, b); | ||
| 1121 | default: | ||
| 1122 | ThrowInvalidType(a.Type()); | ||
| 1123 | } | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { | ||
| 1127 | if (a.Type() != b.Type()) { | ||
| 1128 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 1129 | } | ||
| 1130 | switch (a.Type()) { | ||
| 1131 | case Type::U32: | ||
| 1132 | return Inst<U32>(Opcode::ISub32, a, b); | ||
| 1133 | case Type::U64: | ||
| 1134 | return Inst<U64>(Opcode::ISub64, a, b); | ||
| 1135 | default: | ||
| 1136 | ThrowInvalidType(a.Type()); | ||
| 1137 | } | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | U32 IREmitter::IMul(const U32& a, const U32& b) { | ||
| 1141 | return Inst<U32>(Opcode::IMul32, a, b); | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | U32U64 IREmitter::INeg(const U32U64& value) { | ||
| 1145 | switch (value.Type()) { | ||
| 1146 | case Type::U32: | ||
| 1147 | return Inst<U32>(Opcode::INeg32, value); | ||
| 1148 | case Type::U64: | ||
| 1149 | return Inst<U64>(Opcode::INeg64, value); | ||
| 1150 | default: | ||
| 1151 | ThrowInvalidType(value.Type()); | ||
| 1152 | } | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | U32 IREmitter::IAbs(const U32& value) { | ||
| 1156 | return Inst<U32>(Opcode::IAbs32, value); | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { | ||
| 1160 | switch (base.Type()) { | ||
| 1161 | case Type::U32: | ||
| 1162 | return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); | ||
| 1163 | case Type::U64: | ||
| 1164 | return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift); | ||
| 1165 | default: | ||
| 1166 | ThrowInvalidType(base.Type()); | ||
| 1167 | } | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { | ||
| 1171 | switch (base.Type()) { | ||
| 1172 | case Type::U32: | ||
| 1173 | return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); | ||
| 1174 | case Type::U64: | ||
| 1175 | return Inst<U64>(Opcode::ShiftRightLogical64, base, shift); | ||
| 1176 | default: | ||
| 1177 | ThrowInvalidType(base.Type()); | ||
| 1178 | } | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { | ||
| 1182 | switch (base.Type()) { | ||
| 1183 | case Type::U32: | ||
| 1184 | return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); | ||
| 1185 | case Type::U64: | ||
| 1186 | return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift); | ||
| 1187 | default: | ||
| 1188 | ThrowInvalidType(base.Type()); | ||
| 1189 | } | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { | ||
| 1193 | return Inst<U32>(Opcode::BitwiseAnd32, a, b); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { | ||
| 1197 | return Inst<U32>(Opcode::BitwiseOr32, a, b); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { | ||
| 1201 | return Inst<U32>(Opcode::BitwiseXor32, a, b); | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 1205 | const U32& count) { | ||
| 1206 | return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 1210 | bool is_signed) { | ||
| 1211 | return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, | ||
| 1212 | count); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | U32 IREmitter::BitReverse(const U32& value) { | ||
| 1216 | return Inst<U32>(Opcode::BitReverse32, value); | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | U32 IREmitter::BitCount(const U32& value) { | ||
| 1220 | return Inst<U32>(Opcode::BitCount32, value); | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | U32 IREmitter::BitwiseNot(const U32& value) { | ||
| 1224 | return Inst<U32>(Opcode::BitwiseNot32, value); | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | U32 IREmitter::FindSMsb(const U32& value) { | ||
| 1228 | return Inst<U32>(Opcode::FindSMsb32, value); | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | U32 IREmitter::FindUMsb(const U32& value) { | ||
| 1232 | return Inst<U32>(Opcode::FindUMsb32, value); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | U32 IREmitter::SMin(const U32& a, const U32& b) { | ||
| 1236 | return Inst<U32>(Opcode::SMin32, a, b); | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | U32 IREmitter::UMin(const U32& a, const U32& b) { | ||
| 1240 | return Inst<U32>(Opcode::UMin32, a, b); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { | ||
| 1244 | return is_signed ? SMin(a, b) : UMin(a, b); | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | U32 IREmitter::SMax(const U32& a, const U32& b) { | ||
| 1248 | return Inst<U32>(Opcode::SMax32, a, b); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | U32 IREmitter::UMax(const U32& a, const U32& b) { | ||
| 1252 | return Inst<U32>(Opcode::UMax32, a, b); | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { | ||
| 1256 | return is_signed ? SMax(a, b) : UMax(a, b); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { | ||
| 1260 | return Inst<U32>(Opcode::SClamp32, value, min, max); | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { | ||
| 1264 | return Inst<U32>(Opcode::UClamp32, value, min, max); | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1268 | return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { | ||
| 1272 | if (lhs.Type() != rhs.Type()) { | ||
| 1273 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1274 | } | ||
| 1275 | switch (lhs.Type()) { | ||
| 1276 | case Type::U32: | ||
| 1277 | return Inst<U1>(Opcode::IEqual, lhs, rhs); | ||
| 1278 | case Type::U64: { | ||
| 1279 | // Manually compare the unpacked values | ||
| 1280 | const Value lhs_vector{UnpackUint2x32(lhs)}; | ||
| 1281 | const Value rhs_vector{UnpackUint2x32(rhs)}; | ||
| 1282 | return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, | ||
| 1283 | IR::U32{CompositeExtract(rhs_vector, 0)}), | ||
| 1284 | IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, | ||
| 1285 | IR::U32{CompositeExtract(rhs_vector, 1)})); | ||
| 1286 | } | ||
| 1287 | default: | ||
| 1288 | ThrowInvalidType(lhs.Type()); | ||
| 1289 | } | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1293 | return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1297 | return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { | ||
| 1301 | return Inst<U1>(Opcode::INotEqual, lhs, rhs); | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1305 | return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { | ||
| 1309 | return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value); | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { | ||
| 1313 | return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value); | ||
| 1314 | } | ||
| 1315 | |||
| 1316 | U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { | ||
| 1317 | return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1321 | return is_signed ? SharedAtomicSMin(pointer_offset, value) | ||
| 1322 | : SharedAtomicUMin(pointer_offset, value); | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { | ||
| 1326 | return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { | ||
| 1330 | return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value); | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1334 | return is_signed ? SharedAtomicSMax(pointer_offset, value) | ||
| 1335 | : SharedAtomicUMax(pointer_offset, value); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { | ||
| 1339 | return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value); | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { | ||
| 1343 | return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value); | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { | ||
| 1347 | return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { | ||
| 1351 | return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value); | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { | ||
| 1355 | return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { | ||
| 1359 | switch (value.Type()) { | ||
| 1360 | case Type::U32: | ||
| 1361 | return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value); | ||
| 1362 | case Type::U64: | ||
| 1363 | return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value); | ||
| 1364 | default: | ||
| 1365 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { | ||
| 1370 | switch (value.Type()) { | ||
| 1371 | case Type::U32: | ||
| 1372 | return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value); | ||
| 1373 | case Type::U64: | ||
| 1374 | return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value); | ||
| 1375 | default: | ||
| 1376 | ThrowInvalidType(value.Type()); | ||
| 1377 | } | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1381 | switch (value.Type()) { | ||
| 1382 | case Type::U32: | ||
| 1383 | return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value); | ||
| 1384 | case Type::U64: | ||
| 1385 | return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value); | ||
| 1386 | default: | ||
| 1387 | ThrowInvalidType(value.Type()); | ||
| 1388 | } | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1392 | switch (value.Type()) { | ||
| 1393 | case Type::U32: | ||
| 1394 | return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value); | ||
| 1395 | case Type::U64: | ||
| 1396 | return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value); | ||
| 1397 | default: | ||
| 1398 | ThrowInvalidType(value.Type()); | ||
| 1399 | } | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1403 | return is_signed ? GlobalAtomicSMin(pointer_offset, value) | ||
| 1404 | : GlobalAtomicUMin(pointer_offset, value); | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1408 | switch (value.Type()) { | ||
| 1409 | case Type::U32: | ||
| 1410 | return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value); | ||
| 1411 | case Type::U64: | ||
| 1412 | return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value); | ||
| 1413 | default: | ||
| 1414 | ThrowInvalidType(value.Type()); | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1419 | switch (value.Type()) { | ||
| 1420 | case Type::U32: | ||
| 1421 | return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value); | ||
| 1422 | case Type::U64: | ||
| 1423 | return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value); | ||
| 1424 | default: | ||
| 1425 | ThrowInvalidType(value.Type()); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1430 | return is_signed ? GlobalAtomicSMax(pointer_offset, value) | ||
| 1431 | : GlobalAtomicUMax(pointer_offset, value); | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { | ||
| 1435 | return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value); | ||
| 1436 | } | ||
| 1437 | |||
| 1438 | U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { | ||
| 1439 | return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value); | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { | ||
| 1443 | switch (value.Type()) { | ||
| 1444 | case Type::U32: | ||
| 1445 | return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value); | ||
| 1446 | case Type::U64: | ||
| 1447 | return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value); | ||
| 1448 | default: | ||
| 1449 | ThrowInvalidType(value.Type()); | ||
| 1450 | } | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { | ||
| 1454 | switch (value.Type()) { | ||
| 1455 | case Type::U32: | ||
| 1456 | return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value); | ||
| 1457 | case Type::U64: | ||
| 1458 | return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value); | ||
| 1459 | default: | ||
| 1460 | ThrowInvalidType(value.Type()); | ||
| 1461 | } | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { | ||
| 1465 | switch (value.Type()) { | ||
| 1466 | case Type::U32: | ||
| 1467 | return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value); | ||
| 1468 | case Type::U64: | ||
| 1469 | return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value); | ||
| 1470 | default: | ||
| 1471 | ThrowInvalidType(value.Type()); | ||
| 1472 | } | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { | ||
| 1476 | switch (value.Type()) { | ||
| 1477 | case Type::U32: | ||
| 1478 | return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value); | ||
| 1479 | case Type::U64: | ||
| 1480 | return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value); | ||
| 1481 | default: | ||
| 1482 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1483 | } | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 1487 | const FpControl control) { | ||
| 1488 | return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 1492 | const FpControl control) { | ||
| 1493 | return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 1497 | const FpControl control) { | ||
| 1498 | return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 1502 | const FpControl control) { | ||
| 1503 | return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); | ||
| 1504 | } | ||
| 1505 | |||
| 1506 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | ||
| 1507 | return Inst<U1>(Opcode::LogicalOr, a, b); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { | ||
| 1511 | return Inst<U1>(Opcode::LogicalAnd, a, b); | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | U1 IREmitter::LogicalXor(const U1& a, const U1& b) { | ||
| 1515 | return Inst<U1>(Opcode::LogicalXor, a, b); | ||
| 1516 | } | ||
| 1517 | |||
| 1518 | U1 IREmitter::LogicalNot(const U1& value) { | ||
| 1519 | return Inst<U1>(Opcode::LogicalNot, value); | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { | ||
| 1523 | switch (bitsize) { | ||
| 1524 | case 16: | ||
| 1525 | switch (value.Type()) { | ||
| 1526 | case Type::F16: | ||
| 1527 | return Inst<U32>(Opcode::ConvertS16F16, value); | ||
| 1528 | case Type::F32: | ||
| 1529 | return Inst<U32>(Opcode::ConvertS16F32, value); | ||
| 1530 | case Type::F64: | ||
| 1531 | return Inst<U32>(Opcode::ConvertS16F64, value); | ||
| 1532 | default: | ||
| 1533 | ThrowInvalidType(value.Type()); | ||
| 1534 | } | ||
| 1535 | case 32: | ||
| 1536 | switch (value.Type()) { | ||
| 1537 | case Type::F16: | ||
| 1538 | return Inst<U32>(Opcode::ConvertS32F16, value); | ||
| 1539 | case Type::F32: | ||
| 1540 | return Inst<U32>(Opcode::ConvertS32F32, value); | ||
| 1541 | case Type::F64: | ||
| 1542 | return Inst<U32>(Opcode::ConvertS32F64, value); | ||
| 1543 | default: | ||
| 1544 | ThrowInvalidType(value.Type()); | ||
| 1545 | } | ||
| 1546 | case 64: | ||
| 1547 | switch (value.Type()) { | ||
| 1548 | case Type::F16: | ||
| 1549 | return Inst<U64>(Opcode::ConvertS64F16, value); | ||
| 1550 | case Type::F32: | ||
| 1551 | return Inst<U64>(Opcode::ConvertS64F32, value); | ||
| 1552 | case Type::F64: | ||
| 1553 | return Inst<U64>(Opcode::ConvertS64F64, value); | ||
| 1554 | default: | ||
| 1555 | ThrowInvalidType(value.Type()); | ||
| 1556 | } | ||
| 1557 | default: | ||
| 1558 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1559 | } | ||
| 1560 | } | ||
| 1561 | |||
| 1562 | U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { | ||
| 1563 | switch (bitsize) { | ||
| 1564 | case 16: | ||
| 1565 | switch (value.Type()) { | ||
| 1566 | case Type::F16: | ||
| 1567 | return Inst<U32>(Opcode::ConvertU16F16, value); | ||
| 1568 | case Type::F32: | ||
| 1569 | return Inst<U32>(Opcode::ConvertU16F32, value); | ||
| 1570 | case Type::F64: | ||
| 1571 | return Inst<U32>(Opcode::ConvertU16F64, value); | ||
| 1572 | default: | ||
| 1573 | ThrowInvalidType(value.Type()); | ||
| 1574 | } | ||
| 1575 | case 32: | ||
| 1576 | switch (value.Type()) { | ||
| 1577 | case Type::F16: | ||
| 1578 | return Inst<U32>(Opcode::ConvertU32F16, value); | ||
| 1579 | case Type::F32: | ||
| 1580 | return Inst<U32>(Opcode::ConvertU32F32, value); | ||
| 1581 | case Type::F64: | ||
| 1582 | return Inst<U32>(Opcode::ConvertU32F64, value); | ||
| 1583 | default: | ||
| 1584 | ThrowInvalidType(value.Type()); | ||
| 1585 | } | ||
| 1586 | case 64: | ||
| 1587 | switch (value.Type()) { | ||
| 1588 | case Type::F16: | ||
| 1589 | return Inst<U64>(Opcode::ConvertU64F16, value); | ||
| 1590 | case Type::F32: | ||
| 1591 | return Inst<U64>(Opcode::ConvertU64F32, value); | ||
| 1592 | case Type::F64: | ||
| 1593 | return Inst<U64>(Opcode::ConvertU64F64, value); | ||
| 1594 | default: | ||
| 1595 | ThrowInvalidType(value.Type()); | ||
| 1596 | } | ||
| 1597 | default: | ||
| 1598 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1599 | } | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { | ||
| 1603 | return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 1607 | FpControl control) { | ||
| 1608 | switch (dest_bitsize) { | ||
| 1609 | case 16: | ||
| 1610 | switch (src_bitsize) { | ||
| 1611 | case 8: | ||
| 1612 | return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value); | ||
| 1613 | case 16: | ||
| 1614 | return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value); | ||
| 1615 | case 32: | ||
| 1616 | return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value); | ||
| 1617 | case 64: | ||
| 1618 | return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value); | ||
| 1619 | } | ||
| 1620 | break; | ||
| 1621 | case 32: | ||
| 1622 | switch (src_bitsize) { | ||
| 1623 | case 8: | ||
| 1624 | return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value); | ||
| 1625 | case 16: | ||
| 1626 | return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value); | ||
| 1627 | case 32: | ||
| 1628 | return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value); | ||
| 1629 | case 64: | ||
| 1630 | return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value); | ||
| 1631 | } | ||
| 1632 | break; | ||
| 1633 | case 64: | ||
| 1634 | switch (src_bitsize) { | ||
| 1635 | case 8: | ||
| 1636 | return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value); | ||
| 1637 | case 16: | ||
| 1638 | return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value); | ||
| 1639 | case 32: | ||
| 1640 | return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value); | ||
| 1641 | case 64: | ||
| 1642 | return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value); | ||
| 1643 | } | ||
| 1644 | break; | ||
| 1645 | } | ||
| 1646 | throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); | ||
| 1647 | } | ||
| 1648 | |||
| 1649 | F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 1650 | FpControl control) { | ||
| 1651 | switch (dest_bitsize) { | ||
| 1652 | case 16: | ||
| 1653 | switch (src_bitsize) { | ||
| 1654 | case 8: | ||
| 1655 | return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value); | ||
| 1656 | case 16: | ||
| 1657 | return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value); | ||
| 1658 | case 32: | ||
| 1659 | return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value); | ||
| 1660 | case 64: | ||
| 1661 | return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value); | ||
| 1662 | } | ||
| 1663 | break; | ||
| 1664 | case 32: | ||
| 1665 | switch (src_bitsize) { | ||
| 1666 | case 8: | ||
| 1667 | return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value); | ||
| 1668 | case 16: | ||
| 1669 | return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value); | ||
| 1670 | case 32: | ||
| 1671 | return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value); | ||
| 1672 | case 64: | ||
| 1673 | return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value); | ||
| 1674 | } | ||
| 1675 | break; | ||
| 1676 | case 64: | ||
| 1677 | switch (src_bitsize) { | ||
| 1678 | case 8: | ||
| 1679 | return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value); | ||
| 1680 | case 16: | ||
| 1681 | return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value); | ||
| 1682 | case 32: | ||
| 1683 | return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value); | ||
| 1684 | case 64: | ||
| 1685 | return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value); | ||
| 1686 | } | ||
| 1687 | break; | ||
| 1688 | } | ||
| 1689 | throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, | ||
| 1693 | const Value& value, FpControl control) { | ||
| 1694 | return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control) | ||
| 1695 | : ConvertUToF(dest_bitsize, src_bitsize, value, control); | ||
| 1696 | } | ||
| 1697 | |||
| 1698 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | ||
| 1699 | switch (result_bitsize) { | ||
| 1700 | case 32: | ||
| 1701 | switch (value.Type()) { | ||
| 1702 | case Type::U32: | ||
| 1703 | // Nothing to do | ||
| 1704 | return value; | ||
| 1705 | case Type::U64: | ||
| 1706 | return Inst<U32>(Opcode::ConvertU32U64, value); | ||
| 1707 | default: | ||
| 1708 | break; | ||
| 1709 | } | ||
| 1710 | break; | ||
| 1711 | case 64: | ||
| 1712 | switch (value.Type()) { | ||
| 1713 | case Type::U32: | ||
| 1714 | return Inst<U64>(Opcode::ConvertU64U32, value); | ||
| 1715 | case Type::U64: | ||
| 1716 | // Nothing to do | ||
| 1717 | return value; | ||
| 1718 | default: | ||
| 1719 | break; | ||
| 1720 | } | ||
| 1721 | } | ||
| 1722 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { | ||
| 1726 | switch (result_bitsize) { | ||
| 1727 | case 16: | ||
| 1728 | switch (value.Type()) { | ||
| 1729 | case Type::F16: | ||
| 1730 | // Nothing to do | ||
| 1731 | return value; | ||
| 1732 | case Type::F32: | ||
| 1733 | return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value); | ||
| 1734 | case Type::F64: | ||
| 1735 | throw LogicError("Illegal conversion from F64 to F16"); | ||
| 1736 | default: | ||
| 1737 | break; | ||
| 1738 | } | ||
| 1739 | break; | ||
| 1740 | case 32: | ||
| 1741 | switch (value.Type()) { | ||
| 1742 | case Type::F16: | ||
| 1743 | return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value); | ||
| 1744 | case Type::F32: | ||
| 1745 | // Nothing to do | ||
| 1746 | return value; | ||
| 1747 | case Type::F64: | ||
| 1748 | return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value); | ||
| 1749 | default: | ||
| 1750 | break; | ||
| 1751 | } | ||
| 1752 | break; | ||
| 1753 | case 64: | ||
| 1754 | switch (value.Type()) { | ||
| 1755 | case Type::F16: | ||
| 1756 | throw LogicError("Illegal conversion from F16 to F64"); | ||
| 1757 | case Type::F32: | ||
| 1758 | return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value); | ||
| 1759 | case Type::F64: | ||
| 1760 | // Nothing to do | ||
| 1761 | return value; | ||
| 1762 | default: | ||
| 1763 | break; | ||
| 1764 | } | ||
| 1765 | break; | ||
| 1766 | } | ||
| 1767 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, | ||
| 1771 | const Value& offset, const F32& lod_clamp, | ||
| 1772 | TextureInstInfo info) { | ||
| 1773 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1774 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod | ||
| 1775 | : Opcode::BindlessImageSampleImplicitLod}; | ||
| 1776 | return Inst(op, Flags{info}, handle, coords, bias_lc, offset); | ||
| 1777 | } | ||
| 1778 | |||
| 1779 | Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, | ||
| 1780 | const Value& offset, TextureInstInfo info) { | ||
| 1781 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod | ||
| 1782 | : Opcode::BindlessImageSampleExplicitLod}; | ||
| 1783 | return Inst(op, Flags{info}, handle, coords, lod, offset); | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1787 | const F32& bias, const Value& offset, | ||
| 1788 | const F32& lod_clamp, TextureInstInfo info) { | ||
| 1789 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1790 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod | ||
| 1791 | : Opcode::BindlessImageSampleDrefImplicitLod}; | ||
| 1792 | return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); | ||
| 1793 | } | ||
| 1794 | |||
| 1795 | F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1796 | const F32& lod, const Value& offset, | ||
| 1797 | TextureInstInfo info) { | ||
| 1798 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod | ||
| 1799 | : Opcode::BindlessImageSampleDrefExplicitLod}; | ||
| 1800 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset); | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 1804 | const Value& offset2, TextureInstInfo info) { | ||
| 1805 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; | ||
| 1806 | return Inst(op, Flags{info}, handle, coords, offset, offset2); | ||
| 1807 | } | ||
| 1808 | |||
| 1809 | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | ||
| 1810 | const Value& offset2, const F32& dref, TextureInstInfo info) { | ||
| 1811 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref | ||
| 1812 | : Opcode::BindlessImageGatherDref}; | ||
| 1813 | return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); | ||
| 1814 | } | ||
| 1815 | |||
| 1816 | Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, | ||
| 1817 | const U32& lod, const U32& multisampling, TextureInstInfo info) { | ||
| 1818 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch}; | ||
| 1819 | return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { | ||
| 1823 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions | ||
| 1824 | : Opcode::BindlessImageQueryDimensions}; | ||
| 1825 | return Inst(op, handle, lod); | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { | ||
| 1829 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod | ||
| 1830 | : Opcode::BindlessImageQueryLod}; | ||
| 1831 | return Inst(op, Flags{info}, handle, coords); | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, | ||
| 1835 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { | ||
| 1836 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient | ||
| 1837 | : Opcode::BindlessImageGradient}; | ||
| 1838 | return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { | ||
| 1842 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; | ||
| 1843 | return Inst(op, Flags{info}, handle, coords); | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, | ||
| 1847 | TextureInstInfo info) { | ||
| 1848 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; | ||
| 1849 | Inst(op, Flags{info}, handle, coords, color); | ||
| 1850 | } | ||
| 1851 | |||
| 1852 | Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, | ||
| 1853 | TextureInstInfo info) { | ||
| 1854 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32 | ||
| 1855 | : Opcode::BindlessImageAtomicIAdd32}; | ||
| 1856 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1857 | } | ||
| 1858 | |||
| 1859 | Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1860 | TextureInstInfo info) { | ||
| 1861 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32 | ||
| 1862 | : Opcode::BindlessImageAtomicSMin32}; | ||
| 1863 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1867 | TextureInstInfo info) { | ||
| 1868 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32 | ||
| 1869 | : Opcode::BindlessImageAtomicUMin32}; | ||
| 1870 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1874 | bool is_signed, TextureInstInfo info) { | ||
| 1875 | return is_signed ? ImageAtomicSMin(handle, coords, value, info) | ||
| 1876 | : ImageAtomicUMin(handle, coords, value, info); | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1880 | TextureInstInfo info) { | ||
| 1881 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32 | ||
| 1882 | : Opcode::BindlessImageAtomicSMax32}; | ||
| 1883 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1887 | TextureInstInfo info) { | ||
| 1888 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32 | ||
| 1889 | : Opcode::BindlessImageAtomicUMax32}; | ||
| 1890 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1891 | } | ||
| 1892 | |||
| 1893 | Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1894 | bool is_signed, TextureInstInfo info) { | ||
| 1895 | return is_signed ? ImageAtomicSMax(handle, coords, value, info) | ||
| 1896 | : ImageAtomicUMax(handle, coords, value, info); | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, | ||
| 1900 | TextureInstInfo info) { | ||
| 1901 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32 | ||
| 1902 | : Opcode::BindlessImageAtomicInc32}; | ||
| 1903 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, | ||
| 1907 | TextureInstInfo info) { | ||
| 1908 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32 | ||
| 1909 | : Opcode::BindlessImageAtomicDec32}; | ||
| 1910 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, | ||
| 1914 | TextureInstInfo info) { | ||
| 1915 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32 | ||
| 1916 | : Opcode::BindlessImageAtomicAnd32}; | ||
| 1917 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, | ||
| 1921 | TextureInstInfo info) { | ||
| 1922 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32 | ||
| 1923 | : Opcode::BindlessImageAtomicOr32}; | ||
| 1924 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, | ||
| 1928 | TextureInstInfo info) { | ||
| 1929 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32 | ||
| 1930 | : Opcode::BindlessImageAtomicXor32}; | ||
| 1931 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, | ||
| 1935 | TextureInstInfo info) { | ||
| 1936 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32 | ||
| 1937 | : Opcode::BindlessImageAtomicExchange32}; | ||
| 1938 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1939 | } | ||
| 1940 | |||
| 1941 | U1 IREmitter::VoteAll(const U1& value) { | ||
| 1942 | return Inst<U1>(Opcode::VoteAll, value); | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | U1 IREmitter::VoteAny(const U1& value) { | ||
| 1946 | return Inst<U1>(Opcode::VoteAny, value); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | U1 IREmitter::VoteEqual(const U1& value) { | ||
| 1950 | return Inst<U1>(Opcode::VoteEqual, value); | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | U32 IREmitter::SubgroupBallot(const U1& value) { | ||
| 1954 | return Inst<U32>(Opcode::SubgroupBallot, value); | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | U32 IREmitter::SubgroupEqMask() { | ||
| 1958 | return Inst<U32>(Opcode::SubgroupEqMask); | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | U32 IREmitter::SubgroupLtMask() { | ||
| 1962 | return Inst<U32>(Opcode::SubgroupLtMask); | ||
| 1963 | } | ||
| 1964 | |||
| 1965 | U32 IREmitter::SubgroupLeMask() { | ||
| 1966 | return Inst<U32>(Opcode::SubgroupLeMask); | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | U32 IREmitter::SubgroupGtMask() { | ||
| 1970 | return Inst<U32>(Opcode::SubgroupGtMask); | ||
| 1971 | } | ||
| 1972 | |||
| 1973 | U32 IREmitter::SubgroupGeMask() { | ||
| 1974 | return Inst<U32>(Opcode::SubgroupGeMask); | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1978 | const IR::U32& seg_mask) { | ||
| 1979 | return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||
| 1980 | } | ||
| 1981 | |||
| 1982 | U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1983 | const IR::U32& seg_mask) { | ||
| 1984 | return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1988 | const IR::U32& seg_mask) { | ||
| 1989 | return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||
| 1990 | } | ||
| 1991 | |||
| 1992 | U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1993 | const IR::U32& seg_mask) { | ||
| 1994 | return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||
| 1995 | } | ||
| 1996 | |||
| 1997 | F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { | ||
| 1998 | return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); | ||
| 1999 | } | ||
| 2000 | |||
| 2001 | F32 IREmitter::DPdxFine(const F32& a) { | ||
| 2002 | return Inst<F32>(Opcode::DPdxFine, a); | ||
| 2003 | } | ||
| 2004 | |||
| 2005 | F32 IREmitter::DPdyFine(const F32& a) { | ||
| 2006 | return Inst<F32>(Opcode::DPdyFine, a); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | F32 IREmitter::DPdxCoarse(const F32& a) { | ||
| 2010 | return Inst<F32>(Opcode::DPdxCoarse, a); | ||
| 2011 | } | ||
| 2012 | |||
| 2013 | F32 IREmitter::DPdyCoarse(const F32& a) { | ||
| 2014 | return Inst<F32>(Opcode::DPdyCoarse, a); | ||
| 2015 | } | ||
| 2016 | |||
| 2017 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h new file mode 100644 index 000000000..53f7b3b06 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -0,0 +1,413 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstring> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | class IREmitter { | ||
| 18 | public: | ||
| 19 | explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} | ||
| 20 | explicit IREmitter(Block& block_, Block::iterator insertion_point_) | ||
| 21 | : block{&block_}, insertion_point{insertion_point_} {} | ||
| 22 | |||
| 23 | Block* block; | ||
| 24 | |||
| 25 | [[nodiscard]] U1 Imm1(bool value) const; | ||
| 26 | [[nodiscard]] U8 Imm8(u8 value) const; | ||
| 27 | [[nodiscard]] U16 Imm16(u16 value) const; | ||
| 28 | [[nodiscard]] U32 Imm32(u32 value) const; | ||
| 29 | [[nodiscard]] U32 Imm32(s32 value) const; | ||
| 30 | [[nodiscard]] F32 Imm32(f32 value) const; | ||
| 31 | [[nodiscard]] U64 Imm64(u64 value) const; | ||
| 32 | [[nodiscard]] U64 Imm64(s64 value) const; | ||
| 33 | [[nodiscard]] F64 Imm64(f64 value) const; | ||
| 34 | |||
| 35 | U1 ConditionRef(const U1& value); | ||
| 36 | void Reference(const Value& value); | ||
| 37 | |||
| 38 | void PhiMove(IR::Inst& phi, const Value& value); | ||
| 39 | |||
| 40 | void Prologue(); | ||
| 41 | void Epilogue(); | ||
| 42 | void DemoteToHelperInvocation(); | ||
| 43 | void EmitVertex(const U32& stream); | ||
| 44 | void EndPrimitive(const U32& stream); | ||
| 45 | |||
| 46 | [[nodiscard]] U32 GetReg(IR::Reg reg); | ||
| 47 | void SetReg(IR::Reg reg, const U32& value); | ||
| 48 | |||
| 49 | [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); | ||
| 50 | void SetPred(IR::Pred pred, const U1& value); | ||
| 51 | |||
| 52 | [[nodiscard]] U1 GetGotoVariable(u32 id); | ||
| 53 | void SetGotoVariable(u32 id, const U1& value); | ||
| 54 | |||
| 55 | [[nodiscard]] U32 GetIndirectBranchVariable(); | ||
| 56 | void SetIndirectBranchVariable(const U32& value); | ||
| 57 | |||
| 58 | [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); | ||
| 59 | [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, | ||
| 60 | bool is_signed); | ||
| 61 | [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); | ||
| 62 | |||
| 63 | [[nodiscard]] U1 GetZFlag(); | ||
| 64 | [[nodiscard]] U1 GetSFlag(); | ||
| 65 | [[nodiscard]] U1 GetCFlag(); | ||
| 66 | [[nodiscard]] U1 GetOFlag(); | ||
| 67 | |||
| 68 | void SetZFlag(const U1& value); | ||
| 69 | void SetSFlag(const U1& value); | ||
| 70 | void SetCFlag(const U1& value); | ||
| 71 | void SetOFlag(const U1& value); | ||
| 72 | |||
| 73 | [[nodiscard]] U1 Condition(IR::Condition cond); | ||
| 74 | [[nodiscard]] U1 GetFlowTestResult(FlowTest test); | ||
| 75 | |||
| 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | ||
| 77 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | ||
| 78 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | ||
| 79 | |||
| 80 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | ||
| 81 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); | ||
| 82 | void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); | ||
| 83 | |||
| 84 | [[nodiscard]] F32 GetPatch(Patch patch); | ||
| 85 | void SetPatch(Patch patch, const F32& value); | ||
| 86 | |||
| 87 | void SetFragColor(u32 index, u32 component, const F32& value); | ||
| 88 | void SetSampleMask(const U32& value); | ||
| 89 | void SetFragDepth(const F32& value); | ||
| 90 | |||
| 91 | [[nodiscard]] U32 WorkgroupIdX(); | ||
| 92 | [[nodiscard]] U32 WorkgroupIdY(); | ||
| 93 | [[nodiscard]] U32 WorkgroupIdZ(); | ||
| 94 | |||
| 95 | [[nodiscard]] Value LocalInvocationId(); | ||
| 96 | [[nodiscard]] U32 LocalInvocationIdX(); | ||
| 97 | [[nodiscard]] U32 LocalInvocationIdY(); | ||
| 98 | [[nodiscard]] U32 LocalInvocationIdZ(); | ||
| 99 | |||
| 100 | [[nodiscard]] U32 InvocationId(); | ||
| 101 | [[nodiscard]] U32 SampleId(); | ||
| 102 | [[nodiscard]] U1 IsHelperInvocation(); | ||
| 103 | [[nodiscard]] F32 YDirection(); | ||
| 104 | |||
| 105 | [[nodiscard]] U32 LaneId(); | ||
| 106 | |||
| 107 | [[nodiscard]] U32 LoadGlobalU8(const U64& address); | ||
| 108 | [[nodiscard]] U32 LoadGlobalS8(const U64& address); | ||
| 109 | [[nodiscard]] U32 LoadGlobalU16(const U64& address); | ||
| 110 | [[nodiscard]] U32 LoadGlobalS16(const U64& address); | ||
| 111 | [[nodiscard]] U32 LoadGlobal32(const U64& address); | ||
| 112 | [[nodiscard]] Value LoadGlobal64(const U64& address); | ||
| 113 | [[nodiscard]] Value LoadGlobal128(const U64& address); | ||
| 114 | |||
| 115 | void WriteGlobalU8(const U64& address, const U32& value); | ||
| 116 | void WriteGlobalS8(const U64& address, const U32& value); | ||
| 117 | void WriteGlobalU16(const U64& address, const U32& value); | ||
| 118 | void WriteGlobalS16(const U64& address, const U32& value); | ||
| 119 | void WriteGlobal32(const U64& address, const U32& value); | ||
| 120 | void WriteGlobal64(const U64& address, const IR::Value& vector); | ||
| 121 | void WriteGlobal128(const U64& address, const IR::Value& vector); | ||
| 122 | |||
| 123 | [[nodiscard]] U32 LoadLocal(const U32& word_offset); | ||
| 124 | void WriteLocal(const U32& word_offset, const U32& value); | ||
| 125 | |||
| 126 | [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); | ||
| 127 | void WriteShared(int bit_size, const U32& offset, const Value& value); | ||
| 128 | |||
| 129 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); | ||
| 130 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | ||
| 131 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||
| 132 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||
| 133 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||
| 134 | [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||
| 135 | |||
| 136 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||
| 137 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||
| 138 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | ||
| 139 | const Value& e4); | ||
| 140 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | ||
| 141 | [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||
| 142 | |||
| 143 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | ||
| 144 | const Value& false_value); | ||
| 145 | |||
| 146 | void Barrier(); | ||
| 147 | void WorkgroupMemoryBarrier(); | ||
| 148 | void DeviceMemoryBarrier(); | ||
| 149 | |||
| 150 | template <typename Dest, typename Source> | ||
| 151 | [[nodiscard]] Dest BitCast(const Source& value); | ||
| 152 | |||
| 153 | [[nodiscard]] U64 PackUint2x32(const Value& vector); | ||
| 154 | [[nodiscard]] Value UnpackUint2x32(const U64& value); | ||
| 155 | |||
| 156 | [[nodiscard]] U32 PackFloat2x16(const Value& vector); | ||
| 157 | [[nodiscard]] Value UnpackFloat2x16(const U32& value); | ||
| 158 | |||
| 159 | [[nodiscard]] U32 PackHalf2x16(const Value& vector); | ||
| 160 | [[nodiscard]] Value UnpackHalf2x16(const U32& value); | ||
| 161 | |||
| 162 | [[nodiscard]] F64 PackDouble2x32(const Value& vector); | ||
| 163 | [[nodiscard]] Value UnpackDouble2x32(const F64& value); | ||
| 164 | |||
| 165 | [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); | ||
| 166 | [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); | ||
| 167 | [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, | ||
| 168 | FpControl control = {}); | ||
| 169 | |||
| 170 | [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value); | ||
| 171 | [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); | ||
| 172 | [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); | ||
| 173 | |||
| 174 | [[nodiscard]] F32 FPCos(const F32& value); | ||
| 175 | [[nodiscard]] F32 FPSin(const F32& value); | ||
| 176 | [[nodiscard]] F32 FPExp2(const F32& value); | ||
| 177 | [[nodiscard]] F32 FPLog2(const F32& value); | ||
| 178 | [[nodiscard]] F32F64 FPRecip(const F32F64& value); | ||
| 179 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | ||
| 180 | [[nodiscard]] F32 FPSqrt(const F32& value); | ||
| 181 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||
| 182 | [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||
| 183 | const F16F32F64& max_value); | ||
| 184 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||
| 185 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||
| 186 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||
| 187 | [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); | ||
| 188 | |||
| 189 | [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 190 | bool ordered = true); | ||
| 191 | [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 192 | bool ordered = true); | ||
| 193 | [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 194 | bool ordered = true); | ||
| 195 | [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 196 | FpControl control = {}, bool ordered = true); | ||
| 197 | [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 198 | FpControl control = {}, bool ordered = true); | ||
| 199 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 200 | FpControl control = {}, bool ordered = true); | ||
| 201 | [[nodiscard]] U1 FPIsNan(const F16F32F64& value); | ||
| 202 | [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); | ||
| 203 | [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); | ||
| 204 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | ||
| 205 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | ||
| 206 | |||
| 207 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | ||
| 208 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | ||
| 209 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); | ||
| 210 | [[nodiscard]] U32U64 INeg(const U32U64& value); | ||
| 211 | [[nodiscard]] U32 IAbs(const U32& value); | ||
| 212 | [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); | ||
| 213 | [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); | ||
| 214 | [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); | ||
| 215 | [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); | ||
| 216 | [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); | ||
| 217 | [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); | ||
| 218 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 219 | const U32& count); | ||
| 220 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 221 | bool is_signed = false); | ||
| 222 | [[nodiscard]] U32 BitReverse(const U32& value); | ||
| 223 | [[nodiscard]] U32 BitCount(const U32& value); | ||
| 224 | [[nodiscard]] U32 BitwiseNot(const U32& value); | ||
| 225 | |||
| 226 | [[nodiscard]] U32 FindSMsb(const U32& value); | ||
| 227 | [[nodiscard]] U32 FindUMsb(const U32& value); | ||
| 228 | [[nodiscard]] U32 SMin(const U32& a, const U32& b); | ||
| 229 | [[nodiscard]] U32 UMin(const U32& a, const U32& b); | ||
| 230 | [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); | ||
| 231 | [[nodiscard]] U32 SMax(const U32& a, const U32& b); | ||
| 232 | [[nodiscard]] U32 UMax(const U32& a, const U32& b); | ||
| 233 | [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); | ||
| 234 | [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); | ||
| 235 | [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); | ||
| 236 | |||
| 237 | [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 238 | [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); | ||
| 239 | [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 240 | [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 241 | [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | ||
| 242 | [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 243 | |||
| 244 | [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); | ||
| 245 | [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); | ||
| 246 | [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); | ||
| 247 | [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 248 | [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); | ||
| 249 | [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); | ||
| 250 | [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 251 | [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); | ||
| 252 | [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); | ||
| 253 | [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); | ||
| 254 | [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); | ||
| 255 | [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); | ||
| 256 | [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); | ||
| 257 | |||
| 258 | [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); | ||
| 259 | [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); | ||
| 260 | [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); | ||
| 261 | [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, | ||
| 262 | bool is_signed); | ||
| 263 | [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); | ||
| 264 | [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); | ||
| 265 | [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, | ||
| 266 | bool is_signed); | ||
| 267 | [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); | ||
| 268 | [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); | ||
| 269 | [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); | ||
| 270 | [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); | ||
| 271 | [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); | ||
| 272 | [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); | ||
| 273 | |||
| 274 | [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 275 | const FpControl control = {}); | ||
| 276 | [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 277 | const FpControl control = {}); | ||
| 278 | [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 279 | const FpControl control = {}); | ||
| 280 | [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 281 | const FpControl control = {}); | ||
| 282 | |||
| 283 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | ||
| 284 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | ||
| 285 | [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | ||
| 286 | [[nodiscard]] U1 LogicalNot(const U1& value); | ||
| 287 | |||
| 288 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); | ||
| 289 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | ||
| 290 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | ||
| 291 | [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 292 | FpControl control = {}); | ||
| 293 | [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 294 | FpControl control = {}); | ||
| 295 | [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, | ||
| 296 | const Value& value, FpControl control = {}); | ||
| 297 | |||
| 298 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | ||
| 299 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, | ||
| 300 | FpControl control = {}); | ||
| 301 | |||
| 302 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | ||
| 303 | const F32& bias, const Value& offset, | ||
| 304 | const F32& lod_clamp, TextureInstInfo info); | ||
| 305 | [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, | ||
| 306 | const F32& lod, const Value& offset, | ||
| 307 | TextureInstInfo info); | ||
| 308 | [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, | ||
| 309 | const F32& dref, const F32& bias, | ||
| 310 | const Value& offset, const F32& lod_clamp, | ||
| 311 | TextureInstInfo info); | ||
| 312 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | ||
| 313 | const F32& dref, const F32& lod, | ||
| 314 | const Value& offset, TextureInstInfo info); | ||
| 315 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); | ||
| 316 | |||
| 317 | [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, | ||
| 318 | TextureInstInfo info); | ||
| 319 | [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 320 | const Value& offset2, TextureInstInfo info); | ||
| 321 | [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, | ||
| 322 | const Value& offset, const Value& offset2, const F32& dref, | ||
| 323 | TextureInstInfo info); | ||
| 324 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, | ||
| 325 | const U32& lod, const U32& multisampling, TextureInstInfo info); | ||
| 326 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, | ||
| 327 | const Value& derivates, const Value& offset, | ||
| 328 | const F32& lod_clamp, TextureInstInfo info); | ||
| 329 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); | ||
| 330 | [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, | ||
| 331 | TextureInstInfo info); | ||
| 332 | |||
| 333 | [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords, | ||
| 334 | const Value& value, TextureInstInfo info); | ||
| 335 | [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords, | ||
| 336 | const Value& value, TextureInstInfo info); | ||
| 337 | [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords, | ||
| 338 | const Value& value, TextureInstInfo info); | ||
| 339 | [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords, | ||
| 340 | const Value& value, bool is_signed, TextureInstInfo info); | ||
| 341 | [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords, | ||
| 342 | const Value& value, TextureInstInfo info); | ||
| 343 | [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, | ||
| 344 | const Value& value, TextureInstInfo info); | ||
| 345 | [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, | ||
| 346 | const Value& value, bool is_signed, TextureInstInfo info); | ||
| 347 | [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, | ||
| 348 | TextureInstInfo info); | ||
| 349 | [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, | ||
| 350 | TextureInstInfo info); | ||
| 351 | [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, | ||
| 352 | TextureInstInfo info); | ||
| 353 | [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, | ||
| 354 | TextureInstInfo info); | ||
| 355 | [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, | ||
| 356 | TextureInstInfo info); | ||
| 357 | [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, | ||
| 358 | const Value& value, TextureInstInfo info); | ||
| 359 | [[nodiscard]] U1 VoteAll(const U1& value); | ||
| 360 | [[nodiscard]] U1 VoteAny(const U1& value); | ||
| 361 | [[nodiscard]] U1 VoteEqual(const U1& value); | ||
| 362 | [[nodiscard]] U32 SubgroupBallot(const U1& value); | ||
| 363 | [[nodiscard]] U32 SubgroupEqMask(); | ||
| 364 | [[nodiscard]] U32 SubgroupLtMask(); | ||
| 365 | [[nodiscard]] U32 SubgroupLeMask(); | ||
| 366 | [[nodiscard]] U32 SubgroupGtMask(); | ||
| 367 | [[nodiscard]] U32 SubgroupGeMask(); | ||
| 368 | [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 369 | const IR::U32& seg_mask); | ||
| 370 | [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 371 | const IR::U32& seg_mask); | ||
| 372 | [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 373 | const IR::U32& seg_mask); | ||
| 374 | [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||
| 375 | const IR::U32& clamp, const IR::U32& seg_mask); | ||
| 376 | [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, | ||
| 377 | FpControl control = {}); | ||
| 378 | |||
| 379 | [[nodiscard]] F32 DPdxFine(const F32& a); | ||
| 380 | |||
| 381 | [[nodiscard]] F32 DPdyFine(const F32& a); | ||
| 382 | |||
| 383 | [[nodiscard]] F32 DPdxCoarse(const F32& a); | ||
| 384 | |||
| 385 | [[nodiscard]] F32 DPdyCoarse(const F32& a); | ||
| 386 | |||
| 387 | private: | ||
| 388 | IR::Block::iterator insertion_point; | ||
| 389 | |||
| 390 | template <typename T = Value, typename... Args> | ||
| 391 | T Inst(Opcode op, Args... args) { | ||
| 392 | auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; | ||
| 393 | return T{Value{&*it}}; | ||
| 394 | } | ||
| 395 | |||
| 396 | template <typename T> | ||
| 397 | requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { | ||
| 398 | Flags() = default; | ||
| 399 | Flags(T proxy_) : proxy{proxy_} {} | ||
| 400 | |||
| 401 | T proxy; | ||
| 402 | }; | ||
| 403 | |||
| 404 | template <typename T = Value, typename FlagType, typename... Args> | ||
| 405 | T Inst(Opcode op, Flags<FlagType> flags, Args... args) { | ||
| 406 | u32 raw_flags{}; | ||
| 407 | std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); | ||
| 408 | auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; | ||
| 409 | return T{Value{&*it}}; | ||
| 410 | } | ||
| 411 | }; | ||
| 412 | |||
| 413 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp new file mode 100644 index 000000000..3dfa5a880 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -0,0 +1,411 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | |||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | namespace { | ||
| 14 | void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | ||
| 15 | if (inst && inst->GetOpcode() != opcode) { | ||
| 16 | throw LogicError("Invalid pseudo-instruction"); | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | ||
| 21 | if (dest_inst) { | ||
| 22 | throw LogicError("Only one of each type of pseudo-op allowed"); | ||
| 23 | } | ||
| 24 | dest_inst = pseudo_inst; | ||
| 25 | } | ||
| 26 | |||
| 27 | void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | ||
| 28 | if (inst->GetOpcode() != expected_opcode) { | ||
| 29 | throw LogicError("Undoing use of invalid pseudo-op"); | ||
| 30 | } | ||
| 31 | inst = nullptr; | ||
| 32 | } | ||
| 33 | |||
| 34 | void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { | ||
| 35 | if (!associated_insts) { | ||
| 36 | associated_insts = std::make_unique<AssociatedInsts>(); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | } // Anonymous namespace | ||
| 40 | |||
| 41 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | ||
| 42 | if (op == Opcode::Phi) { | ||
| 43 | std::construct_at(&phi_args); | ||
| 44 | } else { | ||
| 45 | std::construct_at(&args); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | Inst::~Inst() { | ||
| 50 | if (op == Opcode::Phi) { | ||
| 51 | std::destroy_at(&phi_args); | ||
| 52 | } else { | ||
| 53 | std::destroy_at(&args); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | bool Inst::MayHaveSideEffects() const noexcept { | ||
| 58 | switch (op) { | ||
| 59 | case Opcode::ConditionRef: | ||
| 60 | case Opcode::Reference: | ||
| 61 | case Opcode::PhiMove: | ||
| 62 | case Opcode::Prologue: | ||
| 63 | case Opcode::Epilogue: | ||
| 64 | case Opcode::Join: | ||
| 65 | case Opcode::DemoteToHelperInvocation: | ||
| 66 | case Opcode::Barrier: | ||
| 67 | case Opcode::WorkgroupMemoryBarrier: | ||
| 68 | case Opcode::DeviceMemoryBarrier: | ||
| 69 | case Opcode::EmitVertex: | ||
| 70 | case Opcode::EndPrimitive: | ||
| 71 | case Opcode::SetAttribute: | ||
| 72 | case Opcode::SetAttributeIndexed: | ||
| 73 | case Opcode::SetPatch: | ||
| 74 | case Opcode::SetFragColor: | ||
| 75 | case Opcode::SetSampleMask: | ||
| 76 | case Opcode::SetFragDepth: | ||
| 77 | case Opcode::WriteGlobalU8: | ||
| 78 | case Opcode::WriteGlobalS8: | ||
| 79 | case Opcode::WriteGlobalU16: | ||
| 80 | case Opcode::WriteGlobalS16: | ||
| 81 | case Opcode::WriteGlobal32: | ||
| 82 | case Opcode::WriteGlobal64: | ||
| 83 | case Opcode::WriteGlobal128: | ||
| 84 | case Opcode::WriteStorageU8: | ||
| 85 | case Opcode::WriteStorageS8: | ||
| 86 | case Opcode::WriteStorageU16: | ||
| 87 | case Opcode::WriteStorageS16: | ||
| 88 | case Opcode::WriteStorage32: | ||
| 89 | case Opcode::WriteStorage64: | ||
| 90 | case Opcode::WriteStorage128: | ||
| 91 | case Opcode::WriteLocal: | ||
| 92 | case Opcode::WriteSharedU8: | ||
| 93 | case Opcode::WriteSharedU16: | ||
| 94 | case Opcode::WriteSharedU32: | ||
| 95 | case Opcode::WriteSharedU64: | ||
| 96 | case Opcode::WriteSharedU128: | ||
| 97 | case Opcode::SharedAtomicIAdd32: | ||
| 98 | case Opcode::SharedAtomicSMin32: | ||
| 99 | case Opcode::SharedAtomicUMin32: | ||
| 100 | case Opcode::SharedAtomicSMax32: | ||
| 101 | case Opcode::SharedAtomicUMax32: | ||
| 102 | case Opcode::SharedAtomicInc32: | ||
| 103 | case Opcode::SharedAtomicDec32: | ||
| 104 | case Opcode::SharedAtomicAnd32: | ||
| 105 | case Opcode::SharedAtomicOr32: | ||
| 106 | case Opcode::SharedAtomicXor32: | ||
| 107 | case Opcode::SharedAtomicExchange32: | ||
| 108 | case Opcode::SharedAtomicExchange64: | ||
| 109 | case Opcode::GlobalAtomicIAdd32: | ||
| 110 | case Opcode::GlobalAtomicSMin32: | ||
| 111 | case Opcode::GlobalAtomicUMin32: | ||
| 112 | case Opcode::GlobalAtomicSMax32: | ||
| 113 | case Opcode::GlobalAtomicUMax32: | ||
| 114 | case Opcode::GlobalAtomicInc32: | ||
| 115 | case Opcode::GlobalAtomicDec32: | ||
| 116 | case Opcode::GlobalAtomicAnd32: | ||
| 117 | case Opcode::GlobalAtomicOr32: | ||
| 118 | case Opcode::GlobalAtomicXor32: | ||
| 119 | case Opcode::GlobalAtomicExchange32: | ||
| 120 | case Opcode::GlobalAtomicIAdd64: | ||
| 121 | case Opcode::GlobalAtomicSMin64: | ||
| 122 | case Opcode::GlobalAtomicUMin64: | ||
| 123 | case Opcode::GlobalAtomicSMax64: | ||
| 124 | case Opcode::GlobalAtomicUMax64: | ||
| 125 | case Opcode::GlobalAtomicAnd64: | ||
| 126 | case Opcode::GlobalAtomicOr64: | ||
| 127 | case Opcode::GlobalAtomicXor64: | ||
| 128 | case Opcode::GlobalAtomicExchange64: | ||
| 129 | case Opcode::GlobalAtomicAddF32: | ||
| 130 | case Opcode::GlobalAtomicAddF16x2: | ||
| 131 | case Opcode::GlobalAtomicAddF32x2: | ||
| 132 | case Opcode::GlobalAtomicMinF16x2: | ||
| 133 | case Opcode::GlobalAtomicMinF32x2: | ||
| 134 | case Opcode::GlobalAtomicMaxF16x2: | ||
| 135 | case Opcode::GlobalAtomicMaxF32x2: | ||
| 136 | case Opcode::StorageAtomicIAdd32: | ||
| 137 | case Opcode::StorageAtomicSMin32: | ||
| 138 | case Opcode::StorageAtomicUMin32: | ||
| 139 | case Opcode::StorageAtomicSMax32: | ||
| 140 | case Opcode::StorageAtomicUMax32: | ||
| 141 | case Opcode::StorageAtomicInc32: | ||
| 142 | case Opcode::StorageAtomicDec32: | ||
| 143 | case Opcode::StorageAtomicAnd32: | ||
| 144 | case Opcode::StorageAtomicOr32: | ||
| 145 | case Opcode::StorageAtomicXor32: | ||
| 146 | case Opcode::StorageAtomicExchange32: | ||
| 147 | case Opcode::StorageAtomicIAdd64: | ||
| 148 | case Opcode::StorageAtomicSMin64: | ||
| 149 | case Opcode::StorageAtomicUMin64: | ||
| 150 | case Opcode::StorageAtomicSMax64: | ||
| 151 | case Opcode::StorageAtomicUMax64: | ||
| 152 | case Opcode::StorageAtomicAnd64: | ||
| 153 | case Opcode::StorageAtomicOr64: | ||
| 154 | case Opcode::StorageAtomicXor64: | ||
| 155 | case Opcode::StorageAtomicExchange64: | ||
| 156 | case Opcode::StorageAtomicAddF32: | ||
| 157 | case Opcode::StorageAtomicAddF16x2: | ||
| 158 | case Opcode::StorageAtomicAddF32x2: | ||
| 159 | case Opcode::StorageAtomicMinF16x2: | ||
| 160 | case Opcode::StorageAtomicMinF32x2: | ||
| 161 | case Opcode::StorageAtomicMaxF16x2: | ||
| 162 | case Opcode::StorageAtomicMaxF32x2: | ||
| 163 | case Opcode::BindlessImageWrite: | ||
| 164 | case Opcode::BoundImageWrite: | ||
| 165 | case Opcode::ImageWrite: | ||
| 166 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 167 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 168 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 169 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 170 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 171 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 172 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 173 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 174 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 175 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 176 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 177 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 178 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 179 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 180 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 181 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 182 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 183 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 184 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 185 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 186 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 187 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 188 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 189 | case IR::Opcode::ImageAtomicSMin32: | ||
| 190 | case IR::Opcode::ImageAtomicUMin32: | ||
| 191 | case IR::Opcode::ImageAtomicSMax32: | ||
| 192 | case IR::Opcode::ImageAtomicUMax32: | ||
| 193 | case IR::Opcode::ImageAtomicInc32: | ||
| 194 | case IR::Opcode::ImageAtomicDec32: | ||
| 195 | case IR::Opcode::ImageAtomicAnd32: | ||
| 196 | case IR::Opcode::ImageAtomicOr32: | ||
| 197 | case IR::Opcode::ImageAtomicXor32: | ||
| 198 | case IR::Opcode::ImageAtomicExchange32: | ||
| 199 | return true; | ||
| 200 | default: | ||
| 201 | return false; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | bool Inst::IsPseudoInstruction() const noexcept { | ||
| 206 | switch (op) { | ||
| 207 | case Opcode::GetZeroFromOp: | ||
| 208 | case Opcode::GetSignFromOp: | ||
| 209 | case Opcode::GetCarryFromOp: | ||
| 210 | case Opcode::GetOverflowFromOp: | ||
| 211 | case Opcode::GetSparseFromOp: | ||
| 212 | case Opcode::GetInBoundsFromOp: | ||
| 213 | return true; | ||
| 214 | default: | ||
| 215 | return false; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | bool Inst::AreAllArgsImmediates() const { | ||
| 220 | if (op == Opcode::Phi) { | ||
| 221 | throw LogicError("Testing for all arguments are immediates on phi instruction"); | ||
| 222 | } | ||
| 223 | return std::all_of(args.begin(), args.begin() + NumArgs(), | ||
| 224 | [](const IR::Value& value) { return value.IsImmediate(); }); | ||
| 225 | } | ||
| 226 | |||
| 227 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | ||
| 228 | if (!associated_insts) { | ||
| 229 | return nullptr; | ||
| 230 | } | ||
| 231 | switch (opcode) { | ||
| 232 | case Opcode::GetZeroFromOp: | ||
| 233 | CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); | ||
| 234 | return associated_insts->zero_inst; | ||
| 235 | case Opcode::GetSignFromOp: | ||
| 236 | CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); | ||
| 237 | return associated_insts->sign_inst; | ||
| 238 | case Opcode::GetCarryFromOp: | ||
| 239 | CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); | ||
| 240 | return associated_insts->carry_inst; | ||
| 241 | case Opcode::GetOverflowFromOp: | ||
| 242 | CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 243 | return associated_insts->overflow_inst; | ||
| 244 | case Opcode::GetSparseFromOp: | ||
| 245 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||
| 246 | return associated_insts->sparse_inst; | ||
| 247 | case Opcode::GetInBoundsFromOp: | ||
| 248 | CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 249 | return associated_insts->in_bounds_inst; | ||
| 250 | default: | ||
| 251 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | IR::Type Inst::Type() const { | ||
| 256 | return TypeOf(op); | ||
| 257 | } | ||
| 258 | |||
| 259 | void Inst::SetArg(size_t index, Value value) { | ||
| 260 | if (index >= NumArgs()) { | ||
| 261 | throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); | ||
| 262 | } | ||
| 263 | const IR::Value arg{Arg(index)}; | ||
| 264 | if (!arg.IsImmediate()) { | ||
| 265 | UndoUse(arg); | ||
| 266 | } | ||
| 267 | if (!value.IsImmediate()) { | ||
| 268 | Use(value); | ||
| 269 | } | ||
| 270 | if (op == Opcode::Phi) { | ||
| 271 | phi_args[index].second = value; | ||
| 272 | } else { | ||
| 273 | args[index] = value; | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | Block* Inst::PhiBlock(size_t index) const { | ||
| 278 | if (op != Opcode::Phi) { | ||
| 279 | throw LogicError("{} is not a Phi instruction", op); | ||
| 280 | } | ||
| 281 | if (index >= phi_args.size()) { | ||
| 282 | throw InvalidArgument("Out of bounds argument index {} in phi instruction"); | ||
| 283 | } | ||
| 284 | return phi_args[index].first; | ||
| 285 | } | ||
| 286 | |||
| 287 | void Inst::AddPhiOperand(Block* predecessor, const Value& value) { | ||
| 288 | if (!value.IsImmediate()) { | ||
| 289 | Use(value); | ||
| 290 | } | ||
| 291 | phi_args.emplace_back(predecessor, value); | ||
| 292 | } | ||
| 293 | |||
| 294 | void Inst::Invalidate() { | ||
| 295 | ClearArgs(); | ||
| 296 | ReplaceOpcode(Opcode::Void); | ||
| 297 | } | ||
| 298 | |||
| 299 | void Inst::ClearArgs() { | ||
| 300 | if (op == Opcode::Phi) { | ||
| 301 | for (auto& pair : phi_args) { | ||
| 302 | IR::Value& value{pair.second}; | ||
| 303 | if (!value.IsImmediate()) { | ||
| 304 | UndoUse(value); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | phi_args.clear(); | ||
| 308 | } else { | ||
| 309 | for (auto& value : args) { | ||
| 310 | if (!value.IsImmediate()) { | ||
| 311 | UndoUse(value); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | // Reset arguments to null | ||
| 315 | // std::memset was measured to be faster on MSVC than std::ranges:fill | ||
| 316 | std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args)); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | void Inst::ReplaceUsesWith(Value replacement) { | ||
| 321 | Invalidate(); | ||
| 322 | ReplaceOpcode(Opcode::Identity); | ||
| 323 | if (!replacement.IsImmediate()) { | ||
| 324 | Use(replacement); | ||
| 325 | } | ||
| 326 | args[0] = replacement; | ||
| 327 | } | ||
| 328 | |||
| 329 | void Inst::ReplaceOpcode(IR::Opcode opcode) { | ||
| 330 | if (opcode == IR::Opcode::Phi) { | ||
| 331 | throw LogicError("Cannot transition into Phi"); | ||
| 332 | } | ||
| 333 | if (op == Opcode::Phi) { | ||
| 334 | // Transition out of phi arguments into non-phi | ||
| 335 | std::destroy_at(&phi_args); | ||
| 336 | std::construct_at(&args); | ||
| 337 | } | ||
| 338 | op = opcode; | ||
| 339 | } | ||
| 340 | |||
| 341 | void Inst::Use(const Value& value) { | ||
| 342 | Inst* const inst{value.Inst()}; | ||
| 343 | ++inst->use_count; | ||
| 344 | |||
| 345 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 346 | switch (op) { | ||
| 347 | case Opcode::GetZeroFromOp: | ||
| 348 | AllocAssociatedInsts(assoc_inst); | ||
| 349 | SetPseudoInstruction(assoc_inst->zero_inst, this); | ||
| 350 | break; | ||
| 351 | case Opcode::GetSignFromOp: | ||
| 352 | AllocAssociatedInsts(assoc_inst); | ||
| 353 | SetPseudoInstruction(assoc_inst->sign_inst, this); | ||
| 354 | break; | ||
| 355 | case Opcode::GetCarryFromOp: | ||
| 356 | AllocAssociatedInsts(assoc_inst); | ||
| 357 | SetPseudoInstruction(assoc_inst->carry_inst, this); | ||
| 358 | break; | ||
| 359 | case Opcode::GetOverflowFromOp: | ||
| 360 | AllocAssociatedInsts(assoc_inst); | ||
| 361 | SetPseudoInstruction(assoc_inst->overflow_inst, this); | ||
| 362 | break; | ||
| 363 | case Opcode::GetSparseFromOp: | ||
| 364 | AllocAssociatedInsts(assoc_inst); | ||
| 365 | SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||
| 366 | break; | ||
| 367 | case Opcode::GetInBoundsFromOp: | ||
| 368 | AllocAssociatedInsts(assoc_inst); | ||
| 369 | SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||
| 370 | break; | ||
| 371 | default: | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | void Inst::UndoUse(const Value& value) { | ||
| 377 | Inst* const inst{value.Inst()}; | ||
| 378 | --inst->use_count; | ||
| 379 | |||
| 380 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 381 | switch (op) { | ||
| 382 | case Opcode::GetZeroFromOp: | ||
| 383 | AllocAssociatedInsts(assoc_inst); | ||
| 384 | RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); | ||
| 385 | break; | ||
| 386 | case Opcode::GetSignFromOp: | ||
| 387 | AllocAssociatedInsts(assoc_inst); | ||
| 388 | RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); | ||
| 389 | break; | ||
| 390 | case Opcode::GetCarryFromOp: | ||
| 391 | AllocAssociatedInsts(assoc_inst); | ||
| 392 | RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); | ||
| 393 | break; | ||
| 394 | case Opcode::GetOverflowFromOp: | ||
| 395 | AllocAssociatedInsts(assoc_inst); | ||
| 396 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 397 | break; | ||
| 398 | case Opcode::GetSparseFromOp: | ||
| 399 | AllocAssociatedInsts(assoc_inst); | ||
| 400 | RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp); | ||
| 401 | break; | ||
| 402 | case Opcode::GetInBoundsFromOp: | ||
| 403 | AllocAssociatedInsts(assoc_inst); | ||
| 404 | RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 405 | break; | ||
| 406 | default: | ||
| 407 | break; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..77cda1f8a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/shader_info.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | enum class FmzMode : u8 { | ||
| 14 | DontCare, // Not specified for this instruction | ||
| 15 | FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | ||
| 16 | FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) | ||
| 17 | None, // Denorms are not flushed, NAN is propagated (nouveau) | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class FpRounding : u8 { | ||
| 21 | DontCare, // Not specified for this instruction | ||
| 22 | RN, // Round to nearest even, | ||
| 23 | RM, // Round towards negative infinity | ||
| 24 | RP, // Round towards positive infinity | ||
| 25 | RZ, // Round towards zero | ||
| 26 | }; | ||
| 27 | |||
| 28 | struct FpControl { | ||
| 29 | bool no_contraction{false}; | ||
| 30 | FpRounding rounding{FpRounding::DontCare}; | ||
| 31 | FmzMode fmz_mode{FmzMode::DontCare}; | ||
| 32 | }; | ||
| 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); | ||
| 34 | |||
| 35 | union TextureInstInfo { | ||
| 36 | u32 raw; | ||
| 37 | BitField<0, 16, u32> descriptor_index; | ||
| 38 | BitField<16, 3, TextureType> type; | ||
| 39 | BitField<19, 1, u32> is_depth; | ||
| 40 | BitField<20, 1, u32> has_bias; | ||
| 41 | BitField<21, 1, u32> has_lod_clamp; | ||
| 42 | BitField<22, 1, u32> relaxed_precision; | ||
| 43 | BitField<23, 2, u32> gather_component; | ||
| 44 | BitField<25, 2, u32> num_derivates; | ||
| 45 | BitField<27, 3, ImageFormat> image_format; | ||
| 46 | }; | ||
| 47 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||
| 48 | |||
| 49 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp new file mode 100644 index 000000000..24d024ad7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | std::string_view NameOf(Opcode op) { | ||
| 12 | return Detail::META_TABLE[static_cast<size_t>(op)].name; | ||
| 13 | } | ||
| 14 | |||
| 15 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h new file mode 100644 index 000000000..9ab108292 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <string_view> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | enum class Opcode { | ||
| 18 | #define OPCODE(name, ...) name, | ||
| 19 | #include "opcodes.inc" | ||
| 20 | #undef OPCODE | ||
| 21 | }; | ||
| 22 | |||
| 23 | namespace Detail { | ||
| 24 | struct OpcodeMeta { | ||
| 25 | std::string_view name; | ||
| 26 | Type type; | ||
| 27 | std::array<Type, 5> arg_types; | ||
| 28 | }; | ||
| 29 | |||
| 30 | // using enum Type; | ||
| 31 | constexpr Type Void{Type::Void}; | ||
| 32 | constexpr Type Opaque{Type::Opaque}; | ||
| 33 | constexpr Type Reg{Type::Reg}; | ||
| 34 | constexpr Type Pred{Type::Pred}; | ||
| 35 | constexpr Type Attribute{Type::Attribute}; | ||
| 36 | constexpr Type Patch{Type::Patch}; | ||
| 37 | constexpr Type U1{Type::U1}; | ||
| 38 | constexpr Type U8{Type::U8}; | ||
| 39 | constexpr Type U16{Type::U16}; | ||
| 40 | constexpr Type U32{Type::U32}; | ||
| 41 | constexpr Type U64{Type::U64}; | ||
| 42 | constexpr Type F16{Type::F16}; | ||
| 43 | constexpr Type F32{Type::F32}; | ||
| 44 | constexpr Type F64{Type::F64}; | ||
| 45 | constexpr Type U32x2{Type::U32x2}; | ||
| 46 | constexpr Type U32x3{Type::U32x3}; | ||
| 47 | constexpr Type U32x4{Type::U32x4}; | ||
| 48 | constexpr Type F16x2{Type::F16x2}; | ||
| 49 | constexpr Type F16x3{Type::F16x3}; | ||
| 50 | constexpr Type F16x4{Type::F16x4}; | ||
| 51 | constexpr Type F32x2{Type::F32x2}; | ||
| 52 | constexpr Type F32x3{Type::F32x3}; | ||
| 53 | constexpr Type F32x4{Type::F32x4}; | ||
| 54 | constexpr Type F64x2{Type::F64x2}; | ||
| 55 | constexpr Type F64x3{Type::F64x3}; | ||
| 56 | constexpr Type F64x4{Type::F64x4}; | ||
| 57 | |||
| 58 | constexpr OpcodeMeta META_TABLE[]{ | ||
| 59 | #define OPCODE(name_token, type_token, ...) \ | ||
| 60 | { \ | ||
| 61 | .name{#name_token}, \ | ||
| 62 | .type = type_token, \ | ||
| 63 | .arg_types{__VA_ARGS__}, \ | ||
| 64 | }, | ||
| 65 | #include "opcodes.inc" | ||
| 66 | #undef OPCODE | ||
| 67 | }; | ||
| 68 | constexpr size_t CalculateNumArgsOf(Opcode op) { | ||
| 69 | const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; | ||
| 70 | return static_cast<size_t>( | ||
| 71 | std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); | ||
| 72 | } | ||
| 73 | |||
| 74 | constexpr u8 NUM_ARGS[]{ | ||
| 75 | #define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)), | ||
| 76 | #include "opcodes.inc" | ||
| 77 | #undef OPCODE | ||
| 78 | }; | ||
| 79 | } // namespace Detail | ||
| 80 | |||
| 81 | /// Get return type of an opcode | ||
| 82 | [[nodiscard]] inline Type TypeOf(Opcode op) noexcept { | ||
| 83 | return Detail::META_TABLE[static_cast<size_t>(op)].type; | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Get the number of arguments an opcode accepts | ||
| 87 | [[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { | ||
| 88 | return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]); | ||
| 89 | } | ||
| 90 | |||
| 91 | /// Get the required type of an argument of an opcode | ||
| 92 | [[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { | ||
| 93 | return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index]; | ||
| 94 | } | ||
| 95 | |||
| 96 | /// Get the name of an opcode | ||
| 97 | [[nodiscard]] std::string_view NameOf(Opcode op); | ||
| 98 | |||
| 99 | } // namespace Shader::IR | ||
| 100 | |||
| 101 | template <> | ||
| 102 | struct fmt::formatter<Shader::IR::Opcode> { | ||
| 103 | constexpr auto parse(format_parse_context& ctx) { | ||
| 104 | return ctx.begin(); | ||
| 105 | } | ||
| 106 | template <typename FormatContext> | ||
| 107 | auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { | ||
| 108 | return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); | ||
| 109 | } | ||
| 110 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc new file mode 100644 index 000000000..d91098c80 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -0,0 +1,550 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... | ||
| 6 | OPCODE(Phi, Opaque, ) | ||
| 7 | OPCODE(Identity, Opaque, Opaque, ) | ||
| 8 | OPCODE(Void, Void, ) | ||
| 9 | OPCODE(ConditionRef, U1, U1, ) | ||
| 10 | OPCODE(Reference, Void, Opaque, ) | ||
| 11 | OPCODE(PhiMove, Void, Opaque, Opaque, ) | ||
| 12 | |||
| 13 | // Special operations | ||
| 14 | OPCODE(Prologue, Void, ) | ||
| 15 | OPCODE(Epilogue, Void, ) | ||
| 16 | OPCODE(Join, Void, ) | ||
| 17 | OPCODE(DemoteToHelperInvocation, Void, ) | ||
| 18 | OPCODE(EmitVertex, Void, U32, ) | ||
| 19 | OPCODE(EndPrimitive, Void, U32, ) | ||
| 20 | |||
| 21 | // Barriers | ||
| 22 | OPCODE(Barrier, Void, ) | ||
| 23 | OPCODE(WorkgroupMemoryBarrier, Void, ) | ||
| 24 | OPCODE(DeviceMemoryBarrier, Void, ) | ||
| 25 | |||
| 26 | // Context getters/setters | ||
| 27 | OPCODE(GetRegister, U32, Reg, ) | ||
| 28 | OPCODE(SetRegister, Void, Reg, U32, ) | ||
| 29 | OPCODE(GetPred, U1, Pred, ) | ||
| 30 | OPCODE(SetPred, Void, Pred, U1, ) | ||
| 31 | OPCODE(GetGotoVariable, U1, U32, ) | ||
| 32 | OPCODE(SetGotoVariable, Void, U32, U1, ) | ||
| 33 | OPCODE(GetIndirectBranchVariable, U32, ) | ||
| 34 | OPCODE(SetIndirectBranchVariable, Void, U32, ) | ||
| 35 | OPCODE(GetCbufU8, U32, U32, U32, ) | ||
| 36 | OPCODE(GetCbufS8, U32, U32, U32, ) | ||
| 37 | OPCODE(GetCbufU16, U32, U32, U32, ) | ||
| 38 | OPCODE(GetCbufS16, U32, U32, U32, ) | ||
| 39 | OPCODE(GetCbufU32, U32, U32, U32, ) | ||
| 40 | OPCODE(GetCbufF32, F32, U32, U32, ) | ||
| 41 | OPCODE(GetCbufU32x2, U32x2, U32, U32, ) | ||
| 42 | OPCODE(GetAttribute, F32, Attribute, U32, ) | ||
| 43 | OPCODE(SetAttribute, Void, Attribute, F32, U32, ) | ||
| 44 | OPCODE(GetAttributeIndexed, F32, U32, U32, ) | ||
| 45 | OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) | ||
| 46 | OPCODE(GetPatch, F32, Patch, ) | ||
| 47 | OPCODE(SetPatch, Void, Patch, F32, ) | ||
| 48 | OPCODE(SetFragColor, Void, U32, U32, F32, ) | ||
| 49 | OPCODE(SetSampleMask, Void, U32, ) | ||
| 50 | OPCODE(SetFragDepth, Void, F32, ) | ||
| 51 | OPCODE(GetZFlag, U1, Void, ) | ||
| 52 | OPCODE(GetSFlag, U1, Void, ) | ||
| 53 | OPCODE(GetCFlag, U1, Void, ) | ||
| 54 | OPCODE(GetOFlag, U1, Void, ) | ||
| 55 | OPCODE(SetZFlag, Void, U1, ) | ||
| 56 | OPCODE(SetSFlag, Void, U1, ) | ||
| 57 | OPCODE(SetCFlag, Void, U1, ) | ||
| 58 | OPCODE(SetOFlag, Void, U1, ) | ||
| 59 | OPCODE(WorkgroupId, U32x3, ) | ||
| 60 | OPCODE(LocalInvocationId, U32x3, ) | ||
| 61 | OPCODE(InvocationId, U32, ) | ||
| 62 | OPCODE(SampleId, U32, ) | ||
| 63 | OPCODE(IsHelperInvocation, U1, ) | ||
| 64 | OPCODE(YDirection, F32, ) | ||
| 65 | |||
| 66 | // Undefined | ||
| 67 | OPCODE(UndefU1, U1, ) | ||
| 68 | OPCODE(UndefU8, U8, ) | ||
| 69 | OPCODE(UndefU16, U16, ) | ||
| 70 | OPCODE(UndefU32, U32, ) | ||
| 71 | OPCODE(UndefU64, U64, ) | ||
| 72 | |||
| 73 | // Memory operations | ||
| 74 | OPCODE(LoadGlobalU8, U32, Opaque, ) | ||
| 75 | OPCODE(LoadGlobalS8, U32, Opaque, ) | ||
| 76 | OPCODE(LoadGlobalU16, U32, Opaque, ) | ||
| 77 | OPCODE(LoadGlobalS16, U32, Opaque, ) | ||
| 78 | OPCODE(LoadGlobal32, U32, Opaque, ) | ||
| 79 | OPCODE(LoadGlobal64, U32x2, Opaque, ) | ||
| 80 | OPCODE(LoadGlobal128, U32x4, Opaque, ) | ||
| 81 | OPCODE(WriteGlobalU8, Void, Opaque, U32, ) | ||
| 82 | OPCODE(WriteGlobalS8, Void, Opaque, U32, ) | ||
| 83 | OPCODE(WriteGlobalU16, Void, Opaque, U32, ) | ||
| 84 | OPCODE(WriteGlobalS16, Void, Opaque, U32, ) | ||
| 85 | OPCODE(WriteGlobal32, Void, Opaque, U32, ) | ||
| 86 | OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) | ||
| 87 | OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) | ||
| 88 | |||
| 89 | // Storage buffer operations | ||
| 90 | OPCODE(LoadStorageU8, U32, U32, U32, ) | ||
| 91 | OPCODE(LoadStorageS8, U32, U32, U32, ) | ||
| 92 | OPCODE(LoadStorageU16, U32, U32, U32, ) | ||
| 93 | OPCODE(LoadStorageS16, U32, U32, U32, ) | ||
| 94 | OPCODE(LoadStorage32, U32, U32, U32, ) | ||
| 95 | OPCODE(LoadStorage64, U32x2, U32, U32, ) | ||
| 96 | OPCODE(LoadStorage128, U32x4, U32, U32, ) | ||
| 97 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) | ||
| 98 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) | ||
| 99 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) | ||
| 100 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) | ||
| 101 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) | ||
| 102 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | ||
| 103 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | ||
| 104 | |||
| 105 | // Local memory operations | ||
| 106 | OPCODE(LoadLocal, U32, U32, ) | ||
| 107 | OPCODE(WriteLocal, Void, U32, U32, ) | ||
| 108 | |||
| 109 | // Shared memory operations | ||
| 110 | OPCODE(LoadSharedU8, U32, U32, ) | ||
| 111 | OPCODE(LoadSharedS8, U32, U32, ) | ||
| 112 | OPCODE(LoadSharedU16, U32, U32, ) | ||
| 113 | OPCODE(LoadSharedS16, U32, U32, ) | ||
| 114 | OPCODE(LoadSharedU32, U32, U32, ) | ||
| 115 | OPCODE(LoadSharedU64, U32x2, U32, ) | ||
| 116 | OPCODE(LoadSharedU128, U32x4, U32, ) | ||
| 117 | OPCODE(WriteSharedU8, Void, U32, U32, ) | ||
| 118 | OPCODE(WriteSharedU16, Void, U32, U32, ) | ||
| 119 | OPCODE(WriteSharedU32, Void, U32, U32, ) | ||
| 120 | OPCODE(WriteSharedU64, Void, U32, U32x2, ) | ||
| 121 | OPCODE(WriteSharedU128, Void, U32, U32x4, ) | ||
| 122 | |||
| 123 | // Vector utility | ||
| 124 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | ||
| 125 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | ||
| 126 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) | ||
| 127 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | ||
| 128 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | ||
| 129 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | ||
| 130 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | ||
| 131 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | ||
| 132 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | ||
| 133 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | ||
| 134 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | ||
| 135 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | ||
| 136 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | ||
| 137 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | ||
| 138 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | ||
| 139 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | ||
| 140 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | ||
| 141 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | ||
| 142 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | ||
| 143 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | ||
| 144 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | ||
| 145 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | ||
| 146 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | ||
| 147 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | ||
| 148 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | ||
| 149 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | ||
| 150 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | ||
| 151 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | ||
| 152 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | ||
| 153 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | ||
| 154 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | ||
| 155 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | ||
| 156 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | ||
| 157 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | ||
| 158 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | ||
| 159 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | ||
| 160 | |||
| 161 | // Select operations | ||
| 162 | OPCODE(SelectU1, U1, U1, U1, U1, ) | ||
| 163 | OPCODE(SelectU8, U8, U1, U8, U8, ) | ||
| 164 | OPCODE(SelectU16, U16, U1, U16, U16, ) | ||
| 165 | OPCODE(SelectU32, U32, U1, U32, U32, ) | ||
| 166 | OPCODE(SelectU64, U64, U1, U64, U64, ) | ||
| 167 | OPCODE(SelectF16, F16, U1, F16, F16, ) | ||
| 168 | OPCODE(SelectF32, F32, U1, F32, F32, ) | ||
| 169 | OPCODE(SelectF64, F64, U1, F64, F64, ) | ||
| 170 | |||
| 171 | // Bitwise conversions | ||
| 172 | OPCODE(BitCastU16F16, U16, F16, ) | ||
| 173 | OPCODE(BitCastU32F32, U32, F32, ) | ||
| 174 | OPCODE(BitCastU64F64, U64, F64, ) | ||
| 175 | OPCODE(BitCastF16U16, F16, U16, ) | ||
| 176 | OPCODE(BitCastF32U32, F32, U32, ) | ||
| 177 | OPCODE(BitCastF64U64, F64, U64, ) | ||
| 178 | OPCODE(PackUint2x32, U64, U32x2, ) | ||
| 179 | OPCODE(UnpackUint2x32, U32x2, U64, ) | ||
| 180 | OPCODE(PackFloat2x16, U32, F16x2, ) | ||
| 181 | OPCODE(UnpackFloat2x16, F16x2, U32, ) | ||
| 182 | OPCODE(PackHalf2x16, U32, F32x2, ) | ||
| 183 | OPCODE(UnpackHalf2x16, F32x2, U32, ) | ||
| 184 | OPCODE(PackDouble2x32, F64, U32x2, ) | ||
| 185 | OPCODE(UnpackDouble2x32, U32x2, F64, ) | ||
| 186 | |||
| 187 | // Pseudo-operation, handled specially at final emit | ||
| 188 | OPCODE(GetZeroFromOp, U1, Opaque, ) | ||
| 189 | OPCODE(GetSignFromOp, U1, Opaque, ) | ||
| 190 | OPCODE(GetCarryFromOp, U1, Opaque, ) | ||
| 191 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | ||
| 192 | OPCODE(GetSparseFromOp, U1, Opaque, ) | ||
| 193 | OPCODE(GetInBoundsFromOp, U1, Opaque, ) | ||
| 194 | |||
| 195 | // Floating-point operations | ||
| 196 | OPCODE(FPAbs16, F16, F16, ) | ||
| 197 | OPCODE(FPAbs32, F32, F32, ) | ||
| 198 | OPCODE(FPAbs64, F64, F64, ) | ||
| 199 | OPCODE(FPAdd16, F16, F16, F16, ) | ||
| 200 | OPCODE(FPAdd32, F32, F32, F32, ) | ||
| 201 | OPCODE(FPAdd64, F64, F64, F64, ) | ||
| 202 | OPCODE(FPFma16, F16, F16, F16, F16, ) | ||
| 203 | OPCODE(FPFma32, F32, F32, F32, F32, ) | ||
| 204 | OPCODE(FPFma64, F64, F64, F64, F64, ) | ||
| 205 | OPCODE(FPMax32, F32, F32, F32, ) | ||
| 206 | OPCODE(FPMax64, F64, F64, F64, ) | ||
| 207 | OPCODE(FPMin32, F32, F32, F32, ) | ||
| 208 | OPCODE(FPMin64, F64, F64, F64, ) | ||
| 209 | OPCODE(FPMul16, F16, F16, F16, ) | ||
| 210 | OPCODE(FPMul32, F32, F32, F32, ) | ||
| 211 | OPCODE(FPMul64, F64, F64, F64, ) | ||
| 212 | OPCODE(FPNeg16, F16, F16, ) | ||
| 213 | OPCODE(FPNeg32, F32, F32, ) | ||
| 214 | OPCODE(FPNeg64, F64, F64, ) | ||
| 215 | OPCODE(FPRecip32, F32, F32, ) | ||
| 216 | OPCODE(FPRecip64, F64, F64, ) | ||
| 217 | OPCODE(FPRecipSqrt32, F32, F32, ) | ||
| 218 | OPCODE(FPRecipSqrt64, F64, F64, ) | ||
| 219 | OPCODE(FPSqrt, F32, F32, ) | ||
| 220 | OPCODE(FPSin, F32, F32, ) | ||
| 221 | OPCODE(FPExp2, F32, F32, ) | ||
| 222 | OPCODE(FPCos, F32, F32, ) | ||
| 223 | OPCODE(FPLog2, F32, F32, ) | ||
| 224 | OPCODE(FPSaturate16, F16, F16, ) | ||
| 225 | OPCODE(FPSaturate32, F32, F32, ) | ||
| 226 | OPCODE(FPSaturate64, F64, F64, ) | ||
| 227 | OPCODE(FPClamp16, F16, F16, F16, F16, ) | ||
| 228 | OPCODE(FPClamp32, F32, F32, F32, F32, ) | ||
| 229 | OPCODE(FPClamp64, F64, F64, F64, F64, ) | ||
| 230 | OPCODE(FPRoundEven16, F16, F16, ) | ||
| 231 | OPCODE(FPRoundEven32, F32, F32, ) | ||
| 232 | OPCODE(FPRoundEven64, F64, F64, ) | ||
| 233 | OPCODE(FPFloor16, F16, F16, ) | ||
| 234 | OPCODE(FPFloor32, F32, F32, ) | ||
| 235 | OPCODE(FPFloor64, F64, F64, ) | ||
| 236 | OPCODE(FPCeil16, F16, F16, ) | ||
| 237 | OPCODE(FPCeil32, F32, F32, ) | ||
| 238 | OPCODE(FPCeil64, F64, F64, ) | ||
| 239 | OPCODE(FPTrunc16, F16, F16, ) | ||
| 240 | OPCODE(FPTrunc32, F32, F32, ) | ||
| 241 | OPCODE(FPTrunc64, F64, F64, ) | ||
| 242 | |||
| 243 | OPCODE(FPOrdEqual16, U1, F16, F16, ) | ||
| 244 | OPCODE(FPOrdEqual32, U1, F32, F32, ) | ||
| 245 | OPCODE(FPOrdEqual64, U1, F64, F64, ) | ||
| 246 | OPCODE(FPUnordEqual16, U1, F16, F16, ) | ||
| 247 | OPCODE(FPUnordEqual32, U1, F32, F32, ) | ||
| 248 | OPCODE(FPUnordEqual64, U1, F64, F64, ) | ||
| 249 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) | ||
| 250 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) | ||
| 251 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) | ||
| 252 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) | ||
| 253 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) | ||
| 254 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) | ||
| 255 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) | ||
| 256 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) | ||
| 257 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) | ||
| 258 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) | ||
| 259 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) | ||
| 260 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) | ||
| 261 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) | ||
| 262 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) | ||
| 263 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) | ||
| 264 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) | ||
| 265 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) | ||
| 266 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) | ||
| 267 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) | ||
| 268 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) | ||
| 269 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) | ||
| 270 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) | ||
| 271 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) | ||
| 272 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) | ||
| 273 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) | ||
| 274 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) | ||
| 275 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) | ||
| 276 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | ||
| 277 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | ||
| 278 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | ||
| 279 | OPCODE(FPIsNan16, U1, F16, ) | ||
| 280 | OPCODE(FPIsNan32, U1, F32, ) | ||
| 281 | OPCODE(FPIsNan64, U1, F64, ) | ||
| 282 | |||
| 283 | // Integer operations | ||
| 284 | OPCODE(IAdd32, U32, U32, U32, ) | ||
| 285 | OPCODE(IAdd64, U64, U64, U64, ) | ||
| 286 | OPCODE(ISub32, U32, U32, U32, ) | ||
| 287 | OPCODE(ISub64, U64, U64, U64, ) | ||
| 288 | OPCODE(IMul32, U32, U32, U32, ) | ||
| 289 | OPCODE(INeg32, U32, U32, ) | ||
| 290 | OPCODE(INeg64, U64, U64, ) | ||
| 291 | OPCODE(IAbs32, U32, U32, ) | ||
| 292 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) | ||
| 293 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) | ||
| 294 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) | ||
| 295 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) | ||
| 296 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) | ||
| 297 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) | ||
| 298 | OPCODE(BitwiseAnd32, U32, U32, U32, ) | ||
| 299 | OPCODE(BitwiseOr32, U32, U32, U32, ) | ||
| 300 | OPCODE(BitwiseXor32, U32, U32, U32, ) | ||
| 301 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) | ||
| 302 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) | ||
| 303 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) | ||
| 304 | OPCODE(BitReverse32, U32, U32, ) | ||
| 305 | OPCODE(BitCount32, U32, U32, ) | ||
| 306 | OPCODE(BitwiseNot32, U32, U32, ) | ||
| 307 | |||
| 308 | OPCODE(FindSMsb32, U32, U32, ) | ||
| 309 | OPCODE(FindUMsb32, U32, U32, ) | ||
| 310 | OPCODE(SMin32, U32, U32, U32, ) | ||
| 311 | OPCODE(UMin32, U32, U32, U32, ) | ||
| 312 | OPCODE(SMax32, U32, U32, U32, ) | ||
| 313 | OPCODE(UMax32, U32, U32, U32, ) | ||
| 314 | OPCODE(SClamp32, U32, U32, U32, U32, ) | ||
| 315 | OPCODE(UClamp32, U32, U32, U32, U32, ) | ||
| 316 | OPCODE(SLessThan, U1, U32, U32, ) | ||
| 317 | OPCODE(ULessThan, U1, U32, U32, ) | ||
| 318 | OPCODE(IEqual, U1, U32, U32, ) | ||
| 319 | OPCODE(SLessThanEqual, U1, U32, U32, ) | ||
| 320 | OPCODE(ULessThanEqual, U1, U32, U32, ) | ||
| 321 | OPCODE(SGreaterThan, U1, U32, U32, ) | ||
| 322 | OPCODE(UGreaterThan, U1, U32, U32, ) | ||
| 323 | OPCODE(INotEqual, U1, U32, U32, ) | ||
| 324 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | ||
| 325 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | ||
| 326 | |||
| 327 | // Atomic operations | ||
| 328 | OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) | ||
| 329 | OPCODE(SharedAtomicSMin32, U32, U32, U32, ) | ||
| 330 | OPCODE(SharedAtomicUMin32, U32, U32, U32, ) | ||
| 331 | OPCODE(SharedAtomicSMax32, U32, U32, U32, ) | ||
| 332 | OPCODE(SharedAtomicUMax32, U32, U32, U32, ) | ||
| 333 | OPCODE(SharedAtomicInc32, U32, U32, U32, ) | ||
| 334 | OPCODE(SharedAtomicDec32, U32, U32, U32, ) | ||
| 335 | OPCODE(SharedAtomicAnd32, U32, U32, U32, ) | ||
| 336 | OPCODE(SharedAtomicOr32, U32, U32, U32, ) | ||
| 337 | OPCODE(SharedAtomicXor32, U32, U32, U32, ) | ||
| 338 | OPCODE(SharedAtomicExchange32, U32, U32, U32, ) | ||
| 339 | OPCODE(SharedAtomicExchange64, U64, U32, U64, ) | ||
| 340 | |||
| 341 | OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) | ||
| 342 | OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) | ||
| 343 | OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) | ||
| 344 | OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) | ||
| 345 | OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) | ||
| 346 | OPCODE(GlobalAtomicInc32, U32, U64, U32, ) | ||
| 347 | OPCODE(GlobalAtomicDec32, U32, U64, U32, ) | ||
| 348 | OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) | ||
| 349 | OPCODE(GlobalAtomicOr32, U32, U64, U32, ) | ||
| 350 | OPCODE(GlobalAtomicXor32, U32, U64, U32, ) | ||
| 351 | OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) | ||
| 352 | OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) | ||
| 353 | OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) | ||
| 354 | OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) | ||
| 355 | OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) | ||
| 356 | OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) | ||
| 357 | OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) | ||
| 358 | OPCODE(GlobalAtomicOr64, U64, U64, U64, ) | ||
| 359 | OPCODE(GlobalAtomicXor64, U64, U64, U64, ) | ||
| 360 | OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) | ||
| 361 | OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) | ||
| 362 | OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) | ||
| 363 | OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) | ||
| 364 | OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) | ||
| 365 | OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) | ||
| 366 | OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) | ||
| 367 | OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) | ||
| 368 | |||
| 369 | OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) | ||
| 370 | OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) | ||
| 371 | OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) | ||
| 372 | OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) | ||
| 373 | OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) | ||
| 374 | OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) | ||
| 375 | OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) | ||
| 376 | OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) | ||
| 377 | OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) | ||
| 378 | OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) | ||
| 379 | OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) | ||
| 380 | OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) | ||
| 381 | OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) | ||
| 382 | OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) | ||
| 383 | OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) | ||
| 384 | OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) | ||
| 385 | OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) | ||
| 386 | OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) | ||
| 387 | OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) | ||
| 388 | OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) | ||
| 389 | OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) | ||
| 390 | OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) | ||
| 391 | OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) | ||
| 392 | OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) | ||
| 393 | OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) | ||
| 394 | OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) | ||
| 395 | OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) | ||
| 396 | |||
| 397 | // Logical operations | ||
| 398 | OPCODE(LogicalOr, U1, U1, U1, ) | ||
| 399 | OPCODE(LogicalAnd, U1, U1, U1, ) | ||
| 400 | OPCODE(LogicalXor, U1, U1, U1, ) | ||
| 401 | OPCODE(LogicalNot, U1, U1, ) | ||
| 402 | |||
| 403 | // Conversion operations | ||
| 404 | OPCODE(ConvertS16F16, U32, F16, ) | ||
| 405 | OPCODE(ConvertS16F32, U32, F32, ) | ||
| 406 | OPCODE(ConvertS16F64, U32, F64, ) | ||
| 407 | OPCODE(ConvertS32F16, U32, F16, ) | ||
| 408 | OPCODE(ConvertS32F32, U32, F32, ) | ||
| 409 | OPCODE(ConvertS32F64, U32, F64, ) | ||
| 410 | OPCODE(ConvertS64F16, U64, F16, ) | ||
| 411 | OPCODE(ConvertS64F32, U64, F32, ) | ||
| 412 | OPCODE(ConvertS64F64, U64, F64, ) | ||
| 413 | OPCODE(ConvertU16F16, U32, F16, ) | ||
| 414 | OPCODE(ConvertU16F32, U32, F32, ) | ||
| 415 | OPCODE(ConvertU16F64, U32, F64, ) | ||
| 416 | OPCODE(ConvertU32F16, U32, F16, ) | ||
| 417 | OPCODE(ConvertU32F32, U32, F32, ) | ||
| 418 | OPCODE(ConvertU32F64, U32, F64, ) | ||
| 419 | OPCODE(ConvertU64F16, U64, F16, ) | ||
| 420 | OPCODE(ConvertU64F32, U64, F32, ) | ||
| 421 | OPCODE(ConvertU64F64, U64, F64, ) | ||
| 422 | OPCODE(ConvertU64U32, U64, U32, ) | ||
| 423 | OPCODE(ConvertU32U64, U32, U64, ) | ||
| 424 | OPCODE(ConvertF16F32, F16, F32, ) | ||
| 425 | OPCODE(ConvertF32F16, F32, F16, ) | ||
| 426 | OPCODE(ConvertF32F64, F32, F64, ) | ||
| 427 | OPCODE(ConvertF64F32, F64, F32, ) | ||
| 428 | OPCODE(ConvertF16S8, F16, U32, ) | ||
| 429 | OPCODE(ConvertF16S16, F16, U32, ) | ||
| 430 | OPCODE(ConvertF16S32, F16, U32, ) | ||
| 431 | OPCODE(ConvertF16S64, F16, U64, ) | ||
| 432 | OPCODE(ConvertF16U8, F16, U32, ) | ||
| 433 | OPCODE(ConvertF16U16, F16, U32, ) | ||
| 434 | OPCODE(ConvertF16U32, F16, U32, ) | ||
| 435 | OPCODE(ConvertF16U64, F16, U64, ) | ||
| 436 | OPCODE(ConvertF32S8, F32, U32, ) | ||
| 437 | OPCODE(ConvertF32S16, F32, U32, ) | ||
| 438 | OPCODE(ConvertF32S32, F32, U32, ) | ||
| 439 | OPCODE(ConvertF32S64, F32, U64, ) | ||
| 440 | OPCODE(ConvertF32U8, F32, U32, ) | ||
| 441 | OPCODE(ConvertF32U16, F32, U32, ) | ||
| 442 | OPCODE(ConvertF32U32, F32, U32, ) | ||
| 443 | OPCODE(ConvertF32U64, F32, U64, ) | ||
| 444 | OPCODE(ConvertF64S8, F64, U32, ) | ||
| 445 | OPCODE(ConvertF64S16, F64, U32, ) | ||
| 446 | OPCODE(ConvertF64S32, F64, U32, ) | ||
| 447 | OPCODE(ConvertF64S64, F64, U64, ) | ||
| 448 | OPCODE(ConvertF64U8, F64, U32, ) | ||
| 449 | OPCODE(ConvertF64U16, F64, U32, ) | ||
| 450 | OPCODE(ConvertF64U32, F64, U32, ) | ||
| 451 | OPCODE(ConvertF64U64, F64, U64, ) | ||
| 452 | |||
| 453 | // Image operations | ||
| 454 | OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 455 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 456 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 457 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 458 | OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 459 | OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 460 | OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | ||
| 461 | OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) | ||
| 462 | OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) | ||
| 463 | OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | ||
| 464 | OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) | ||
| 465 | OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) | ||
| 466 | |||
| 467 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 468 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 469 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 470 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 471 | OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 472 | OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 473 | OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | ||
| 474 | OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) | ||
| 475 | OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) | ||
| 476 | OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | ||
| 477 | OPCODE(BoundImageRead, U32x4, U32, Opaque, ) | ||
| 478 | OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) | ||
| 479 | |||
| 480 | OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 481 | OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 482 | OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) | ||
| 483 | OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) | ||
| 484 | OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 485 | OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) | ||
| 486 | OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) | ||
| 487 | OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) | ||
| 488 | OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) | ||
| 489 | OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) | ||
| 490 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) | ||
| 491 | OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) | ||
| 492 | |||
| 493 | // Atomic Image operations | ||
| 494 | |||
| 495 | OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) | ||
| 496 | OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, ) | ||
| 497 | OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, ) | ||
| 498 | OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, ) | ||
| 499 | OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, ) | ||
| 500 | OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, ) | ||
| 501 | OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, ) | ||
| 502 | OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, ) | ||
| 503 | OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, ) | ||
| 504 | OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, ) | ||
| 505 | OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, ) | ||
| 506 | |||
| 507 | OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, ) | ||
| 508 | OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, ) | ||
| 509 | OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, ) | ||
| 510 | OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, ) | ||
| 511 | OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, ) | ||
| 512 | OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, ) | ||
| 513 | OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, ) | ||
| 514 | OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, ) | ||
| 515 | OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, ) | ||
| 516 | OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, ) | ||
| 517 | OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, ) | ||
| 518 | |||
| 519 | OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) | ||
| 520 | OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) | ||
| 521 | OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) | ||
| 522 | OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) | ||
| 523 | OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) | ||
| 524 | OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) | ||
| 525 | OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) | ||
| 526 | OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) | ||
| 527 | OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) | ||
| 528 | OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) | ||
| 529 | OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) | ||
| 530 | |||
| 531 | // Warp operations | ||
| 532 | OPCODE(LaneId, U32, ) | ||
| 533 | OPCODE(VoteAll, U1, U1, ) | ||
| 534 | OPCODE(VoteAny, U1, U1, ) | ||
| 535 | OPCODE(VoteEqual, U1, U1, ) | ||
| 536 | OPCODE(SubgroupBallot, U32, U1, ) | ||
| 537 | OPCODE(SubgroupEqMask, U32, ) | ||
| 538 | OPCODE(SubgroupLtMask, U32, ) | ||
| 539 | OPCODE(SubgroupLeMask, U32, ) | ||
| 540 | OPCODE(SubgroupGtMask, U32, ) | ||
| 541 | OPCODE(SubgroupGeMask, U32, ) | ||
| 542 | OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) | ||
| 543 | OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) | ||
| 544 | OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) | ||
| 545 | OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) | ||
| 546 | OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) | ||
| 547 | OPCODE(DPdxFine, F32, F32, ) | ||
| 548 | OPCODE(DPdyFine, F32, F32, ) | ||
| 549 | OPCODE(DPdxCoarse, F32, F32, ) | ||
| 550 | OPCODE(DPdyCoarse, F32, F32, ) | ||
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..4c956a970 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/patch.h" | ||
| 7 | |||
| 8 | namespace Shader::IR { | ||
| 9 | |||
| 10 | bool IsGeneric(Patch patch) noexcept { | ||
| 11 | return patch >= Patch::Component0 && patch <= Patch::Component119; | ||
| 12 | } | ||
| 13 | |||
| 14 | u32 GenericPatchIndex(Patch patch) { | ||
| 15 | if (!IsGeneric(patch)) { | ||
| 16 | throw InvalidArgument("Patch {} is not generic", patch); | ||
| 17 | } | ||
| 18 | return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4; | ||
| 19 | } | ||
| 20 | |||
| 21 | u32 GenericPatchElement(Patch patch) { | ||
| 22 | if (!IsGeneric(patch)) { | ||
| 23 | throw InvalidArgument("Patch {} is not generic", patch); | ||
| 24 | } | ||
| 25 | return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4; | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | enum class Patch : u64 { | ||
| 12 | TessellationLodLeft, | ||
| 13 | TessellationLodTop, | ||
| 14 | TessellationLodRight, | ||
| 15 | TessellationLodBottom, | ||
| 16 | TessellationLodInteriorU, | ||
| 17 | TessellationLodInteriorV, | ||
| 18 | ComponentPadding0, | ||
| 19 | ComponentPadding1, | ||
| 20 | Component0, | ||
| 21 | Component1, | ||
| 22 | Component2, | ||
| 23 | Component3, | ||
| 24 | Component4, | ||
| 25 | Component5, | ||
| 26 | Component6, | ||
| 27 | Component7, | ||
| 28 | Component8, | ||
| 29 | Component9, | ||
| 30 | Component10, | ||
| 31 | Component11, | ||
| 32 | Component12, | ||
| 33 | Component13, | ||
| 34 | Component14, | ||
| 35 | Component15, | ||
| 36 | Component16, | ||
| 37 | Component17, | ||
| 38 | Component18, | ||
| 39 | Component19, | ||
| 40 | Component20, | ||
| 41 | Component21, | ||
| 42 | Component22, | ||
| 43 | Component23, | ||
| 44 | Component24, | ||
| 45 | Component25, | ||
| 46 | Component26, | ||
| 47 | Component27, | ||
| 48 | Component28, | ||
| 49 | Component29, | ||
| 50 | Component30, | ||
| 51 | Component31, | ||
| 52 | Component32, | ||
| 53 | Component33, | ||
| 54 | Component34, | ||
| 55 | Component35, | ||
| 56 | Component36, | ||
| 57 | Component37, | ||
| 58 | Component38, | ||
| 59 | Component39, | ||
| 60 | Component40, | ||
| 61 | Component41, | ||
| 62 | Component42, | ||
| 63 | Component43, | ||
| 64 | Component44, | ||
| 65 | Component45, | ||
| 66 | Component46, | ||
| 67 | Component47, | ||
| 68 | Component48, | ||
| 69 | Component49, | ||
| 70 | Component50, | ||
| 71 | Component51, | ||
| 72 | Component52, | ||
| 73 | Component53, | ||
| 74 | Component54, | ||
| 75 | Component55, | ||
| 76 | Component56, | ||
| 77 | Component57, | ||
| 78 | Component58, | ||
| 79 | Component59, | ||
| 80 | Component60, | ||
| 81 | Component61, | ||
| 82 | Component62, | ||
| 83 | Component63, | ||
| 84 | Component64, | ||
| 85 | Component65, | ||
| 86 | Component66, | ||
| 87 | Component67, | ||
| 88 | Component68, | ||
| 89 | Component69, | ||
| 90 | Component70, | ||
| 91 | Component71, | ||
| 92 | Component72, | ||
| 93 | Component73, | ||
| 94 | Component74, | ||
| 95 | Component75, | ||
| 96 | Component76, | ||
| 97 | Component77, | ||
| 98 | Component78, | ||
| 99 | Component79, | ||
| 100 | Component80, | ||
| 101 | Component81, | ||
| 102 | Component82, | ||
| 103 | Component83, | ||
| 104 | Component84, | ||
| 105 | Component85, | ||
| 106 | Component86, | ||
| 107 | Component87, | ||
| 108 | Component88, | ||
| 109 | Component89, | ||
| 110 | Component90, | ||
| 111 | Component91, | ||
| 112 | Component92, | ||
| 113 | Component93, | ||
| 114 | Component94, | ||
| 115 | Component95, | ||
| 116 | Component96, | ||
| 117 | Component97, | ||
| 118 | Component98, | ||
| 119 | Component99, | ||
| 120 | Component100, | ||
| 121 | Component101, | ||
| 122 | Component102, | ||
| 123 | Component103, | ||
| 124 | Component104, | ||
| 125 | Component105, | ||
| 126 | Component106, | ||
| 127 | Component107, | ||
| 128 | Component108, | ||
| 129 | Component109, | ||
| 130 | Component110, | ||
| 131 | Component111, | ||
| 132 | Component112, | ||
| 133 | Component113, | ||
| 134 | Component114, | ||
| 135 | Component115, | ||
| 136 | Component116, | ||
| 137 | Component117, | ||
| 138 | Component118, | ||
| 139 | Component119, | ||
| 140 | }; | ||
| 141 | static_assert(static_cast<u64>(Patch::Component119) == 127); | ||
| 142 | |||
| 143 | [[nodiscard]] bool IsGeneric(Patch patch) noexcept; | ||
| 144 | |||
| 145 | [[nodiscard]] u32 GenericPatchIndex(Patch patch); | ||
| 146 | |||
| 147 | [[nodiscard]] u32 GenericPatchElement(Patch patch); | ||
| 148 | |||
| 149 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp new file mode 100644 index 000000000..16bc44101 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include <boost/container/flat_set.hpp> | ||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/post_order.h" | ||
| 12 | |||
| 13 | namespace Shader::IR { | ||
| 14 | |||
| 15 | BlockList PostOrder(const AbstractSyntaxNode& root) { | ||
| 16 | boost::container::small_vector<Block*, 16> block_stack; | ||
| 17 | boost::container::flat_set<Block*> visited; | ||
| 18 | BlockList post_order_blocks; | ||
| 19 | |||
| 20 | if (root.type != AbstractSyntaxNode::Type::Block) { | ||
| 21 | throw LogicError("First node in abstract syntax list root is not a block"); | ||
| 22 | } | ||
| 23 | Block* const first_block{root.data.block}; | ||
| 24 | visited.insert(first_block); | ||
| 25 | block_stack.push_back(first_block); | ||
| 26 | |||
| 27 | while (!block_stack.empty()) { | ||
| 28 | Block* const block{block_stack.back()}; | ||
| 29 | const auto visit{[&](Block* branch) { | ||
| 30 | if (!visited.insert(branch).second) { | ||
| 31 | return false; | ||
| 32 | } | ||
| 33 | // Calling push_back twice is faster than insert on MSVC | ||
| 34 | block_stack.push_back(block); | ||
| 35 | block_stack.push_back(branch); | ||
| 36 | return true; | ||
| 37 | }}; | ||
| 38 | block_stack.pop_back(); | ||
| 39 | if (std::ranges::none_of(block->ImmSuccessors(), visit)) { | ||
| 40 | post_order_blocks.push_back(block); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | return post_order_blocks; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h new file mode 100644 index 000000000..07bfbadc3 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | BlockList PostOrder(const AbstractSyntaxNode& root); | ||
| 13 | |||
| 14 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h new file mode 100644 index 000000000..4e7f32423 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/pred.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | enum class Pred : u64 { | ||
| 12 | P0, | ||
| 13 | P1, | ||
| 14 | P2, | ||
| 15 | P3, | ||
| 16 | P4, | ||
| 17 | P5, | ||
| 18 | P6, | ||
| 19 | PT, | ||
| 20 | }; | ||
| 21 | |||
| 22 | constexpr size_t NUM_USER_PREDS = 7; | ||
| 23 | constexpr size_t NUM_PREDS = 8; | ||
| 24 | |||
| 25 | [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { | ||
| 26 | return static_cast<size_t>(pred); | ||
| 27 | } | ||
| 28 | |||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct fmt::formatter<Shader::IR::Pred> { | ||
| 33 | constexpr auto parse(format_parse_context& ctx) { | ||
| 34 | return ctx.begin(); | ||
| 35 | } | ||
| 36 | template <typename FormatContext> | ||
| 37 | auto format(const Shader::IR::Pred& pred, FormatContext& ctx) { | ||
| 38 | if (pred == Shader::IR::Pred::PT) { | ||
| 39 | return fmt::format_to(ctx.out(), "PT"); | ||
| 40 | } else { | ||
| 41 | return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred)); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 000000000..3fc06f855 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | |||
| 16 | std::string DumpProgram(const Program& program) { | ||
| 17 | size_t index{0}; | ||
| 18 | std::map<const IR::Inst*, size_t> inst_to_index; | ||
| 19 | std::map<const IR::Block*, size_t> block_to_index; | ||
| 20 | |||
| 21 | for (const IR::Block* const block : program.blocks) { | ||
| 22 | block_to_index.emplace(block, index); | ||
| 23 | ++index; | ||
| 24 | } | ||
| 25 | std::string ret; | ||
| 26 | for (const auto& block : program.blocks) { | ||
| 27 | ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; | ||
| 28 | } | ||
| 29 | return ret; | ||
| 30 | } | ||
| 31 | |||
| 32 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 000000000..ebcaa8bc2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/program_header.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | |||
| 18 | struct Program { | ||
| 19 | AbstractSyntaxList syntax_list; | ||
| 20 | BlockList blocks; | ||
| 21 | BlockList post_order_blocks; | ||
| 22 | Info info; | ||
| 23 | Stage stage{}; | ||
| 24 | std::array<u32, 3> workgroup_size{}; | ||
| 25 | OutputTopology output_topology{}; | ||
| 26 | u32 output_vertices{}; | ||
| 27 | u32 invocations{}; | ||
| 28 | u32 local_memory_size{}; | ||
| 29 | u32 shared_memory_size{}; | ||
| 30 | bool is_geometry_passthrough{}; | ||
| 31 | }; | ||
| 32 | |||
| 33 | [[nodiscard]] std::string DumpProgram(const Program& program); | ||
| 34 | |||
| 35 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h new file mode 100644 index 000000000..a4b635792 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/reg.h | |||
| @@ -0,0 +1,332 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/exception.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | |||
| 14 | enum class Reg : u64 { | ||
| 15 | R0, | ||
| 16 | R1, | ||
| 17 | R2, | ||
| 18 | R3, | ||
| 19 | R4, | ||
| 20 | R5, | ||
| 21 | R6, | ||
| 22 | R7, | ||
| 23 | R8, | ||
| 24 | R9, | ||
| 25 | R10, | ||
| 26 | R11, | ||
| 27 | R12, | ||
| 28 | R13, | ||
| 29 | R14, | ||
| 30 | R15, | ||
| 31 | R16, | ||
| 32 | R17, | ||
| 33 | R18, | ||
| 34 | R19, | ||
| 35 | R20, | ||
| 36 | R21, | ||
| 37 | R22, | ||
| 38 | R23, | ||
| 39 | R24, | ||
| 40 | R25, | ||
| 41 | R26, | ||
| 42 | R27, | ||
| 43 | R28, | ||
| 44 | R29, | ||
| 45 | R30, | ||
| 46 | R31, | ||
| 47 | R32, | ||
| 48 | R33, | ||
| 49 | R34, | ||
| 50 | R35, | ||
| 51 | R36, | ||
| 52 | R37, | ||
| 53 | R38, | ||
| 54 | R39, | ||
| 55 | R40, | ||
| 56 | R41, | ||
| 57 | R42, | ||
| 58 | R43, | ||
| 59 | R44, | ||
| 60 | R45, | ||
| 61 | R46, | ||
| 62 | R47, | ||
| 63 | R48, | ||
| 64 | R49, | ||
| 65 | R50, | ||
| 66 | R51, | ||
| 67 | R52, | ||
| 68 | R53, | ||
| 69 | R54, | ||
| 70 | R55, | ||
| 71 | R56, | ||
| 72 | R57, | ||
| 73 | R58, | ||
| 74 | R59, | ||
| 75 | R60, | ||
| 76 | R61, | ||
| 77 | R62, | ||
| 78 | R63, | ||
| 79 | R64, | ||
| 80 | R65, | ||
| 81 | R66, | ||
| 82 | R67, | ||
| 83 | R68, | ||
| 84 | R69, | ||
| 85 | R70, | ||
| 86 | R71, | ||
| 87 | R72, | ||
| 88 | R73, | ||
| 89 | R74, | ||
| 90 | R75, | ||
| 91 | R76, | ||
| 92 | R77, | ||
| 93 | R78, | ||
| 94 | R79, | ||
| 95 | R80, | ||
| 96 | R81, | ||
| 97 | R82, | ||
| 98 | R83, | ||
| 99 | R84, | ||
| 100 | R85, | ||
| 101 | R86, | ||
| 102 | R87, | ||
| 103 | R88, | ||
| 104 | R89, | ||
| 105 | R90, | ||
| 106 | R91, | ||
| 107 | R92, | ||
| 108 | R93, | ||
| 109 | R94, | ||
| 110 | R95, | ||
| 111 | R96, | ||
| 112 | R97, | ||
| 113 | R98, | ||
| 114 | R99, | ||
| 115 | R100, | ||
| 116 | R101, | ||
| 117 | R102, | ||
| 118 | R103, | ||
| 119 | R104, | ||
| 120 | R105, | ||
| 121 | R106, | ||
| 122 | R107, | ||
| 123 | R108, | ||
| 124 | R109, | ||
| 125 | R110, | ||
| 126 | R111, | ||
| 127 | R112, | ||
| 128 | R113, | ||
| 129 | R114, | ||
| 130 | R115, | ||
| 131 | R116, | ||
| 132 | R117, | ||
| 133 | R118, | ||
| 134 | R119, | ||
| 135 | R120, | ||
| 136 | R121, | ||
| 137 | R122, | ||
| 138 | R123, | ||
| 139 | R124, | ||
| 140 | R125, | ||
| 141 | R126, | ||
| 142 | R127, | ||
| 143 | R128, | ||
| 144 | R129, | ||
| 145 | R130, | ||
| 146 | R131, | ||
| 147 | R132, | ||
| 148 | R133, | ||
| 149 | R134, | ||
| 150 | R135, | ||
| 151 | R136, | ||
| 152 | R137, | ||
| 153 | R138, | ||
| 154 | R139, | ||
| 155 | R140, | ||
| 156 | R141, | ||
| 157 | R142, | ||
| 158 | R143, | ||
| 159 | R144, | ||
| 160 | R145, | ||
| 161 | R146, | ||
| 162 | R147, | ||
| 163 | R148, | ||
| 164 | R149, | ||
| 165 | R150, | ||
| 166 | R151, | ||
| 167 | R152, | ||
| 168 | R153, | ||
| 169 | R154, | ||
| 170 | R155, | ||
| 171 | R156, | ||
| 172 | R157, | ||
| 173 | R158, | ||
| 174 | R159, | ||
| 175 | R160, | ||
| 176 | R161, | ||
| 177 | R162, | ||
| 178 | R163, | ||
| 179 | R164, | ||
| 180 | R165, | ||
| 181 | R166, | ||
| 182 | R167, | ||
| 183 | R168, | ||
| 184 | R169, | ||
| 185 | R170, | ||
| 186 | R171, | ||
| 187 | R172, | ||
| 188 | R173, | ||
| 189 | R174, | ||
| 190 | R175, | ||
| 191 | R176, | ||
| 192 | R177, | ||
| 193 | R178, | ||
| 194 | R179, | ||
| 195 | R180, | ||
| 196 | R181, | ||
| 197 | R182, | ||
| 198 | R183, | ||
| 199 | R184, | ||
| 200 | R185, | ||
| 201 | R186, | ||
| 202 | R187, | ||
| 203 | R188, | ||
| 204 | R189, | ||
| 205 | R190, | ||
| 206 | R191, | ||
| 207 | R192, | ||
| 208 | R193, | ||
| 209 | R194, | ||
| 210 | R195, | ||
| 211 | R196, | ||
| 212 | R197, | ||
| 213 | R198, | ||
| 214 | R199, | ||
| 215 | R200, | ||
| 216 | R201, | ||
| 217 | R202, | ||
| 218 | R203, | ||
| 219 | R204, | ||
| 220 | R205, | ||
| 221 | R206, | ||
| 222 | R207, | ||
| 223 | R208, | ||
| 224 | R209, | ||
| 225 | R210, | ||
| 226 | R211, | ||
| 227 | R212, | ||
| 228 | R213, | ||
| 229 | R214, | ||
| 230 | R215, | ||
| 231 | R216, | ||
| 232 | R217, | ||
| 233 | R218, | ||
| 234 | R219, | ||
| 235 | R220, | ||
| 236 | R221, | ||
| 237 | R222, | ||
| 238 | R223, | ||
| 239 | R224, | ||
| 240 | R225, | ||
| 241 | R226, | ||
| 242 | R227, | ||
| 243 | R228, | ||
| 244 | R229, | ||
| 245 | R230, | ||
| 246 | R231, | ||
| 247 | R232, | ||
| 248 | R233, | ||
| 249 | R234, | ||
| 250 | R235, | ||
| 251 | R236, | ||
| 252 | R237, | ||
| 253 | R238, | ||
| 254 | R239, | ||
| 255 | R240, | ||
| 256 | R241, | ||
| 257 | R242, | ||
| 258 | R243, | ||
| 259 | R244, | ||
| 260 | R245, | ||
| 261 | R246, | ||
| 262 | R247, | ||
| 263 | R248, | ||
| 264 | R249, | ||
| 265 | R250, | ||
| 266 | R251, | ||
| 267 | R252, | ||
| 268 | R253, | ||
| 269 | R254, | ||
| 270 | RZ, | ||
| 271 | }; | ||
| 272 | static_assert(static_cast<int>(Reg::RZ) == 255); | ||
| 273 | |||
| 274 | constexpr size_t NUM_USER_REGS = 255; | ||
| 275 | constexpr size_t NUM_REGS = 256; | ||
| 276 | |||
| 277 | [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { | ||
| 278 | if (reg == Reg::RZ) { | ||
| 279 | // Adding or subtracting registers from RZ yields RZ | ||
| 280 | return Reg::RZ; | ||
| 281 | } | ||
| 282 | const int result{static_cast<int>(reg) + num}; | ||
| 283 | if (result >= static_cast<int>(Reg::RZ)) { | ||
| 284 | throw LogicError("Overflow on register arithmetic"); | ||
| 285 | } | ||
| 286 | if (result < 0) { | ||
| 287 | throw LogicError("Underflow on register arithmetic"); | ||
| 288 | } | ||
| 289 | return static_cast<Reg>(result); | ||
| 290 | } | ||
| 291 | |||
| 292 | [[nodiscard]] constexpr Reg operator-(Reg reg, int num) { | ||
| 293 | return reg + (-num); | ||
| 294 | } | ||
| 295 | |||
| 296 | constexpr Reg operator++(Reg& reg) { | ||
| 297 | reg = reg + 1; | ||
| 298 | return reg; | ||
| 299 | } | ||
| 300 | |||
| 301 | constexpr Reg operator++(Reg& reg, int) { | ||
| 302 | const Reg copy{reg}; | ||
| 303 | reg = reg + 1; | ||
| 304 | return copy; | ||
| 305 | } | ||
| 306 | |||
| 307 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | ||
| 308 | return static_cast<size_t>(reg); | ||
| 309 | } | ||
| 310 | |||
| 311 | [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { | ||
| 312 | return RegIndex(reg) % align == 0 || reg == Reg::RZ; | ||
| 313 | } | ||
| 314 | |||
| 315 | } // namespace Shader::IR | ||
| 316 | |||
| 317 | template <> | ||
| 318 | struct fmt::formatter<Shader::IR::Reg> { | ||
| 319 | constexpr auto parse(format_parse_context& ctx) { | ||
| 320 | return ctx.begin(); | ||
| 321 | } | ||
| 322 | template <typename FormatContext> | ||
| 323 | auto format(const Shader::IR::Reg& reg, FormatContext& ctx) { | ||
| 324 | if (reg == Shader::IR::Reg::RZ) { | ||
| 325 | return fmt::format_to(ctx.out(), "RZ"); | ||
| 326 | } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) { | ||
| 327 | return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg)); | ||
| 328 | } else { | ||
| 329 | throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg)); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp new file mode 100644 index 000000000..f28341bfe --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | std::string NameOf(Type type) { | ||
| 13 | static constexpr std::array names{ | ||
| 14 | "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", | ||
| 15 | "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", | ||
| 16 | "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", | ||
| 17 | }; | ||
| 18 | const size_t bits{static_cast<size_t>(type)}; | ||
| 19 | if (bits == 0) { | ||
| 20 | return "Void"; | ||
| 21 | } | ||
| 22 | std::string result; | ||
| 23 | for (size_t i = 0; i < names.size(); i++) { | ||
| 24 | if ((bits & (size_t{1} << i)) != 0) { | ||
| 25 | if (!result.empty()) { | ||
| 26 | result += '|'; | ||
| 27 | } | ||
| 28 | result += names[i]; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | return result; | ||
| 32 | } | ||
| 33 | |||
| 34 | bool AreTypesCompatible(Type lhs, Type rhs) noexcept { | ||
| 35 | return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h new file mode 100644 index 000000000..294b230c4 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | |||
| 16 | enum class Type { | ||
| 17 | Void = 0, | ||
| 18 | Opaque = 1 << 0, | ||
| 19 | Reg = 1 << 1, | ||
| 20 | Pred = 1 << 2, | ||
| 21 | Attribute = 1 << 3, | ||
| 22 | Patch = 1 << 4, | ||
| 23 | U1 = 1 << 5, | ||
| 24 | U8 = 1 << 6, | ||
| 25 | U16 = 1 << 7, | ||
| 26 | U32 = 1 << 8, | ||
| 27 | U64 = 1 << 9, | ||
| 28 | F16 = 1 << 10, | ||
| 29 | F32 = 1 << 11, | ||
| 30 | F64 = 1 << 12, | ||
| 31 | U32x2 = 1 << 13, | ||
| 32 | U32x3 = 1 << 14, | ||
| 33 | U32x4 = 1 << 15, | ||
| 34 | F16x2 = 1 << 16, | ||
| 35 | F16x3 = 1 << 17, | ||
| 36 | F16x4 = 1 << 18, | ||
| 37 | F32x2 = 1 << 19, | ||
| 38 | F32x3 = 1 << 20, | ||
| 39 | F32x4 = 1 << 21, | ||
| 40 | F64x2 = 1 << 22, | ||
| 41 | F64x3 = 1 << 23, | ||
| 42 | F64x4 = 1 << 24, | ||
| 43 | }; | ||
| 44 | DECLARE_ENUM_FLAG_OPERATORS(Type) | ||
| 45 | |||
| 46 | [[nodiscard]] std::string NameOf(Type type); | ||
| 47 | |||
| 48 | [[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; | ||
| 49 | |||
| 50 | } // namespace Shader::IR | ||
| 51 | |||
| 52 | template <> | ||
| 53 | struct fmt::formatter<Shader::IR::Type> { | ||
| 54 | constexpr auto parse(format_parse_context& ctx) { | ||
| 55 | return ctx.begin(); | ||
| 56 | } | ||
| 57 | template <typename FormatContext> | ||
| 58 | auto format(const Shader::IR::Type& type, FormatContext& ctx) { | ||
| 59 | return fmt::format_to(ctx.out(), "{}", NameOf(type)); | ||
| 60 | } | ||
| 61 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp new file mode 100644 index 000000000..d365ea1bc --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | |||
| 8 | namespace Shader::IR { | ||
| 9 | |||
| 10 | Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} | ||
| 11 | |||
| 12 | Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} | ||
| 13 | |||
| 14 | Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} | ||
| 15 | |||
| 16 | Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} | ||
| 17 | |||
| 18 | Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} | ||
| 19 | |||
| 20 | Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} | ||
| 21 | |||
| 22 | Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} | ||
| 23 | |||
| 24 | Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} | ||
| 25 | |||
| 26 | Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} | ||
| 27 | |||
| 28 | Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} | ||
| 29 | |||
| 30 | Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} | ||
| 31 | |||
| 32 | Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} | ||
| 33 | |||
| 34 | IR::Type Value::Type() const noexcept { | ||
| 35 | if (IsPhi()) { | ||
| 36 | // The type of a phi node is stored in its flags | ||
| 37 | return inst->Flags<IR::Type>(); | ||
| 38 | } | ||
| 39 | if (IsIdentity()) { | ||
| 40 | return inst->Arg(0).Type(); | ||
| 41 | } | ||
| 42 | if (type == Type::Opaque) { | ||
| 43 | return inst->Type(); | ||
| 44 | } | ||
| 45 | return type; | ||
| 46 | } | ||
| 47 | |||
| 48 | bool Value::operator==(const Value& other) const { | ||
| 49 | if (type != other.type) { | ||
| 50 | return false; | ||
| 51 | } | ||
| 52 | switch (type) { | ||
| 53 | case Type::Void: | ||
| 54 | return true; | ||
| 55 | case Type::Opaque: | ||
| 56 | return inst == other.inst; | ||
| 57 | case Type::Reg: | ||
| 58 | return reg == other.reg; | ||
| 59 | case Type::Pred: | ||
| 60 | return pred == other.pred; | ||
| 61 | case Type::Attribute: | ||
| 62 | return attribute == other.attribute; | ||
| 63 | case Type::Patch: | ||
| 64 | return patch == other.patch; | ||
| 65 | case Type::U1: | ||
| 66 | return imm_u1 == other.imm_u1; | ||
| 67 | case Type::U8: | ||
| 68 | return imm_u8 == other.imm_u8; | ||
| 69 | case Type::U16: | ||
| 70 | case Type::F16: | ||
| 71 | return imm_u16 == other.imm_u16; | ||
| 72 | case Type::U32: | ||
| 73 | case Type::F32: | ||
| 74 | return imm_u32 == other.imm_u32; | ||
| 75 | case Type::U64: | ||
| 76 | case Type::F64: | ||
| 77 | return imm_u64 == other.imm_u64; | ||
| 78 | case Type::U32x2: | ||
| 79 | case Type::U32x3: | ||
| 80 | case Type::U32x4: | ||
| 81 | case Type::F16x2: | ||
| 82 | case Type::F16x3: | ||
| 83 | case Type::F16x4: | ||
| 84 | case Type::F32x2: | ||
| 85 | case Type::F32x3: | ||
| 86 | case Type::F32x4: | ||
| 87 | case Type::F64x2: | ||
| 88 | case Type::F64x3: | ||
| 89 | case Type::F64x4: | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | throw LogicError("Invalid type {}", type); | ||
| 93 | } | ||
| 94 | |||
| 95 | bool Value::operator!=(const Value& other) const { | ||
| 96 | return !operator==(other); | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h new file mode 100644 index 000000000..0c6bf684d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -0,0 +1,398 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/intrusive/list.hpp> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/bit_cast.h" | ||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "shader_recompiler/exception.h" | ||
| 21 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 22 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 23 | #include "shader_recompiler/frontend/ir/patch.h" | ||
| 24 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 25 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 26 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 27 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 28 | |||
| 29 | namespace Shader::IR { | ||
| 30 | |||
| 31 | class Block; | ||
| 32 | class Inst; | ||
| 33 | |||
| 34 | struct AssociatedInsts; | ||
| 35 | |||
| 36 | class Value { | ||
| 37 | public: | ||
| 38 | Value() noexcept = default; | ||
| 39 | explicit Value(IR::Inst* value) noexcept; | ||
| 40 | explicit Value(IR::Reg value) noexcept; | ||
| 41 | explicit Value(IR::Pred value) noexcept; | ||
| 42 | explicit Value(IR::Attribute value) noexcept; | ||
| 43 | explicit Value(IR::Patch value) noexcept; | ||
| 44 | explicit Value(bool value) noexcept; | ||
| 45 | explicit Value(u8 value) noexcept; | ||
| 46 | explicit Value(u16 value) noexcept; | ||
| 47 | explicit Value(u32 value) noexcept; | ||
| 48 | explicit Value(f32 value) noexcept; | ||
| 49 | explicit Value(u64 value) noexcept; | ||
| 50 | explicit Value(f64 value) noexcept; | ||
| 51 | |||
| 52 | [[nodiscard]] bool IsIdentity() const noexcept; | ||
| 53 | [[nodiscard]] bool IsPhi() const noexcept; | ||
| 54 | [[nodiscard]] bool IsEmpty() const noexcept; | ||
| 55 | [[nodiscard]] bool IsImmediate() const noexcept; | ||
| 56 | [[nodiscard]] IR::Type Type() const noexcept; | ||
| 57 | |||
| 58 | [[nodiscard]] IR::Inst* Inst() const; | ||
| 59 | [[nodiscard]] IR::Inst* InstRecursive() const; | ||
| 60 | [[nodiscard]] IR::Value Resolve() const; | ||
| 61 | [[nodiscard]] IR::Reg Reg() const; | ||
| 62 | [[nodiscard]] IR::Pred Pred() const; | ||
| 63 | [[nodiscard]] IR::Attribute Attribute() const; | ||
| 64 | [[nodiscard]] IR::Patch Patch() const; | ||
| 65 | [[nodiscard]] bool U1() const; | ||
| 66 | [[nodiscard]] u8 U8() const; | ||
| 67 | [[nodiscard]] u16 U16() const; | ||
| 68 | [[nodiscard]] u32 U32() const; | ||
| 69 | [[nodiscard]] f32 F32() const; | ||
| 70 | [[nodiscard]] u64 U64() const; | ||
| 71 | [[nodiscard]] f64 F64() const; | ||
| 72 | |||
| 73 | [[nodiscard]] bool operator==(const Value& other) const; | ||
| 74 | [[nodiscard]] bool operator!=(const Value& other) const; | ||
| 75 | |||
| 76 | private: | ||
| 77 | IR::Type type{}; | ||
| 78 | union { | ||
| 79 | IR::Inst* inst{}; | ||
| 80 | IR::Reg reg; | ||
| 81 | IR::Pred pred; | ||
| 82 | IR::Attribute attribute; | ||
| 83 | IR::Patch patch; | ||
| 84 | bool imm_u1; | ||
| 85 | u8 imm_u8; | ||
| 86 | u16 imm_u16; | ||
| 87 | u32 imm_u32; | ||
| 88 | f32 imm_f32; | ||
| 89 | u64 imm_u64; | ||
| 90 | f64 imm_f64; | ||
| 91 | }; | ||
| 92 | }; | ||
| 93 | static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); | ||
| 94 | static_assert(std::is_trivially_copyable_v<Value>); | ||
| 95 | |||
| 96 | template <IR::Type type_> | ||
| 97 | class TypedValue : public Value { | ||
| 98 | public: | ||
| 99 | TypedValue() = default; | ||
| 100 | |||
| 101 | template <IR::Type other_type> | ||
| 102 | requires((other_type & type_) != IR::Type::Void) explicit(false) | ||
| 103 | TypedValue(const TypedValue<other_type>& value) | ||
| 104 | : Value(value) {} | ||
| 105 | |||
| 106 | explicit TypedValue(const Value& value) : Value(value) { | ||
| 107 | if ((value.Type() & type_) == IR::Type::Void) { | ||
| 108 | throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} | ||
| 113 | }; | ||
| 114 | |||
| 115 | class Inst : public boost::intrusive::list_base_hook<> { | ||
| 116 | public: | ||
| 117 | explicit Inst(IR::Opcode op_, u32 flags_) noexcept; | ||
| 118 | ~Inst(); | ||
| 119 | |||
| 120 | Inst& operator=(const Inst&) = delete; | ||
| 121 | Inst(const Inst&) = delete; | ||
| 122 | |||
| 123 | Inst& operator=(Inst&&) = delete; | ||
| 124 | Inst(Inst&&) = delete; | ||
| 125 | |||
| 126 | /// Get the number of uses this instruction has. | ||
| 127 | [[nodiscard]] int UseCount() const noexcept { | ||
| 128 | return use_count; | ||
| 129 | } | ||
| 130 | |||
| 131 | /// Determines whether this instruction has uses or not. | ||
| 132 | [[nodiscard]] bool HasUses() const noexcept { | ||
| 133 | return use_count > 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Get the opcode this microinstruction represents. | ||
| 137 | [[nodiscard]] IR::Opcode GetOpcode() const noexcept { | ||
| 138 | return op; | ||
| 139 | } | ||
| 140 | |||
| 141 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 142 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { | ||
| 143 | return associated_insts != nullptr; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Determines whether or not this instruction may have side effects. | ||
| 147 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; | ||
| 148 | |||
| 149 | /// Determines whether or not this instruction is a pseudo-instruction. | ||
| 150 | /// Pseudo-instructions depend on their parent instructions for their semantics. | ||
| 151 | [[nodiscard]] bool IsPseudoInstruction() const noexcept; | ||
| 152 | |||
| 153 | /// Determines if all arguments of this instruction are immediates. | ||
| 154 | [[nodiscard]] bool AreAllArgsImmediates() const; | ||
| 155 | |||
| 156 | /// Gets a pseudo-operation associated with this instruction | ||
| 157 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); | ||
| 158 | |||
| 159 | /// Get the type this instruction returns. | ||
| 160 | [[nodiscard]] IR::Type Type() const; | ||
| 161 | |||
| 162 | /// Get the number of arguments this instruction has. | ||
| 163 | [[nodiscard]] size_t NumArgs() const { | ||
| 164 | return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); | ||
| 165 | } | ||
| 166 | |||
| 167 | /// Get the value of a given argument index. | ||
| 168 | [[nodiscard]] Value Arg(size_t index) const noexcept { | ||
| 169 | if (op == IR::Opcode::Phi) { | ||
| 170 | return phi_args[index].second; | ||
| 171 | } else { | ||
| 172 | return args[index]; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | /// Set the value of a given argument index. | ||
| 177 | void SetArg(size_t index, Value value); | ||
| 178 | |||
| 179 | /// Get a pointer to the block of a phi argument. | ||
| 180 | [[nodiscard]] Block* PhiBlock(size_t index) const; | ||
| 181 | /// Add phi operand to a phi instruction. | ||
| 182 | void AddPhiOperand(Block* predecessor, const Value& value); | ||
| 183 | |||
| 184 | void Invalidate(); | ||
| 185 | void ClearArgs(); | ||
| 186 | |||
| 187 | void ReplaceUsesWith(Value replacement); | ||
| 188 | |||
| 189 | void ReplaceOpcode(IR::Opcode opcode); | ||
| 190 | |||
| 191 | template <typename FlagsType> | ||
| 192 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||
| 193 | [[nodiscard]] FlagsType Flags() const noexcept { | ||
| 194 | FlagsType ret; | ||
| 195 | std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); | ||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | template <typename FlagsType> | ||
| 200 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||
| 201 | [[nodiscard]] void SetFlags(FlagsType value) noexcept { | ||
| 202 | std::memcpy(&flags, &value, sizeof(value)); | ||
| 203 | } | ||
| 204 | |||
| 205 | /// Intrusively store the host definition of this instruction. | ||
| 206 | template <typename DefinitionType> | ||
| 207 | void SetDefinition(DefinitionType def) { | ||
| 208 | definition = Common::BitCast<u32>(def); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Return the intrusively stored host definition of this instruction. | ||
| 212 | template <typename DefinitionType> | ||
| 213 | [[nodiscard]] DefinitionType Definition() const noexcept { | ||
| 214 | return Common::BitCast<DefinitionType>(definition); | ||
| 215 | } | ||
| 216 | |||
| 217 | /// Destructively remove one reference count from the instruction | ||
| 218 | /// Useful for register allocation | ||
| 219 | void DestructiveRemoveUsage() { | ||
| 220 | --use_count; | ||
| 221 | } | ||
| 222 | |||
| 223 | /// Destructively add usages to the instruction | ||
| 224 | /// Useful for register allocation | ||
| 225 | void DestructiveAddUsage(int count) { | ||
| 226 | use_count += count; | ||
| 227 | } | ||
| 228 | |||
| 229 | private: | ||
| 230 | struct NonTriviallyDummy { | ||
| 231 | NonTriviallyDummy() noexcept {} | ||
| 232 | }; | ||
| 233 | |||
| 234 | void Use(const Value& value); | ||
| 235 | void UndoUse(const Value& value); | ||
| 236 | |||
| 237 | IR::Opcode op{}; | ||
| 238 | int use_count{}; | ||
| 239 | u32 flags{}; | ||
| 240 | u32 definition{}; | ||
| 241 | union { | ||
| 242 | NonTriviallyDummy dummy{}; | ||
| 243 | boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args; | ||
| 244 | std::array<Value, 5> args; | ||
| 245 | }; | ||
| 246 | std::unique_ptr<AssociatedInsts> associated_insts; | ||
| 247 | }; | ||
| 248 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||
| 249 | |||
| 250 | struct AssociatedInsts { | ||
| 251 | union { | ||
| 252 | Inst* in_bounds_inst; | ||
| 253 | Inst* sparse_inst; | ||
| 254 | Inst* zero_inst{}; | ||
| 255 | }; | ||
| 256 | Inst* sign_inst{}; | ||
| 257 | Inst* carry_inst{}; | ||
| 258 | Inst* overflow_inst{}; | ||
| 259 | }; | ||
| 260 | |||
| 261 | using U1 = TypedValue<Type::U1>; | ||
| 262 | using U8 = TypedValue<Type::U8>; | ||
| 263 | using U16 = TypedValue<Type::U16>; | ||
| 264 | using U32 = TypedValue<Type::U32>; | ||
| 265 | using U64 = TypedValue<Type::U64>; | ||
| 266 | using F16 = TypedValue<Type::F16>; | ||
| 267 | using F32 = TypedValue<Type::F32>; | ||
| 268 | using F64 = TypedValue<Type::F64>; | ||
| 269 | using U32U64 = TypedValue<Type::U32 | Type::U64>; | ||
| 270 | using F32F64 = TypedValue<Type::F32 | Type::F64>; | ||
| 271 | using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; | ||
| 272 | using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>; | ||
| 273 | using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; | ||
| 274 | |||
| 275 | inline bool Value::IsIdentity() const noexcept { | ||
| 276 | return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; | ||
| 277 | } | ||
| 278 | |||
| 279 | inline bool Value::IsPhi() const noexcept { | ||
| 280 | return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; | ||
| 281 | } | ||
| 282 | |||
| 283 | inline bool Value::IsEmpty() const noexcept { | ||
| 284 | return type == Type::Void; | ||
| 285 | } | ||
| 286 | |||
| 287 | inline bool Value::IsImmediate() const noexcept { | ||
| 288 | IR::Type current_type{type}; | ||
| 289 | const IR::Inst* current_inst{inst}; | ||
| 290 | while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { | ||
| 291 | const Value& arg{current_inst->Arg(0)}; | ||
| 292 | current_type = arg.type; | ||
| 293 | current_inst = arg.inst; | ||
| 294 | } | ||
| 295 | return current_type != Type::Opaque; | ||
| 296 | } | ||
| 297 | |||
| 298 | inline IR::Inst* Value::Inst() const { | ||
| 299 | DEBUG_ASSERT(type == Type::Opaque); | ||
| 300 | return inst; | ||
| 301 | } | ||
| 302 | |||
| 303 | inline IR::Inst* Value::InstRecursive() const { | ||
| 304 | DEBUG_ASSERT(type == Type::Opaque); | ||
| 305 | if (IsIdentity()) { | ||
| 306 | return inst->Arg(0).InstRecursive(); | ||
| 307 | } | ||
| 308 | return inst; | ||
| 309 | } | ||
| 310 | |||
| 311 | inline IR::Value Value::Resolve() const { | ||
| 312 | if (IsIdentity()) { | ||
| 313 | return inst->Arg(0).Resolve(); | ||
| 314 | } | ||
| 315 | return *this; | ||
| 316 | } | ||
| 317 | |||
| 318 | inline IR::Reg Value::Reg() const { | ||
| 319 | DEBUG_ASSERT(type == Type::Reg); | ||
| 320 | return reg; | ||
| 321 | } | ||
| 322 | |||
| 323 | inline IR::Pred Value::Pred() const { | ||
| 324 | DEBUG_ASSERT(type == Type::Pred); | ||
| 325 | return pred; | ||
| 326 | } | ||
| 327 | |||
| 328 | inline IR::Attribute Value::Attribute() const { | ||
| 329 | DEBUG_ASSERT(type == Type::Attribute); | ||
| 330 | return attribute; | ||
| 331 | } | ||
| 332 | |||
| 333 | inline IR::Patch Value::Patch() const { | ||
| 334 | DEBUG_ASSERT(type == Type::Patch); | ||
| 335 | return patch; | ||
| 336 | } | ||
| 337 | |||
| 338 | inline bool Value::U1() const { | ||
| 339 | if (IsIdentity()) { | ||
| 340 | return inst->Arg(0).U1(); | ||
| 341 | } | ||
| 342 | DEBUG_ASSERT(type == Type::U1); | ||
| 343 | return imm_u1; | ||
| 344 | } | ||
| 345 | |||
| 346 | inline u8 Value::U8() const { | ||
| 347 | if (IsIdentity()) { | ||
| 348 | return inst->Arg(0).U8(); | ||
| 349 | } | ||
| 350 | DEBUG_ASSERT(type == Type::U8); | ||
| 351 | return imm_u8; | ||
| 352 | } | ||
| 353 | |||
| 354 | inline u16 Value::U16() const { | ||
| 355 | if (IsIdentity()) { | ||
| 356 | return inst->Arg(0).U16(); | ||
| 357 | } | ||
| 358 | DEBUG_ASSERT(type == Type::U16); | ||
| 359 | return imm_u16; | ||
| 360 | } | ||
| 361 | |||
| 362 | inline u32 Value::U32() const { | ||
| 363 | if (IsIdentity()) { | ||
| 364 | return inst->Arg(0).U32(); | ||
| 365 | } | ||
| 366 | DEBUG_ASSERT(type == Type::U32); | ||
| 367 | return imm_u32; | ||
| 368 | } | ||
| 369 | |||
| 370 | inline f32 Value::F32() const { | ||
| 371 | if (IsIdentity()) { | ||
| 372 | return inst->Arg(0).F32(); | ||
| 373 | } | ||
| 374 | DEBUG_ASSERT(type == Type::F32); | ||
| 375 | return imm_f32; | ||
| 376 | } | ||
| 377 | |||
| 378 | inline u64 Value::U64() const { | ||
| 379 | if (IsIdentity()) { | ||
| 380 | return inst->Arg(0).U64(); | ||
| 381 | } | ||
| 382 | DEBUG_ASSERT(type == Type::U64); | ||
| 383 | return imm_u64; | ||
| 384 | } | ||
| 385 | |||
| 386 | inline f64 Value::F64() const { | ||
| 387 | if (IsIdentity()) { | ||
| 388 | return inst->Arg(0).F64(); | ||
| 389 | } | ||
| 390 | DEBUG_ASSERT(type == Type::F64); | ||
| 391 | return imm_f64; | ||
| 392 | } | ||
| 393 | |||
| 394 | [[nodiscard]] inline bool IsPhi(const Inst& inst) { | ||
| 395 | return inst.GetOpcode() == Opcode::Phi; | ||
| 396 | } | ||
| 397 | |||
| 398 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..1a954a509 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp | |||
| @@ -0,0 +1,642 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <optional> | ||
| 8 | #include <string> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 16 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 17 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell::Flow { | ||
| 20 | namespace { | ||
| 21 | struct Compare { | ||
| 22 | bool operator()(const Block& lhs, Location rhs) const noexcept { | ||
| 23 | return lhs.begin < rhs; | ||
| 24 | } | ||
| 25 | |||
| 26 | bool operator()(Location lhs, const Block& rhs) const noexcept { | ||
| 27 | return lhs < rhs.begin; | ||
| 28 | } | ||
| 29 | |||
| 30 | bool operator()(const Block& lhs, const Block& rhs) const noexcept { | ||
| 31 | return lhs.begin < rhs.begin; | ||
| 32 | } | ||
| 33 | }; | ||
| 34 | |||
| 35 | u32 BranchOffset(Location pc, Instruction inst) { | ||
| 36 | return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; | ||
| 37 | } | ||
| 38 | |||
| 39 | void Split(Block* old_block, Block* new_block, Location pc) { | ||
| 40 | if (pc <= old_block->begin || pc >= old_block->end) { | ||
| 41 | throw InvalidArgument("Invalid address to split={}", pc); | ||
| 42 | } | ||
| 43 | *new_block = Block{}; | ||
| 44 | new_block->begin = pc; | ||
| 45 | new_block->end = old_block->end; | ||
| 46 | new_block->end_class = old_block->end_class; | ||
| 47 | new_block->cond = old_block->cond; | ||
| 48 | new_block->stack = old_block->stack; | ||
| 49 | new_block->branch_true = old_block->branch_true; | ||
| 50 | new_block->branch_false = old_block->branch_false; | ||
| 51 | new_block->function_call = old_block->function_call; | ||
| 52 | new_block->return_block = old_block->return_block; | ||
| 53 | new_block->branch_reg = old_block->branch_reg; | ||
| 54 | new_block->branch_offset = old_block->branch_offset; | ||
| 55 | new_block->indirect_branches = std::move(old_block->indirect_branches); | ||
| 56 | |||
| 57 | const Location old_begin{old_block->begin}; | ||
| 58 | Stack old_stack{std::move(old_block->stack)}; | ||
| 59 | *old_block = Block{}; | ||
| 60 | old_block->begin = old_begin; | ||
| 61 | old_block->end = pc; | ||
| 62 | old_block->end_class = EndClass::Branch; | ||
| 63 | old_block->cond = IR::Condition(true); | ||
| 64 | old_block->stack = old_stack; | ||
| 65 | old_block->branch_true = new_block; | ||
| 66 | old_block->branch_false = nullptr; | ||
| 67 | } | ||
| 68 | |||
| 69 | Token OpcodeToken(Opcode opcode) { | ||
| 70 | switch (opcode) { | ||
| 71 | case Opcode::PBK: | ||
| 72 | case Opcode::BRK: | ||
| 73 | return Token::PBK; | ||
| 74 | case Opcode::PCNT: | ||
| 75 | case Opcode::CONT: | ||
| 76 | return Token::PBK; | ||
| 77 | case Opcode::PEXIT: | ||
| 78 | case Opcode::EXIT: | ||
| 79 | return Token::PEXIT; | ||
| 80 | case Opcode::PLONGJMP: | ||
| 81 | case Opcode::LONGJMP: | ||
| 82 | return Token::PLONGJMP; | ||
| 83 | case Opcode::PRET: | ||
| 84 | case Opcode::RET: | ||
| 85 | case Opcode::CAL: | ||
| 86 | return Token::PRET; | ||
| 87 | case Opcode::SSY: | ||
| 88 | case Opcode::SYNC: | ||
| 89 | return Token::SSY; | ||
| 90 | default: | ||
| 91 | throw InvalidArgument("{}", opcode); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | bool IsAbsoluteJump(Opcode opcode) { | ||
| 96 | switch (opcode) { | ||
| 97 | case Opcode::JCAL: | ||
| 98 | case Opcode::JMP: | ||
| 99 | case Opcode::JMX: | ||
| 100 | return true; | ||
| 101 | default: | ||
| 102 | return false; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | bool HasFlowTest(Opcode opcode) { | ||
| 107 | switch (opcode) { | ||
| 108 | case Opcode::BRA: | ||
| 109 | case Opcode::BRX: | ||
| 110 | case Opcode::EXIT: | ||
| 111 | case Opcode::JMP: | ||
| 112 | case Opcode::JMX: | ||
| 113 | case Opcode::KIL: | ||
| 114 | case Opcode::BRK: | ||
| 115 | case Opcode::CONT: | ||
| 116 | case Opcode::LONGJMP: | ||
| 117 | case Opcode::RET: | ||
| 118 | case Opcode::SYNC: | ||
| 119 | return true; | ||
| 120 | case Opcode::CAL: | ||
| 121 | case Opcode::JCAL: | ||
| 122 | return false; | ||
| 123 | default: | ||
| 124 | throw InvalidArgument("Invalid branch {}", opcode); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | std::string NameOf(const Block& block) { | ||
| 129 | if (block.begin.IsVirtual()) { | ||
| 130 | return fmt::format("\"Virtual {}\"", block.begin); | ||
| 131 | } else { | ||
| 132 | return fmt::format("\"{}\"", block.begin); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } // Anonymous namespace | ||
| 136 | |||
| 137 | void Stack::Push(Token token, Location target) { | ||
| 138 | entries.push_back({ | ||
| 139 | .token = token, | ||
| 140 | .target{target}, | ||
| 141 | }); | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Location, Stack> Stack::Pop(Token token) const { | ||
| 145 | const std::optional<Location> pc{Peek(token)}; | ||
| 146 | if (!pc) { | ||
| 147 | throw LogicError("Token could not be found"); | ||
| 148 | } | ||
| 149 | return {*pc, Remove(token)}; | ||
| 150 | } | ||
| 151 | |||
| 152 | std::optional<Location> Stack::Peek(Token token) const { | ||
| 153 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 154 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 155 | if (it == entries.rend()) { | ||
| 156 | return std::nullopt; | ||
| 157 | } | ||
| 158 | return it->target; | ||
| 159 | } | ||
| 160 | |||
| 161 | Stack Stack::Remove(Token token) const { | ||
| 162 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 163 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 164 | const auto pos{std::distance(entries.rbegin(), it)}; | ||
| 165 | Stack result; | ||
| 166 | result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | bool Block::Contains(Location pc) const noexcept { | ||
| 171 | return pc >= begin && pc < end; | ||
| 172 | } | ||
| 173 | |||
| 174 | Function::Function(ObjectPool<Block>& block_pool, Location start_address) | ||
| 175 | : entrypoint{start_address} { | ||
| 176 | Label& label{labels.emplace_back()}; | ||
| 177 | label.address = start_address; | ||
| 178 | label.block = block_pool.Create(Block{}); | ||
| 179 | label.block->begin = start_address; | ||
| 180 | label.block->end = start_address; | ||
| 181 | label.block->end_class = EndClass::Branch; | ||
| 182 | label.block->cond = IR::Condition(true); | ||
| 183 | label.block->branch_true = nullptr; | ||
| 184 | label.block->branch_false = nullptr; | ||
| 185 | } | ||
| 186 | |||
| 187 | CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address, | ||
| 188 | bool exits_to_dispatcher_) | ||
| 189 | : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ | ||
| 190 | exits_to_dispatcher_} { | ||
| 191 | if (exits_to_dispatcher) { | ||
| 192 | dispatch_block = block_pool.Create(Block{}); | ||
| 193 | dispatch_block->begin = {}; | ||
| 194 | dispatch_block->end = {}; | ||
| 195 | dispatch_block->end_class = EndClass::Exit; | ||
| 196 | dispatch_block->cond = IR::Condition(true); | ||
| 197 | dispatch_block->stack = {}; | ||
| 198 | dispatch_block->branch_true = nullptr; | ||
| 199 | dispatch_block->branch_false = nullptr; | ||
| 200 | } | ||
| 201 | functions.emplace_back(block_pool, start_address); | ||
| 202 | for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { | ||
| 203 | while (!functions[function_id].labels.empty()) { | ||
| 204 | Function& function{functions[function_id]}; | ||
| 205 | Label label{function.labels.back()}; | ||
| 206 | function.labels.pop_back(); | ||
| 207 | AnalyzeLabel(function_id, label); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | if (exits_to_dispatcher) { | ||
| 211 | const auto last_block{functions[0].blocks.rbegin()}; | ||
| 212 | dispatch_block->begin = last_block->end + 1; | ||
| 213 | dispatch_block->end = last_block->end + 1; | ||
| 214 | functions[0].blocks.insert(*dispatch_block); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { | ||
| 219 | if (InspectVisitedBlocks(function_id, label)) { | ||
| 220 | // Label address has been visited | ||
| 221 | return; | ||
| 222 | } | ||
| 223 | // Try to find the next block | ||
| 224 | Function* const function{&functions[function_id]}; | ||
| 225 | Location pc{label.address}; | ||
| 226 | const auto next_it{function->blocks.upper_bound(pc, Compare{})}; | ||
| 227 | const bool is_last{next_it == function->blocks.end()}; | ||
| 228 | Block* const next{is_last ? nullptr : &*next_it}; | ||
| 229 | // Insert before the next block | ||
| 230 | Block* const block{label.block}; | ||
| 231 | // Analyze instructions until it reaches an already visited block or there's a branch | ||
| 232 | bool is_branch{false}; | ||
| 233 | while (!next || pc < next->begin) { | ||
| 234 | is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; | ||
| 235 | if (is_branch) { | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | ++pc; | ||
| 239 | } | ||
| 240 | if (!is_branch) { | ||
| 241 | // If the block finished without a branch, | ||
| 242 | // it means that the next instruction is already visited, jump to it | ||
| 243 | block->end = pc; | ||
| 244 | block->cond = IR::Condition{true}; | ||
| 245 | block->branch_true = next; | ||
| 246 | block->branch_false = nullptr; | ||
| 247 | } | ||
| 248 | // Function's pointer might be invalid, resolve it again | ||
| 249 | // Insert the new block | ||
| 250 | functions[function_id].blocks.insert(*block); | ||
| 251 | } | ||
| 252 | |||
| 253 | bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { | ||
| 254 | const Location pc{label.address}; | ||
| 255 | Function& function{functions[function_id]}; | ||
| 256 | const auto it{ | ||
| 257 | std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; | ||
| 258 | if (it == function.blocks.end()) { | ||
| 259 | // Address has not been visited | ||
| 260 | return false; | ||
| 261 | } | ||
| 262 | Block* const visited_block{&*it}; | ||
| 263 | if (visited_block->begin == pc) { | ||
| 264 | throw LogicError("Dangling block"); | ||
| 265 | } | ||
| 266 | Block* const new_block{label.block}; | ||
| 267 | Split(visited_block, new_block, pc); | ||
| 268 | function.blocks.insert(it, *new_block); | ||
| 269 | return true; | ||
| 270 | } | ||
| 271 | |||
| 272 | CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { | ||
| 273 | const Instruction inst{env.ReadInstruction(pc.Offset())}; | ||
| 274 | const Opcode opcode{Decode(inst.raw)}; | ||
| 275 | switch (opcode) { | ||
| 276 | case Opcode::BRA: | ||
| 277 | case Opcode::JMP: | ||
| 278 | case Opcode::RET: | ||
| 279 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 280 | return AnalysisState::Continue; | ||
| 281 | } | ||
| 282 | switch (opcode) { | ||
| 283 | case Opcode::BRA: | ||
| 284 | case Opcode::JMP: | ||
| 285 | AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); | ||
| 286 | break; | ||
| 287 | case Opcode::RET: | ||
| 288 | block->end_class = EndClass::Return; | ||
| 289 | break; | ||
| 290 | default: | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | block->end = pc; | ||
| 294 | return AnalysisState::Branch; | ||
| 295 | case Opcode::BRK: | ||
| 296 | case Opcode::CONT: | ||
| 297 | case Opcode::LONGJMP: | ||
| 298 | case Opcode::SYNC: { | ||
| 299 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 300 | return AnalysisState::Continue; | ||
| 301 | } | ||
| 302 | const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; | ||
| 303 | block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); | ||
| 304 | block->end = pc; | ||
| 305 | return AnalysisState::Branch; | ||
| 306 | } | ||
| 307 | case Opcode::KIL: { | ||
| 308 | const Predicate pred{inst.Pred()}; | ||
| 309 | const auto ir_pred{static_cast<IR::Pred>(pred.index)}; | ||
| 310 | const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; | ||
| 311 | AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); | ||
| 312 | return AnalysisState::Branch; | ||
| 313 | } | ||
| 314 | case Opcode::PBK: | ||
| 315 | case Opcode::PCNT: | ||
| 316 | case Opcode::PEXIT: | ||
| 317 | case Opcode::PLONGJMP: | ||
| 318 | case Opcode::SSY: | ||
| 319 | block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); | ||
| 320 | return AnalysisState::Continue; | ||
| 321 | case Opcode::BRX: | ||
| 322 | case Opcode::JMX: | ||
| 323 | return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); | ||
| 324 | case Opcode::EXIT: | ||
| 325 | return AnalyzeEXIT(block, function_id, pc, inst); | ||
| 326 | case Opcode::PRET: | ||
| 327 | throw NotImplementedException("PRET flow analysis"); | ||
| 328 | case Opcode::CAL: | ||
| 329 | case Opcode::JCAL: { | ||
| 330 | const bool is_absolute{IsAbsoluteJump(opcode)}; | ||
| 331 | const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 332 | // Technically CAL pushes into PRET, but that's implicit in the function call for us | ||
| 333 | // Insert the function into the list if it doesn't exist | ||
| 334 | const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; | ||
| 335 | const bool exists{it != functions.end()}; | ||
| 336 | const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) | ||
| 337 | : functions.size()}; | ||
| 338 | if (!exists) { | ||
| 339 | functions.emplace_back(block_pool, cal_pc); | ||
| 340 | } | ||
| 341 | block->end_class = EndClass::Call; | ||
| 342 | block->function_call = call_id; | ||
| 343 | block->return_block = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 344 | block->end = pc; | ||
| 345 | return AnalysisState::Branch; | ||
| 346 | } | ||
| 347 | default: | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | const Predicate pred{inst.Pred()}; | ||
| 351 | if (pred == Predicate{true} || pred == Predicate{false}) { | ||
| 352 | return AnalysisState::Continue; | ||
| 353 | } | ||
| 354 | const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 355 | AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); | ||
| 356 | return AnalysisState::Branch; | ||
| 357 | } | ||
| 358 | |||
| 359 | void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, | ||
| 360 | EndClass insn_end_class, IR::Condition cond) { | ||
| 361 | if (block->begin != pc) { | ||
| 362 | // If the block doesn't start in the conditional instruction | ||
| 363 | // mark it as a label to visit it later | ||
| 364 | block->end = pc; | ||
| 365 | block->cond = IR::Condition{true}; | ||
| 366 | block->branch_true = AddLabel(block, block->stack, pc, function_id); | ||
| 367 | block->branch_false = nullptr; | ||
| 368 | return; | ||
| 369 | } | ||
| 370 | // Create a virtual block and a conditional block | ||
| 371 | Block* const conditional_block{block_pool.Create()}; | ||
| 372 | Block virtual_block{}; | ||
| 373 | virtual_block.begin = block->begin.Virtual(); | ||
| 374 | virtual_block.end = block->begin.Virtual(); | ||
| 375 | virtual_block.end_class = EndClass::Branch; | ||
| 376 | virtual_block.stack = block->stack; | ||
| 377 | virtual_block.cond = cond; | ||
| 378 | virtual_block.branch_true = conditional_block; | ||
| 379 | virtual_block.branch_false = nullptr; | ||
| 380 | // Save the contents of the visited block in the conditional block | ||
| 381 | *conditional_block = std::move(*block); | ||
| 382 | // Impersonate the visited block with a virtual block | ||
| 383 | *block = std::move(virtual_block); | ||
| 384 | // Set the end properties of the conditional instruction | ||
| 385 | conditional_block->end = pc + 1; | ||
| 386 | conditional_block->end_class = insn_end_class; | ||
| 387 | // Add a label to the instruction after the conditional instruction | ||
| 388 | Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; | ||
| 389 | // Branch to the next instruction from the virtual block | ||
| 390 | block->branch_false = endif_block; | ||
| 391 | // And branch to it from the conditional instruction if it is a branch or a kill instruction | ||
| 392 | // Kill instructions are considered a branch because they demote to a helper invocation and | ||
| 393 | // execution may continue. | ||
| 394 | if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { | ||
| 395 | conditional_block->cond = IR::Condition{true}; | ||
| 396 | conditional_block->branch_true = endif_block; | ||
| 397 | conditional_block->branch_false = nullptr; | ||
| 398 | } | ||
| 399 | // Finally insert the condition block into the list of blocks | ||
| 400 | functions[function_id].blocks.insert(*conditional_block); | ||
| 401 | } | ||
| 402 | |||
| 403 | bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 404 | Opcode opcode) { | ||
| 405 | if (inst.branch.is_cbuf) { | ||
| 406 | throw NotImplementedException("Branch with constant buffer offset"); | ||
| 407 | } | ||
| 408 | const Predicate pred{inst.Pred()}; | ||
| 409 | if (pred == Predicate{false}) { | ||
| 410 | return false; | ||
| 411 | } | ||
| 412 | const bool has_flow_test{HasFlowTest(opcode)}; | ||
| 413 | const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; | ||
| 414 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 415 | block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); | ||
| 416 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 417 | } else { | ||
| 418 | block->cond = IR::Condition{true}; | ||
| 419 | } | ||
| 420 | return true; | ||
| 421 | } | ||
| 422 | |||
| 423 | void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 424 | bool is_absolute) { | ||
| 425 | const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 426 | block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); | ||
| 427 | } | ||
| 428 | |||
| 429 | CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 430 | FunctionId function_id) { | ||
| 431 | const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; | ||
| 432 | if (!brx_table) { | ||
| 433 | TrackIndirectBranchTable(env, pc, program_start); | ||
| 434 | throw NotImplementedException("Failed to track indirect branch"); | ||
| 435 | } | ||
| 436 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 437 | const Predicate pred{inst.Pred()}; | ||
| 438 | if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { | ||
| 439 | throw NotImplementedException("Conditional indirect branch"); | ||
| 440 | } | ||
| 441 | std::vector<u32> targets; | ||
| 442 | targets.reserve(brx_table->num_entries); | ||
| 443 | for (u32 i = 0; i < brx_table->num_entries; ++i) { | ||
| 444 | u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; | ||
| 445 | if (!is_absolute) { | ||
| 446 | target += pc.Offset(); | ||
| 447 | } | ||
| 448 | target += static_cast<u32>(brx_table->branch_offset); | ||
| 449 | target += 8; | ||
| 450 | targets.push_back(target); | ||
| 451 | } | ||
| 452 | std::ranges::sort(targets); | ||
| 453 | targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); | ||
| 454 | |||
| 455 | block->indirect_branches.reserve(targets.size()); | ||
| 456 | for (const u32 target : targets) { | ||
| 457 | Block* const branch{AddLabel(block, block->stack, target, function_id)}; | ||
| 458 | block->indirect_branches.push_back({ | ||
| 459 | .block = branch, | ||
| 460 | .address = target, | ||
| 461 | }); | ||
| 462 | } | ||
| 463 | block->cond = IR::Condition{true}; | ||
| 464 | block->end = pc + 1; | ||
| 465 | block->end_class = EndClass::IndirectBranch; | ||
| 466 | block->branch_reg = brx_table->branch_reg; | ||
| 467 | block->branch_offset = brx_table->branch_offset + 8; | ||
| 468 | if (!is_absolute) { | ||
| 469 | block->branch_offset += pc.Offset(); | ||
| 470 | } | ||
| 471 | return AnalysisState::Branch; | ||
| 472 | } | ||
| 473 | |||
| 474 | CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, | ||
| 475 | Instruction inst) { | ||
| 476 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 477 | const Predicate pred{inst.Pred()}; | ||
| 478 | if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { | ||
| 479 | // EXIT will never be taken | ||
| 480 | return AnalysisState::Continue; | ||
| 481 | } | ||
| 482 | if (exits_to_dispatcher && function_id != 0) { | ||
| 483 | throw NotImplementedException("Dispatch EXIT on external function"); | ||
| 484 | } | ||
| 485 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 486 | if (block->stack.Peek(Token::PEXIT).has_value()) { | ||
| 487 | throw NotImplementedException("Conditional EXIT with PEXIT token"); | ||
| 488 | } | ||
| 489 | const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 490 | if (exits_to_dispatcher) { | ||
| 491 | block->end = pc; | ||
| 492 | block->end_class = EndClass::Branch; | ||
| 493 | block->cond = cond; | ||
| 494 | block->branch_true = dispatch_block; | ||
| 495 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 496 | return AnalysisState::Branch; | ||
| 497 | } | ||
| 498 | AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); | ||
| 499 | return AnalysisState::Branch; | ||
| 500 | } | ||
| 501 | if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { | ||
| 502 | const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; | ||
| 503 | block->cond = IR::Condition{true}; | ||
| 504 | block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); | ||
| 505 | block->branch_false = nullptr; | ||
| 506 | return AnalysisState::Branch; | ||
| 507 | } | ||
| 508 | if (exits_to_dispatcher) { | ||
| 509 | block->cond = IR::Condition{true}; | ||
| 510 | block->end = pc; | ||
| 511 | block->end_class = EndClass::Branch; | ||
| 512 | block->branch_true = dispatch_block; | ||
| 513 | block->branch_false = nullptr; | ||
| 514 | return AnalysisState::Branch; | ||
| 515 | } | ||
| 516 | block->end = pc + 1; | ||
| 517 | block->end_class = EndClass::Exit; | ||
| 518 | return AnalysisState::Branch; | ||
| 519 | } | ||
| 520 | |||
| 521 | Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { | ||
| 522 | Function& function{functions[function_id]}; | ||
| 523 | if (block->begin == pc) { | ||
| 524 | // Jumps to itself | ||
| 525 | return block; | ||
| 526 | } | ||
| 527 | if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { | ||
| 528 | // Block already exists and it has been visited | ||
| 529 | if (function.blocks.begin() != it) { | ||
| 530 | // Check if the previous node is the virtual variant of the label | ||
| 531 | // This won't exist if a virtual node is not needed or it hasn't been visited | ||
| 532 | // If it hasn't been visited and a virtual node is needed, this will still behave as | ||
| 533 | // expected because the node impersonated with its virtual node. | ||
| 534 | const auto prev{std::prev(it)}; | ||
| 535 | if (it->begin.Virtual() == prev->begin) { | ||
| 536 | return &*prev; | ||
| 537 | } | ||
| 538 | } | ||
| 539 | return &*it; | ||
| 540 | } | ||
| 541 | // Make sure we don't insert the same layer twice | ||
| 542 | const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; | ||
| 543 | if (label_it != function.labels.end()) { | ||
| 544 | return label_it->block; | ||
| 545 | } | ||
| 546 | Block* const new_block{block_pool.Create()}; | ||
| 547 | new_block->begin = pc; | ||
| 548 | new_block->end = pc; | ||
| 549 | new_block->end_class = EndClass::Branch; | ||
| 550 | new_block->cond = IR::Condition(true); | ||
| 551 | new_block->stack = stack; | ||
| 552 | new_block->branch_true = nullptr; | ||
| 553 | new_block->branch_false = nullptr; | ||
| 554 | function.labels.push_back(Label{ | ||
| 555 | .address{pc}, | ||
| 556 | .block = new_block, | ||
| 557 | .stack{std::move(stack)}, | ||
| 558 | }); | ||
| 559 | return new_block; | ||
| 560 | } | ||
| 561 | |||
| 562 | std::string CFG::Dot() const { | ||
| 563 | int node_uid{0}; | ||
| 564 | |||
| 565 | std::string dot{"digraph shader {\n"}; | ||
| 566 | for (const Function& function : functions) { | ||
| 567 | dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); | ||
| 568 | dot += fmt::format("\t\tnode [style=filled];\n"); | ||
| 569 | for (const Block& block : function.blocks) { | ||
| 570 | const std::string name{NameOf(block)}; | ||
| 571 | const auto add_branch = [&](Block* branch, bool add_label) { | ||
| 572 | dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); | ||
| 573 | if (add_label && block.cond != IR::Condition{true} && | ||
| 574 | block.cond != IR::Condition{false}) { | ||
| 575 | dot += fmt::format(" [label=\"{}\"]", block.cond); | ||
| 576 | } | ||
| 577 | dot += '\n'; | ||
| 578 | }; | ||
| 579 | dot += fmt::format("\t\t{};\n", name); | ||
| 580 | switch (block.end_class) { | ||
| 581 | case EndClass::Branch: | ||
| 582 | if (block.cond != IR::Condition{false}) { | ||
| 583 | add_branch(block.branch_true, true); | ||
| 584 | } | ||
| 585 | if (block.cond != IR::Condition{true}) { | ||
| 586 | add_branch(block.branch_false, false); | ||
| 587 | } | ||
| 588 | break; | ||
| 589 | case EndClass::IndirectBranch: | ||
| 590 | for (const IndirectBranch& branch : block.indirect_branches) { | ||
| 591 | add_branch(branch.block, false); | ||
| 592 | } | ||
| 593 | break; | ||
| 594 | case EndClass::Call: | ||
| 595 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 596 | dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); | ||
| 597 | dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", | ||
| 598 | node_uid, block.function_call); | ||
| 599 | dot += '\n'; | ||
| 600 | ++node_uid; | ||
| 601 | break; | ||
| 602 | case EndClass::Exit: | ||
| 603 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 604 | dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", | ||
| 605 | node_uid); | ||
| 606 | ++node_uid; | ||
| 607 | break; | ||
| 608 | case EndClass::Return: | ||
| 609 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 610 | dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", | ||
| 611 | node_uid); | ||
| 612 | ++node_uid; | ||
| 613 | break; | ||
| 614 | case EndClass::Kill: | ||
| 615 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 616 | dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", | ||
| 617 | node_uid); | ||
| 618 | ++node_uid; | ||
| 619 | break; | ||
| 620 | } | ||
| 621 | } | ||
| 622 | if (function.entrypoint == 8) { | ||
| 623 | dot += fmt::format("\t\tlabel = \"main\";\n"); | ||
| 624 | } else { | ||
| 625 | dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); | ||
| 626 | } | ||
| 627 | dot += "\t}\n"; | ||
| 628 | } | ||
| 629 | if (!functions.empty()) { | ||
| 630 | auto& function{functions.front()}; | ||
| 631 | if (function.blocks.empty()) { | ||
| 632 | dot += "Start;\n"; | ||
| 633 | } else { | ||
| 634 | dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); | ||
| 635 | } | ||
| 636 | dot += fmt::format("\tStart [shape=diamond];\n"); | ||
| 637 | } | ||
| 638 | dot += "}\n"; | ||
| 639 | return dot; | ||
| 640 | } | ||
| 641 | |||
| 642 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..a6bd3e196 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <string> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/small_vector.hpp> | ||
| 14 | #include <boost/intrusive/set.hpp> | ||
| 15 | |||
| 16 | #include "shader_recompiler/environment.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 18 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 19 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 21 | #include "shader_recompiler/object_pool.h" | ||
| 22 | |||
| 23 | namespace Shader::Maxwell::Flow { | ||
| 24 | |||
| 25 | struct Block; | ||
| 26 | |||
| 27 | using FunctionId = size_t; | ||
| 28 | |||
| 29 | enum class EndClass { | ||
| 30 | Branch, | ||
| 31 | IndirectBranch, | ||
| 32 | Call, | ||
| 33 | Exit, | ||
| 34 | Return, | ||
| 35 | Kill, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class Token { | ||
| 39 | SSY, | ||
| 40 | PBK, | ||
| 41 | PEXIT, | ||
| 42 | PRET, | ||
| 43 | PCNT, | ||
| 44 | PLONGJMP, | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct StackEntry { | ||
| 48 | auto operator<=>(const StackEntry&) const noexcept = default; | ||
| 49 | |||
| 50 | Token token; | ||
| 51 | Location target; | ||
| 52 | }; | ||
| 53 | |||
| 54 | class Stack { | ||
| 55 | public: | ||
| 56 | void Push(Token token, Location target); | ||
| 57 | [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const; | ||
| 58 | [[nodiscard]] std::optional<Location> Peek(Token token) const; | ||
| 59 | [[nodiscard]] Stack Remove(Token token) const; | ||
| 60 | |||
| 61 | private: | ||
| 62 | boost::container::small_vector<StackEntry, 3> entries; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct IndirectBranch { | ||
| 66 | Block* block; | ||
| 67 | u32 address; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct Block : boost::intrusive::set_base_hook< | ||
| 71 | // Normal link is ~2.5% faster compared to safe link | ||
| 72 | boost::intrusive::link_mode<boost::intrusive::normal_link>> { | ||
| 73 | [[nodiscard]] bool Contains(Location pc) const noexcept; | ||
| 74 | |||
| 75 | bool operator<(const Block& rhs) const noexcept { | ||
| 76 | return begin < rhs.begin; | ||
| 77 | } | ||
| 78 | |||
| 79 | Location begin; | ||
| 80 | Location end; | ||
| 81 | EndClass end_class{}; | ||
| 82 | IR::Condition cond{}; | ||
| 83 | Stack stack; | ||
| 84 | Block* branch_true{}; | ||
| 85 | Block* branch_false{}; | ||
| 86 | FunctionId function_call{}; | ||
| 87 | Block* return_block{}; | ||
| 88 | IR::Reg branch_reg{}; | ||
| 89 | s32 branch_offset{}; | ||
| 90 | std::vector<IndirectBranch> indirect_branches; | ||
| 91 | }; | ||
| 92 | |||
| 93 | struct Label { | ||
| 94 | Location address; | ||
| 95 | Block* block; | ||
| 96 | Stack stack; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct Function { | ||
| 100 | explicit Function(ObjectPool<Block>& block_pool, Location start_address); | ||
| 101 | |||
| 102 | Location entrypoint; | ||
| 103 | boost::container::small_vector<Label, 16> labels; | ||
| 104 | boost::intrusive::set<Block> blocks; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class CFG { | ||
| 108 | enum class AnalysisState { | ||
| 109 | Branch, | ||
| 110 | Continue, | ||
| 111 | }; | ||
| 112 | |||
| 113 | public: | ||
| 114 | explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address, | ||
| 115 | bool exits_to_dispatcher = false); | ||
| 116 | |||
| 117 | CFG& operator=(const CFG&) = delete; | ||
| 118 | CFG(const CFG&) = delete; | ||
| 119 | |||
| 120 | CFG& operator=(CFG&&) = delete; | ||
| 121 | CFG(CFG&&) = delete; | ||
| 122 | |||
| 123 | [[nodiscard]] std::string Dot() const; | ||
| 124 | |||
| 125 | [[nodiscard]] std::span<const Function> Functions() const noexcept { | ||
| 126 | return std::span(functions.data(), functions.size()); | ||
| 127 | } | ||
| 128 | [[nodiscard]] std::span<Function> Functions() noexcept { | ||
| 129 | return std::span(functions.data(), functions.size()); | ||
| 130 | } | ||
| 131 | |||
| 132 | [[nodiscard]] bool ExitsToDispatcher() const { | ||
| 133 | return exits_to_dispatcher; | ||
| 134 | } | ||
| 135 | |||
| 136 | private: | ||
| 137 | void AnalyzeLabel(FunctionId function_id, Label& label); | ||
| 138 | |||
| 139 | /// Inspect already visited blocks. | ||
| 140 | /// Return true when the block has already been visited | ||
| 141 | bool InspectVisitedBlocks(FunctionId function_id, const Label& label); | ||
| 142 | |||
| 143 | AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); | ||
| 144 | |||
| 145 | void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, | ||
| 146 | IR::Condition cond); | ||
| 147 | |||
| 148 | /// Return true when the branch instruction is confirmed to be a branch | ||
| 149 | bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 150 | Opcode opcode); | ||
| 151 | |||
| 152 | void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 153 | bool is_absolute); | ||
| 154 | AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 155 | FunctionId function_id); | ||
| 156 | AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); | ||
| 157 | |||
| 158 | /// Return the branch target block id | ||
| 159 | Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); | ||
| 160 | |||
| 161 | Environment& env; | ||
| 162 | ObjectPool<Block>& block_pool; | ||
| 163 | boost::container::small_vector<Function, 1> functions; | ||
| 164 | Location program_start; | ||
| 165 | bool exits_to_dispatcher{}; | ||
| 166 | Block* dispatch_block{}; | ||
| 167 | }; | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..972f677dc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <bit> | ||
| 8 | #include <memory> | ||
| 9 | #include <string_view> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 15 | |||
| 16 | namespace Shader::Maxwell { | ||
| 17 | namespace { | ||
| 18 | struct MaskValue { | ||
| 19 | u64 mask; | ||
| 20 | u64 value; | ||
| 21 | }; | ||
| 22 | |||
| 23 | constexpr MaskValue MaskValueFromEncoding(const char* encoding) { | ||
| 24 | u64 mask{}; | ||
| 25 | u64 value{}; | ||
| 26 | u64 bit{u64(1) << 63}; | ||
| 27 | while (*encoding) { | ||
| 28 | switch (*encoding) { | ||
| 29 | case '0': | ||
| 30 | mask |= bit; | ||
| 31 | break; | ||
| 32 | case '1': | ||
| 33 | mask |= bit; | ||
| 34 | value |= bit; | ||
| 35 | break; | ||
| 36 | case '-': | ||
| 37 | break; | ||
| 38 | case ' ': | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | throw LogicError("Invalid encoding character '{}'", *encoding); | ||
| 42 | } | ||
| 43 | ++encoding; | ||
| 44 | if (*encoding != ' ') { | ||
| 45 | bit >>= 1; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | return MaskValue{.mask = mask, .value = value}; | ||
| 49 | } | ||
| 50 | |||
| 51 | struct InstEncoding { | ||
| 52 | MaskValue mask_value; | ||
| 53 | Opcode opcode; | ||
| 54 | }; | ||
| 55 | constexpr std::array UNORDERED_ENCODINGS{ | ||
| 56 | #define INST(name, cute, encode) \ | ||
| 57 | InstEncoding{ \ | ||
| 58 | .mask_value{MaskValueFromEncoding(encode)}, \ | ||
| 59 | .opcode = Opcode::name, \ | ||
| 60 | }, | ||
| 61 | #include "maxwell.inc" | ||
| 62 | #undef INST | ||
| 63 | }; | ||
| 64 | |||
| 65 | constexpr auto SortedEncodings() { | ||
| 66 | std::array encodings{UNORDERED_ENCODINGS}; | ||
| 67 | std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { | ||
| 68 | return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); | ||
| 69 | }); | ||
| 70 | return encodings; | ||
| 71 | } | ||
| 72 | constexpr auto ENCODINGS{SortedEncodings()}; | ||
| 73 | |||
| 74 | constexpr int WidestLeftBits() { | ||
| 75 | int bits{64}; | ||
| 76 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 77 | bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); | ||
| 78 | } | ||
| 79 | return 64 - bits; | ||
| 80 | } | ||
| 81 | constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; | ||
| 82 | constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; | ||
| 83 | |||
| 84 | constexpr size_t ToFastLookupIndex(u64 value) { | ||
| 85 | return static_cast<size_t>(value >> MASK_SHIFT); | ||
| 86 | } | ||
| 87 | |||
| 88 | constexpr size_t FastLookupSize() { | ||
| 89 | size_t max_width{}; | ||
| 90 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 91 | max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); | ||
| 92 | } | ||
| 93 | return max_width + 1; | ||
| 94 | } | ||
| 95 | constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; | ||
| 96 | |||
| 97 | struct InstInfo { | ||
| 98 | [[nodiscard]] u64 Mask() const noexcept { | ||
| 99 | return static_cast<u64>(high_mask) << MASK_SHIFT; | ||
| 100 | } | ||
| 101 | |||
| 102 | [[nodiscard]] u64 Value() const noexcept { | ||
| 103 | return static_cast<u64>(high_value) << MASK_SHIFT; | ||
| 104 | } | ||
| 105 | |||
| 106 | u16 high_mask; | ||
| 107 | u16 high_value; | ||
| 108 | Opcode opcode; | ||
| 109 | }; | ||
| 110 | |||
| 111 | constexpr auto MakeFastLookupTableIndex(size_t index) { | ||
| 112 | std::array<InstInfo, 2> encodings{}; | ||
| 113 | size_t element{}; | ||
| 114 | for (const auto& encoding : ENCODINGS) { | ||
| 115 | const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; | ||
| 116 | const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; | ||
| 117 | if ((index & mask) == value) { | ||
| 118 | encodings.at(element) = InstInfo{ | ||
| 119 | .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), | ||
| 120 | .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), | ||
| 121 | .opcode = encoding.opcode, | ||
| 122 | }; | ||
| 123 | ++element; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return encodings; | ||
| 127 | } | ||
| 128 | |||
| 129 | /*constexpr*/ auto MakeFastLookupTable() { | ||
| 130 | auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()}; | ||
| 131 | for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { | ||
| 132 | (*encodings)[index] = MakeFastLookupTableIndex(index); | ||
| 133 | } | ||
| 134 | return encodings; | ||
| 135 | } | ||
| 136 | const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; | ||
| 137 | } // Anonymous namespace | ||
| 138 | |||
| 139 | Opcode Decode(u64 insn) { | ||
| 140 | const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; | ||
| 141 | const auto it{std::ranges::find_if( | ||
| 142 | table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; | ||
| 143 | if (it == table.end()) { | ||
| 144 | throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); | ||
| 145 | } | ||
| 146 | return it->opcode; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..b4f080fd7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[nodiscard]] Opcode Decode(u64 insn); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..008625cb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 12 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | namespace { | ||
| 16 | union Encoding { | ||
| 17 | u64 raw; | ||
| 18 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 19 | BitField<8, 8, IR::Reg> src_reg; | ||
| 20 | BitField<20, 19, u64> immediate; | ||
| 21 | BitField<56, 1, u64> is_negative; | ||
| 22 | BitField<20, 24, s64> brx_offset; | ||
| 23 | }; | ||
| 24 | |||
| 25 | template <typename Callable> | ||
| 26 | std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { | ||
| 27 | while (pos >= block_begin) { | ||
| 28 | const u64 insn{env.ReadInstruction(pos.Offset())}; | ||
| 29 | --pos; | ||
| 30 | if (func(insn, Decode(insn))) { | ||
| 31 | return insn; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | return std::nullopt; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos, | ||
| 38 | IR::Reg brx_reg) { | ||
| 39 | return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { | ||
| 40 | const LDC::Encoding ldc{insn}; | ||
| 41 | return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && | ||
| 42 | ldc.mode == LDC::Mode::Default; | ||
| 43 | }); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos, | ||
| 47 | IR::Reg ldc_reg) { | ||
| 48 | return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { | ||
| 49 | const Encoding shl{insn}; | ||
| 50 | return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; | ||
| 51 | }); | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos, | ||
| 55 | IR::Reg shl_reg) { | ||
| 56 | return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { | ||
| 57 | const Encoding imnmx{insn}; | ||
| 58 | return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; | ||
| 59 | }); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 64 | Location block_begin) { | ||
| 65 | const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; | ||
| 66 | const Opcode brx_opcode{Decode(brx_insn)}; | ||
| 67 | if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { | ||
| 68 | throw LogicError("Tracked instruction is not BRX or JMX"); | ||
| 69 | } | ||
| 70 | const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; | ||
| 71 | const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)}; | ||
| 72 | |||
| 73 | Location pos{brx_pos}; | ||
| 74 | const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; | ||
| 75 | if (!ldc_insn) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const LDC::Encoding ldc{*ldc_insn}; | ||
| 79 | const u32 cbuf_index{static_cast<u32>(ldc.index)}; | ||
| 80 | const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))}; | ||
| 81 | const IR::Reg ldc_reg{ldc.src_reg}; | ||
| 82 | |||
| 83 | const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; | ||
| 84 | if (!shl_insn) { | ||
| 85 | return std::nullopt; | ||
| 86 | } | ||
| 87 | const Encoding shl{*shl_insn}; | ||
| 88 | const IR::Reg shl_reg{shl.src_reg}; | ||
| 89 | |||
| 90 | const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; | ||
| 91 | if (!imnmx_insn) { | ||
| 92 | return std::nullopt; | ||
| 93 | } | ||
| 94 | const Encoding imnmx{*imnmx_insn}; | ||
| 95 | if (imnmx.is_negative != 0) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; | ||
| 99 | return IndirectBranchTableInfo{ | ||
| 100 | .cbuf_index = cbuf_index, | ||
| 101 | .cbuf_offset = cbuf_offset, | ||
| 102 | .num_entries = imnmx_immediate + 1, | ||
| 103 | .branch_offset = brx_offset, | ||
| 104 | .branch_reg = brx_reg, | ||
| 105 | }; | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | struct IndirectBranchTableInfo { | ||
| 18 | u32 cbuf_index{}; | ||
| 19 | u32 cbuf_offset{}; | ||
| 20 | u32 num_entries{}; | ||
| 21 | s32 branch_offset{}; | ||
| 22 | IR::Reg branch_reg{}; | ||
| 23 | }; | ||
| 24 | |||
| 25 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 26 | Location block_begin); | ||
| 27 | |||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..743d68d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | struct Predicate { | ||
| 15 | Predicate() = default; | ||
| 16 | Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} | ||
| 17 | Predicate(bool value) : index{7}, negated{!value} {} | ||
| 18 | Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {} | ||
| 19 | |||
| 20 | unsigned index; | ||
| 21 | bool negated; | ||
| 22 | }; | ||
| 23 | |||
| 24 | inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 25 | return lhs.index == rhs.index && lhs.negated == rhs.negated; | ||
| 26 | } | ||
| 27 | |||
| 28 | inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 29 | return !(lhs == rhs); | ||
| 30 | } | ||
| 31 | |||
| 32 | union Instruction { | ||
| 33 | Instruction(u64 raw_) : raw{raw_} {} | ||
| 34 | |||
| 35 | u64 raw; | ||
| 36 | |||
| 37 | union { | ||
| 38 | BitField<5, 1, u64> is_cbuf; | ||
| 39 | BitField<0, 5, IR::FlowTest> flow_test; | ||
| 40 | |||
| 41 | [[nodiscard]] u32 Absolute() const noexcept { | ||
| 42 | return static_cast<u32>(absolute); | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] s32 Offset() const noexcept { | ||
| 46 | return static_cast<s32>(offset); | ||
| 47 | } | ||
| 48 | |||
| 49 | private: | ||
| 50 | BitField<20, 24, s64> offset; | ||
| 51 | BitField<20, 32, u64> absolute; | ||
| 52 | } branch; | ||
| 53 | |||
| 54 | [[nodiscard]] Predicate Pred() const noexcept { | ||
| 55 | return Predicate{pred}; | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | BitField<16, 4, u64> pred; | ||
| 60 | }; | ||
| 61 | static_assert(std::is_trivially_copyable_v<Instruction>); | ||
| 62 | |||
| 63 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..26d29eae2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <iterator> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | class Location { | ||
| 18 | static constexpr u32 VIRTUAL_BIAS{4}; | ||
| 19 | |||
| 20 | public: | ||
| 21 | constexpr Location() = default; | ||
| 22 | |||
| 23 | constexpr Location(u32 initial_offset) : offset{initial_offset} { | ||
| 24 | if (initial_offset % 8 != 0) { | ||
| 25 | throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); | ||
| 26 | } | ||
| 27 | Align(); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr Location Virtual() const noexcept { | ||
| 31 | Location virtual_location; | ||
| 32 | virtual_location.offset = offset - VIRTUAL_BIAS; | ||
| 33 | return virtual_location; | ||
| 34 | } | ||
| 35 | |||
| 36 | [[nodiscard]] constexpr u32 Offset() const noexcept { | ||
| 37 | return offset; | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] constexpr bool IsVirtual() const { | ||
| 41 | return offset % 8 == VIRTUAL_BIAS; | ||
| 42 | } | ||
| 43 | |||
| 44 | constexpr auto operator<=>(const Location&) const noexcept = default; | ||
| 45 | |||
| 46 | constexpr Location operator++() noexcept { | ||
| 47 | const Location copy{*this}; | ||
| 48 | Step(); | ||
| 49 | return copy; | ||
| 50 | } | ||
| 51 | |||
| 52 | constexpr Location operator++(int) noexcept { | ||
| 53 | Step(); | ||
| 54 | return *this; | ||
| 55 | } | ||
| 56 | |||
| 57 | constexpr Location operator--() noexcept { | ||
| 58 | const Location copy{*this}; | ||
| 59 | Back(); | ||
| 60 | return copy; | ||
| 61 | } | ||
| 62 | |||
| 63 | constexpr Location operator--(int) noexcept { | ||
| 64 | Back(); | ||
| 65 | return *this; | ||
| 66 | } | ||
| 67 | |||
| 68 | constexpr Location operator+(int number) const { | ||
| 69 | Location new_pc{*this}; | ||
| 70 | while (number > 0) { | ||
| 71 | --number; | ||
| 72 | ++new_pc; | ||
| 73 | } | ||
| 74 | while (number < 0) { | ||
| 75 | ++number; | ||
| 76 | --new_pc; | ||
| 77 | } | ||
| 78 | return new_pc; | ||
| 79 | } | ||
| 80 | |||
| 81 | constexpr Location operator-(int number) const { | ||
| 82 | return operator+(-number); | ||
| 83 | } | ||
| 84 | |||
| 85 | private: | ||
| 86 | constexpr void Align() { | ||
| 87 | offset += offset % 32 == 0 ? 8 : 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr void Step() { | ||
| 91 | offset += 8 + (offset % 32 == 24 ? 8 : 0); | ||
| 92 | } | ||
| 93 | |||
| 94 | constexpr void Back() { | ||
| 95 | offset -= 8 + (offset % 32 == 8 ? 8 : 0); | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 offset{0xcccccccc}; | ||
| 99 | }; | ||
| 100 | |||
| 101 | } // namespace Shader::Maxwell | ||
| 102 | |||
| 103 | template <> | ||
| 104 | struct fmt::formatter<Shader::Maxwell::Location> { | ||
| 105 | constexpr auto parse(format_parse_context& ctx) { | ||
| 106 | return ctx.begin(); | ||
| 107 | } | ||
| 108 | template <typename FormatContext> | ||
| 109 | auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { | ||
| 110 | return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); | ||
| 111 | } | ||
| 112 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..2fee591bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | INST(AL2P, "AL2P", "1110 1111 1010 0---") | ||
| 6 | INST(ALD, "ALD", "1110 1111 1101 1---") | ||
| 7 | INST(AST, "AST", "1110 1111 1111 0---") | ||
| 8 | INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") | ||
| 9 | INST(ATOM, "ATOM", "1110 1101 ---- ----") | ||
| 10 | INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") | ||
| 11 | INST(ATOMS, "ATOMS", "1110 1100 ---- ----") | ||
| 12 | INST(B2R, "B2R", "1111 0000 1011 1---") | ||
| 13 | INST(BAR, "BAR", "1111 0000 1010 1---") | ||
| 14 | INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") | ||
| 15 | INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") | ||
| 16 | INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") | ||
| 17 | INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") | ||
| 18 | INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") | ||
| 19 | INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") | ||
| 20 | INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") | ||
| 21 | INST(BPT, "BPT", "1110 0011 1010 ----") | ||
| 22 | INST(BRA, "BRA", "1110 0010 0100 ----") | ||
| 23 | INST(BRK, "BRK", "1110 0011 0100 ----") | ||
| 24 | INST(BRX, "BRX", "1110 0010 0101 ----") | ||
| 25 | INST(CAL, "CAL", "1110 0010 0110 ----") | ||
| 26 | INST(CCTL, "CCTL", "1110 1111 011- ----") | ||
| 27 | INST(CCTLL, "CCTLL", "1110 1111 100- ----") | ||
| 28 | INST(CONT, "CONT", "1110 0011 0101 ----") | ||
| 29 | INST(CS2R, "CS2R", "0101 0000 1100 1---") | ||
| 30 | INST(CSET, "CSET", "0101 0000 1001 1---") | ||
| 31 | INST(CSETP, "CSETP", "0101 0000 1010 0---") | ||
| 32 | INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") | ||
| 33 | INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") | ||
| 34 | INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") | ||
| 35 | INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") | ||
| 36 | INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") | ||
| 37 | INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") | ||
| 38 | INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") | ||
| 39 | INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") | ||
| 40 | INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") | ||
| 41 | INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") | ||
| 42 | INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") | ||
| 43 | INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") | ||
| 44 | INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") | ||
| 45 | INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") | ||
| 46 | INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") | ||
| 47 | INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") | ||
| 48 | INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") | ||
| 49 | INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") | ||
| 50 | INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") | ||
| 51 | INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") | ||
| 52 | INST(EXIT, "EXIT", "1110 0011 0000 ----") | ||
| 53 | INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") | ||
| 54 | INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") | ||
| 55 | INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") | ||
| 56 | INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") | ||
| 57 | INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") | ||
| 58 | INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") | ||
| 59 | INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") | ||
| 60 | INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") | ||
| 61 | INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") | ||
| 62 | INST(FADD32I, "FADD32I", "0000 10-- ---- ----") | ||
| 63 | INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") | ||
| 64 | INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") | ||
| 65 | INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") | ||
| 66 | INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") | ||
| 67 | INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") | ||
| 68 | INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") | ||
| 69 | INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") | ||
| 70 | INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") | ||
| 71 | INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") | ||
| 72 | INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") | ||
| 73 | INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") | ||
| 74 | INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") | ||
| 75 | INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") | ||
| 76 | INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") | ||
| 77 | INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") | ||
| 78 | INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") | ||
| 79 | INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") | ||
| 80 | INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") | ||
| 81 | INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") | ||
| 82 | INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") | ||
| 83 | INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") | ||
| 84 | INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") | ||
| 85 | INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") | ||
| 86 | INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") | ||
| 87 | INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") | ||
| 88 | INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") | ||
| 89 | INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") | ||
| 90 | INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") | ||
| 91 | INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") | ||
| 92 | INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") | ||
| 93 | INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") | ||
| 94 | INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") | ||
| 95 | INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") | ||
| 96 | INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") | ||
| 97 | INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") | ||
| 98 | INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") | ||
| 99 | INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") | ||
| 100 | INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") | ||
| 101 | INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") | ||
| 102 | INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") | ||
| 103 | INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") | ||
| 104 | INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") | ||
| 105 | INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") | ||
| 106 | INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") | ||
| 107 | INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") | ||
| 108 | INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") | ||
| 109 | INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") | ||
| 110 | INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") | ||
| 111 | INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") | ||
| 112 | INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") | ||
| 113 | INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") | ||
| 114 | INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") | ||
| 115 | INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") | ||
| 116 | INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") | ||
| 117 | INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") | ||
| 118 | INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") | ||
| 119 | INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") | ||
| 120 | INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") | ||
| 121 | INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") | ||
| 122 | INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") | ||
| 123 | INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") | ||
| 124 | INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") | ||
| 125 | INST(IADD32I, "IADD32I", "0001 110- ---- ----") | ||
| 126 | INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") | ||
| 127 | INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") | ||
| 128 | INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") | ||
| 129 | INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") | ||
| 130 | INST(IDE, "IDE", "1110 0011 1001 ----") | ||
| 131 | INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") | ||
| 132 | INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") | ||
| 133 | INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") | ||
| 134 | INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") | ||
| 135 | INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") | ||
| 136 | INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") | ||
| 137 | INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") | ||
| 138 | INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") | ||
| 139 | INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") | ||
| 140 | INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") | ||
| 141 | INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") | ||
| 142 | INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") | ||
| 143 | INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") | ||
| 144 | INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") | ||
| 145 | INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") | ||
| 146 | INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") | ||
| 147 | INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") | ||
| 148 | INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") | ||
| 149 | INST(IPA, "IPA", "1110 0000 ---- ----") | ||
| 150 | INST(ISBERD, "ISBERD", "1110 1111 1101 0---") | ||
| 151 | INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") | ||
| 152 | INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") | ||
| 153 | INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") | ||
| 154 | INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") | ||
| 155 | INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") | ||
| 156 | INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") | ||
| 157 | INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") | ||
| 158 | INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") | ||
| 159 | INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") | ||
| 160 | INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") | ||
| 161 | INST(JCAL, "JCAL", "1110 0010 0010 ----") | ||
| 162 | INST(JMP, "JMP", "1110 0010 0001 ----") | ||
| 163 | INST(JMX, "JMX", "1110 0010 0000 ----") | ||
| 164 | INST(KIL, "KIL", "1110 0011 0011 ----") | ||
| 165 | INST(LD, "LD", "100- ---- ---- ----") | ||
| 166 | INST(LDC, "LDC", "1110 1111 1001 0---") | ||
| 167 | INST(LDG, "LDG", "1110 1110 1101 0---") | ||
| 168 | INST(LDL, "LDL", "1110 1111 0100 0---") | ||
| 169 | INST(LDS, "LDS", "1110 1111 0100 1---") | ||
| 170 | INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") | ||
| 171 | INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") | ||
| 172 | INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") | ||
| 173 | INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") | ||
| 174 | INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") | ||
| 175 | INST(LEPC, "LEPC", "0101 0000 1101 0---") | ||
| 176 | INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") | ||
| 177 | INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") | ||
| 178 | INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") | ||
| 179 | INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") | ||
| 180 | INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") | ||
| 181 | INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") | ||
| 182 | INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") | ||
| 183 | INST(LOP32I, "LOP32I", "0000 01-- ---- ----") | ||
| 184 | INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") | ||
| 185 | INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") | ||
| 186 | INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") | ||
| 187 | INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") | ||
| 188 | INST(MOV32I, "MOV32I", "0000 0001 0000 ----") | ||
| 189 | INST(MUFU, "MUFU", "0101 0000 1000 0---") | ||
| 190 | INST(NOP, "NOP", "0101 0000 1011 0---") | ||
| 191 | INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") | ||
| 192 | INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") | ||
| 193 | INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") | ||
| 194 | INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") | ||
| 195 | INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") | ||
| 196 | INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") | ||
| 197 | INST(PBK, "PBK", "1110 0010 1010 ----") | ||
| 198 | INST(PCNT, "PCNT", "1110 0010 1011 ----") | ||
| 199 | INST(PEXIT, "PEXIT", "1110 0010 0011 ----") | ||
| 200 | INST(PIXLD, "PIXLD", "1110 1111 1110 1---") | ||
| 201 | INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") | ||
| 202 | INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") | ||
| 203 | INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") | ||
| 204 | INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") | ||
| 205 | INST(PRET, "PRET", "1110 0010 0111 ----") | ||
| 206 | INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") | ||
| 207 | INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") | ||
| 208 | INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") | ||
| 209 | INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") | ||
| 210 | INST(PSET, "PSET", "0101 0000 1000 1---") | ||
| 211 | INST(PSETP, "PSETP", "0101 0000 1001 0---") | ||
| 212 | INST(R2B, "R2B", "1111 0000 1100 0---") | ||
| 213 | INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") | ||
| 214 | INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") | ||
| 215 | INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") | ||
| 216 | INST(RAM, "RAM", "1110 0011 1000 ----") | ||
| 217 | INST(RED, "RED", "1110 1011 1111 1---") | ||
| 218 | INST(RET, "RET", "1110 0011 0010 ----") | ||
| 219 | INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") | ||
| 220 | INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") | ||
| 221 | INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") | ||
| 222 | INST(RTT, "RTT", "1110 0011 0110 ----") | ||
| 223 | INST(S2R, "S2R", "1111 0000 1100 1---") | ||
| 224 | INST(SAM, "SAM", "1110 0011 0111 ----") | ||
| 225 | INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") | ||
| 226 | INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") | ||
| 227 | INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") | ||
| 228 | INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") | ||
| 229 | INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") | ||
| 230 | INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") | ||
| 231 | INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") | ||
| 232 | INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") | ||
| 233 | INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") | ||
| 234 | INST(SHFL, "SHFL", "1110 1111 0001 0---") | ||
| 235 | INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") | ||
| 236 | INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") | ||
| 237 | INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") | ||
| 238 | INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") | ||
| 239 | INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") | ||
| 240 | INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") | ||
| 241 | INST(SSY, "SSY", "1110 0010 1001 ----") | ||
| 242 | INST(ST, "ST", "101- ---- ---- ----") | ||
| 243 | INST(STG, "STG", "1110 1110 1101 1---") | ||
| 244 | INST(STL, "STL", "1110 1111 0101 0---") | ||
| 245 | INST(STP, "STP", "1110 1110 1010 0---") | ||
| 246 | INST(STS, "STS", "1110 1111 0101 1---") | ||
| 247 | INST(SUATOM, "SUATOM", "1110 1010 0--- ----") | ||
| 248 | INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") | ||
| 249 | INST(SULD, "SULD", "1110 1011 000- ----") | ||
| 250 | INST(SURED, "SURED", "1110 1011 010- ----") | ||
| 251 | INST(SUST, "SUST", "1110 1011 001- ----") | ||
| 252 | INST(SYNC, "SYNC", "1111 0000 1111 1---") | ||
| 253 | INST(TEX, "TEX", "1100 0--- ---- ----") | ||
| 254 | INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") | ||
| 255 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | ||
| 256 | INST(TLD, "TLD", "1101 1100 ---- ----") | ||
| 257 | INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") | ||
| 258 | INST(TLD4, "TLD4", "1100 10-- ---- ----") | ||
| 259 | INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") | ||
| 260 | INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") | ||
| 261 | INST(TLDS, "TLDS", "1101 -01- ---- ----") | ||
| 262 | INST(TMML, "TMML", "1101 1111 0101 1---") | ||
| 263 | INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") | ||
| 264 | INST(TXA, "TXA", "1101 1111 0100 0---") | ||
| 265 | INST(TXD, "TXD", "1101 1110 00-- ----") | ||
| 266 | INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") | ||
| 267 | INST(TXQ, "TXQ", "1101 1111 0100 1---") | ||
| 268 | INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") | ||
| 269 | INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") | ||
| 270 | INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") | ||
| 271 | INST(VADD, "VADD", "0010 00-- ---- ----") | ||
| 272 | INST(VMAD, "VMAD", "0101 1111 ---- ----") | ||
| 273 | INST(VMNMX, "VMNMX", "0011 101- ---- ----") | ||
| 274 | INST(VOTE, "VOTE", "0101 0000 1101 1---") | ||
| 275 | INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") | ||
| 276 | INST(VSET, "VSET", "0100 000- ---- ----") | ||
| 277 | INST(VSETP, "VSETP", "0101 0000 1111 0---") | ||
| 278 | INST(VSHL, "VSHL", "0101 0111 ---- ----") | ||
| 279 | INST(VSHR, "VSHR", "0101 0110 ---- ----") | ||
| 280 | INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") | ||
| 281 | INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") | ||
| 282 | INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") | ||
| 283 | INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") | ||
| 284 | |||
| 285 | // Removed due to its weird formatting making fast tables larger | ||
| 286 | // INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..ccc40c20c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | constexpr std::array NAME_TABLE{ | ||
| 13 | #define INST(name, cute, encode) cute, | ||
| 14 | #include "maxwell.inc" | ||
| 15 | #undef INST | ||
| 16 | }; | ||
| 17 | } // Anonymous namespace | ||
| 18 | |||
| 19 | const char* NameOf(Opcode opcode) { | ||
| 20 | if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { | ||
| 21 | throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); | ||
| 22 | } | ||
| 23 | return NAME_TABLE[static_cast<size_t>(opcode)]; | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | |||
| 11 | enum class Opcode { | ||
| 12 | #define INST(name, cute, encode) name, | ||
| 13 | #include "maxwell.inc" | ||
| 14 | #undef INST | ||
| 15 | }; | ||
| 16 | |||
| 17 | const char* NameOf(Opcode opcode); | ||
| 18 | |||
| 19 | } // namespace Shader::Maxwell | ||
| 20 | |||
| 21 | template <> | ||
| 22 | struct fmt::formatter<Shader::Maxwell::Opcode> { | ||
| 23 | constexpr auto parse(format_parse_context& ctx) { | ||
| 24 | return ctx.begin(); | ||
| 25 | } | ||
| 26 | template <typename FormatContext> | ||
| 27 | auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { | ||
| 28 | return format_to(ctx.out(), "{}", NameOf(opcode)); | ||
| 29 | } | ||
| 30 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..8b3e0a15c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -0,0 +1,883 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <string> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include <version> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include <boost/intrusive/list.hpp> | ||
| 16 | |||
| 17 | #include "shader_recompiler/environment.h" | ||
| 18 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 19 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | ||
| 24 | |||
| 25 | namespace Shader::Maxwell { | ||
| 26 | namespace { | ||
| 27 | struct Statement; | ||
| 28 | |||
| 29 | // Use normal_link because we are not guaranteed to destroy the tree in order | ||
| 30 | using ListBaseHook = | ||
| 31 | boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; | ||
| 32 | |||
| 33 | using Tree = boost::intrusive::list<Statement, | ||
| 34 | // Allow using Statement without a definition | ||
| 35 | boost::intrusive::base_hook<ListBaseHook>, | ||
| 36 | // Avoid linear complexity on splice, size is never called | ||
| 37 | boost::intrusive::constant_time_size<false>>; | ||
| 38 | using Node = Tree::iterator; | ||
| 39 | |||
| 40 | enum class StatementType { | ||
| 41 | Code, | ||
| 42 | Goto, | ||
| 43 | Label, | ||
| 44 | If, | ||
| 45 | Loop, | ||
| 46 | Break, | ||
| 47 | Return, | ||
| 48 | Kill, | ||
| 49 | Unreachable, | ||
| 50 | Function, | ||
| 51 | Identity, | ||
| 52 | Not, | ||
| 53 | Or, | ||
| 54 | SetVariable, | ||
| 55 | SetIndirectBranchVariable, | ||
| 56 | Variable, | ||
| 57 | IndirectBranchCond, | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool HasChildren(StatementType type) { | ||
| 61 | switch (type) { | ||
| 62 | case StatementType::If: | ||
| 63 | case StatementType::Loop: | ||
| 64 | case StatementType::Function: | ||
| 65 | return true; | ||
| 66 | default: | ||
| 67 | return false; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | struct Goto {}; | ||
| 72 | struct Label {}; | ||
| 73 | struct If {}; | ||
| 74 | struct Loop {}; | ||
| 75 | struct Break {}; | ||
| 76 | struct Return {}; | ||
| 77 | struct Kill {}; | ||
| 78 | struct Unreachable {}; | ||
| 79 | struct FunctionTag {}; | ||
| 80 | struct Identity {}; | ||
| 81 | struct Not {}; | ||
| 82 | struct Or {}; | ||
| 83 | struct SetVariable {}; | ||
| 84 | struct SetIndirectBranchVariable {}; | ||
| 85 | struct Variable {}; | ||
| 86 | struct IndirectBranchCond {}; | ||
| 87 | |||
| 88 | #ifdef _MSC_VER | ||
| 89 | #pragma warning(push) | ||
| 90 | #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement | ||
| 91 | #endif | ||
| 92 | struct Statement : ListBaseHook { | ||
| 93 | Statement(const Flow::Block* block_, Statement* up_) | ||
| 94 | : block{block_}, up{up_}, type{StatementType::Code} {} | ||
| 95 | Statement(Goto, Statement* cond_, Node label_, Statement* up_) | ||
| 96 | : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} | ||
| 97 | Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} | ||
| 98 | Statement(If, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 99 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} | ||
| 100 | Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 101 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} | ||
| 102 | Statement(Break, Statement* cond_, Statement* up_) | ||
| 103 | : cond{cond_}, up{up_}, type{StatementType::Break} {} | ||
| 104 | Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} | ||
| 105 | Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} | ||
| 106 | Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} | ||
| 107 | Statement(FunctionTag) : children{}, type{StatementType::Function} {} | ||
| 108 | Statement(Identity, IR::Condition cond_, Statement* up_) | ||
| 109 | : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} | ||
| 110 | Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} | ||
| 111 | Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) | ||
| 112 | : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} | ||
| 113 | Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) | ||
| 114 | : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} | ||
| 115 | Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) | ||
| 116 | : branch_offset{branch_offset_}, | ||
| 117 | branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} | ||
| 118 | Statement(Variable, u32 id_, Statement* up_) | ||
| 119 | : id{id_}, up{up_}, type{StatementType::Variable} {} | ||
| 120 | Statement(IndirectBranchCond, u32 location_, Statement* up_) | ||
| 121 | : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} | ||
| 122 | |||
| 123 | ~Statement() { | ||
| 124 | if (HasChildren(type)) { | ||
| 125 | std::destroy_at(&children); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | union { | ||
| 130 | const Flow::Block* block; | ||
| 131 | Node label; | ||
| 132 | Tree children; | ||
| 133 | IR::Condition guest_cond; | ||
| 134 | Statement* op; | ||
| 135 | Statement* op_a; | ||
| 136 | u32 location; | ||
| 137 | s32 branch_offset; | ||
| 138 | }; | ||
| 139 | union { | ||
| 140 | Statement* cond; | ||
| 141 | Statement* op_b; | ||
| 142 | u32 id; | ||
| 143 | IR::Reg branch_reg; | ||
| 144 | }; | ||
| 145 | Statement* up{}; | ||
| 146 | StatementType type; | ||
| 147 | }; | ||
| 148 | #ifdef _MSC_VER | ||
| 149 | #pragma warning(pop) | ||
| 150 | #endif | ||
| 151 | |||
| 152 | std::string DumpExpr(const Statement* stmt) { | ||
| 153 | switch (stmt->type) { | ||
| 154 | case StatementType::Identity: | ||
| 155 | return fmt::format("{}", stmt->guest_cond); | ||
| 156 | case StatementType::Not: | ||
| 157 | return fmt::format("!{}", DumpExpr(stmt->op)); | ||
| 158 | case StatementType::Or: | ||
| 159 | return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); | ||
| 160 | case StatementType::Variable: | ||
| 161 | return fmt::format("goto_L{}", stmt->id); | ||
| 162 | case StatementType::IndirectBranchCond: | ||
| 163 | return fmt::format("(indirect_branch == {:x})", stmt->location); | ||
| 164 | default: | ||
| 165 | return "<invalid type>"; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | [[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { | ||
| 170 | std::string ret; | ||
| 171 | std::string indent(indentation, ' '); | ||
| 172 | for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { | ||
| 173 | switch (stmt->type) { | ||
| 174 | case StatementType::Code: | ||
| 175 | ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, | ||
| 176 | stmt->block->begin.Offset(), stmt->block->end.Offset(), | ||
| 177 | reinterpret_cast<uintptr_t>(stmt->block)); | ||
| 178 | break; | ||
| 179 | case StatementType::Goto: | ||
| 180 | ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), | ||
| 181 | stmt->label->id); | ||
| 182 | break; | ||
| 183 | case StatementType::Label: | ||
| 184 | ret += fmt::format("{}L{}:\n", indent, stmt->id); | ||
| 185 | break; | ||
| 186 | case StatementType::If: | ||
| 187 | ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); | ||
| 188 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 189 | ret += fmt::format("{} }}\n", indent); | ||
| 190 | break; | ||
| 191 | case StatementType::Loop: | ||
| 192 | ret += fmt::format("{} do {{\n", indent); | ||
| 193 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 194 | ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); | ||
| 195 | break; | ||
| 196 | case StatementType::Break: | ||
| 197 | ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); | ||
| 198 | break; | ||
| 199 | case StatementType::Return: | ||
| 200 | ret += fmt::format("{} return;\n", indent); | ||
| 201 | break; | ||
| 202 | case StatementType::Kill: | ||
| 203 | ret += fmt::format("{} kill;\n", indent); | ||
| 204 | break; | ||
| 205 | case StatementType::Unreachable: | ||
| 206 | ret += fmt::format("{} unreachable;\n", indent); | ||
| 207 | break; | ||
| 208 | case StatementType::SetVariable: | ||
| 209 | ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); | ||
| 210 | break; | ||
| 211 | case StatementType::SetIndirectBranchVariable: | ||
| 212 | ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, | ||
| 213 | stmt->branch_offset); | ||
| 214 | break; | ||
| 215 | case StatementType::Function: | ||
| 216 | case StatementType::Identity: | ||
| 217 | case StatementType::Not: | ||
| 218 | case StatementType::Or: | ||
| 219 | case StatementType::Variable: | ||
| 220 | case StatementType::IndirectBranchCond: | ||
| 221 | throw LogicError("Statement can't be printed"); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | return ret; | ||
| 225 | } | ||
| 226 | |||
| 227 | void SanitizeNoBreaks(const Tree& tree) { | ||
| 228 | if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { | ||
| 229 | throw NotImplementedException("Capturing statement with break nodes"); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | size_t Level(Node stmt) { | ||
| 234 | size_t level{0}; | ||
| 235 | Statement* node{stmt->up}; | ||
| 236 | while (node) { | ||
| 237 | ++level; | ||
| 238 | node = node->up; | ||
| 239 | } | ||
| 240 | return level; | ||
| 241 | } | ||
| 242 | |||
| 243 | bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 244 | const size_t goto_level{Level(goto_stmt)}; | ||
| 245 | const size_t label_level{Level(label_stmt)}; | ||
| 246 | size_t min_level; | ||
| 247 | size_t max_level; | ||
| 248 | Node min; | ||
| 249 | Node max; | ||
| 250 | if (label_level < goto_level) { | ||
| 251 | min_level = label_level; | ||
| 252 | max_level = goto_level; | ||
| 253 | min = label_stmt; | ||
| 254 | max = goto_stmt; | ||
| 255 | } else { // goto_level < label_level | ||
| 256 | min_level = goto_level; | ||
| 257 | max_level = label_level; | ||
| 258 | min = goto_stmt; | ||
| 259 | max = label_stmt; | ||
| 260 | } | ||
| 261 | while (max_level > min_level) { | ||
| 262 | --max_level; | ||
| 263 | max = max->up; | ||
| 264 | } | ||
| 265 | return min->up == max->up; | ||
| 266 | } | ||
| 267 | |||
| 268 | bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 269 | return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { | ||
| 273 | Node it{goto_stmt}; | ||
| 274 | do { | ||
| 275 | if (it == label_stmt) { | ||
| 276 | return true; | ||
| 277 | } | ||
| 278 | --it; | ||
| 279 | } while (it != goto_stmt->up->children.begin()); | ||
| 280 | while (it != goto_stmt->up->children.end()) { | ||
| 281 | if (it == label_stmt) { | ||
| 282 | return true; | ||
| 283 | } | ||
| 284 | ++it; | ||
| 285 | } | ||
| 286 | return false; | ||
| 287 | } | ||
| 288 | |||
| 289 | Node SiblingFromNephew(Node uncle, Node nephew) noexcept { | ||
| 290 | Statement* const parent{uncle->up}; | ||
| 291 | Statement* it{&*nephew}; | ||
| 292 | while (it->up != parent) { | ||
| 293 | it = it->up; | ||
| 294 | } | ||
| 295 | return Tree::s_iterator_to(*it); | ||
| 296 | } | ||
| 297 | |||
| 298 | bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { | ||
| 299 | const Node end{right_sibling->up->children.end()}; | ||
| 300 | for (auto it = right_sibling; it != end; ++it) { | ||
| 301 | if (it == left_sibling) { | ||
| 302 | return false; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | return true; | ||
| 306 | } | ||
| 307 | |||
| 308 | bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { | ||
| 309 | const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; | ||
| 310 | return AreOrdered(sibling, goto_stmt); | ||
| 311 | } | ||
| 312 | |||
| 313 | class GotoPass { | ||
| 314 | public: | ||
| 315 | explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { | ||
| 316 | std::vector gotos{BuildTree(cfg)}; | ||
| 317 | const auto end{gotos.rend()}; | ||
| 318 | for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { | ||
| 319 | RemoveGoto(*goto_stmt); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | Statement& RootStatement() noexcept { | ||
| 324 | return root_stmt; | ||
| 325 | } | ||
| 326 | |||
| 327 | private: | ||
| 328 | void RemoveGoto(Node goto_stmt) { | ||
| 329 | // Force goto_stmt and label_stmt to be directly related | ||
| 330 | const Node label_stmt{goto_stmt->label}; | ||
| 331 | if (IsIndirectlyRelated(goto_stmt, label_stmt)) { | ||
| 332 | // Move goto_stmt out using outward-movement transformation until it becomes | ||
| 333 | // directly related to label_stmt | ||
| 334 | while (!IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 335 | goto_stmt = MoveOutward(goto_stmt); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | // Force goto_stmt and label_stmt to be siblings | ||
| 339 | if (IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 340 | const size_t label_level{Level(label_stmt)}; | ||
| 341 | size_t goto_level{Level(goto_stmt)}; | ||
| 342 | if (goto_level > label_level) { | ||
| 343 | // Move goto_stmt out of its level using outward-movement transformations | ||
| 344 | while (goto_level > label_level) { | ||
| 345 | goto_stmt = MoveOutward(goto_stmt); | ||
| 346 | --goto_level; | ||
| 347 | } | ||
| 348 | } else { // Level(goto_stmt) < Level(label_stmt) | ||
| 349 | if (NeedsLift(goto_stmt, label_stmt)) { | ||
| 350 | // Lift goto_stmt to above stmt containing label_stmt using goto-lifting | ||
| 351 | // transformations | ||
| 352 | goto_stmt = Lift(goto_stmt); | ||
| 353 | } | ||
| 354 | // Move goto_stmt into label_stmt's level using inward-movement transformation | ||
| 355 | while (goto_level < label_level) { | ||
| 356 | goto_stmt = MoveInward(goto_stmt); | ||
| 357 | ++goto_level; | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | // Expensive operation: | ||
| 362 | // if (!AreSiblings(goto_stmt, label_stmt)) { | ||
| 363 | // throw LogicError("Goto is not a sibling with the label"); | ||
| 364 | // } | ||
| 365 | // goto_stmt and label_stmt are guaranteed to be siblings, eliminate | ||
| 366 | if (std::next(goto_stmt) == label_stmt) { | ||
| 367 | // Simply eliminate the goto if the label is next to it | ||
| 368 | goto_stmt->up->children.erase(goto_stmt); | ||
| 369 | } else if (AreOrdered(goto_stmt, label_stmt)) { | ||
| 370 | // Eliminate goto_stmt with a conditional | ||
| 371 | EliminateAsConditional(goto_stmt, label_stmt); | ||
| 372 | } else { | ||
| 373 | // Eliminate goto_stmt with a loop | ||
| 374 | EliminateAsLoop(goto_stmt, label_stmt); | ||
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | std::vector<Node> BuildTree(Flow::CFG& cfg) { | ||
| 379 | u32 label_id{0}; | ||
| 380 | std::vector<Node> gotos; | ||
| 381 | Flow::Function& first_function{cfg.Functions().front()}; | ||
| 382 | BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); | ||
| 383 | return gotos; | ||
| 384 | } | ||
| 385 | |||
| 386 | void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, | ||
| 387 | std::vector<Node>& gotos, Node function_insert_point, | ||
| 388 | std::optional<Node> return_label) { | ||
| 389 | Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; | ||
| 390 | Tree& root{root_stmt.children}; | ||
| 391 | std::unordered_map<Flow::Block*, Node> local_labels; | ||
| 392 | local_labels.reserve(function.blocks.size()); | ||
| 393 | |||
| 394 | for (Flow::Block& block : function.blocks) { | ||
| 395 | Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; | ||
| 396 | const Node label_it{root.insert(function_insert_point, *label)}; | ||
| 397 | local_labels.emplace(&block, label_it); | ||
| 398 | ++label_id; | ||
| 399 | } | ||
| 400 | for (Flow::Block& block : function.blocks) { | ||
| 401 | const Node label{local_labels.at(&block)}; | ||
| 402 | // Insertion point | ||
| 403 | const Node ip{std::next(label)}; | ||
| 404 | |||
| 405 | // Reset goto variables before the first block and after its respective label | ||
| 406 | const auto make_reset_variable{[&]() -> Statement& { | ||
| 407 | return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); | ||
| 408 | }}; | ||
| 409 | root.push_front(make_reset_variable()); | ||
| 410 | root.insert(ip, make_reset_variable()); | ||
| 411 | root.insert(ip, *pool.Create(&block, &root_stmt)); | ||
| 412 | |||
| 413 | switch (block.end_class) { | ||
| 414 | case Flow::EndClass::Branch: { | ||
| 415 | Statement* const always_cond{ | ||
| 416 | pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; | ||
| 417 | if (block.cond == IR::Condition{true}) { | ||
| 418 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 419 | gotos.push_back( | ||
| 420 | root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); | ||
| 421 | } else if (block.cond == IR::Condition{false}) { | ||
| 422 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 423 | gotos.push_back(root.insert( | ||
| 424 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 425 | } else { | ||
| 426 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 427 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 428 | Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 429 | gotos.push_back( | ||
| 430 | root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); | ||
| 431 | gotos.push_back(root.insert( | ||
| 432 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 433 | } | ||
| 434 | break; | ||
| 435 | } | ||
| 436 | case Flow::EndClass::IndirectBranch: | ||
| 437 | root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, | ||
| 438 | block.branch_offset, &root_stmt)); | ||
| 439 | for (const Flow::IndirectBranch& indirect : block.indirect_branches) { | ||
| 440 | const Node indirect_label{local_labels.at(indirect.block)}; | ||
| 441 | Statement* cond{ | ||
| 442 | pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; | ||
| 443 | Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; | ||
| 444 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 445 | } | ||
| 446 | root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); | ||
| 447 | break; | ||
| 448 | case Flow::EndClass::Call: { | ||
| 449 | Flow::Function& call{cfg.Functions()[block.function_call]}; | ||
| 450 | const Node call_return_label{local_labels.at(block.return_block)}; | ||
| 451 | BuildTree(cfg, call, label_id, gotos, ip, call_return_label); | ||
| 452 | break; | ||
| 453 | } | ||
| 454 | case Flow::EndClass::Exit: | ||
| 455 | root.insert(ip, *pool.Create(Return{}, &root_stmt)); | ||
| 456 | break; | ||
| 457 | case Flow::EndClass::Return: { | ||
| 458 | Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 459 | auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; | ||
| 460 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 461 | break; | ||
| 462 | } | ||
| 463 | case Flow::EndClass::Kill: | ||
| 464 | root.insert(ip, *pool.Create(Kill{}, &root_stmt)); | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | } | ||
| 469 | |||
| 470 | void UpdateTreeUp(Statement* tree) { | ||
| 471 | for (Statement& stmt : tree->children) { | ||
| 472 | stmt.up = tree; | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | void EliminateAsConditional(Node goto_stmt, Node label_stmt) { | ||
| 477 | Tree& body{goto_stmt->up->children}; | ||
| 478 | Tree if_body; | ||
| 479 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); | ||
| 480 | Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; | ||
| 481 | Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; | ||
| 482 | UpdateTreeUp(if_stmt); | ||
| 483 | body.insert(goto_stmt, *if_stmt); | ||
| 484 | body.erase(goto_stmt); | ||
| 485 | } | ||
| 486 | |||
| 487 | void EliminateAsLoop(Node goto_stmt, Node label_stmt) { | ||
| 488 | Tree& body{goto_stmt->up->children}; | ||
| 489 | Tree loop_body; | ||
| 490 | loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); | ||
| 491 | Statement* const cond{goto_stmt->cond}; | ||
| 492 | Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; | ||
| 493 | UpdateTreeUp(loop); | ||
| 494 | body.insert(goto_stmt, *loop); | ||
| 495 | body.erase(goto_stmt); | ||
| 496 | } | ||
| 497 | |||
| 498 | [[nodiscard]] Node MoveOutward(Node goto_stmt) { | ||
| 499 | switch (goto_stmt->up->type) { | ||
| 500 | case StatementType::If: | ||
| 501 | return MoveOutwardIf(goto_stmt); | ||
| 502 | case StatementType::Loop: | ||
| 503 | return MoveOutwardLoop(goto_stmt); | ||
| 504 | default: | ||
| 505 | throw LogicError("Invalid outward movement"); | ||
| 506 | } | ||
| 507 | } | ||
| 508 | |||
| 509 | [[nodiscard]] Node MoveInward(Node goto_stmt) { | ||
| 510 | Statement* const parent{goto_stmt->up}; | ||
| 511 | Tree& body{parent->children}; | ||
| 512 | const Node label{goto_stmt->label}; | ||
| 513 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 514 | const u32 label_id{label->id}; | ||
| 515 | |||
| 516 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 517 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 518 | body.insert(goto_stmt, *set_var); | ||
| 519 | |||
| 520 | Tree if_body; | ||
| 521 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); | ||
| 522 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 523 | Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; | ||
| 524 | if (!if_body.empty()) { | ||
| 525 | Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; | ||
| 526 | UpdateTreeUp(if_stmt); | ||
| 527 | body.insert(goto_stmt, *if_stmt); | ||
| 528 | } | ||
| 529 | body.erase(goto_stmt); | ||
| 530 | |||
| 531 | switch (label_nested_stmt->type) { | ||
| 532 | case StatementType::If: | ||
| 533 | // Update nested if condition | ||
| 534 | label_nested_stmt->cond = | ||
| 535 | pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); | ||
| 536 | break; | ||
| 537 | case StatementType::Loop: | ||
| 538 | break; | ||
| 539 | default: | ||
| 540 | throw LogicError("Invalid inward movement"); | ||
| 541 | } | ||
| 542 | Tree& nested_tree{label_nested_stmt->children}; | ||
| 543 | Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; | ||
| 544 | return nested_tree.insert(nested_tree.begin(), *new_goto); | ||
| 545 | } | ||
| 546 | |||
| 547 | [[nodiscard]] Node Lift(Node goto_stmt) { | ||
| 548 | Statement* const parent{goto_stmt->up}; | ||
| 549 | Tree& body{parent->children}; | ||
| 550 | const Node label{goto_stmt->label}; | ||
| 551 | const u32 label_id{label->id}; | ||
| 552 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 553 | |||
| 554 | Tree loop_body; | ||
| 555 | loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); | ||
| 556 | SanitizeNoBreaks(loop_body); | ||
| 557 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 558 | Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; | ||
| 559 | UpdateTreeUp(loop_stmt); | ||
| 560 | body.insert(goto_stmt, *loop_stmt); | ||
| 561 | |||
| 562 | Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; | ||
| 563 | loop_stmt->children.push_front(*new_goto); | ||
| 564 | const Node new_goto_node{loop_stmt->children.begin()}; | ||
| 565 | |||
| 566 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; | ||
| 567 | loop_stmt->children.push_back(*set_var); | ||
| 568 | |||
| 569 | body.erase(goto_stmt); | ||
| 570 | return new_goto_node; | ||
| 571 | } | ||
| 572 | |||
| 573 | Node MoveOutwardIf(Node goto_stmt) { | ||
| 574 | const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 575 | Tree& body{parent->children}; | ||
| 576 | const u32 label_id{goto_stmt->label->id}; | ||
| 577 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 578 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; | ||
| 579 | body.insert(goto_stmt, *set_goto_var); | ||
| 580 | |||
| 581 | Tree if_body; | ||
| 582 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); | ||
| 583 | if_body.pop_front(); | ||
| 584 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 585 | Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; | ||
| 586 | Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; | ||
| 587 | UpdateTreeUp(if_stmt); | ||
| 588 | body.insert(goto_stmt, *if_stmt); | ||
| 589 | |||
| 590 | body.erase(goto_stmt); | ||
| 591 | |||
| 592 | Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 593 | Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; | ||
| 594 | Tree& parent_tree{parent->up->children}; | ||
| 595 | return parent_tree.insert(std::next(parent), *new_goto); | ||
| 596 | } | ||
| 597 | |||
| 598 | Node MoveOutwardLoop(Node goto_stmt) { | ||
| 599 | Statement* const parent{goto_stmt->up}; | ||
| 600 | Tree& body{parent->children}; | ||
| 601 | const u32 label_id{goto_stmt->label->id}; | ||
| 602 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 603 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 604 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 605 | Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; | ||
| 606 | body.insert(goto_stmt, *set_goto_var); | ||
| 607 | body.insert(goto_stmt, *break_stmt); | ||
| 608 | body.erase(goto_stmt); | ||
| 609 | |||
| 610 | const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 611 | Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 612 | Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; | ||
| 613 | Tree& parent_tree{loop->up->children}; | ||
| 614 | return parent_tree.insert(std::next(loop), *new_goto); | ||
| 615 | } | ||
| 616 | |||
| 617 | ObjectPool<Statement>& pool; | ||
| 618 | Statement root_stmt{FunctionTag{}}; | ||
| 619 | }; | ||
| 620 | |||
| 621 | [[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { | ||
| 622 | Tree& tree{stmt.up->children}; | ||
| 623 | const Node end{tree.end()}; | ||
| 624 | Node forward_node{std::next(Tree::s_iterator_to(stmt))}; | ||
| 625 | while (forward_node != end && !HasChildren(forward_node->type)) { | ||
| 626 | if (forward_node->type == StatementType::Code) { | ||
| 627 | return &*forward_node; | ||
| 628 | } | ||
| 629 | ++forward_node; | ||
| 630 | } | ||
| 631 | return nullptr; | ||
| 632 | } | ||
| 633 | |||
| 634 | [[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { | ||
| 635 | switch (stmt.type) { | ||
| 636 | case StatementType::Identity: | ||
| 637 | return ir.Condition(stmt.guest_cond); | ||
| 638 | case StatementType::Not: | ||
| 639 | return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); | ||
| 640 | case StatementType::Or: | ||
| 641 | return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); | ||
| 642 | case StatementType::Variable: | ||
| 643 | return ir.GetGotoVariable(stmt.id); | ||
| 644 | case StatementType::IndirectBranchCond: | ||
| 645 | return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); | ||
| 646 | default: | ||
| 647 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | class TranslatePass { | ||
| 652 | public: | ||
| 653 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | ||
| 654 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | ||
| 655 | IR::AbstractSyntaxList& syntax_list_) | ||
| 656 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | ||
| 657 | syntax_list{syntax_list_} { | ||
| 658 | Visit(root_stmt, nullptr, nullptr); | ||
| 659 | |||
| 660 | IR::Block& first_block{*syntax_list.front().data.block}; | ||
| 661 | IR::IREmitter ir(first_block, first_block.begin()); | ||
| 662 | ir.Prologue(); | ||
| 663 | } | ||
| 664 | |||
| 665 | private: | ||
| 666 | void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { | ||
| 667 | IR::Block* current_block{}; | ||
| 668 | const auto ensure_block{[&] { | ||
| 669 | if (current_block) { | ||
| 670 | return; | ||
| 671 | } | ||
| 672 | current_block = block_pool.Create(inst_pool); | ||
| 673 | auto& node{syntax_list.emplace_back()}; | ||
| 674 | node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 675 | node.data.block = current_block; | ||
| 676 | }}; | ||
| 677 | Tree& tree{parent.children}; | ||
| 678 | for (auto it = tree.begin(); it != tree.end(); ++it) { | ||
| 679 | Statement& stmt{*it}; | ||
| 680 | switch (stmt.type) { | ||
| 681 | case StatementType::Label: | ||
| 682 | // Labels can be ignored | ||
| 683 | break; | ||
| 684 | case StatementType::Code: { | ||
| 685 | ensure_block(); | ||
| 686 | Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | case StatementType::SetVariable: { | ||
| 690 | ensure_block(); | ||
| 691 | IR::IREmitter ir{*current_block}; | ||
| 692 | ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | case StatementType::SetIndirectBranchVariable: { | ||
| 696 | ensure_block(); | ||
| 697 | IR::IREmitter ir{*current_block}; | ||
| 698 | IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; | ||
| 699 | ir.SetIndirectBranchVariable(address); | ||
| 700 | break; | ||
| 701 | } | ||
| 702 | case StatementType::If: { | ||
| 703 | ensure_block(); | ||
| 704 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 705 | |||
| 706 | // Implement if header block | ||
| 707 | IR::IREmitter ir{*current_block}; | ||
| 708 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 709 | |||
| 710 | const size_t if_node_index{syntax_list.size()}; | ||
| 711 | syntax_list.emplace_back(); | ||
| 712 | |||
| 713 | // Visit children | ||
| 714 | const size_t then_block_index{syntax_list.size()}; | ||
| 715 | Visit(stmt, break_block, merge_block); | ||
| 716 | |||
| 717 | IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; | ||
| 718 | current_block->AddBranch(then_block); | ||
| 719 | current_block->AddBranch(merge_block); | ||
| 720 | current_block = merge_block; | ||
| 721 | |||
| 722 | auto& if_node{syntax_list[if_node_index]}; | ||
| 723 | if_node.type = IR::AbstractSyntaxNode::Type::If; | ||
| 724 | if_node.data.if_node.cond = cond; | ||
| 725 | if_node.data.if_node.body = then_block; | ||
| 726 | if_node.data.if_node.merge = merge_block; | ||
| 727 | |||
| 728 | auto& endif_node{syntax_list.emplace_back()}; | ||
| 729 | endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; | ||
| 730 | endif_node.data.end_if.merge = merge_block; | ||
| 731 | |||
| 732 | auto& merge{syntax_list.emplace_back()}; | ||
| 733 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 734 | merge.data.block = merge_block; | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | case StatementType::Loop: { | ||
| 738 | IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; | ||
| 739 | if (current_block) { | ||
| 740 | current_block->AddBranch(loop_header_block); | ||
| 741 | } | ||
| 742 | auto& header_node{syntax_list.emplace_back()}; | ||
| 743 | header_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 744 | header_node.data.block = loop_header_block; | ||
| 745 | |||
| 746 | IR::Block* const continue_block{block_pool.Create(inst_pool)}; | ||
| 747 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 748 | |||
| 749 | const size_t loop_node_index{syntax_list.size()}; | ||
| 750 | syntax_list.emplace_back(); | ||
| 751 | |||
| 752 | // Visit children | ||
| 753 | const size_t body_block_index{syntax_list.size()}; | ||
| 754 | Visit(stmt, merge_block, continue_block); | ||
| 755 | |||
| 756 | // The continue block is located at the end of the loop | ||
| 757 | IR::IREmitter ir{*continue_block}; | ||
| 758 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 759 | |||
| 760 | IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; | ||
| 761 | loop_header_block->AddBranch(body_block); | ||
| 762 | |||
| 763 | continue_block->AddBranch(loop_header_block); | ||
| 764 | continue_block->AddBranch(merge_block); | ||
| 765 | |||
| 766 | current_block = merge_block; | ||
| 767 | |||
| 768 | auto& loop{syntax_list[loop_node_index]}; | ||
| 769 | loop.type = IR::AbstractSyntaxNode::Type::Loop; | ||
| 770 | loop.data.loop.body = body_block; | ||
| 771 | loop.data.loop.continue_block = continue_block; | ||
| 772 | loop.data.loop.merge = merge_block; | ||
| 773 | |||
| 774 | auto& continue_block_node{syntax_list.emplace_back()}; | ||
| 775 | continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 776 | continue_block_node.data.block = continue_block; | ||
| 777 | |||
| 778 | auto& repeat{syntax_list.emplace_back()}; | ||
| 779 | repeat.type = IR::AbstractSyntaxNode::Type::Repeat; | ||
| 780 | repeat.data.repeat.cond = cond; | ||
| 781 | repeat.data.repeat.loop_header = loop_header_block; | ||
| 782 | repeat.data.repeat.merge = merge_block; | ||
| 783 | |||
| 784 | auto& merge{syntax_list.emplace_back()}; | ||
| 785 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 786 | merge.data.block = merge_block; | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | case StatementType::Break: { | ||
| 790 | ensure_block(); | ||
| 791 | IR::Block* const skip_block{MergeBlock(parent, stmt)}; | ||
| 792 | |||
| 793 | IR::IREmitter ir{*current_block}; | ||
| 794 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 795 | current_block->AddBranch(break_block); | ||
| 796 | current_block->AddBranch(skip_block); | ||
| 797 | current_block = skip_block; | ||
| 798 | |||
| 799 | auto& break_node{syntax_list.emplace_back()}; | ||
| 800 | break_node.type = IR::AbstractSyntaxNode::Type::Break; | ||
| 801 | break_node.data.break_node.cond = cond; | ||
| 802 | break_node.data.break_node.merge = break_block; | ||
| 803 | break_node.data.break_node.skip = skip_block; | ||
| 804 | |||
| 805 | auto& merge{syntax_list.emplace_back()}; | ||
| 806 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 807 | merge.data.block = skip_block; | ||
| 808 | break; | ||
| 809 | } | ||
| 810 | case StatementType::Return: { | ||
| 811 | ensure_block(); | ||
| 812 | IR::IREmitter{*current_block}.Epilogue(); | ||
| 813 | current_block = nullptr; | ||
| 814 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | ||
| 815 | break; | ||
| 816 | } | ||
| 817 | case StatementType::Kill: { | ||
| 818 | ensure_block(); | ||
| 819 | IR::Block* demote_block{MergeBlock(parent, stmt)}; | ||
| 820 | IR::IREmitter{*current_block}.DemoteToHelperInvocation(); | ||
| 821 | current_block->AddBranch(demote_block); | ||
| 822 | current_block = demote_block; | ||
| 823 | |||
| 824 | auto& merge{syntax_list.emplace_back()}; | ||
| 825 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 826 | merge.data.block = demote_block; | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | case StatementType::Unreachable: { | ||
| 830 | ensure_block(); | ||
| 831 | current_block = nullptr; | ||
| 832 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 833 | break; | ||
| 834 | } | ||
| 835 | default: | ||
| 836 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 837 | } | ||
| 838 | } | ||
| 839 | if (current_block) { | ||
| 840 | if (fallthrough_block) { | ||
| 841 | current_block->AddBranch(fallthrough_block); | ||
| 842 | } else { | ||
| 843 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 844 | } | ||
| 845 | } | ||
| 846 | } | ||
| 847 | |||
| 848 | IR::Block* MergeBlock(Statement& parent, Statement& stmt) { | ||
| 849 | Statement* merge_stmt{TryFindForwardBlock(stmt)}; | ||
| 850 | if (!merge_stmt) { | ||
| 851 | // Create a merge block we can visit later | ||
| 852 | merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); | ||
| 853 | parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); | ||
| 854 | } | ||
| 855 | return block_pool.Create(inst_pool); | ||
| 856 | } | ||
| 857 | |||
| 858 | ObjectPool<Statement>& stmt_pool; | ||
| 859 | ObjectPool<IR::Inst>& inst_pool; | ||
| 860 | ObjectPool<IR::Block>& block_pool; | ||
| 861 | Environment& env; | ||
| 862 | IR::AbstractSyntaxList& syntax_list; | ||
| 863 | |||
| 864 | // TODO: C++20 Remove this when all compilers support constexpr std::vector | ||
| 865 | #if __cpp_lib_constexpr_vector >= 201907 | ||
| 866 | static constexpr Flow::Block dummy_flow_block; | ||
| 867 | #else | ||
| 868 | const Flow::Block dummy_flow_block; | ||
| 869 | #endif | ||
| 870 | }; | ||
| 871 | } // Anonymous namespace | ||
| 872 | |||
| 873 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 874 | Environment& env, Flow::CFG& cfg) { | ||
| 875 | ObjectPool<Statement> stmt_pool{64}; | ||
| 876 | GotoPass goto_pass{cfg, stmt_pool}; | ||
| 877 | Statement& root{goto_pass.RootStatement()}; | ||
| 878 | IR::AbstractSyntaxList syntax_list; | ||
| 879 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | ||
| 880 | return syntax_list; | ||
| 881 | } | ||
| 882 | |||
| 883 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..88b083649 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg); | ||
| 19 | |||
| 20 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction = false, | ||
| 63 | .rounding = IR::FpRounding::RN, | ||
| 64 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction = false, | ||
| 68 | .rounding = IR::FpRounding::RN, | ||
| 69 | .fmz_mode = IR::FmzMode::FTZ, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, | ||
| 159 | AtomSize size, AtomOp op) { | ||
| 160 | switch (size) { | ||
| 161 | case AtomSize::U32: | ||
| 162 | case AtomSize::S32: | ||
| 163 | return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); | ||
| 164 | case AtomSize::U64: | ||
| 165 | case AtomSize::S64: | ||
| 166 | return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); | ||
| 167 | case AtomSize::F32: | ||
| 168 | return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); | ||
| 169 | case AtomSize::F16x2: { | ||
| 170 | return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); | ||
| 171 | } | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("Atom Size {}", size); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, | ||
| 178 | const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { | ||
| 179 | IR::Value result; | ||
| 180 | if (AtomOpNotApplicable(size, op)) { | ||
| 181 | result = LoadGlobal(v.ir, offset, size); | ||
| 182 | } else { | ||
| 183 | result = ApplyAtomOp(v, operand_reg, offset, size, op); | ||
| 184 | } | ||
| 185 | if (write_dest) { | ||
| 186 | StoreResult(v, dest_reg, result, size); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | } // Anonymous namespace | ||
| 190 | |||
| 191 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 192 | union { | ||
| 193 | u64 raw; | ||
| 194 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 195 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<49, 3, AtomSize> size; | ||
| 197 | BitField<52, 4, AtomOp> op; | ||
| 198 | } const atom{insn}; | ||
| 199 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 200 | GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void TranslatorVisitor::RED(u64 insn) { | ||
| 204 | union { | ||
| 205 | u64 raw; | ||
| 206 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 207 | BitField<20, 3, AtomSize> size; | ||
| 208 | BitField<23, 3, AtomOp> op; | ||
| 209 | } const red{insn}; | ||
| 210 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 211 | GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | enum class BitSize : u64 { | ||
| 13 | B32, | ||
| 14 | B64, | ||
| 15 | B96, | ||
| 16 | B128, | ||
| 17 | }; | ||
| 18 | |||
| 19 | void TranslatorVisitor::AL2P(u64 inst) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<0, 8, IR::Reg> result_register; | ||
| 23 | BitField<8, 8, IR::Reg> indexing_register; | ||
| 24 | BitField<20, 11, s64> offset; | ||
| 25 | BitField<47, 2, BitSize> bitsize; | ||
| 26 | } al2p{inst}; | ||
| 27 | if (al2p.bitsize != BitSize::B32) { | ||
| 28 | throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); | ||
| 29 | } | ||
| 30 | const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; | ||
| 31 | const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; | ||
| 32 | X(al2p.result_register, result); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | // Seems to be in CUDA terminology. | ||
| 14 | enum class LocalScope : u64 { | ||
| 15 | CTA, | ||
| 16 | GL, | ||
| 17 | SYS, | ||
| 18 | VC, | ||
| 19 | }; | ||
| 20 | } // Anonymous namespace | ||
| 21 | |||
| 22 | void TranslatorVisitor::MEMBAR(u64 inst) { | ||
| 23 | union { | ||
| 24 | u64 raw; | ||
| 25 | BitField<8, 2, LocalScope> scope; | ||
| 26 | } const membar{inst}; | ||
| 27 | |||
| 28 | if (membar.scope == LocalScope::CTA) { | ||
| 29 | ir.WorkgroupMemoryBarrier(); | ||
| 30 | } else { | ||
| 31 | ir.DeviceMemoryBarrier(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::DEPBAR() { | ||
| 36 | // DEPBAR is a no-op | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::BAR(u64 insn) { | ||
| 40 | enum class Mode { | ||
| 41 | RedPopc, | ||
| 42 | Scan, | ||
| 43 | RedAnd, | ||
| 44 | RedOr, | ||
| 45 | Sync, | ||
| 46 | Arrive, | ||
| 47 | }; | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<43, 1, u64> is_a_imm; | ||
| 51 | BitField<44, 1, u64> is_b_imm; | ||
| 52 | BitField<8, 8, u64> imm_a; | ||
| 53 | BitField<20, 12, u64> imm_b; | ||
| 54 | BitField<42, 1, u64> neg_pred; | ||
| 55 | BitField<39, 3, IR::Pred> pred; | ||
| 56 | } const bar{insn}; | ||
| 57 | |||
| 58 | const Mode mode{[insn] { | ||
| 59 | switch (insn & 0x0000009B00000000ULL) { | ||
| 60 | case 0x0000000200000000ULL: | ||
| 61 | return Mode::RedPopc; | ||
| 62 | case 0x0000000300000000ULL: | ||
| 63 | return Mode::Scan; | ||
| 64 | case 0x0000000A00000000ULL: | ||
| 65 | return Mode::RedAnd; | ||
| 66 | case 0x0000001200000000ULL: | ||
| 67 | return Mode::RedOr; | ||
| 68 | case 0x0000008000000000ULL: | ||
| 69 | return Mode::Sync; | ||
| 70 | case 0x0000008100000000ULL: | ||
| 71 | return Mode::Arrive; | ||
| 72 | } | ||
| 73 | throw NotImplementedException("Invalid encoding"); | ||
| 74 | }()}; | ||
| 75 | if (mode != Mode::Sync) { | ||
| 76 | throw NotImplementedException("BAR mode {}", mode); | ||
| 77 | } | ||
| 78 | if (bar.is_a_imm == 0) { | ||
| 79 | throw NotImplementedException("Non-immediate input A"); | ||
| 80 | } | ||
| 81 | if (bar.imm_a != 0) { | ||
| 82 | throw NotImplementedException("Non-zero input A"); | ||
| 83 | } | ||
| 84 | if (bar.is_b_imm == 0) { | ||
| 85 | throw NotImplementedException("Non-immediate input B"); | ||
| 86 | } | ||
| 87 | if (bar.imm_b != 0) { | ||
| 88 | throw NotImplementedException("Non-zero input B"); | ||
| 89 | } | ||
| 90 | if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { | ||
| 91 | throw NotImplementedException("Non-true input predicate"); | ||
| 92 | } | ||
| 93 | ir.Barrier(); | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 16 | BitField<40, 1, u64> brev; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const bfe{insn}; | ||
| 20 | |||
| 21 | const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 23 | |||
| 24 | // Common constants | ||
| 25 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 26 | const IR::U32 one{v.ir.Imm32(1)}; | ||
| 27 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 28 | // Edge case conditions | ||
| 29 | const IR::U1 zero_count{v.ir.IEqual(count, zero)}; | ||
| 30 | const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; | ||
| 31 | const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 32 | |||
| 33 | IR::U32 base{v.X(bfe.offset_reg)}; | ||
| 34 | if (bfe.brev != 0) { | ||
| 35 | base = v.ir.BitReverse(base); | ||
| 36 | } | ||
| 37 | IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; | ||
| 38 | if (bfe.is_signed != 0) { | ||
| 39 | const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; | ||
| 40 | const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 41 | const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; | ||
| 42 | // Replicate condition | ||
| 43 | result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; | ||
| 44 | // Exceeding condition | ||
| 45 | const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; | ||
| 46 | result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; | ||
| 47 | } | ||
| 48 | // Zero count condition | ||
| 49 | result = IR::U32{v.ir.Select(zero_count, zero, result)}; | ||
| 50 | |||
| 51 | v.X(bfe.dest_reg, result); | ||
| 52 | |||
| 53 | if (bfe.cc != 0) { | ||
| 54 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 55 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 56 | v.ResetCFlag(); | ||
| 57 | v.ResetOFlag(); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } // Anonymous namespace | ||
| 61 | |||
| 62 | void TranslatorVisitor::BFE_reg(u64 insn) { | ||
| 63 | BFE(*this, insn, GetReg20(insn)); | ||
| 64 | } | ||
| 65 | |||
| 66 | void TranslatorVisitor::BFE_cbuf(u64 insn) { | ||
| 67 | BFE(*this, insn, GetCbuf(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::BFE_imm(u64 insn) { | ||
| 71 | BFE(*this, insn, GetImm20(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> insert_reg; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | } const bfi{insn}; | ||
| 18 | |||
| 19 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 20 | const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; | ||
| 21 | const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 23 | |||
| 24 | // Edge case conditions | ||
| 25 | const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 26 | const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; | ||
| 27 | |||
| 28 | const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; | ||
| 29 | const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; | ||
| 30 | |||
| 31 | const IR::U32 insert{v.X(bfi.insert_reg)}; | ||
| 32 | IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; | ||
| 33 | |||
| 34 | result = IR::U32{v.ir.Select(exceed_offset, base, result)}; | ||
| 35 | |||
| 36 | v.X(bfi.dest_reg, result); | ||
| 37 | if (bfi.cc != 0) { | ||
| 38 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 39 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 40 | v.ResetCFlag(); | ||
| 41 | v.ResetOFlag(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::BFI_reg(u64 insn) { | ||
| 47 | BFI(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::BFI_rc(u64 insn) { | ||
| 51 | BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::BFI_cr(u64 insn) { | ||
| 55 | BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::BFI_imm(u64 insn) { | ||
| 59 | BFI(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void Check(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<5, 1, u64> cbuf_mode; | ||
| 16 | BitField<6, 1, u64> lmt; | ||
| 17 | } const encoding{insn}; | ||
| 18 | |||
| 19 | if (encoding.cbuf_mode != 0) { | ||
| 20 | throw NotImplementedException("Constant buffer mode"); | ||
| 21 | } | ||
| 22 | if (encoding.lmt != 0) { | ||
| 23 | throw NotImplementedException("LMT"); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::BRX(u64 insn) { | ||
| 29 | Check(insn); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::JMX(u64 insn) { | ||
| 33 | Check(insn); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | // FMZ is manually handled in the instruction | ||
| 50 | return IR::FmzMode::FTZ; | ||
| 51 | case FmzMode::INVALIDFMZ3: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 9 | CompareOp compare_op, bool is_signed) { | ||
| 10 | switch (compare_op) { | ||
| 11 | case CompareOp::False: | ||
| 12 | return ir.Imm1(false); | ||
| 13 | case CompareOp::LessThan: | ||
| 14 | return ir.ILessThan(operand_1, operand_2, is_signed); | ||
| 15 | case CompareOp::Equal: | ||
| 16 | return ir.IEqual(operand_1, operand_2); | ||
| 17 | case CompareOp::LessThanEqual: | ||
| 18 | return ir.ILessThanEqual(operand_1, operand_2, is_signed); | ||
| 19 | case CompareOp::GreaterThan: | ||
| 20 | return ir.IGreaterThan(operand_1, operand_2, is_signed); | ||
| 21 | case CompareOp::NotEqual: | ||
| 22 | return ir.INotEqual(operand_1, operand_2); | ||
| 23 | case CompareOp::GreaterThanEqual: | ||
| 24 | return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); | ||
| 25 | case CompareOp::True: | ||
| 26 | return ir.Imm1(true); | ||
| 27 | default: | ||
| 28 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 33 | CompareOp compare_op, bool is_signed) { | ||
| 34 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 35 | const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; | ||
| 36 | const IR::U1 z_flag{ir.GetZFlag()}; | ||
| 37 | const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; | ||
| 38 | const IR::U1 flip_logic{is_signed ? ir.Imm1(false) | ||
| 39 | : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), | ||
| 40 | ir.ILessThan(operand_2, zero, true))}; | ||
| 41 | switch (compare_op) { | ||
| 42 | case CompareOp::False: | ||
| 43 | return ir.Imm1(false); | ||
| 44 | case CompareOp::LessThan: | ||
| 45 | return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 46 | ir.ILessThan(intermediate, zero, true))}; | ||
| 47 | case CompareOp::Equal: | ||
| 48 | return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); | ||
| 49 | case CompareOp::LessThanEqual: { | ||
| 50 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 51 | ir.ILessThan(intermediate, zero, true))}; | ||
| 52 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 53 | } | ||
| 54 | case CompareOp::GreaterThan: { | ||
| 55 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), | ||
| 56 | ir.IGreaterThan(intermediate, zero, true))}; | ||
| 57 | const IR::U1 not_z{ir.LogicalNot(z_flag)}; | ||
| 58 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); | ||
| 59 | } | ||
| 60 | case CompareOp::NotEqual: | ||
| 61 | return ir.LogicalOr(ir.INotEqual(intermediate, zero), | ||
| 62 | ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); | ||
| 63 | case CompareOp::GreaterThanEqual: { | ||
| 64 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), | ||
| 65 | ir.IGreaterThanEqual(intermediate, zero, true))}; | ||
| 66 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 67 | } | ||
| 68 | case CompareOp::True: | ||
| 69 | return ir.Imm1(true); | ||
| 70 | default: | ||
| 71 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, | ||
| 76 | BooleanOp bop) { | ||
| 77 | switch (bop) { | ||
| 78 | case BooleanOp::AND: | ||
| 79 | return ir.LogicalAnd(predicate_1, predicate_2); | ||
| 80 | case BooleanOp::OR: | ||
| 81 | return ir.LogicalOr(predicate_1, predicate_2); | ||
| 82 | case BooleanOp::XOR: | ||
| 83 | return ir.LogicalXor(predicate_1, predicate_2); | ||
| 84 | default: | ||
| 85 | throw NotImplementedException("Invalid bop {}", bop); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { | ||
| 90 | switch (op) { | ||
| 91 | case PredicateOp::False: | ||
| 92 | return ir.Imm1(false); | ||
| 93 | case PredicateOp::True: | ||
| 94 | return ir.Imm1(true); | ||
| 95 | case PredicateOp::Zero: | ||
| 96 | return ir.IEqual(result, ir.Imm32(0)); | ||
| 97 | case PredicateOp::NonZero: | ||
| 98 | return ir.INotEqual(result, ir.Imm32(0)); | ||
| 99 | default: | ||
| 100 | throw NotImplementedException("Invalid Predicate operation {}", op); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | bool IsCompareOpOrdered(FPCompareOp op) { | ||
| 105 | switch (op) { | ||
| 106 | case FPCompareOp::LTU: | ||
| 107 | case FPCompareOp::EQU: | ||
| 108 | case FPCompareOp::LEU: | ||
| 109 | case FPCompareOp::GTU: | ||
| 110 | case FPCompareOp::NEU: | ||
| 111 | case FPCompareOp::GEU: | ||
| 112 | return false; | ||
| 113 | default: | ||
| 114 | return true; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 119 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 120 | IR::FpControl control) { | ||
| 121 | const bool ordered{IsCompareOpOrdered(compare_op)}; | ||
| 122 | switch (compare_op) { | ||
| 123 | case FPCompareOp::F: | ||
| 124 | return ir.Imm1(false); | ||
| 125 | case FPCompareOp::LT: | ||
| 126 | case FPCompareOp::LTU: | ||
| 127 | return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||
| 128 | case FPCompareOp::EQ: | ||
| 129 | case FPCompareOp::EQU: | ||
| 130 | return ir.FPEqual(operand_1, operand_2, control, ordered); | ||
| 131 | case FPCompareOp::LE: | ||
| 132 | case FPCompareOp::LEU: | ||
| 133 | return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||
| 134 | case FPCompareOp::GT: | ||
| 135 | case FPCompareOp::GTU: | ||
| 136 | return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||
| 137 | case FPCompareOp::NE: | ||
| 138 | case FPCompareOp::NEU: | ||
| 139 | return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||
| 140 | case FPCompareOp::GE: | ||
| 141 | case FPCompareOp::GEU: | ||
| 142 | return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||
| 143 | case FPCompareOp::NUM: | ||
| 144 | return ir.FPOrdered(operand_1, operand_2); | ||
| 145 | case FPCompareOp::Nan: | ||
| 146 | return ir.FPUnordered(operand_1, operand_2); | ||
| 147 | case FPCompareOp::T: | ||
| 148 | return ir.Imm1(true); | ||
| 149 | default: | ||
| 150 | throw NotImplementedException("Invalid FP compare op {}", compare_op); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 12 | const IR::U32& operand_2, CompareOp compare_op, bool is_signed); | ||
| 13 | |||
| 14 | [[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 15 | const IR::U32& operand_2, CompareOp compare_op, | ||
| 16 | bool is_signed); | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||
| 19 | const IR::U1& predicate_2, BooleanOp bop); | ||
| 20 | |||
| 21 | [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||
| 22 | |||
| 23 | [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); | ||
| 24 | |||
| 25 | [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 26 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 27 | IR::FpControl control = {}); | ||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void TranslatorVisitor::CSET(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 17 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 18 | BitField<42, 1, u64> neg_bop_pred; | ||
| 19 | BitField<44, 1, u64> bf; | ||
| 20 | BitField<45, 2, BooleanOp> bop; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | } const cset{insn}; | ||
| 23 | |||
| 24 | const IR::U32 one_mask{ir.Imm32(-1)}; | ||
| 25 | const IR::U32 fp_one{ir.Imm32(0x3f800000)}; | ||
| 26 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 27 | const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; | ||
| 28 | const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; | ||
| 29 | const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; | ||
| 30 | const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; | ||
| 31 | const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; | ||
| 32 | X(cset.dest_reg, result); | ||
| 33 | if (cset.cc != 0) { | ||
| 34 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 35 | SetZFlag(is_zero); | ||
| 36 | if (cset.bf != 0) { | ||
| 37 | ResetSFlag(); | ||
| 38 | } else { | ||
| 39 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 40 | } | ||
| 41 | ResetOFlag(); | ||
| 42 | ResetCFlag(); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::CSETP(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 50 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 51 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 52 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 53 | BitField<42, 1, u64> neg_bop_pred; | ||
| 54 | BitField<45, 2, BooleanOp> bop; | ||
| 55 | } const csetp{insn}; | ||
| 56 | |||
| 57 | const BooleanOp bop{csetp.bop}; | ||
| 58 | const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; | ||
| 59 | const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; | ||
| 60 | const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; | ||
| 61 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; | ||
| 62 | ir.SetPred(csetp.dest_pred_a, result_a); | ||
| 63 | ir.SetPred(csetp.dest_pred_b, result_b); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<45, 1, u64> neg_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> neg_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const dadd{insn}; | ||
| 25 | if (dadd.cc != 0) { | ||
| 26 | throw NotImplementedException("DADD CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::F64 src_a{v.D(dadd.src_a_reg)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; | ||
| 32 | |||
| 33 | const IR::FpControl control{ | ||
| 34 | .no_contraction = true, | ||
| 35 | .rounding = CastFpRounding(dadd.fp_rounding), | ||
| 36 | .fmz_mode = IR::FmzMode::None, | ||
| 37 | }; | ||
| 38 | |||
| 39 | v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DADD_reg(u64 insn) { | ||
| 44 | DADD(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DADD_cbuf(u64 insn) { | ||
| 48 | DADD(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DADD_imm(u64 insn) { | ||
| 52 | DADD(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | } const dset{insn}; | ||
| 28 | |||
| 29 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; | ||
| 30 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; | ||
| 31 | |||
| 32 | IR::U1 pred{v.ir.GetPred(dset.pred)}; | ||
| 33 | if (dset.neg_pred != 0) { | ||
| 34 | pred = v.ir.LogicalNot(pred); | ||
| 35 | } | ||
| 36 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; | ||
| 37 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; | ||
| 38 | |||
| 39 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 40 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 41 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 42 | const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; | ||
| 43 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 44 | |||
| 45 | v.X(dset.dest_reg, result); | ||
| 46 | if (dset.cc != 0) { | ||
| 47 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 48 | v.SetZFlag(is_zero); | ||
| 49 | if (dset.bf != 0) { | ||
| 50 | v.ResetSFlag(); | ||
| 51 | } else { | ||
| 52 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 53 | } | ||
| 54 | v.ResetCFlag(); | ||
| 55 | v.ResetOFlag(); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::DSET_reg(u64 insn) { | ||
| 61 | DSET(*this, insn, GetDoubleReg20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::DSET_cbuf(u64 insn) { | ||
| 65 | DSET(*this, insn, GetDoubleCbuf(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::DSET_imm(u64 insn) { | ||
| 69 | DSET(*this, insn, GetDoubleImm20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<50, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg_b; | ||
| 21 | BitField<49, 1, u64> neg_c; | ||
| 22 | } const dfma{insn}; | ||
| 23 | |||
| 24 | if (dfma.cc != 0) { | ||
| 25 | throw NotImplementedException("DFMA CC"); | ||
| 26 | } | ||
| 27 | |||
| 28 | const IR::F64 src_a{v.D(dfma.src_a_reg)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; | ||
| 30 | const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; | ||
| 31 | |||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = true, | ||
| 34 | .rounding = CastFpRounding(dfma.fp_rounding), | ||
| 35 | .fmz_mode = IR::FmzMode::None, | ||
| 36 | }; | ||
| 37 | |||
| 38 | v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DFMA_reg(u64 insn) { | ||
| 43 | DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DFMA_cr(u64 insn) { | ||
| 47 | DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DFMA_rc(u64 insn) { | ||
| 51 | DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::DFMA_imm(u64 insn) { | ||
| 55 | DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<45, 1, u64> negate_b; | ||
| 19 | BitField<46, 1, u64> abs_a; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> negate_a; | ||
| 22 | BitField<49, 1, u64> abs_b; | ||
| 23 | } const dmnmx{insn}; | ||
| 24 | |||
| 25 | if (dmnmx.cc != 0) { | ||
| 26 | throw NotImplementedException("DMNMX CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; | ||
| 32 | |||
| 33 | IR::F64 max{v.ir.FPMax(op_a, op_b)}; | ||
| 34 | IR::F64 min{v.ir.FPMin(op_a, op_b)}; | ||
| 35 | |||
| 36 | if (dmnmx.neg_pred != 0) { | ||
| 37 | std::swap(min, max); | ||
| 38 | } | ||
| 39 | v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DMNMX_reg(u64 insn) { | ||
| 44 | DMNMX(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DMNMX_cbuf(u64 insn) { | ||
| 48 | DMNMX(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DMNMX_imm(u64 insn) { | ||
| 52 | DMNMX(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg; | ||
| 21 | } const dmul{insn}; | ||
| 22 | |||
| 23 | if (dmul.cc != 0) { | ||
| 24 | throw NotImplementedException("DMUL CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; | ||
| 28 | const IR::FpControl control{ | ||
| 29 | .no_contraction = true, | ||
| 30 | .rounding = CastFpRounding(dmul.fp_rounding), | ||
| 31 | .fmz_mode = IR::FmzMode::None, | ||
| 32 | }; | ||
| 33 | |||
| 34 | v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); | ||
| 35 | } | ||
| 36 | } // Anonymous namespace | ||
| 37 | |||
| 38 | void TranslatorVisitor::DMUL_reg(u64 insn) { | ||
| 39 | DMUL(*this, insn, GetDoubleReg20(insn)); | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::DMUL_cbuf(u64 insn) { | ||
| 43 | DMUL(*this, insn, GetDoubleCbuf(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DMUL_imm(u64 insn) { | ||
| 47 | DMUL(*this, insn, GetDoubleImm20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 26 | } const dsetp{insn}; | ||
| 27 | |||
| 28 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; | ||
| 30 | |||
| 31 | const BooleanOp bop{dsetp.bop}; | ||
| 32 | const FPCompareOp compare_op{dsetp.compare_op}; | ||
| 33 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; | ||
| 34 | const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; | ||
| 35 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 36 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 37 | v.ir.SetPred(dsetp.dest_pred_a, result_a); | ||
| 38 | v.ir.SetPred(dsetp.dest_pred_b, result_b); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DSETP_reg(u64 insn) { | ||
| 43 | DSETP(*this, insn, GetDoubleReg20(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DSETP_cbuf(u64 insn) { | ||
| 47 | DSETP(*this, insn, GetDoubleCbuf(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DSETP_imm(u64 insn) { | ||
| 51 | DSETP(*this, insn, GetDoubleImm20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ExitFragment(TranslatorVisitor& v) { | ||
| 12 | const ProgramHeader sph{v.env.SPH()}; | ||
| 13 | IR::Reg src_reg{IR::Reg::R0}; | ||
| 14 | for (u32 render_target = 0; render_target < 8; ++render_target) { | ||
| 15 | const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; | ||
| 16 | for (u32 component = 0; component < 4; ++component) { | ||
| 17 | if (!mask[component]) { | ||
| 18 | continue; | ||
| 19 | } | ||
| 20 | v.ir.SetFragColor(render_target, component, v.F(src_reg)); | ||
| 21 | ++src_reg; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (sph.ps.omap.sample_mask != 0) { | ||
| 25 | v.ir.SetSampleMask(v.X(src_reg)); | ||
| 26 | } | ||
| 27 | if (sph.ps.omap.depth != 0) { | ||
| 28 | v.ir.SetFragDepth(v.F(src_reg + 1)); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::EXIT() { | ||
| 34 | switch (env.ShaderStage()) { | ||
| 35 | case Stage::Fragment: | ||
| 36 | ExitFragment(*this); | ||
| 37 | break; | ||
| 38 | default: | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | BitField<41, 1, u64> shift; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const flo{insn}; | ||
| 20 | |||
| 21 | if (flo.cc != 0) { | ||
| 22 | throw NotImplementedException("CC"); | ||
| 23 | } | ||
| 24 | if (flo.tilde != 0) { | ||
| 25 | src = v.ir.BitwiseNot(src); | ||
| 26 | } | ||
| 27 | IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; | ||
| 28 | if (flo.shift != 0) { | ||
| 29 | const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; | ||
| 30 | result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; | ||
| 31 | } | ||
| 32 | v.X(flo.dest_reg, result); | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | void TranslatorVisitor::FLO_reg(u64 insn) { | ||
| 37 | FLO(*this, insn, GetReg20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::FLO_cbuf(u64 insn) { | ||
| 41 | FLO(*this, insn, GetCbuf(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::FLO_imm(u64 insn) { | ||
| 45 | FLO(*this, insn, GetImm20(insn)); | ||
| 46 | } | ||
| 47 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 13 | const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const fadd{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FADD CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 25 | IR::FpControl control{ | ||
| 26 | .no_contraction = true, | ||
| 27 | .rounding = CastFpRounding(fp_rounding), | ||
| 28 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 29 | }; | ||
| 30 | IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; | ||
| 31 | if (sat) { | ||
| 32 | value = v.ir.FPSaturate(value); | ||
| 33 | } | ||
| 34 | v.F(fadd.dest_reg, value); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetFloatReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64 insn) { | ||
| 60 | FADD(*this, insn, GetFloatCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64 insn) { | ||
| 64 | FADD(*this, insn, GetFloatImm20(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64 insn) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<55, 1, u64> ftz; | ||
| 71 | BitField<56, 1, u64> neg_a; | ||
| 72 | BitField<54, 1, u64> abs_a; | ||
| 73 | BitField<52, 1, u64> cc; | ||
| 74 | BitField<53, 1, u64> neg_b; | ||
| 75 | BitField<57, 1, u64> abs_b; | ||
| 76 | } const fadd32i{insn}; | ||
| 77 | |||
| 78 | FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), | ||
| 79 | fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<47, 1, u64> ftz; | ||
| 18 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 19 | } const fcmp{insn}; | ||
| 20 | |||
| 21 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 22 | const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; | ||
| 23 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; | ||
| 24 | const IR::U32 src_reg{v.X(fcmp.src_reg)}; | ||
| 25 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 26 | |||
| 27 | v.X(fcmp.dest_reg, result); | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | void TranslatorVisitor::FCMP_reg(u64 insn) { | ||
| 32 | FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::FCMP_rc(u64 insn) { | ||
| 36 | FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::FCMP_cr(u64 insn) { | ||
| 40 | FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); | ||
| 41 | } | ||
| 42 | |||
| 43 | void TranslatorVisitor::FCMP_imm(u64 insn) { | ||
| 44 | union { | ||
| 45 | u64 raw; | ||
| 46 | BitField<20, 19, u64> value; | ||
| 47 | BitField<56, 1, u64> is_negative; | ||
| 48 | } const fcmp{insn}; | ||
| 49 | const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; | ||
| 50 | const u32 value{static_cast<u32>(fcmp.value) << 12}; | ||
| 51 | |||
| 52 | FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | BitField<55, 1, u64> ftz; | ||
| 28 | } const fset{insn}; | ||
| 29 | |||
| 30 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; | ||
| 31 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); | ||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = false, | ||
| 34 | .rounding = IR::FpRounding::DontCare, | ||
| 35 | .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 36 | }; | ||
| 37 | |||
| 38 | IR::U1 pred{v.ir.GetPred(fset.pred)}; | ||
| 39 | if (fset.neg_pred != 0) { | ||
| 40 | pred = v.ir.LogicalNot(pred); | ||
| 41 | } | ||
| 42 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 48 | const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; | ||
| 49 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 50 | |||
| 51 | v.X(fset.dest_reg, result); | ||
| 52 | if (fset.cc != 0) { | ||
| 53 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 54 | v.SetZFlag(is_zero); | ||
| 55 | if (fset.bf != 0) { | ||
| 56 | v.ResetSFlag(); | ||
| 57 | } else { | ||
| 58 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 59 | } | ||
| 60 | v.ResetCFlag(); | ||
| 61 | v.ResetOFlag(); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::FSET_reg(u64 insn) { | ||
| 67 | FSET(*this, insn, GetFloatReg20(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::FSET_cbuf(u64 insn) { | ||
| 71 | FSET(*this, insn, GetFloatCbuf(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::FSET_imm(u64 insn) { | ||
| 75 | FSET(*this, insn, GetFloatImm20(insn)); | ||
| 76 | } | ||
| 77 | |||
| 78 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | namespace { | ||
| 10 | enum class FloatFormat : u64 { | ||
| 11 | F16 = 1, | ||
| 12 | F32 = 2, | ||
| 13 | F64 = 3, | ||
| 14 | }; | ||
| 15 | |||
| 16 | enum class RoundingOp : u64 { | ||
| 17 | None = 0, | ||
| 18 | Pass = 3, | ||
| 19 | Round = 8, | ||
| 20 | Floor = 9, | ||
| 21 | Ceil = 10, | ||
| 22 | Trunc = 11, | ||
| 23 | }; | ||
| 24 | |||
| 25 | [[nodiscard]] u32 WidthSize(FloatFormat width) { | ||
| 26 | switch (width) { | ||
| 27 | case FloatFormat::F16: | ||
| 28 | return 16; | ||
| 29 | case FloatFormat::F32: | ||
| 30 | return 32; | ||
| 31 | case FloatFormat::F64: | ||
| 32 | return 64; | ||
| 33 | default: | ||
| 34 | throw NotImplementedException("Invalid width {}", width); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { | ||
| 39 | union { | ||
| 40 | u64 insn; | ||
| 41 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 42 | BitField<44, 1, u64> ftz; | ||
| 43 | BitField<45, 1, u64> neg; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<50, 1, u64> sat; | ||
| 46 | BitField<39, 4, u64> rounding_op; | ||
| 47 | BitField<39, 2, FpRounding> rounding; | ||
| 48 | BitField<10, 2, FloatFormat> src_size; | ||
| 49 | BitField<8, 2, FloatFormat> dst_size; | ||
| 50 | |||
| 51 | [[nodiscard]] RoundingOp RoundingOperation() const { | ||
| 52 | constexpr u64 rounding_mask = 0x0B; | ||
| 53 | return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); | ||
| 54 | } | ||
| 55 | } const f2f{insn}; | ||
| 56 | |||
| 57 | if (f2f.cc != 0) { | ||
| 58 | throw NotImplementedException("F2F CC"); | ||
| 59 | } | ||
| 60 | |||
| 61 | IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; | ||
| 62 | |||
| 63 | const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; | ||
| 64 | IR::FpControl fp_control{ | ||
| 65 | .no_contraction = false, | ||
| 66 | .rounding = IR::FpRounding::DontCare, | ||
| 67 | .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 68 | }; | ||
| 69 | if (f2f.src_size != f2f.dst_size) { | ||
| 70 | fp_control.rounding = CastFpRounding(f2f.rounding); | ||
| 71 | input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); | ||
| 72 | } else { | ||
| 73 | switch (f2f.RoundingOperation()) { | ||
| 74 | case RoundingOp::None: | ||
| 75 | case RoundingOp::Pass: | ||
| 76 | // Make sure NANs are handled properly | ||
| 77 | switch (f2f.src_size) { | ||
| 78 | case FloatFormat::F16: | ||
| 79 | input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); | ||
| 80 | break; | ||
| 81 | case FloatFormat::F32: | ||
| 82 | input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); | ||
| 83 | break; | ||
| 84 | case FloatFormat::F64: | ||
| 85 | input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | break; | ||
| 89 | case RoundingOp::Round: | ||
| 90 | input = v.ir.FPRoundEven(input, fp_control); | ||
| 91 | break; | ||
| 92 | case RoundingOp::Floor: | ||
| 93 | input = v.ir.FPFloor(input, fp_control); | ||
| 94 | break; | ||
| 95 | case RoundingOp::Ceil: | ||
| 96 | input = v.ir.FPCeil(input, fp_control); | ||
| 97 | break; | ||
| 98 | case RoundingOp::Trunc: | ||
| 99 | input = v.ir.FPTrunc(input, fp_control); | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | if (f2f.sat != 0 && !any_fp64) { | ||
| 106 | input = v.ir.FPSaturate(input); | ||
| 107 | } | ||
| 108 | |||
| 109 | switch (f2f.dst_size) { | ||
| 110 | case FloatFormat::F16: { | ||
| 111 | const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 112 | v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case FloatFormat::F32: | ||
| 116 | v.F(f2f.dest_reg, input); | ||
| 117 | break; | ||
| 118 | case FloatFormat::F64: | ||
| 119 | v.D(f2f.dest_reg, input); | ||
| 120 | break; | ||
| 121 | default: | ||
| 122 | throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } // Anonymous namespace | ||
| 126 | |||
| 127 | void TranslatorVisitor::F2F_reg(u64 insn) { | ||
| 128 | union { | ||
| 129 | u64 insn; | ||
| 130 | BitField<49, 1, u64> abs; | ||
| 131 | BitField<10, 2, FloatFormat> src_size; | ||
| 132 | BitField<41, 1, u64> selector; | ||
| 133 | } const f2f{insn}; | ||
| 134 | |||
| 135 | IR::F16F32F64 src_a; | ||
| 136 | switch (f2f.src_size) { | ||
| 137 | case FloatFormat::F16: { | ||
| 138 | auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; | ||
| 139 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | case FloatFormat::F32: | ||
| 143 | src_a = GetFloatReg20(insn); | ||
| 144 | break; | ||
| 145 | case FloatFormat::F64: | ||
| 146 | src_a = GetDoubleReg20(insn); | ||
| 147 | break; | ||
| 148 | default: | ||
| 149 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 150 | } | ||
| 151 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::F2F_cbuf(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 insn; | ||
| 157 | BitField<49, 1, u64> abs; | ||
| 158 | BitField<10, 2, FloatFormat> src_size; | ||
| 159 | BitField<41, 1, u64> selector; | ||
| 160 | } const f2f{insn}; | ||
| 161 | |||
| 162 | IR::F16F32F64 src_a; | ||
| 163 | switch (f2f.src_size) { | ||
| 164 | case FloatFormat::F16: { | ||
| 165 | auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; | ||
| 166 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case FloatFormat::F32: | ||
| 170 | src_a = GetFloatCbuf(insn); | ||
| 171 | break; | ||
| 172 | case FloatFormat::F64: | ||
| 173 | src_a = GetDoubleCbuf(insn); | ||
| 174 | break; | ||
| 175 | default: | ||
| 176 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 177 | } | ||
| 178 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 179 | } | ||
| 180 | |||
| 181 | void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { | ||
| 182 | union { | ||
| 183 | u64 insn; | ||
| 184 | BitField<49, 1, u64> abs; | ||
| 185 | BitField<10, 2, FloatFormat> src_size; | ||
| 186 | BitField<41, 1, u64> selector; | ||
| 187 | BitField<20, 19, u64> imm; | ||
| 188 | BitField<56, 1, u64> imm_neg; | ||
| 189 | } const f2f{insn}; | ||
| 190 | |||
| 191 | IR::F16F32F64 src_a; | ||
| 192 | switch (f2f.src_size) { | ||
| 193 | case FloatFormat::F16: { | ||
| 194 | const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; | ||
| 195 | const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; | ||
| 196 | src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; | ||
| 197 | if (f2f.imm_neg != 0) { | ||
| 198 | throw NotImplementedException("Neg bit on F16"); | ||
| 199 | } | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | case FloatFormat::F32: | ||
| 203 | src_a = GetFloatImm20(insn); | ||
| 204 | break; | ||
| 205 | case FloatFormat::F64: | ||
| 206 | src_a = GetDoubleImm20(insn); | ||
| 207 | break; | ||
| 208 | default: | ||
| 209 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 210 | } | ||
| 211 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class DestFormat : u64 { | ||
| 15 | Invalid, | ||
| 16 | I16, | ||
| 17 | I32, | ||
| 18 | I64, | ||
| 19 | }; | ||
| 20 | enum class SrcFormat : u64 { | ||
| 21 | Invalid, | ||
| 22 | F16, | ||
| 23 | F32, | ||
| 24 | F64, | ||
| 25 | }; | ||
| 26 | enum class Rounding : u64 { | ||
| 27 | Round, | ||
| 28 | Floor, | ||
| 29 | Ceil, | ||
| 30 | Trunc, | ||
| 31 | }; | ||
| 32 | |||
| 33 | union F2I { | ||
| 34 | u64 raw; | ||
| 35 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 36 | BitField<8, 2, DestFormat> dest_format; | ||
| 37 | BitField<10, 2, SrcFormat> src_format; | ||
| 38 | BitField<12, 1, u64> is_signed; | ||
| 39 | BitField<39, 2, Rounding> rounding; | ||
| 40 | BitField<41, 1, u64> half; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> abs; | ||
| 43 | BitField<47, 1, u64> cc; | ||
| 44 | BitField<49, 1, u64> neg; | ||
| 45 | }; | ||
| 46 | |||
| 47 | size_t BitSize(DestFormat dest_format) { | ||
| 48 | switch (dest_format) { | ||
| 49 | case DestFormat::I16: | ||
| 50 | return 16; | ||
| 51 | case DestFormat::I32: | ||
| 52 | return 32; | ||
| 53 | case DestFormat::I64: | ||
| 54 | return 64; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid destination format {}", dest_format); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { | ||
| 61 | if (is_signed) { | ||
| 62 | switch (format) { | ||
| 63 | case DestFormat::I16: | ||
| 64 | return {static_cast<f64>(std::numeric_limits<s16>::max()), | ||
| 65 | static_cast<f64>(std::numeric_limits<s16>::min())}; | ||
| 66 | case DestFormat::I32: | ||
| 67 | return {static_cast<f64>(std::numeric_limits<s32>::max()), | ||
| 68 | static_cast<f64>(std::numeric_limits<s32>::min())}; | ||
| 69 | case DestFormat::I64: | ||
| 70 | return {static_cast<f64>(std::numeric_limits<s64>::max()), | ||
| 71 | static_cast<f64>(std::numeric_limits<s64>::min())}; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | switch (format) { | ||
| 77 | case DestFormat::I16: | ||
| 78 | return {static_cast<f64>(std::numeric_limits<u16>::max()), | ||
| 79 | static_cast<f64>(std::numeric_limits<u16>::min())}; | ||
| 80 | case DestFormat::I32: | ||
| 81 | return {static_cast<f64>(std::numeric_limits<u32>::max()), | ||
| 82 | static_cast<f64>(std::numeric_limits<u32>::min())}; | ||
| 83 | case DestFormat::I64: | ||
| 84 | return {static_cast<f64>(std::numeric_limits<u64>::max()), | ||
| 85 | static_cast<f64>(std::numeric_limits<u64>::min())}; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | throw NotImplementedException("Invalid destination format {}", format); | ||
| 91 | } | ||
| 92 | |||
| 93 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 14, s64> offset; | ||
| 97 | BitField<34, 5, u64> binding; | ||
| 98 | } const cbuf{insn}; | ||
| 99 | if (cbuf.binding >= 18) { | ||
| 100 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 101 | } | ||
| 102 | if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { | ||
| 103 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); | ||
| 104 | } | ||
| 105 | if (cbuf.offset % 2 != 0) { | ||
| 106 | throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); | ||
| 107 | } | ||
| 108 | const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; | ||
| 109 | const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; | ||
| 110 | const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; | ||
| 111 | const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; | ||
| 112 | return v.ir.PackDouble2x32(vector); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | ||
| 116 | // F2I is used to convert from a floating point value to an integer | ||
| 117 | const F2I f2i{insn}; | ||
| 118 | |||
| 119 | const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||
| 120 | f2i.dest_format != DestFormat::I64}; | ||
| 121 | IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||
| 122 | if (denorm_cares) { | ||
| 123 | fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||
| 124 | } | ||
| 125 | const IR::FpControl fp_control{ | ||
| 126 | .no_contraction = true, | ||
| 127 | .rounding = IR::FpRounding::DontCare, | ||
| 128 | .fmz_mode = fmz_mode, | ||
| 129 | }; | ||
| 130 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | ||
| 131 | const IR::F16F32F64 rounded_value{[&] { | ||
| 132 | switch (f2i.rounding) { | ||
| 133 | case Rounding::Round: | ||
| 134 | return v.ir.FPRoundEven(op_a, fp_control); | ||
| 135 | case Rounding::Floor: | ||
| 136 | return v.ir.FPFloor(op_a, fp_control); | ||
| 137 | case Rounding::Ceil: | ||
| 138 | return v.ir.FPCeil(op_a, fp_control); | ||
| 139 | case Rounding::Trunc: | ||
| 140 | return v.ir.FPTrunc(op_a, fp_control); | ||
| 141 | default: | ||
| 142 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | ||
| 143 | } | ||
| 144 | }()}; | ||
| 145 | const bool is_signed{f2i.is_signed != 0}; | ||
| 146 | const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); | ||
| 147 | |||
| 148 | IR::F16F32F64 intermediate; | ||
| 149 | switch (f2i.src_format) { | ||
| 150 | case SrcFormat::F16: { | ||
| 151 | const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; | ||
| 152 | const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; | ||
| 153 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case SrcFormat::F32: { | ||
| 157 | const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; | ||
| 158 | const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; | ||
| 159 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case SrcFormat::F64: { | ||
| 163 | const IR::F64 max_val{v.ir.Imm64(max_bound)}; | ||
| 164 | const IR::F64 min_val{v.ir.Imm64(min_bound)}; | ||
| 165 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | default: | ||
| 169 | throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); | ||
| 170 | } | ||
| 171 | |||
| 172 | const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; | ||
| 173 | IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; | ||
| 174 | |||
| 175 | bool handled_special_case = false; | ||
| 176 | const bool special_nan_cases = | ||
| 177 | (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); | ||
| 178 | if (special_nan_cases) { | ||
| 179 | if (f2i.dest_format == DestFormat::I32) { | ||
| 180 | handled_special_case = true; | ||
| 181 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; | ||
| 182 | } else if (f2i.dest_format == DestFormat::I64) { | ||
| 183 | handled_special_case = true; | ||
| 184 | result = IR::U64{ | ||
| 185 | v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | if (!handled_special_case && is_signed) { | ||
| 189 | if (bitsize != 64) { | ||
| 190 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; | ||
| 191 | } else { | ||
| 192 | result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | if (bitsize == 64) { | ||
| 197 | v.L(f2i.dest_reg, result); | ||
| 198 | } else { | ||
| 199 | v.X(f2i.dest_reg, result); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (f2i.cc != 0) { | ||
| 203 | throw NotImplementedException("F2I CC"); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void TranslatorVisitor::F2I_reg(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | F2I base; | ||
| 212 | BitField<20, 8, IR::Reg> src_reg; | ||
| 213 | } const f2i{insn}; | ||
| 214 | |||
| 215 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 216 | switch (f2i.base.src_format) { | ||
| 217 | case SrcFormat::F16: | ||
| 218 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; | ||
| 219 | case SrcFormat::F32: | ||
| 220 | return F(f2i.src_reg); | ||
| 221 | case SrcFormat::F64: | ||
| 222 | return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); | ||
| 223 | default: | ||
| 224 | throw NotImplementedException("Invalid F2I source format {}", | ||
| 225 | f2i.base.src_format.Value()); | ||
| 226 | } | ||
| 227 | }()}; | ||
| 228 | TranslateF2I(*this, insn, op_a); | ||
| 229 | } | ||
| 230 | |||
| 231 | void TranslatorVisitor::F2I_cbuf(u64 insn) { | ||
| 232 | const F2I f2i{insn}; | ||
| 233 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 234 | switch (f2i.src_format) { | ||
| 235 | case SrcFormat::F16: | ||
| 236 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; | ||
| 237 | case SrcFormat::F32: | ||
| 238 | return GetFloatCbuf(insn); | ||
| 239 | case SrcFormat::F64: { | ||
| 240 | return UnpackCbuf(*this, insn); | ||
| 241 | } | ||
| 242 | default: | ||
| 243 | throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); | ||
| 244 | } | ||
| 245 | }()}; | ||
| 246 | TranslateF2I(*this, insn, op_a); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::F2I_imm(u64) { | ||
| 250 | throw NotImplementedException("{}", Opcode::F2I_imm); | ||
| 251 | } | ||
| 252 | |||
| 253 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FFMA CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 25 | const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 26 | const IR::FpControl fp_control{ | ||
| 27 | .no_contraction = true, | ||
| 28 | .rounding = CastFpRounding(fp_rounding), | ||
| 29 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 30 | }; | ||
| 31 | IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; | ||
| 32 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 33 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 34 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 35 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 36 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 37 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 38 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 39 | value = IR::F32{v.ir.Select(any_zero, op_c, value)}; | ||
| 40 | } | ||
| 41 | if (sat) { | ||
| 42 | value = v.ir.FPSaturate(value); | ||
| 43 | } | ||
| 44 | v.F(ffma.dest_reg, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<47, 1, u64> cc; | ||
| 51 | BitField<48, 1, u64> neg_b; | ||
| 52 | BitField<49, 1, u64> neg_c; | ||
| 53 | BitField<50, 1, u64> sat; | ||
| 54 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 55 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 56 | } const ffma{insn}; | ||
| 57 | |||
| 58 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 59 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 64 | FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FFMA_rc(u64 insn) { | ||
| 68 | FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 72 | FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void TranslatorVisitor::FFMA_imm(u64 insn) { | ||
| 76 | FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void TranslatorVisitor::FFMA32I(u64 insn) { | ||
| 80 | union { | ||
| 81 | u64 raw; | ||
| 82 | BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register | ||
| 83 | BitField<52, 1, u64> cc; | ||
| 84 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 85 | BitField<55, 1, u64> sat; | ||
| 86 | BitField<56, 1, u64> neg_a; | ||
| 87 | BitField<57, 1, u64> neg_c; | ||
| 88 | } const ffma32i{insn}; | ||
| 89 | |||
| 90 | FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, | ||
| 91 | ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<45, 1, u64> negate_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> negate_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const fmnmx{insn}; | ||
| 25 | |||
| 26 | if (fmnmx.cc) { | ||
| 27 | throw NotImplementedException("FMNMX CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; | ||
| 31 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; | ||
| 32 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; | ||
| 33 | |||
| 34 | const IR::FpControl control{ | ||
| 35 | .no_contraction = false, | ||
| 36 | .rounding = IR::FpRounding::DontCare, | ||
| 37 | .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 38 | }; | ||
| 39 | IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; | ||
| 40 | IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; | ||
| 41 | |||
| 42 | if (fmnmx.neg_pred != 0) { | ||
| 43 | std::swap(min, max); | ||
| 44 | } | ||
| 45 | |||
| 46 | v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | void TranslatorVisitor::FMNMX_reg(u64 insn) { | ||
| 51 | FMNMX(*this, insn, GetFloatReg20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::FMNMX_cbuf(u64 insn) { | ||
| 55 | FMNMX(*this, insn, GetFloatCbuf(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::FMNMX_imm(u64 insn) { | ||
| 59 | FMNMX(*this, insn, GetFloatImm20(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Operation : u64 { | ||
| 14 | Cos = 0, | ||
| 15 | Sin = 1, | ||
| 16 | Ex2 = 2, // Base 2 exponent | ||
| 17 | Lg2 = 3, // Base 2 logarithm | ||
| 18 | Rcp = 4, // Reciprocal | ||
| 19 | Rsq = 5, // Reciprocal square root | ||
| 20 | Rcp64H = 6, // 64-bit reciprocal | ||
| 21 | Rsq64H = 7, // 64-bit reciprocal square root | ||
| 22 | Sqrt = 8, | ||
| 23 | }; | ||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::MUFU(u64 insn) { | ||
| 27 | // MUFU is used to implement a bunch of special functions. See Operation. | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 31 | BitField<8, 8, IR::Reg> src_reg; | ||
| 32 | BitField<20, 4, Operation> operation; | ||
| 33 | BitField<46, 1, u64> abs; | ||
| 34 | BitField<48, 1, u64> neg; | ||
| 35 | BitField<50, 1, u64> sat; | ||
| 36 | } const mufu{insn}; | ||
| 37 | |||
| 38 | const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; | ||
| 39 | IR::F32 value{[&]() -> IR::F32 { | ||
| 40 | switch (mufu.operation) { | ||
| 41 | case Operation::Cos: | ||
| 42 | return ir.FPCos(op_a); | ||
| 43 | case Operation::Sin: | ||
| 44 | return ir.FPSin(op_a); | ||
| 45 | case Operation::Ex2: | ||
| 46 | return ir.FPExp2(op_a); | ||
| 47 | case Operation::Lg2: | ||
| 48 | return ir.FPLog2(op_a); | ||
| 49 | case Operation::Rcp: | ||
| 50 | return ir.FPRecip(op_a); | ||
| 51 | case Operation::Rsq: | ||
| 52 | return ir.FPRecipSqrt(op_a); | ||
| 53 | case Operation::Rcp64H: | ||
| 54 | throw NotImplementedException("MUFU.RCP64H"); | ||
| 55 | case Operation::Rsq64H: | ||
| 56 | throw NotImplementedException("MUFU.RSQ64H"); | ||
| 57 | case Operation::Sqrt: | ||
| 58 | return ir.FPSqrt(op_a); | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); | ||
| 61 | } | ||
| 62 | }()}; | ||
| 63 | |||
| 64 | if (mufu.sat) { | ||
| 65 | value = ir.FPSaturate(value); | ||
| 66 | } | ||
| 67 | |||
| 68 | F(mufu.dest_reg, value); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Scale : u64 { | ||
| 15 | None, | ||
| 16 | D2, | ||
| 17 | D4, | ||
| 18 | D8, | ||
| 19 | M8, | ||
| 20 | M4, | ||
| 21 | M2, | ||
| 22 | INVALIDSCALE37, | ||
| 23 | }; | ||
| 24 | |||
| 25 | float ScaleFactor(Scale scale) { | ||
| 26 | switch (scale) { | ||
| 27 | case Scale::None: | ||
| 28 | return 1.0f; | ||
| 29 | case Scale::D2: | ||
| 30 | return 1.0f / 2.0f; | ||
| 31 | case Scale::D4: | ||
| 32 | return 1.0f / 4.0f; | ||
| 33 | case Scale::D8: | ||
| 34 | return 1.0f / 8.0f; | ||
| 35 | case Scale::M8: | ||
| 36 | return 8.0f; | ||
| 37 | case Scale::M4: | ||
| 38 | return 4.0f; | ||
| 39 | case Scale::M2: | ||
| 40 | return 2.0f; | ||
| 41 | case Scale::INVALIDSCALE37: | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, | ||
| 48 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 52 | BitField<8, 8, IR::Reg> src_a; | ||
| 53 | } const fmul{insn}; | ||
| 54 | |||
| 55 | if (cc) { | ||
| 56 | throw NotImplementedException("FMUL CC"); | ||
| 57 | } | ||
| 58 | IR::F32 op_a{v.F(fmul.src_a)}; | ||
| 59 | if (scale != Scale::None) { | ||
| 60 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 61 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 62 | } | ||
| 63 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 64 | } | ||
| 65 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 66 | const IR::FpControl fp_control{ | ||
| 67 | .no_contraction = true, | ||
| 68 | .rounding = CastFpRounding(fp_rounding), | ||
| 69 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 70 | }; | ||
| 71 | IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; | ||
| 72 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 73 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 74 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 75 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 76 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 77 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 78 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 79 | value = IR::F32{v.ir.Select(any_zero, zero, value)}; | ||
| 80 | } | ||
| 81 | if (sat) { | ||
| 82 | value = v.ir.FPSaturate(value); | ||
| 83 | } | ||
| 84 | v.F(fmul.dest_reg, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 88 | union { | ||
| 89 | u64 raw; | ||
| 90 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 91 | BitField<41, 3, Scale> scale; | ||
| 92 | BitField<44, 2, FmzMode> fmz; | ||
| 93 | BitField<47, 1, u64> cc; | ||
| 94 | BitField<48, 1, u64> neg_b; | ||
| 95 | BitField<50, 1, u64> sat; | ||
| 96 | } const fmul{insn}; | ||
| 97 | |||
| 98 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 99 | fmul.neg_b != 0); | ||
| 100 | } | ||
| 101 | } // Anonymous namespace | ||
| 102 | |||
| 103 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 104 | return FMUL(*this, insn, GetFloatReg20(insn)); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::FMUL_cbuf(u64 insn) { | ||
| 108 | return FMUL(*this, insn, GetFloatCbuf(insn)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::FMUL_imm(u64 insn) { | ||
| 112 | return FMUL(*this, insn, GetFloatImm20(insn)); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::FMUL32I(u64 insn) { | ||
| 116 | union { | ||
| 117 | u64 raw; | ||
| 118 | BitField<52, 1, u64> cc; | ||
| 119 | BitField<53, 2, FmzMode> fmz; | ||
| 120 | BitField<55, 1, u64> sat; | ||
| 121 | } const fmul32i{insn}; | ||
| 122 | |||
| 123 | FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, | ||
| 124 | fmul32i.sat != 0, fmul32i.cc != 0, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | SINCOS, | ||
| 13 | EX2, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { | ||
| 17 | union { | ||
| 18 | u64 raw; | ||
| 19 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 20 | BitField<39, 1, Mode> mode; | ||
| 21 | BitField<45, 1, u64> neg; | ||
| 22 | BitField<49, 1, u64> abs; | ||
| 23 | } const rro{insn}; | ||
| 24 | |||
| 25 | v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | void TranslatorVisitor::RRO_reg(u64 insn) { | ||
| 30 | RRO(*this, insn, GetFloatReg20(insn)); | ||
| 31 | } | ||
| 32 | |||
| 33 | void TranslatorVisitor::RRO_cbuf(u64 insn) { | ||
| 34 | RRO(*this, insn, GetFloatCbuf(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::RRO_imm(u64) { | ||
| 38 | throw NotImplementedException("RRO (imm)"); | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<47, 1, u64> ftz; | ||
| 26 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 27 | } const fsetp{insn}; | ||
| 28 | |||
| 29 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; | ||
| 30 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); | ||
| 31 | const IR::FpControl control{ | ||
| 32 | .no_contraction = false, | ||
| 33 | .rounding = IR::FpRounding::DontCare, | ||
| 34 | .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 35 | }; | ||
| 36 | |||
| 37 | const BooleanOp bop{fsetp.bop}; | ||
| 38 | const FPCompareOp compare_op{fsetp.compare_op}; | ||
| 39 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; | ||
| 40 | const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; | ||
| 41 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 42 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 43 | v.ir.SetPred(fsetp.dest_pred_a, result_a); | ||
| 44 | v.ir.SetPred(fsetp.dest_pred_b, result_b); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::FSETP_reg(u64 insn) { | ||
| 49 | FSETP(*this, insn, GetFloatReg20(insn)); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::FSETP_cbuf(u64 insn) { | ||
| 53 | FSETP(*this, insn, GetFloatCbuf(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::FSETP_imm(u64 insn) { | ||
| 57 | FSETP(*this, insn, GetFloatImm20(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::FSWZADD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<28, 8, u64> swizzle; | ||
| 16 | BitField<38, 1, u64> ndv; | ||
| 17 | BitField<39, 2, FpRounding> round; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | } const fswzadd{insn}; | ||
| 21 | |||
| 22 | if (fswzadd.ndv != 0) { | ||
| 23 | throw NotImplementedException("FSWZADD NDV"); | ||
| 24 | } | ||
| 25 | |||
| 26 | const IR::F32 src_a{GetFloatReg8(insn)}; | ||
| 27 | const IR::F32 src_b{GetFloatReg20(insn)}; | ||
| 28 | const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; | ||
| 29 | |||
| 30 | const IR::FpControl fp_control{ | ||
| 31 | .no_contraction = false, | ||
| 32 | .rounding = CastFpRounding(fswzadd.round), | ||
| 33 | .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 34 | }; | ||
| 35 | |||
| 36 | const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; | ||
| 37 | F(fswzadd.dest_reg, result); | ||
| 38 | |||
| 39 | if (fswzadd.cc != 0) { | ||
| 40 | throw NotImplementedException("FSWZADD CC"); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 11 | union { | ||
| 12 | u64 raw; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a; | ||
| 15 | } const hadd2{insn}; | ||
| 16 | |||
| 17 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 18 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 19 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 20 | if (promotion) { | ||
| 21 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 22 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 23 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 24 | } | ||
| 25 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 26 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 27 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 31 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 32 | |||
| 33 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 34 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 35 | |||
| 36 | const IR::FpControl fp_control{ | ||
| 37 | .no_contraction = true, | ||
| 38 | .rounding = IR::FpRounding::DontCare, | ||
| 39 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 40 | }; | ||
| 41 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 42 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 43 | if (sat) { | ||
| 44 | lhs = v.ir.FPSaturate(lhs); | ||
| 45 | rhs = v.ir.FPSaturate(rhs); | ||
| 46 | } | ||
| 47 | if (promotion) { | ||
| 48 | lhs = v.ir.FPConvert(16, lhs); | ||
| 49 | rhs = v.ir.FPConvert(16, rhs); | ||
| 50 | } | ||
| 51 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, | ||
| 55 | const IR::U32& src_b) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<49, 2, Merge> merge; | ||
| 59 | BitField<39, 1, u64> ftz; | ||
| 60 | BitField<43, 1, u64> neg_a; | ||
| 61 | BitField<44, 1, u64> abs_a; | ||
| 62 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 63 | } const hadd2{insn}; | ||
| 64 | |||
| 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | ||
| 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<32, 1, u64> sat; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<30, 1, u64> abs_b; | ||
| 76 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 77 | } const hadd2{insn}; | ||
| 78 | |||
| 79 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 80 | GetReg20(insn)); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<52, 1, u64> sat; | ||
| 87 | BitField<56, 1, u64> neg_b; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | } const hadd2{insn}; | ||
| 90 | |||
| 91 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 92 | GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<52, 1, u64> sat; | ||
| 99 | BitField<56, 1, u64> neg_high; | ||
| 100 | BitField<30, 9, u64> high; | ||
| 101 | BitField<29, 1, u64> neg_low; | ||
| 102 | BitField<20, 9, u64> low; | ||
| 103 | } const hadd2{insn}; | ||
| 104 | |||
| 105 | const u32 imm{ | ||
| 106 | static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 107 | static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 108 | HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 112 | union { | ||
| 113 | u64 raw; | ||
| 114 | BitField<55, 1, u64> ftz; | ||
| 115 | BitField<52, 1, u64> sat; | ||
| 116 | BitField<56, 1, u64> neg_a; | ||
| 117 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 118 | BitField<20, 32, u64> imm32; | ||
| 119 | } const hadd2{insn}; | ||
| 120 | |||
| 121 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 122 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 123 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 124 | } | ||
| 125 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | ||
| 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | ||
| 11 | bool sat, HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hfma2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; | ||
| 21 | const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; | ||
| 22 | if (promotion) { | ||
| 23 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 24 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 25 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 26 | } | ||
| 27 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 28 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 29 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 30 | } | ||
| 31 | if (lhs_c.Type() == IR::Type::F16) { | ||
| 32 | lhs_c = v.ir.FPConvert(32, lhs_c); | ||
| 33 | rhs_c = v.ir.FPConvert(32, rhs_c); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); | ||
| 38 | rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); | ||
| 39 | |||
| 40 | lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); | ||
| 41 | rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); | ||
| 42 | |||
| 43 | const IR::FpControl fp_control{ | ||
| 44 | .no_contraction = true, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 47 | }; | ||
| 48 | IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; | ||
| 49 | IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; | ||
| 50 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 51 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 52 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 53 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 54 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 55 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 56 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 57 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; | ||
| 58 | |||
| 59 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 60 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 61 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 62 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; | ||
| 63 | } | ||
| 64 | if (sat) { | ||
| 65 | lhs = v.ir.FPSaturate(lhs); | ||
| 66 | rhs = v.ir.FPSaturate(rhs); | ||
| 67 | } | ||
| 68 | if (promotion) { | ||
| 69 | lhs = v.ir.FPConvert(16, lhs); | ||
| 70 | rhs = v.ir.FPConvert(16, rhs); | ||
| 71 | } | ||
| 72 | v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, | ||
| 76 | Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, | ||
| 77 | HalfPrecision precision) { | ||
| 78 | union { | ||
| 79 | u64 raw; | ||
| 80 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 81 | BitField<49, 2, Merge> merge; | ||
| 82 | } const hfma2{insn}; | ||
| 83 | |||
| 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | ||
| 85 | sat, precision); | ||
| 86 | } | ||
| 87 | } // Anonymous namespace | ||
| 88 | |||
| 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 93 | BitField<32, 1, u64> saturate; | ||
| 94 | BitField<31, 1, u64> neg_b; | ||
| 95 | BitField<30, 1, u64> neg_c; | ||
| 96 | BitField<35, 2, Swizzle> swizzle_c; | ||
| 97 | BitField<37, 2, HalfPrecision> precision; | ||
| 98 | } const hfma2{insn}; | ||
| 99 | |||
| 100 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, | ||
| 101 | GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::HFMA2_rc(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<51, 1, u64> neg_c; | ||
| 108 | BitField<52, 1, u64> saturate; | ||
| 109 | BitField<53, 2, Swizzle> swizzle_b; | ||
| 110 | BitField<56, 1, u64> neg_b; | ||
| 111 | BitField<57, 2, HalfPrecision> precision; | ||
| 112 | } const hfma2{insn}; | ||
| 113 | |||
| 114 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, | ||
| 115 | GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::HFMA2_cr(u64 insn) { | ||
| 119 | union { | ||
| 120 | u64 raw; | ||
| 121 | BitField<51, 1, u64> neg_c; | ||
| 122 | BitField<52, 1, u64> saturate; | ||
| 123 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 124 | BitField<56, 1, u64> neg_b; | ||
| 125 | BitField<57, 2, HalfPrecision> precision; | ||
| 126 | } const hfma2{insn}; | ||
| 127 | |||
| 128 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, | ||
| 129 | GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::HFMA2_imm(u64 insn) { | ||
| 133 | union { | ||
| 134 | u64 raw; | ||
| 135 | BitField<51, 1, u64> neg_c; | ||
| 136 | BitField<52, 1, u64> saturate; | ||
| 137 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 138 | |||
| 139 | BitField<56, 1, u64> neg_high; | ||
| 140 | BitField<30, 9, u64> high; | ||
| 141 | BitField<29, 1, u64> neg_low; | ||
| 142 | BitField<20, 9, u64> low; | ||
| 143 | BitField<57, 2, HalfPrecision> precision; | ||
| 144 | } const hfma2{insn}; | ||
| 145 | |||
| 146 | const u32 imm{ | ||
| 147 | static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 148 | static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 149 | |||
| 150 | HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), | ||
| 151 | GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::HFMA2_32I(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<0, 8, IR::Reg> src_c; | ||
| 158 | BitField<20, 32, u64> imm32; | ||
| 159 | BitField<52, 1, u64> neg_c; | ||
| 160 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 161 | BitField<55, 2, HalfPrecision> precision; | ||
| 162 | } const hfma2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hfma2.imm32)}; | ||
| 165 | HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, | ||
| 166 | Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | |||
| 9 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { | ||
| 10 | switch (precision) { | ||
| 11 | case HalfPrecision::None: | ||
| 12 | return IR::FmzMode::None; | ||
| 13 | case HalfPrecision::FTZ: | ||
| 14 | return IR::FmzMode::FTZ; | ||
| 15 | case HalfPrecision::FMZ: | ||
| 16 | return IR::FmzMode::FMZ; | ||
| 17 | default: | ||
| 18 | return IR::FmzMode::DontCare; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 23 | switch (swizzle) { | ||
| 24 | case Swizzle::H1_H0: { | ||
| 25 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 26 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 27 | } | ||
| 28 | case Swizzle::H0_H0: { | ||
| 29 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 30 | return {scalar, scalar}; | ||
| 31 | } | ||
| 32 | case Swizzle::H1_H1: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::F32: { | ||
| 37 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 42 | } | ||
| 43 | |||
| 44 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 45 | Merge merge) { | ||
| 46 | switch (merge) { | ||
| 47 | case Merge::H1_H0: | ||
| 48 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 49 | case Merge::F32: | ||
| 50 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 51 | case Merge::MRG_H0: | ||
| 52 | case Merge::MRG_H1: { | ||
| 53 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 54 | const bool is_h0{merge == Merge::MRG_H0}; | ||
| 55 | const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; | ||
| 56 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | |||
| 15 | enum class Merge : u64 { | ||
| 16 | H1_H0, | ||
| 17 | F32, | ||
| 18 | MRG_H0, | ||
| 19 | MRG_H1, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Swizzle : u64 { | ||
| 23 | H1_H0, | ||
| 24 | F32, | ||
| 25 | H0_H0, | ||
| 26 | H1_H1, | ||
| 27 | }; | ||
| 28 | |||
| 29 | enum class HalfPrecision : u64 { | ||
| 30 | None = 0, | ||
| 31 | FTZ = 1, | ||
| 32 | FMZ = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); | ||
| 36 | |||
| 37 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); | ||
| 38 | |||
| 39 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 40 | Merge merge); | ||
| 41 | |||
| 42 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | ||
| 11 | HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hmul2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 21 | if (promotion) { | ||
| 22 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 23 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 24 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 25 | } | ||
| 26 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 27 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 28 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 32 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 33 | |||
| 34 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 35 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 36 | |||
| 37 | const IR::FpControl fp_control{ | ||
| 38 | .no_contraction = true, | ||
| 39 | .rounding = IR::FpRounding::DontCare, | ||
| 40 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 41 | }; | ||
| 42 | IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; | ||
| 43 | IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; | ||
| 44 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 45 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 46 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 47 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 48 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 49 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 50 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 51 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; | ||
| 52 | |||
| 53 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 54 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 55 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 56 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; | ||
| 57 | } | ||
| 58 | if (sat) { | ||
| 59 | lhs = v.ir.FPSaturate(lhs); | ||
| 60 | rhs = v.ir.FPSaturate(rhs); | ||
| 61 | } | ||
| 62 | if (promotion) { | ||
| 63 | lhs = v.ir.FPConvert(16, lhs); | ||
| 64 | rhs = v.ir.FPConvert(16, rhs); | ||
| 65 | } | ||
| 66 | v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); | ||
| 67 | } | ||
| 68 | |||
| 69 | void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, | ||
| 70 | Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<49, 2, Merge> merge; | ||
| 74 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 75 | BitField<39, 2, HalfPrecision> precision; | ||
| 76 | } const hmul2{insn}; | ||
| 77 | |||
| 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | ||
| 79 | hmul2.precision); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<32, 1, u64> sat; | ||
| 87 | BitField<31, 1, u64> neg_b; | ||
| 88 | BitField<30, 1, u64> abs_b; | ||
| 89 | BitField<44, 1, u64> abs_a; | ||
| 90 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 91 | } const hmul2{insn}; | ||
| 92 | |||
| 93 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, | ||
| 94 | hmul2.swizzle_b, GetReg20(insn)); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HMUL2_cbuf(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 raw; | ||
| 100 | BitField<52, 1, u64> sat; | ||
| 101 | BitField<54, 1, u64> abs_b; | ||
| 102 | BitField<43, 1, u64> neg_a; | ||
| 103 | BitField<44, 1, u64> abs_a; | ||
| 104 | } const hmul2{insn}; | ||
| 105 | |||
| 106 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, | ||
| 107 | Swizzle::F32, GetCbuf(insn)); | ||
| 108 | } | ||
| 109 | |||
| 110 | void TranslatorVisitor::HMUL2_imm(u64 insn) { | ||
| 111 | union { | ||
| 112 | u64 raw; | ||
| 113 | BitField<52, 1, u64> sat; | ||
| 114 | BitField<56, 1, u64> neg_high; | ||
| 115 | BitField<30, 9, u64> high; | ||
| 116 | BitField<29, 1, u64> neg_low; | ||
| 117 | BitField<20, 9, u64> low; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | } const hmul2{insn}; | ||
| 121 | |||
| 122 | const u32 imm{ | ||
| 123 | static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 124 | static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 125 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, | ||
| 126 | Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 127 | } | ||
| 128 | |||
| 129 | void TranslatorVisitor::HMUL2_32I(u64 insn) { | ||
| 130 | union { | ||
| 131 | u64 raw; | ||
| 132 | BitField<55, 2, HalfPrecision> precision; | ||
| 133 | BitField<52, 1, u64> sat; | ||
| 134 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 135 | BitField<20, 32, u64> imm32; | ||
| 136 | } const hmul2{insn}; | ||
| 137 | |||
| 138 | const u32 imm{static_cast<u32>(hmul2.imm32)}; | ||
| 139 | HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, | ||
| 140 | Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, | ||
| 10 | bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 15 | BitField<39, 3, IR::Pred> pred; | ||
| 16 | BitField<42, 1, u64> neg_pred; | ||
| 17 | BitField<43, 1, u64> neg_a; | ||
| 18 | BitField<45, 2, BooleanOp> bop; | ||
| 19 | BitField<44, 1, u64> abs_a; | ||
| 20 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 21 | } const hset2{insn}; | ||
| 22 | |||
| 23 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; | ||
| 24 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 25 | |||
| 26 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 27 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 28 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 29 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 30 | } | ||
| 31 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 32 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 33 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 38 | rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 39 | |||
| 40 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 41 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 42 | |||
| 43 | const IR::FpControl control{ | ||
| 44 | .no_contraction = false, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 47 | }; | ||
| 48 | |||
| 49 | IR::U1 pred{v.ir.GetPred(hset2.pred)}; | ||
| 50 | if (hset2.neg_pred != 0) { | ||
| 51 | pred = v.ir.LogicalNot(pred); | ||
| 52 | } | ||
| 53 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 54 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 55 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; | ||
| 56 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; | ||
| 57 | |||
| 58 | const u32 true_value = bf ? 0x3c00 : 0xffff; | ||
| 59 | const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; | ||
| 60 | const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; | ||
| 61 | const IR::U32 fail_result{v.ir.Imm32(0)}; | ||
| 62 | const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; | ||
| 63 | const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; | ||
| 64 | |||
| 65 | v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); | ||
| 66 | } | ||
| 67 | } // Anonymous namespace | ||
| 68 | |||
| 69 | void TranslatorVisitor::HSET2_reg(u64 insn) { | ||
| 70 | union { | ||
| 71 | u64 insn; | ||
| 72 | BitField<30, 1, u64> abs_b; | ||
| 73 | BitField<49, 1, u64> bf; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<50, 1, u64> ftz; | ||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 77 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 78 | } const hset2{insn}; | ||
| 79 | |||
| 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | ||
| 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSET2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> bf; | ||
| 88 | BitField<56, 1, u64> neg_b; | ||
| 89 | BitField<54, 1, u64> ftz; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hset2{insn}; | ||
| 92 | |||
| 93 | HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, | ||
| 94 | hset2.compare_op, Swizzle::F32); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSET2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> bf; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hset2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{ | ||
| 110 | static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 112 | |||
| 113 | HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, | ||
| 114 | Swizzle::H1_H0); | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, | ||
| 10 | Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 14 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 1, u64> neg_a; | ||
| 19 | BitField<45, 2, BooleanOp> bop; | ||
| 20 | BitField<44, 1, u64> abs_a; | ||
| 21 | BitField<6, 1, u64> ftz; | ||
| 22 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 23 | } const hsetp2{insn}; | ||
| 24 | |||
| 25 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; | ||
| 26 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 27 | |||
| 28 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 29 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 30 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 31 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 32 | } | ||
| 33 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 34 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 35 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 40 | rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 41 | |||
| 42 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 43 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 44 | |||
| 45 | const IR::FpControl control{ | ||
| 46 | .no_contraction = false, | ||
| 47 | .rounding = IR::FpRounding::DontCare, | ||
| 48 | .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 49 | }; | ||
| 50 | |||
| 51 | IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; | ||
| 52 | if (hsetp2.neg_pred != 0) { | ||
| 53 | pred = v.ir.LogicalNot(pred); | ||
| 54 | } | ||
| 55 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 56 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 57 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; | ||
| 58 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; | ||
| 59 | |||
| 60 | if (h_and) { | ||
| 61 | auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); | ||
| 62 | v.ir.SetPred(hsetp2.dest_pred_a, result); | ||
| 63 | v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); | ||
| 64 | } else { | ||
| 65 | v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); | ||
| 66 | v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | void TranslatorVisitor::HSETP2_reg(u64 insn) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<30, 1, u64> abs_b; | ||
| 75 | BitField<49, 1, u64> h_and; | ||
| 76 | BitField<31, 1, u64> neg_b; | ||
| 77 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 78 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 79 | } const hsetp2{insn}; | ||
| 80 | HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, | ||
| 81 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSETP2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> h_and; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | BitField<56, 1, u64> neg_b; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hsetp2{insn}; | ||
| 92 | |||
| 93 | HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, | ||
| 94 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSETP2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> h_and; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hsetp2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{static_cast<u32>(hsetp2.low << 6) | | ||
| 110 | static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hsetp2.high << 22) | | ||
| 112 | static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 113 | |||
| 114 | HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, | ||
| 115 | hsetp2.h_and != 0); | ||
| 116 | } | ||
| 117 | |||
| 118 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | [[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, | ||
| 12 | u32 offset) { | ||
| 13 | if (unaligned) { | ||
| 14 | return ir.Imm32(0); | ||
| 15 | } | ||
| 16 | return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | IR::U32 TranslatorVisitor::X(IR::Reg reg) { | ||
| 21 | return ir.GetReg(reg); | ||
| 22 | } | ||
| 23 | |||
| 24 | IR::U64 TranslatorVisitor::L(IR::Reg reg) { | ||
| 25 | if (!IR::IsAligned(reg, 2)) { | ||
| 26 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 27 | } | ||
| 28 | return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 29 | } | ||
| 30 | |||
| 31 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { | ||
| 32 | return ir.BitCast<IR::F32>(X(reg)); | ||
| 33 | } | ||
| 34 | |||
| 35 | IR::F64 TranslatorVisitor::D(IR::Reg reg) { | ||
| 36 | if (!IR::IsAligned(reg, 2)) { | ||
| 37 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 38 | } | ||
| 39 | return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | ||
| 43 | ir.SetReg(dest_reg, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { | ||
| 47 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 48 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 49 | } | ||
| 50 | const IR::Value result{ir.UnpackUint2x32(value)}; | ||
| 51 | for (int i = 0; i < 2; i++) { | ||
| 52 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { | ||
| 57 | X(dest_reg, ir.BitCast<IR::U32>(value)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { | ||
| 61 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 62 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 63 | } | ||
| 64 | const IR::Value result{ir.UnpackDouble2x32(value)}; | ||
| 65 | for (int i = 0; i < 2; i++) { | ||
| 66 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | IR::U32 TranslatorVisitor::GetReg8(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<8, 8, IR::Reg> index; | ||
| 74 | } const reg{insn}; | ||
| 75 | return X(reg.index); | ||
| 76 | } | ||
| 77 | |||
| 78 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 79 | union { | ||
| 80 | u64 raw; | ||
| 81 | BitField<20, 8, IR::Reg> index; | ||
| 82 | } const reg{insn}; | ||
| 83 | return X(reg.index); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 87 | union { | ||
| 88 | u64 raw; | ||
| 89 | BitField<39, 8, IR::Reg> index; | ||
| 90 | } const reg{insn}; | ||
| 91 | return X(reg.index); | ||
| 92 | } | ||
| 93 | |||
| 94 | IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { | ||
| 95 | return ir.BitCast<IR::F32>(GetReg8(insn)); | ||
| 96 | } | ||
| 97 | |||
| 98 | IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { | ||
| 99 | return ir.BitCast<IR::F32>(GetReg20(insn)); | ||
| 100 | } | ||
| 101 | |||
| 102 | IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { | ||
| 103 | return ir.BitCast<IR::F32>(GetReg39(insn)); | ||
| 104 | } | ||
| 105 | |||
| 106 | IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { | ||
| 107 | union { | ||
| 108 | u64 raw; | ||
| 109 | BitField<20, 8, IR::Reg> index; | ||
| 110 | } const reg{insn}; | ||
| 111 | return D(reg.index); | ||
| 112 | } | ||
| 113 | |||
| 114 | IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { | ||
| 115 | union { | ||
| 116 | u64 raw; | ||
| 117 | BitField<39, 8, IR::Reg> index; | ||
| 118 | } const reg{insn}; | ||
| 119 | return D(reg.index); | ||
| 120 | } | ||
| 121 | |||
| 122 | static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { | ||
| 123 | union { | ||
| 124 | u64 raw; | ||
| 125 | BitField<20, 14, u64> offset; | ||
| 126 | BitField<34, 5, u64> binding; | ||
| 127 | } const cbuf{insn}; | ||
| 128 | |||
| 129 | if (cbuf.binding >= 18) { | ||
| 130 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 131 | } | ||
| 132 | if (cbuf.offset >= 0x10'000) { | ||
| 133 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); | ||
| 134 | } | ||
| 135 | const IR::Value binding{static_cast<u32>(cbuf.binding)}; | ||
| 136 | const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; | ||
| 137 | return {IR::U32{binding}, IR::U32{byte_offset}}; | ||
| 138 | } | ||
| 139 | |||
| 140 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||
| 141 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 142 | return ir.GetCbuf(binding, byte_offset); | ||
| 143 | } | ||
| 144 | |||
| 145 | IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { | ||
| 146 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 147 | return ir.GetFloatCbuf(binding, byte_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { | ||
| 151 | union { | ||
| 152 | u64 raw; | ||
| 153 | BitField<20, 1, u64> unaligned; | ||
| 154 | } const cbuf{insn}; | ||
| 155 | |||
| 156 | const auto [binding, offset_value]{CbufAddr(insn)}; | ||
| 157 | const bool unaligned{cbuf.unaligned != 0}; | ||
| 158 | const u32 offset{offset_value.U32()}; | ||
| 159 | const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; | ||
| 160 | |||
| 161 | const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; | ||
| 162 | const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; | ||
| 163 | return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); | ||
| 164 | } | ||
| 165 | |||
| 166 | IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<20, 1, u64> unaligned; | ||
| 170 | } const cbuf{insn}; | ||
| 171 | |||
| 172 | if (cbuf.unaligned != 0) { | ||
| 173 | throw NotImplementedException("Unaligned packed constant buffer read"); | ||
| 174 | } | ||
| 175 | const auto [binding, lower_offset]{CbufAddr(insn)}; | ||
| 176 | const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; | ||
| 177 | const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; | ||
| 178 | const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; | ||
| 179 | return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); | ||
| 180 | } | ||
| 181 | |||
| 182 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { | ||
| 183 | union { | ||
| 184 | u64 raw; | ||
| 185 | BitField<20, 19, u64> value; | ||
| 186 | BitField<56, 1, u64> is_negative; | ||
| 187 | } const imm{insn}; | ||
| 188 | |||
| 189 | if (imm.is_negative != 0) { | ||
| 190 | const s64 raw{static_cast<s64>(imm.value)}; | ||
| 191 | return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); | ||
| 192 | } else { | ||
| 193 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { | ||
| 198 | union { | ||
| 199 | u64 raw; | ||
| 200 | BitField<20, 19, u64> value; | ||
| 201 | BitField<56, 1, u64> is_negative; | ||
| 202 | } const imm{insn}; | ||
| 203 | const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; | ||
| 204 | const u32 value{static_cast<u32>(imm.value) << 12}; | ||
| 205 | return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); | ||
| 206 | } | ||
| 207 | |||
| 208 | IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | BitField<20, 19, u64> value; | ||
| 212 | BitField<56, 1, u64> is_negative; | ||
| 213 | } const imm{insn}; | ||
| 214 | const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; | ||
| 215 | const u64 value{imm.value << 44}; | ||
| 216 | return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { | ||
| 220 | const s64 value{GetImm20(insn).U32()}; | ||
| 221 | return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); | ||
| 222 | } | ||
| 223 | |||
| 224 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 225 | union { | ||
| 226 | u64 raw; | ||
| 227 | BitField<20, 32, u64> value; | ||
| 228 | } const imm{insn}; | ||
| 229 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 230 | } | ||
| 231 | |||
| 232 | IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<20, 32, u64> value; | ||
| 236 | } const imm{insn}; | ||
| 237 | return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | ||
| 241 | ir.SetZFlag(value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::SetSFlag(const IR::U1& value) { | ||
| 245 | ir.SetSFlag(value); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SetCFlag(const IR::U1& value) { | ||
| 249 | ir.SetCFlag(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SetOFlag(const IR::U1& value) { | ||
| 253 | ir.SetOFlag(value); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::ResetZero() { | ||
| 257 | SetZFlag(ir.Imm1(false)); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::ResetSFlag() { | ||
| 261 | SetSFlag(ir.Imm1(false)); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::ResetCFlag() { | ||
| 265 | SetCFlag(ir.Imm1(false)); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::ResetOFlag() { | ||
| 269 | SetOFlag(ir.Imm1(false)); | ||
| 270 | } | ||
| 271 | |||
| 272 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class CompareOp : u64 { | ||
| 15 | False, | ||
| 16 | LessThan, | ||
| 17 | Equal, | ||
| 18 | LessThanEqual, | ||
| 19 | GreaterThan, | ||
| 20 | NotEqual, | ||
| 21 | GreaterThanEqual, | ||
| 22 | True, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class BooleanOp : u64 { | ||
| 26 | AND, | ||
| 27 | OR, | ||
| 28 | XOR, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class PredicateOp : u64 { | ||
| 32 | False, | ||
| 33 | True, | ||
| 34 | Zero, | ||
| 35 | NonZero, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class FPCompareOp : u64 { | ||
| 39 | F, | ||
| 40 | LT, | ||
| 41 | EQ, | ||
| 42 | LE, | ||
| 43 | GT, | ||
| 44 | NE, | ||
| 45 | GE, | ||
| 46 | NUM, | ||
| 47 | Nan, | ||
| 48 | LTU, | ||
| 49 | EQU, | ||
| 50 | LEU, | ||
| 51 | GTU, | ||
| 52 | NEU, | ||
| 53 | GEU, | ||
| 54 | T, | ||
| 55 | }; | ||
| 56 | |||
| 57 | class TranslatorVisitor { | ||
| 58 | public: | ||
| 59 | explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||
| 60 | |||
| 61 | Environment& env; | ||
| 62 | IR::IREmitter ir; | ||
| 63 | |||
| 64 | void AL2P(u64 insn); | ||
| 65 | void ALD(u64 insn); | ||
| 66 | void AST(u64 insn); | ||
| 67 | void ATOM_cas(u64 insn); | ||
| 68 | void ATOM(u64 insn); | ||
| 69 | void ATOMS_cas(u64 insn); | ||
| 70 | void ATOMS(u64 insn); | ||
| 71 | void B2R(u64 insn); | ||
| 72 | void BAR(u64 insn); | ||
| 73 | void BFE_reg(u64 insn); | ||
| 74 | void BFE_cbuf(u64 insn); | ||
| 75 | void BFE_imm(u64 insn); | ||
| 76 | void BFI_reg(u64 insn); | ||
| 77 | void BFI_rc(u64 insn); | ||
| 78 | void BFI_cr(u64 insn); | ||
| 79 | void BFI_imm(u64 insn); | ||
| 80 | void BPT(u64 insn); | ||
| 81 | void BRA(u64 insn); | ||
| 82 | void BRK(u64 insn); | ||
| 83 | void BRX(u64 insn); | ||
| 84 | void CAL(); | ||
| 85 | void CCTL(u64 insn); | ||
| 86 | void CCTLL(u64 insn); | ||
| 87 | void CONT(u64 insn); | ||
| 88 | void CS2R(u64 insn); | ||
| 89 | void CSET(u64 insn); | ||
| 90 | void CSETP(u64 insn); | ||
| 91 | void DADD_reg(u64 insn); | ||
| 92 | void DADD_cbuf(u64 insn); | ||
| 93 | void DADD_imm(u64 insn); | ||
| 94 | void DEPBAR(); | ||
| 95 | void DFMA_reg(u64 insn); | ||
| 96 | void DFMA_rc(u64 insn); | ||
| 97 | void DFMA_cr(u64 insn); | ||
| 98 | void DFMA_imm(u64 insn); | ||
| 99 | void DMNMX_reg(u64 insn); | ||
| 100 | void DMNMX_cbuf(u64 insn); | ||
| 101 | void DMNMX_imm(u64 insn); | ||
| 102 | void DMUL_reg(u64 insn); | ||
| 103 | void DMUL_cbuf(u64 insn); | ||
| 104 | void DMUL_imm(u64 insn); | ||
| 105 | void DSET_reg(u64 insn); | ||
| 106 | void DSET_cbuf(u64 insn); | ||
| 107 | void DSET_imm(u64 insn); | ||
| 108 | void DSETP_reg(u64 insn); | ||
| 109 | void DSETP_cbuf(u64 insn); | ||
| 110 | void DSETP_imm(u64 insn); | ||
| 111 | void EXIT(); | ||
| 112 | void F2F_reg(u64 insn); | ||
| 113 | void F2F_cbuf(u64 insn); | ||
| 114 | void F2F_imm(u64 insn); | ||
| 115 | void F2I_reg(u64 insn); | ||
| 116 | void F2I_cbuf(u64 insn); | ||
| 117 | void F2I_imm(u64 insn); | ||
| 118 | void FADD_reg(u64 insn); | ||
| 119 | void FADD_cbuf(u64 insn); | ||
| 120 | void FADD_imm(u64 insn); | ||
| 121 | void FADD32I(u64 insn); | ||
| 122 | void FCHK_reg(u64 insn); | ||
| 123 | void FCHK_cbuf(u64 insn); | ||
| 124 | void FCHK_imm(u64 insn); | ||
| 125 | void FCMP_reg(u64 insn); | ||
| 126 | void FCMP_rc(u64 insn); | ||
| 127 | void FCMP_cr(u64 insn); | ||
| 128 | void FCMP_imm(u64 insn); | ||
| 129 | void FFMA_reg(u64 insn); | ||
| 130 | void FFMA_rc(u64 insn); | ||
| 131 | void FFMA_cr(u64 insn); | ||
| 132 | void FFMA_imm(u64 insn); | ||
| 133 | void FFMA32I(u64 insn); | ||
| 134 | void FLO_reg(u64 insn); | ||
| 135 | void FLO_cbuf(u64 insn); | ||
| 136 | void FLO_imm(u64 insn); | ||
| 137 | void FMNMX_reg(u64 insn); | ||
| 138 | void FMNMX_cbuf(u64 insn); | ||
| 139 | void FMNMX_imm(u64 insn); | ||
| 140 | void FMUL_reg(u64 insn); | ||
| 141 | void FMUL_cbuf(u64 insn); | ||
| 142 | void FMUL_imm(u64 insn); | ||
| 143 | void FMUL32I(u64 insn); | ||
| 144 | void FSET_reg(u64 insn); | ||
| 145 | void FSET_cbuf(u64 insn); | ||
| 146 | void FSET_imm(u64 insn); | ||
| 147 | void FSETP_reg(u64 insn); | ||
| 148 | void FSETP_cbuf(u64 insn); | ||
| 149 | void FSETP_imm(u64 insn); | ||
| 150 | void FSWZADD(u64 insn); | ||
| 151 | void GETCRSPTR(u64 insn); | ||
| 152 | void GETLMEMBASE(u64 insn); | ||
| 153 | void HADD2_reg(u64 insn); | ||
| 154 | void HADD2_cbuf(u64 insn); | ||
| 155 | void HADD2_imm(u64 insn); | ||
| 156 | void HADD2_32I(u64 insn); | ||
| 157 | void HFMA2_reg(u64 insn); | ||
| 158 | void HFMA2_rc(u64 insn); | ||
| 159 | void HFMA2_cr(u64 insn); | ||
| 160 | void HFMA2_imm(u64 insn); | ||
| 161 | void HFMA2_32I(u64 insn); | ||
| 162 | void HMUL2_reg(u64 insn); | ||
| 163 | void HMUL2_cbuf(u64 insn); | ||
| 164 | void HMUL2_imm(u64 insn); | ||
| 165 | void HMUL2_32I(u64 insn); | ||
| 166 | void HSET2_reg(u64 insn); | ||
| 167 | void HSET2_cbuf(u64 insn); | ||
| 168 | void HSET2_imm(u64 insn); | ||
| 169 | void HSETP2_reg(u64 insn); | ||
| 170 | void HSETP2_cbuf(u64 insn); | ||
| 171 | void HSETP2_imm(u64 insn); | ||
| 172 | void I2F_reg(u64 insn); | ||
| 173 | void I2F_cbuf(u64 insn); | ||
| 174 | void I2F_imm(u64 insn); | ||
| 175 | void I2I_reg(u64 insn); | ||
| 176 | void I2I_cbuf(u64 insn); | ||
| 177 | void I2I_imm(u64 insn); | ||
| 178 | void IADD_reg(u64 insn); | ||
| 179 | void IADD_cbuf(u64 insn); | ||
| 180 | void IADD_imm(u64 insn); | ||
| 181 | void IADD3_reg(u64 insn); | ||
| 182 | void IADD3_cbuf(u64 insn); | ||
| 183 | void IADD3_imm(u64 insn); | ||
| 184 | void IADD32I(u64 insn); | ||
| 185 | void ICMP_reg(u64 insn); | ||
| 186 | void ICMP_rc(u64 insn); | ||
| 187 | void ICMP_cr(u64 insn); | ||
| 188 | void ICMP_imm(u64 insn); | ||
| 189 | void IDE(u64 insn); | ||
| 190 | void IDP_reg(u64 insn); | ||
| 191 | void IDP_imm(u64 insn); | ||
| 192 | void IMAD_reg(u64 insn); | ||
| 193 | void IMAD_rc(u64 insn); | ||
| 194 | void IMAD_cr(u64 insn); | ||
| 195 | void IMAD_imm(u64 insn); | ||
| 196 | void IMAD32I(u64 insn); | ||
| 197 | void IMADSP_reg(u64 insn); | ||
| 198 | void IMADSP_rc(u64 insn); | ||
| 199 | void IMADSP_cr(u64 insn); | ||
| 200 | void IMADSP_imm(u64 insn); | ||
| 201 | void IMNMX_reg(u64 insn); | ||
| 202 | void IMNMX_cbuf(u64 insn); | ||
| 203 | void IMNMX_imm(u64 insn); | ||
| 204 | void IMUL_reg(u64 insn); | ||
| 205 | void IMUL_cbuf(u64 insn); | ||
| 206 | void IMUL_imm(u64 insn); | ||
| 207 | void IMUL32I(u64 insn); | ||
| 208 | void IPA(u64 insn); | ||
| 209 | void ISBERD(u64 insn); | ||
| 210 | void ISCADD_reg(u64 insn); | ||
| 211 | void ISCADD_cbuf(u64 insn); | ||
| 212 | void ISCADD_imm(u64 insn); | ||
| 213 | void ISCADD32I(u64 insn); | ||
| 214 | void ISET_reg(u64 insn); | ||
| 215 | void ISET_cbuf(u64 insn); | ||
| 216 | void ISET_imm(u64 insn); | ||
| 217 | void ISETP_reg(u64 insn); | ||
| 218 | void ISETP_cbuf(u64 insn); | ||
| 219 | void ISETP_imm(u64 insn); | ||
| 220 | void JCAL(u64 insn); | ||
| 221 | void JMP(u64 insn); | ||
| 222 | void JMX(u64 insn); | ||
| 223 | void KIL(); | ||
| 224 | void LD(u64 insn); | ||
| 225 | void LDC(u64 insn); | ||
| 226 | void LDG(u64 insn); | ||
| 227 | void LDL(u64 insn); | ||
| 228 | void LDS(u64 insn); | ||
| 229 | void LEA_hi_reg(u64 insn); | ||
| 230 | void LEA_hi_cbuf(u64 insn); | ||
| 231 | void LEA_lo_reg(u64 insn); | ||
| 232 | void LEA_lo_cbuf(u64 insn); | ||
| 233 | void LEA_lo_imm(u64 insn); | ||
| 234 | void LEPC(u64 insn); | ||
| 235 | void LONGJMP(u64 insn); | ||
| 236 | void LOP_reg(u64 insn); | ||
| 237 | void LOP_cbuf(u64 insn); | ||
| 238 | void LOP_imm(u64 insn); | ||
| 239 | void LOP3_reg(u64 insn); | ||
| 240 | void LOP3_cbuf(u64 insn); | ||
| 241 | void LOP3_imm(u64 insn); | ||
| 242 | void LOP32I(u64 insn); | ||
| 243 | void MEMBAR(u64 insn); | ||
| 244 | void MOV_reg(u64 insn); | ||
| 245 | void MOV_cbuf(u64 insn); | ||
| 246 | void MOV_imm(u64 insn); | ||
| 247 | void MOV32I(u64 insn); | ||
| 248 | void MUFU(u64 insn); | ||
| 249 | void NOP(u64 insn); | ||
| 250 | void OUT_reg(u64 insn); | ||
| 251 | void OUT_cbuf(u64 insn); | ||
| 252 | void OUT_imm(u64 insn); | ||
| 253 | void P2R_reg(u64 insn); | ||
| 254 | void P2R_cbuf(u64 insn); | ||
| 255 | void P2R_imm(u64 insn); | ||
| 256 | void PBK(); | ||
| 257 | void PCNT(); | ||
| 258 | void PEXIT(u64 insn); | ||
| 259 | void PIXLD(u64 insn); | ||
| 260 | void PLONGJMP(u64 insn); | ||
| 261 | void POPC_reg(u64 insn); | ||
| 262 | void POPC_cbuf(u64 insn); | ||
| 263 | void POPC_imm(u64 insn); | ||
| 264 | void PRET(u64 insn); | ||
| 265 | void PRMT_reg(u64 insn); | ||
| 266 | void PRMT_rc(u64 insn); | ||
| 267 | void PRMT_cr(u64 insn); | ||
| 268 | void PRMT_imm(u64 insn); | ||
| 269 | void PSET(u64 insn); | ||
| 270 | void PSETP(u64 insn); | ||
| 271 | void R2B(u64 insn); | ||
| 272 | void R2P_reg(u64 insn); | ||
| 273 | void R2P_cbuf(u64 insn); | ||
| 274 | void R2P_imm(u64 insn); | ||
| 275 | void RAM(u64 insn); | ||
| 276 | void RED(u64 insn); | ||
| 277 | void RET(u64 insn); | ||
| 278 | void RRO_reg(u64 insn); | ||
| 279 | void RRO_cbuf(u64 insn); | ||
| 280 | void RRO_imm(u64 insn); | ||
| 281 | void RTT(u64 insn); | ||
| 282 | void S2R(u64 insn); | ||
| 283 | void SAM(u64 insn); | ||
| 284 | void SEL_reg(u64 insn); | ||
| 285 | void SEL_cbuf(u64 insn); | ||
| 286 | void SEL_imm(u64 insn); | ||
| 287 | void SETCRSPTR(u64 insn); | ||
| 288 | void SETLMEMBASE(u64 insn); | ||
| 289 | void SHF_l_reg(u64 insn); | ||
| 290 | void SHF_l_imm(u64 insn); | ||
| 291 | void SHF_r_reg(u64 insn); | ||
| 292 | void SHF_r_imm(u64 insn); | ||
| 293 | void SHFL(u64 insn); | ||
| 294 | void SHL_reg(u64 insn); | ||
| 295 | void SHL_cbuf(u64 insn); | ||
| 296 | void SHL_imm(u64 insn); | ||
| 297 | void SHR_reg(u64 insn); | ||
| 298 | void SHR_cbuf(u64 insn); | ||
| 299 | void SHR_imm(u64 insn); | ||
| 300 | void SSY(); | ||
| 301 | void ST(u64 insn); | ||
| 302 | void STG(u64 insn); | ||
| 303 | void STL(u64 insn); | ||
| 304 | void STP(u64 insn); | ||
| 305 | void STS(u64 insn); | ||
| 306 | void SUATOM(u64 insn); | ||
| 307 | void SUATOM_cas(u64 insn); | ||
| 308 | void SULD(u64 insn); | ||
| 309 | void SURED(u64 insn); | ||
| 310 | void SUST(u64 insn); | ||
| 311 | void SYNC(u64 insn); | ||
| 312 | void TEX(u64 insn); | ||
| 313 | void TEX_b(u64 insn); | ||
| 314 | void TEXS(u64 insn); | ||
| 315 | void TLD(u64 insn); | ||
| 316 | void TLD_b(u64 insn); | ||
| 317 | void TLD4(u64 insn); | ||
| 318 | void TLD4_b(u64 insn); | ||
| 319 | void TLD4S(u64 insn); | ||
| 320 | void TLDS(u64 insn); | ||
| 321 | void TMML(u64 insn); | ||
| 322 | void TMML_b(u64 insn); | ||
| 323 | void TXA(u64 insn); | ||
| 324 | void TXD(u64 insn); | ||
| 325 | void TXD_b(u64 insn); | ||
| 326 | void TXQ(u64 insn); | ||
| 327 | void TXQ_b(u64 insn); | ||
| 328 | void VABSDIFF(u64 insn); | ||
| 329 | void VABSDIFF4(u64 insn); | ||
| 330 | void VADD(u64 insn); | ||
| 331 | void VMAD(u64 insn); | ||
| 332 | void VMNMX(u64 insn); | ||
| 333 | void VOTE(u64 insn); | ||
| 334 | void VOTE_vtg(u64 insn); | ||
| 335 | void VSET(u64 insn); | ||
| 336 | void VSETP(u64 insn); | ||
| 337 | void VSHL(u64 insn); | ||
| 338 | void VSHR(u64 insn); | ||
| 339 | void XMAD_reg(u64 insn); | ||
| 340 | void XMAD_rc(u64 insn); | ||
| 341 | void XMAD_cr(u64 insn); | ||
| 342 | void XMAD_imm(u64 insn); | ||
| 343 | |||
| 344 | [[nodiscard]] IR::U32 X(IR::Reg reg); | ||
| 345 | [[nodiscard]] IR::U64 L(IR::Reg reg); | ||
| 346 | [[nodiscard]] IR::F32 F(IR::Reg reg); | ||
| 347 | [[nodiscard]] IR::F64 D(IR::Reg reg); | ||
| 348 | |||
| 349 | void X(IR::Reg dest_reg, const IR::U32& value); | ||
| 350 | void L(IR::Reg dest_reg, const IR::U64& value); | ||
| 351 | void F(IR::Reg dest_reg, const IR::F32& value); | ||
| 352 | void D(IR::Reg dest_reg, const IR::F64& value); | ||
| 353 | |||
| 354 | [[nodiscard]] IR::U32 GetReg8(u64 insn); | ||
| 355 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 356 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 357 | [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); | ||
| 358 | [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); | ||
| 359 | [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); | ||
| 360 | [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); | ||
| 361 | [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); | ||
| 362 | |||
| 363 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | ||
| 364 | [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); | ||
| 365 | [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); | ||
| 366 | [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); | ||
| 367 | |||
| 368 | [[nodiscard]] IR::U32 GetImm20(u64 insn); | ||
| 369 | [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); | ||
| 370 | [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); | ||
| 371 | [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); | ||
| 372 | |||
| 373 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 374 | [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); | ||
| 375 | |||
| 376 | void SetZFlag(const IR::U1& value); | ||
| 377 | void SetSFlag(const IR::U1& value); | ||
| 378 | void SetCFlag(const IR::U1& value); | ||
| 379 | void SetOFlag(const IR::U1& value); | ||
| 380 | |||
| 381 | void ResetZero(); | ||
| 382 | void ResetSFlag(); | ||
| 383 | void ResetCFlag(); | ||
| 384 | void ResetOFlag(); | ||
| 385 | }; | ||
| 386 | |||
| 387 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | if (!po && iadd.neg_b != 0) { | ||
| 72 | op_b = v.ir.INeg(op_b); | ||
| 73 | } | ||
| 74 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 75 | } | ||
| 76 | } // Anonymous namespace | ||
| 77 | |||
| 78 | void TranslatorVisitor::IADD_reg(u64 insn) { | ||
| 79 | IADD(*this, insn, GetReg20(insn)); | ||
| 80 | } | ||
| 81 | |||
| 82 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 83 | IADD(*this, insn, GetCbuf(insn)); | ||
| 84 | } | ||
| 85 | |||
| 86 | void TranslatorVisitor::IADD_imm(u64 insn) { | ||
| 87 | IADD(*this, insn, GetImm20(insn)); | ||
| 88 | } | ||
| 89 | |||
| 90 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 91 | union { | ||
| 92 | u64 raw; | ||
| 93 | BitField<52, 1, u64> cc; | ||
| 94 | BitField<53, 1, u64> x; | ||
| 95 | BitField<54, 1, u64> sat; | ||
| 96 | BitField<55, 2, u64> three_for_po; | ||
| 97 | BitField<56, 1, u64> neg_a; | ||
| 98 | } const iadd32i{insn}; | ||
| 99 | |||
| 100 | const bool po{iadd32i.three_for_po == 3}; | ||
| 101 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 102 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Shift : u64 { | ||
| 12 | None, | ||
| 13 | Right, | ||
| 14 | Left, | ||
| 15 | }; | ||
| 16 | enum class Half : u64 { | ||
| 17 | All, | ||
| 18 | Lower, | ||
| 19 | Upper, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { | ||
| 23 | constexpr bool is_signed{false}; | ||
| 24 | switch (half) { | ||
| 25 | case Half::All: | ||
| 26 | return value; | ||
| 27 | case Half::Lower: | ||
| 28 | return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); | ||
| 29 | case Half::Upper: | ||
| 30 | return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); | ||
| 31 | } | ||
| 32 | throw NotImplementedException("Invalid half"); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { | ||
| 36 | switch (shift) { | ||
| 37 | case Shift::None: | ||
| 38 | return value; | ||
| 39 | case Shift::Right: { | ||
| 40 | // 33-bit RS IADD3 edge case | ||
| 41 | const IR::U1 edge_case{ir.GetCarryFromOp(value)}; | ||
| 42 | const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; | ||
| 43 | return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; | ||
| 44 | } | ||
| 45 | case Shift::Left: | ||
| 46 | return ir.ShiftLeftLogical(value, ir.Imm32(16)); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid shift"); | ||
| 49 | } | ||
| 50 | |||
| 51 | void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, | ||
| 52 | Shift shift = Shift::None) { | ||
| 53 | union { | ||
| 54 | u64 insn; | ||
| 55 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 56 | BitField<47, 1, u64> cc; | ||
| 57 | BitField<48, 1, u64> x; | ||
| 58 | BitField<49, 1, u64> neg_c; | ||
| 59 | BitField<50, 1, u64> neg_b; | ||
| 60 | BitField<51, 1, u64> neg_a; | ||
| 61 | } iadd3{insn}; | ||
| 62 | |||
| 63 | if (iadd3.neg_a != 0) { | ||
| 64 | op_a = v.ir.INeg(op_a); | ||
| 65 | } | ||
| 66 | if (iadd3.neg_b != 0) { | ||
| 67 | op_b = v.ir.INeg(op_b); | ||
| 68 | } | ||
| 69 | if (iadd3.neg_c != 0) { | ||
| 70 | op_c = v.ir.INeg(op_c); | ||
| 71 | } | ||
| 72 | IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; | ||
| 73 | if (iadd3.x != 0) { | ||
| 74 | // TODO: How does RS behave when X is set? | ||
| 75 | if (shift == Shift::Right) { | ||
| 76 | throw NotImplementedException("IADD3 X+RS"); | ||
| 77 | } | ||
| 78 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 79 | lhs_1 = v.ir.IAdd(lhs_1, carry); | ||
| 80 | } | ||
| 81 | const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; | ||
| 82 | const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; | ||
| 83 | |||
| 84 | v.X(iadd3.dest_reg, result); | ||
| 85 | if (iadd3.cc != 0) { | ||
| 86 | // TODO: How does CC behave when X is set? | ||
| 87 | if (iadd3.x != 0) { | ||
| 88 | throw NotImplementedException("IADD3 X+CC"); | ||
| 89 | } | ||
| 90 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 91 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 92 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 93 | const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; | ||
| 94 | v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | } // Anonymous namespace | ||
| 98 | |||
| 99 | void TranslatorVisitor::IADD3_reg(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 insn; | ||
| 102 | BitField<37, 2, Shift> shift; | ||
| 103 | BitField<35, 2, Half> half_a; | ||
| 104 | BitField<33, 2, Half> half_b; | ||
| 105 | BitField<31, 2, Half> half_c; | ||
| 106 | } const iadd3{insn}; | ||
| 107 | |||
| 108 | const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; | ||
| 109 | const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; | ||
| 110 | const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; | ||
| 111 | IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); | ||
| 112 | } | ||
| 113 | |||
| 114 | void TranslatorVisitor::IADD3_cbuf(u64 insn) { | ||
| 115 | IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::IADD3_imm(u64 insn) { | ||
| 119 | IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<48, 1, u64> is_signed; | ||
| 18 | BitField<49, 3, CompareOp> compare_op; | ||
| 19 | } const icmp{insn}; | ||
| 20 | |||
| 21 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 22 | const bool is_signed{icmp.is_signed != 0}; | ||
| 23 | const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; | ||
| 24 | |||
| 25 | const IR::U32 src_reg{v.X(icmp.src_reg)}; | ||
| 26 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 27 | |||
| 28 | v.X(icmp.dest_reg, result); | ||
| 29 | } | ||
| 30 | } // Anonymous namespace | ||
| 31 | |||
| 32 | void TranslatorVisitor::ICMP_reg(u64 insn) { | ||
| 33 | ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::ICMP_rc(u64 insn) { | ||
| 37 | ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::ICMP_cr(u64 insn) { | ||
| 41 | ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::ICMP_imm(u64 insn) { | ||
| 45 | ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 19 | union { | ||
| 20 | u64 insn; | ||
| 21 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 22 | BitField<8, 8, IR::Reg> src_reg; | ||
| 23 | BitField<39, 3, IR::Pred> pred; | ||
| 24 | BitField<42, 1, u64> neg_pred; | ||
| 25 | BitField<43, 1, u64> x; | ||
| 26 | BitField<44, 1, u64> bf; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<47, 1, u64> cc; | ||
| 29 | BitField<48, 1, u64> is_signed; | ||
| 30 | BitField<49, 3, CompareOp> compare_op; | ||
| 31 | } const iset{insn}; | ||
| 32 | |||
| 33 | const IR::U32 src_a{v.X(iset.src_reg)}; | ||
| 34 | const bool is_signed{iset.is_signed != 0}; | ||
| 35 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 36 | const bool x{iset.x != 0}; | ||
| 37 | const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; | ||
| 38 | |||
| 39 | IR::U1 pred{v.ir.GetPred(iset.pred)}; | ||
| 40 | if (iset.neg_pred != 0) { | ||
| 41 | pred = v.ir.LogicalNot(pred); | ||
| 42 | } | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; | ||
| 48 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 49 | |||
| 50 | v.X(iset.dest_reg, result); | ||
| 51 | if (iset.cc != 0) { | ||
| 52 | if (x) { | ||
| 53 | throw NotImplementedException("ISET.CC + X"); | ||
| 54 | } | ||
| 55 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 56 | v.SetZFlag(is_zero); | ||
| 57 | if (iset.bf != 0) { | ||
| 58 | v.ResetSFlag(); | ||
| 59 | } else { | ||
| 60 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 61 | } | ||
| 62 | v.ResetCFlag(); | ||
| 63 | v.ResetOFlag(); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } // Anonymous namespace | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISET_reg(u64 insn) { | ||
| 69 | ISET(*this, insn, GetReg20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISET_cbuf(u64 insn) { | ||
| 73 | ISET(*this, insn, GetCbuf(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISET_imm(u64 insn) { | ||
| 77 | ISET(*this, insn, GetImm20(insn)); | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class FloatFormat : u64 { | ||
| 13 | F16 = 1, | ||
| 14 | F32 = 2, | ||
| 15 | F64 = 3, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class IntFormat : u64 { | ||
| 19 | U8 = 0, | ||
| 20 | U16 = 1, | ||
| 21 | U32 = 2, | ||
| 22 | U64 = 3, | ||
| 23 | }; | ||
| 24 | |||
| 25 | union Encoding { | ||
| 26 | u64 raw; | ||
| 27 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 28 | BitField<8, 2, FloatFormat> float_format; | ||
| 29 | BitField<10, 2, IntFormat> int_format; | ||
| 30 | BitField<13, 1, u64> is_signed; | ||
| 31 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 32 | BitField<41, 2, u64> selector; | ||
| 33 | BitField<47, 1, u64> cc; | ||
| 34 | BitField<45, 1, u64> neg; | ||
| 35 | BitField<49, 1, u64> abs; | ||
| 36 | }; | ||
| 37 | |||
| 38 | bool Is64(u64 insn) { | ||
| 39 | return Encoding{insn}.int_format == IntFormat::U64; | ||
| 40 | } | ||
| 41 | |||
| 42 | int BitSize(FloatFormat format) { | ||
| 43 | switch (format) { | ||
| 44 | case FloatFormat::F16: | ||
| 45 | return 16; | ||
| 46 | case FloatFormat::F32: | ||
| 47 | return 32; | ||
| 48 | case FloatFormat::F64: | ||
| 49 | return 64; | ||
| 50 | } | ||
| 51 | throw NotImplementedException("Invalid float format {}", format); | ||
| 52 | } | ||
| 53 | |||
| 54 | IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { | ||
| 55 | const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; | ||
| 56 | const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; | ||
| 57 | const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; | ||
| 58 | const IR::U1 is_least{v.ir.IEqual(value, least_value)}; | ||
| 59 | return IR::U32{v.ir.Select(is_least, value, absolute)}; | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { | ||
| 63 | const Encoding i2f{insn}; | ||
| 64 | if (i2f.cc != 0) { | ||
| 65 | throw NotImplementedException("I2F CC"); | ||
| 66 | } | ||
| 67 | const bool is_signed{i2f.is_signed != 0}; | ||
| 68 | int src_bitsize{}; | ||
| 69 | switch (i2f.int_format) { | ||
| 70 | case IntFormat::U8: | ||
| 71 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 72 | v.ir.Imm32(8), is_signed); | ||
| 73 | if (i2f.abs != 0) { | ||
| 74 | src = SmallAbs(v, src, 8); | ||
| 75 | } | ||
| 76 | src_bitsize = 8; | ||
| 77 | break; | ||
| 78 | case IntFormat::U16: | ||
| 79 | if (i2f.selector == 1 || i2f.selector == 3) { | ||
| 80 | throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); | ||
| 81 | } | ||
| 82 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 83 | v.ir.Imm32(16), is_signed); | ||
| 84 | if (i2f.abs != 0) { | ||
| 85 | src = SmallAbs(v, src, 16); | ||
| 86 | } | ||
| 87 | src_bitsize = 16; | ||
| 88 | break; | ||
| 89 | case IntFormat::U32: | ||
| 90 | case IntFormat::U64: | ||
| 91 | if (i2f.selector != 0) { | ||
| 92 | throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); | ||
| 93 | } | ||
| 94 | if (i2f.abs != 0 && is_signed) { | ||
| 95 | src = v.ir.IAbs(src); | ||
| 96 | } | ||
| 97 | src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; | ||
| 101 | const int dst_bitsize{BitSize(i2f.float_format)}; | ||
| 102 | const IR::FpControl fp_control{ | ||
| 103 | .no_contraction = false, | ||
| 104 | .rounding = CastFpRounding(i2f.fp_rounding), | ||
| 105 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 106 | }; | ||
| 107 | auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), | ||
| 108 | static_cast<size_t>(conversion_src_bitsize), is_signed, src, | ||
| 109 | fp_control)}; | ||
| 110 | if (i2f.neg != 0) { | ||
| 111 | if (i2f.abs != 0 || !is_signed) { | ||
| 112 | // We know the value is positive | ||
| 113 | value = v.ir.FPNeg(value); | ||
| 114 | } else { | ||
| 115 | // Only negate if the input isn't the lowest value | ||
| 116 | IR::U1 is_least; | ||
| 117 | if (src_bitsize == 64) { | ||
| 118 | is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); | ||
| 119 | } else if (src_bitsize == 32) { | ||
| 120 | is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); | ||
| 121 | } else { | ||
| 122 | const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; | ||
| 123 | is_least = v.ir.IEqual(src, least_value); | ||
| 124 | } | ||
| 125 | value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | switch (i2f.float_format) { | ||
| 129 | case FloatFormat::F16: { | ||
| 130 | const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 131 | v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case FloatFormat::F32: | ||
| 135 | v.F(i2f.dest_reg, value); | ||
| 136 | break; | ||
| 137 | case FloatFormat::F64: { | ||
| 138 | if (!IR::IsAligned(i2f.dest_reg, 2)) { | ||
| 139 | throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); | ||
| 140 | } | ||
| 141 | const IR::Value vector{v.ir.UnpackDouble2x32(value)}; | ||
| 142 | for (int i = 0; i < 2; ++i) { | ||
| 143 | v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 144 | } | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } // Anonymous namespace | ||
| 152 | |||
| 153 | void TranslatorVisitor::I2F_reg(u64 insn) { | ||
| 154 | if (Is64(insn)) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<20, 8, IR::Reg> reg; | ||
| 158 | } const value{insn}; | ||
| 159 | const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; | ||
| 160 | I2F(*this, insn, ir.PackUint2x32(regs)); | ||
| 161 | } else { | ||
| 162 | I2F(*this, insn, GetReg20(insn)); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | void TranslatorVisitor::I2F_cbuf(u64 insn) { | ||
| 167 | if (Is64(insn)) { | ||
| 168 | I2F(*this, insn, GetPackedCbuf(insn)); | ||
| 169 | } else { | ||
| 170 | I2F(*this, insn, GetCbuf(insn)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void TranslatorVisitor::I2F_imm(u64 insn) { | ||
| 175 | if (Is64(insn)) { | ||
| 176 | I2F(*this, insn, GetPackedImm20(insn)); | ||
| 177 | } else { | ||
| 178 | I2F(*this, insn, GetImm20(insn)); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class MaxShift : u64 { | ||
| 12 | U32, | ||
| 13 | Undefined, | ||
| 14 | U64, | ||
| 15 | S64, | ||
| 16 | }; | ||
| 17 | |||
| 18 | IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, | ||
| 19 | bool right_shift, bool is_signed) { | ||
| 20 | if (!right_shift) { | ||
| 21 | return ir.ShiftLeftLogical(packed_int, safe_shift); | ||
| 22 | } | ||
| 23 | if (is_signed) { | ||
| 24 | return ir.ShiftRightArithmetic(packed_int, safe_shift); | ||
| 25 | } | ||
| 26 | return ir.ShiftRightLogical(packed_int, safe_shift); | ||
| 27 | } | ||
| 28 | |||
| 29 | void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, | ||
| 30 | bool right_shift) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<0, 8, IR::Reg> lo_bits_reg; | ||
| 35 | BitField<37, 2, MaxShift> max_shift; | ||
| 36 | BitField<47, 1, u64> cc; | ||
| 37 | BitField<48, 2, u64> x_mode; | ||
| 38 | BitField<50, 1, u64> wrap; | ||
| 39 | } const shf{insn}; | ||
| 40 | |||
| 41 | if (shf.cc != 0) { | ||
| 42 | throw NotImplementedException("SHF CC"); | ||
| 43 | } | ||
| 44 | if (shf.x_mode != 0) { | ||
| 45 | throw NotImplementedException("SHF X Mode"); | ||
| 46 | } | ||
| 47 | if (shf.max_shift == MaxShift::Undefined) { | ||
| 48 | throw NotImplementedException("SHF Use of undefined MaxShift value"); | ||
| 49 | } | ||
| 50 | const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; | ||
| 51 | const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; | ||
| 52 | const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; | ||
| 53 | const IR::U32 safe_shift{shf.wrap != 0 | ||
| 54 | ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) | ||
| 55 | : v.ir.UMin(shift, max_shift)}; | ||
| 56 | |||
| 57 | const bool is_signed{shf.max_shift == MaxShift::S64}; | ||
| 58 | const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; | ||
| 59 | const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; | ||
| 60 | |||
| 61 | const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; | ||
| 62 | v.X(shf.dest_reg, result); | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::SHF_l_reg(u64 insn) { | ||
| 67 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::SHF_l_imm(u64 insn) { | ||
| 71 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::SHF_r_reg(u64 insn) { | ||
| 75 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); | ||
| 76 | } | ||
| 77 | |||
| 78 | void TranslatorVisitor::SHF_r_imm(u64 insn) { | ||
| 79 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 2, u64> mode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const imnmx{insn}; | ||
| 22 | |||
| 23 | if (imnmx.cc != 0) { | ||
| 24 | throw NotImplementedException("IMNMX CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | if (imnmx.mode != 0) { | ||
| 28 | throw NotImplementedException("IMNMX.MODE"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; | ||
| 32 | const IR::U32 op_a{v.X(imnmx.src_reg)}; | ||
| 33 | IR::U32 min; | ||
| 34 | IR::U32 max; | ||
| 35 | |||
| 36 | if (imnmx.is_signed != 0) { | ||
| 37 | min = IR::U32{v.ir.SMin(op_a, op_b)}; | ||
| 38 | max = IR::U32{v.ir.SMax(op_a, op_b)}; | ||
| 39 | } else { | ||
| 40 | min = IR::U32{v.ir.UMin(op_a, op_b)}; | ||
| 41 | max = IR::U32{v.ir.UMax(op_a, op_b)}; | ||
| 42 | } | ||
| 43 | if (imnmx.neg_pred != 0) { | ||
| 44 | std::swap(min, max); | ||
| 45 | } | ||
| 46 | |||
| 47 | const IR::U32 result{v.ir.Select(pred, min, max)}; | ||
| 48 | v.X(imnmx.dest_reg, result); | ||
| 49 | } | ||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::IMNMX_reg(u64 insn) { | ||
| 53 | IMNMX(*this, insn, GetReg20(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::IMNMX_cbuf(u64 insn) { | ||
| 57 | IMNMX(*this, insn, GetCbuf(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::IMNMX_imm(u64 insn) { | ||
| 61 | IMNMX(*this, insn, GetImm20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | } const popc{insn}; | ||
| 17 | |||
| 18 | const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); | ||
| 19 | const IR::U32 result = v.ir.BitCount(operand); | ||
| 20 | v.X(popc.dest_reg, result); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void TranslatorVisitor::POPC_reg(u64 insn) { | ||
| 25 | POPC(*this, insn, GetReg20(insn)); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::POPC_cbuf(u64 insn) { | ||
| 29 | POPC(*this, insn, GetCbuf(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::POPC_imm(u64 insn) { | ||
| 33 | POPC(*this, insn, GetImm20(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, | ||
| 12 | u64 scale_imm) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> op_a; | ||
| 17 | } const iscadd{insn}; | ||
| 18 | |||
| 19 | const bool po{neg_a && neg_b}; | ||
| 20 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 21 | if (po) { | ||
| 22 | // When PO is present, add one | ||
| 23 | op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); | ||
| 24 | } else { | ||
| 25 | // When PO is not present, the bits are interpreted as negation | ||
| 26 | if (neg_a) { | ||
| 27 | op_a = v.ir.INeg(op_a); | ||
| 28 | } | ||
| 29 | if (neg_b) { | ||
| 30 | op_b = v.ir.INeg(op_b); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | // With the operands already processed, scale A | ||
| 34 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; | ||
| 35 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 36 | |||
| 37 | const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 38 | v.X(iscadd.dest_reg, result); | ||
| 39 | |||
| 40 | if (cc) { | ||
| 41 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 42 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 43 | const IR::U1 carry{v.ir.GetCarryFromOp(result)}; | ||
| 44 | const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; | ||
| 45 | v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); | ||
| 46 | v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<47, 1, u64> cc; | ||
| 54 | BitField<48, 1, u64> neg_b; | ||
| 55 | BitField<49, 1, u64> neg_a; | ||
| 56 | BitField<39, 5, u64> scale; | ||
| 57 | } const iscadd{insn}; | ||
| 58 | |||
| 59 | ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 65 | ISCADD(*this, insn, GetReg20(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||
| 69 | ISCADD(*this, insn, GetCbuf(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||
| 73 | ISCADD(*this, insn, GetImm20(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISCADD32I(u64 insn) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<52, 1, u64> cc; | ||
| 80 | BitField<53, 5, u64> scale; | ||
| 81 | } const iscadd{insn}; | ||
| 82 | |||
| 83 | return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 19 | union { | ||
| 20 | u64 raw; | ||
| 21 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 22 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 23 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 24 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 25 | BitField<42, 1, u64> neg_bop_pred; | ||
| 26 | BitField<43, 1, u64> x; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<48, 1, u64> is_signed; | ||
| 29 | BitField<49, 3, CompareOp> compare_op; | ||
| 30 | } const isetp{insn}; | ||
| 31 | |||
| 32 | const bool is_signed{isetp.is_signed != 0}; | ||
| 33 | const bool x{isetp.x != 0}; | ||
| 34 | const BooleanOp bop{isetp.bop}; | ||
| 35 | const CompareOp compare_op{isetp.compare_op}; | ||
| 36 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 37 | const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; | ||
| 38 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 39 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 40 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 41 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 42 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 47 | ISETP(*this, insn, GetReg20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 51 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::ISETP_imm(u64 insn) { | ||
| 55 | ISETP(*this, insn, GetImm20(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to wrap it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64 insn) { | ||
| 60 | SHL(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64 insn) { | ||
| 64 | SHL(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> is_wrapped; | ||
| 17 | BitField<40, 1, u64> brev; | ||
| 18 | BitField<43, 1, u64> xmode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const shr{insn}; | ||
| 22 | |||
| 23 | if (shr.xmode != 0) { | ||
| 24 | throw NotImplementedException("SHR.XMODE"); | ||
| 25 | } | ||
| 26 | if (shr.cc != 0) { | ||
| 27 | throw NotImplementedException("SHR.CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | IR::U32 base{v.X(shr.src_reg_a)}; | ||
| 31 | if (shr.brev == 1) { | ||
| 32 | base = v.ir.BitReverse(base); | ||
| 33 | } | ||
| 34 | IR::U32 result; | ||
| 35 | const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); | ||
| 36 | if (shr.is_signed == 1) { | ||
| 37 | result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; | ||
| 38 | } else { | ||
| 39 | result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; | ||
| 40 | } | ||
| 41 | |||
| 42 | if (shr.is_wrapped == 0) { | ||
| 43 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 44 | const IR::U32 safe_bits{v.ir.Imm32(32)}; | ||
| 45 | |||
| 46 | const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; | ||
| 47 | const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; | ||
| 48 | const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 49 | result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; | ||
| 50 | } | ||
| 51 | v.X(shr.dest_reg, result); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::SHR_reg(u64 insn) { | ||
| 56 | SHR(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHR_cbuf(u64 insn) { | ||
| 60 | SHR(*this, insn, GetCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHR_imm(u64 insn) { | ||
| 64 | SHR(*this, insn, GetImm20(insn)); | ||
| 65 | } | ||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64 insn) { | ||
| 82 | union { | ||
| 83 | u64 raw; | ||
| 84 | BitField<35, 1, Half> half_b; | ||
| 85 | BitField<36, 1, u64> psl; | ||
| 86 | BitField<37, 1, u64> mrg; | ||
| 87 | BitField<38, 1, u64> x; | ||
| 88 | BitField<50, 3, SelectMode> select_mode; | ||
| 89 | } const xmad{insn}; | ||
| 90 | |||
| 91 | XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 92 | xmad.mrg != 0, xmad.x != 0); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::XMAD_rc(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<50, 2, SelectMode> select_mode; | ||
| 99 | BitField<52, 1, Half> half_b; | ||
| 100 | BitField<54, 1, u64> x; | ||
| 101 | } const xmad{insn}; | ||
| 102 | |||
| 103 | XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, | ||
| 104 | xmad.x != 0); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::XMAD_cr(u64 insn) { | ||
| 108 | union { | ||
| 109 | u64 raw; | ||
| 110 | BitField<50, 2, SelectMode> select_mode; | ||
| 111 | BitField<52, 1, Half> half_b; | ||
| 112 | BitField<54, 1, u64> x; | ||
| 113 | BitField<55, 1, u64> psl; | ||
| 114 | BitField<56, 1, u64> mrg; | ||
| 115 | } const xmad{insn}; | ||
| 116 | |||
| 117 | XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 118 | xmad.mrg != 0, xmad.x != 0); | ||
| 119 | } | ||
| 120 | |||
| 121 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 122 | union { | ||
| 123 | u64 raw; | ||
| 124 | BitField<20, 16, u64> src_b; | ||
| 125 | BitField<36, 1, u64> psl; | ||
| 126 | BitField<37, 1, u64> mrg; | ||
| 127 | BitField<38, 1, u64> x; | ||
| 128 | BitField<50, 3, SelectMode> select_mode; | ||
| 129 | } const xmad{insn}; | ||
| 130 | |||
| 131 | XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, | ||
| 132 | Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class IntegerWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Short, | ||
| 14 | Word, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { | ||
| 18 | switch (width) { | ||
| 19 | case IntegerWidth::Byte: | ||
| 20 | return ir.Imm32(8); | ||
| 21 | case IntegerWidth::Short: | ||
| 22 | return ir.Imm32(16); | ||
| 23 | case IntegerWidth::Word: | ||
| 24 | return ir.Imm32(32); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid width {}", width); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, | ||
| 31 | IntegerWidth dst_width) { | ||
| 32 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 33 | const IR::U32 count{WidthSize(ir, dst_width)}; | ||
| 34 | return ir.BitFieldExtract(src, zero, count, false); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, | ||
| 38 | bool dst_signed, bool src_signed) { | ||
| 39 | IR::U32 min{}; | ||
| 40 | IR::U32 max{}; | ||
| 41 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 42 | switch (dst_width) { | ||
| 43 | case IntegerWidth::Byte: | ||
| 44 | min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; | ||
| 45 | max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); | ||
| 46 | break; | ||
| 47 | case IntegerWidth::Short: | ||
| 48 | min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; | ||
| 49 | max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); | ||
| 50 | break; | ||
| 51 | case IntegerWidth::Word: | ||
| 52 | min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; | ||
| 53 | max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid width {}", dst_width); | ||
| 57 | } | ||
| 58 | const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; | ||
| 59 | return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { | ||
| 63 | union { | ||
| 64 | u64 insn; | ||
| 65 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 66 | BitField<8, 2, IntegerWidth> dst_fmt; | ||
| 67 | BitField<12, 1, u64> dst_fmt_sign; | ||
| 68 | BitField<10, 2, IntegerWidth> src_fmt; | ||
| 69 | BitField<13, 1, u64> src_fmt_sign; | ||
| 70 | BitField<41, 3, u64> selector; | ||
| 71 | BitField<45, 1, u64> neg; | ||
| 72 | BitField<47, 1, u64> cc; | ||
| 73 | BitField<49, 1, u64> abs; | ||
| 74 | BitField<50, 1, u64> sat; | ||
| 75 | } const i2i{insn}; | ||
| 76 | |||
| 77 | if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { | ||
| 78 | throw NotImplementedException("16-bit source format incompatible with selector {}", | ||
| 79 | i2i.selector); | ||
| 80 | } | ||
| 81 | if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { | ||
| 82 | throw NotImplementedException("32-bit source format incompatible with selector {}", | ||
| 83 | i2i.selector); | ||
| 84 | } | ||
| 85 | |||
| 86 | const s32 selector{static_cast<s32>(i2i.selector)}; | ||
| 87 | const IR::U32 offset{v.ir.Imm32(selector * 8)}; | ||
| 88 | const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; | ||
| 89 | const bool src_signed{i2i.src_fmt_sign != 0}; | ||
| 90 | const bool dst_signed{i2i.dst_fmt_sign != 0}; | ||
| 91 | const bool sat{i2i.sat != 0}; | ||
| 92 | |||
| 93 | IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; | ||
| 94 | if (i2i.abs != 0) { | ||
| 95 | src_values = v.ir.IAbs(src_values); | ||
| 96 | } | ||
| 97 | if (i2i.neg != 0) { | ||
| 98 | src_values = v.ir.INeg(src_values); | ||
| 99 | } | ||
| 100 | const IR::U32 result{ | ||
| 101 | sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) | ||
| 102 | : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; | ||
| 103 | |||
| 104 | v.X(i2i.dest_reg, result); | ||
| 105 | if (i2i.cc != 0) { | ||
| 106 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 107 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 108 | v.ResetCFlag(); | ||
| 109 | v.ResetOFlag(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } // Anonymous namespace | ||
| 113 | |||
| 114 | void TranslatorVisitor::I2I_reg(u64 insn) { | ||
| 115 | I2I(*this, insn, GetReg20(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::I2I_cbuf(u64 insn) { | ||
| 119 | I2I(*this, insn, GetCbuf(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | void TranslatorVisitor::I2I_imm(u64 insn) { | ||
| 123 | I2I(*this, insn, GetImm20(insn)); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | Patch, | ||
| 14 | Prim, | ||
| 15 | Attr, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class Shift : u64 { | ||
| 19 | Default, | ||
| 20 | U16, | ||
| 21 | B32, | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::ISBERD(u64 insn) { | ||
| 27 | union { | ||
| 28 | u64 raw; | ||
| 29 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 30 | BitField<8, 8, IR::Reg> src_reg; | ||
| 31 | BitField<31, 1, u64> skew; | ||
| 32 | BitField<32, 1, u64> o; | ||
| 33 | BitField<33, 2, Mode> mode; | ||
| 34 | BitField<47, 2, Shift> shift; | ||
| 35 | } const isberd{insn}; | ||
| 36 | |||
| 37 | if (isberd.skew != 0) { | ||
| 38 | throw NotImplementedException("SKEW"); | ||
| 39 | } | ||
| 40 | if (isberd.o != 0) { | ||
| 41 | throw NotImplementedException("O"); | ||
| 42 | } | ||
| 43 | if (isberd.mode != Mode::Default) { | ||
| 44 | throw NotImplementedException("Mode {}", isberd.mode.Value()); | ||
| 45 | } | ||
| 46 | if (isberd.shift != Shift::Default) { | ||
| 47 | throw NotImplementedException("Shift {}", isberd.shift.Value()); | ||
| 48 | } | ||
| 49 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 50 | X(isberd.dest_reg, X(isberd.src_reg)); | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | using namespace LDC; | ||
| 12 | namespace { | ||
| 13 | std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, | ||
| 14 | const IR::U32& reg, const IR::U32& imm) { | ||
| 15 | switch (mode) { | ||
| 16 | case Mode::Default: | ||
| 17 | return {imm_index, ir.IAdd(reg, imm)}; | ||
| 18 | default: | ||
| 19 | break; | ||
| 20 | } | ||
| 21 | throw NotImplementedException("Mode {}", mode); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void TranslatorVisitor::LDC(u64 insn) { | ||
| 26 | const Encoding ldc{insn}; | ||
| 27 | const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; | ||
| 28 | const IR::U32 reg{X(ldc.src_reg)}; | ||
| 29 | const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; | ||
| 30 | const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; | ||
| 31 | switch (ldc.size) { | ||
| 32 | case Size::U8: | ||
| 33 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); | ||
| 34 | break; | ||
| 35 | case Size::S8: | ||
| 36 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); | ||
| 37 | break; | ||
| 38 | case Size::U16: | ||
| 39 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); | ||
| 40 | break; | ||
| 41 | case Size::S16: | ||
| 42 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); | ||
| 43 | break; | ||
| 44 | case Size::B32: | ||
| 45 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); | ||
| 46 | break; | ||
| 47 | case Size::B64: { | ||
| 48 | if (!IR::IsAligned(ldc.dest_reg, 2)) { | ||
| 49 | throw NotImplementedException("Unaligned destination register"); | ||
| 50 | } | ||
| 51 | const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; | ||
| 52 | for (int i = 0; i < 2; ++i) { | ||
| 53 | X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | default: | ||
| 58 | throw NotImplementedException("Invalid size {}", ldc.size.Value()); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell::LDC { | ||
| 12 | |||
| 13 | enum class Mode : u64 { | ||
| 14 | Default, | ||
| 15 | IL, | ||
| 16 | IS, | ||
| 17 | ISL, | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class Size : u64 { | ||
| 21 | U8, | ||
| 22 | S8, | ||
| 23 | U16, | ||
| 24 | S16, | ||
| 25 | B32, | ||
| 26 | B64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | union Encoding { | ||
| 30 | u64 raw; | ||
| 31 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 32 | BitField<8, 8, IR::Reg> src_reg; | ||
| 33 | BitField<20, 16, s64> offset; | ||
| 34 | BitField<36, 5, u64> index; | ||
| 35 | BitField<44, 2, Mode> mode; | ||
| 36 | BitField<48, 3, Size> size; | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace Shader::Maxwell::LDC | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, | ||
| 12 | bool neg, bool x) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 3, IR::Pred> pred; | ||
| 19 | } const lea{insn}; | ||
| 20 | |||
| 21 | if (x) { | ||
| 22 | throw NotImplementedException("LEA.HI X"); | ||
| 23 | } | ||
| 24 | if (lea.pred != IR::Pred::PT) { | ||
| 25 | throw NotImplementedException("LEA.HI Pred"); | ||
| 26 | } | ||
| 27 | if (lea.cc != 0) { | ||
| 28 | throw NotImplementedException("LEA.HI CC"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 32 | const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; | ||
| 33 | const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; | ||
| 34 | |||
| 35 | const s32 hi_scale{32 - static_cast<s32>(scale)}; | ||
| 36 | const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; | ||
| 37 | const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; | ||
| 38 | |||
| 39 | IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; | ||
| 40 | v.X(lea.dest_reg, result); | ||
| 41 | } | ||
| 42 | |||
| 43 | void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { | ||
| 44 | union { | ||
| 45 | u64 insn; | ||
| 46 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 47 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 48 | BitField<39, 5, u64> scale; | ||
| 49 | BitField<45, 1, u64> neg; | ||
| 50 | BitField<46, 1, u64> x; | ||
| 51 | BitField<47, 1, u64> cc; | ||
| 52 | BitField<48, 3, IR::Pred> pred; | ||
| 53 | } const lea{insn}; | ||
| 54 | if (lea.x != 0) { | ||
| 55 | throw NotImplementedException("LEA.LO X"); | ||
| 56 | } | ||
| 57 | if (lea.pred != IR::Pred::PT) { | ||
| 58 | throw NotImplementedException("LEA.LO Pred"); | ||
| 59 | } | ||
| 60 | if (lea.cc != 0) { | ||
| 61 | throw NotImplementedException("LEA.LO CC"); | ||
| 62 | } | ||
| 63 | |||
| 64 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 65 | const s32 scale{static_cast<s32>(lea.scale)}; | ||
| 66 | const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; | ||
| 67 | const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; | ||
| 68 | |||
| 69 | IR::U32 result{v.ir.IAdd(base, scaled_offset)}; | ||
| 70 | v.X(lea.dest_reg, result); | ||
| 71 | } | ||
| 72 | } // Anonymous namespace | ||
| 73 | |||
| 74 | void TranslatorVisitor::LEA_hi_reg(u64 insn) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<28, 5, u64> scale; | ||
| 78 | BitField<37, 1, u64> neg; | ||
| 79 | BitField<38, 1, u64> x; | ||
| 80 | } const lea{insn}; | ||
| 81 | |||
| 82 | LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { | ||
| 86 | union { | ||
| 87 | u64 insn; | ||
| 88 | BitField<51, 5, u64> scale; | ||
| 89 | BitField<56, 1, u64> neg; | ||
| 90 | BitField<57, 1, u64> x; | ||
| 91 | } const lea{insn}; | ||
| 92 | |||
| 93 | LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::LEA_lo_reg(u64 insn) { | ||
| 97 | LEA_lo(*this, insn, GetReg20(insn)); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { | ||
| 101 | LEA_lo(*this, insn, GetCbuf(insn)); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::LEA_lo_imm(u64 insn) { | ||
| 105 | LEA_lo(*this, insn, GetImm20(insn)); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Size : u64 { | ||
| 15 | B32, | ||
| 16 | B64, | ||
| 17 | B96, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class InterpolationMode : u64 { | ||
| 22 | Pass, | ||
| 23 | Multiply, | ||
| 24 | Constant, | ||
| 25 | Sc, | ||
| 26 | }; | ||
| 27 | |||
| 28 | enum class SampleMode : u64 { | ||
| 29 | Default, | ||
| 30 | Centroid, | ||
| 31 | Offset, | ||
| 32 | }; | ||
| 33 | |||
| 34 | u32 NumElements(Size size) { | ||
| 35 | switch (size) { | ||
| 36 | case Size::B32: | ||
| 37 | return 1; | ||
| 38 | case Size::B64: | ||
| 39 | return 2; | ||
| 40 | case Size::B96: | ||
| 41 | return 3; | ||
| 42 | case Size::B128: | ||
| 43 | return 4; | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid size {}", size); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename F> | ||
| 49 | void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { | ||
| 50 | const IR::U32 index_value{v.X(index_reg)}; | ||
| 51 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 52 | const IR::U32 final_offset{ | ||
| 53 | element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; | ||
| 54 | f(element, final_offset); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::ALD(u64 insn) { | ||
| 61 | union { | ||
| 62 | u64 raw; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> index_reg; | ||
| 65 | BitField<20, 10, u64> absolute_offset; | ||
| 66 | BitField<20, 11, s64> relative_offset; | ||
| 67 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 68 | BitField<32, 1, u64> o; | ||
| 69 | BitField<31, 1, u64> patch; | ||
| 70 | BitField<47, 2, Size> size; | ||
| 71 | } const ald{insn}; | ||
| 72 | |||
| 73 | const u64 offset{ald.absolute_offset.Value()}; | ||
| 74 | if (offset % 4 != 0) { | ||
| 75 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 76 | } | ||
| 77 | const IR::U32 vertex{X(ald.vertex_reg)}; | ||
| 78 | const u32 num_elements{NumElements(ald.size)}; | ||
| 79 | if (ald.index_reg == IR::Reg::RZ) { | ||
| 80 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 81 | if (ald.patch != 0) { | ||
| 82 | const IR::Patch patch{offset / 4 + element}; | ||
| 83 | F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); | ||
| 84 | } else { | ||
| 85 | const IR::Attribute attr{offset / 4 + element}; | ||
| 86 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (ald.patch != 0) { | ||
| 92 | throw NotImplementedException("Indirect patch read"); | ||
| 93 | } | ||
| 94 | HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 95 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::AST(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 raw; | ||
| 102 | BitField<0, 8, IR::Reg> src_reg; | ||
| 103 | BitField<8, 8, IR::Reg> index_reg; | ||
| 104 | BitField<20, 10, u64> absolute_offset; | ||
| 105 | BitField<20, 11, s64> relative_offset; | ||
| 106 | BitField<31, 1, u64> patch; | ||
| 107 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 108 | BitField<47, 2, Size> size; | ||
| 109 | } const ast{insn}; | ||
| 110 | |||
| 111 | if (ast.index_reg != IR::Reg::RZ) { | ||
| 112 | throw NotImplementedException("Indexed store"); | ||
| 113 | } | ||
| 114 | const u64 offset{ast.absolute_offset.Value()}; | ||
| 115 | if (offset % 4 != 0) { | ||
| 116 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 117 | } | ||
| 118 | const IR::U32 vertex{X(ast.vertex_reg)}; | ||
| 119 | const u32 num_elements{NumElements(ast.size)}; | ||
| 120 | if (ast.index_reg == IR::Reg::RZ) { | ||
| 121 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 122 | if (ast.patch != 0) { | ||
| 123 | const IR::Patch patch{offset / 4 + element}; | ||
| 124 | ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); | ||
| 125 | } else { | ||
| 126 | const IR::Attribute attr{offset / 4 + element}; | ||
| 127 | ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | if (ast.patch != 0) { | ||
| 133 | throw NotImplementedException("Indexed tessellation patch store"); | ||
| 134 | } | ||
| 135 | HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 136 | ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IPA(u64 insn) { | ||
| 141 | // IPA is the instruction used to read varyings from a fragment shader. | ||
| 142 | // gl_FragCoord is mapped to the gl_Position attribute. | ||
| 143 | // It yields unknown results when used outside of the fragment shader stage. | ||
| 144 | union { | ||
| 145 | u64 raw; | ||
| 146 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 147 | BitField<8, 8, IR::Reg> index_reg; | ||
| 148 | BitField<20, 8, IR::Reg> multiplier; | ||
| 149 | BitField<30, 8, IR::Attribute> attribute; | ||
| 150 | BitField<38, 1, u64> idx; | ||
| 151 | BitField<51, 1, u64> sat; | ||
| 152 | BitField<52, 2, SampleMode> sample_mode; | ||
| 153 | BitField<54, 2, InterpolationMode> interpolation_mode; | ||
| 154 | } const ipa{insn}; | ||
| 155 | |||
| 156 | // Indexed IPAs are used for indexed varyings. | ||
| 157 | // For example: | ||
| 158 | // | ||
| 159 | // in vec4 colors[4]; | ||
| 160 | // uniform int idx; | ||
| 161 | // void main() { | ||
| 162 | // gl_FragColor = colors[idx]; | ||
| 163 | // } | ||
| 164 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; | ||
| 165 | const IR::Attribute attribute{ipa.attribute}; | ||
| 166 | IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) | ||
| 167 | : ir.GetAttribute(attribute)}; | ||
| 168 | if (IR::IsGeneric(attribute)) { | ||
| 169 | const ProgramHeader& sph{env.SPH()}; | ||
| 170 | const u32 attr_index{IR::GenericAttributeIndex(attribute)}; | ||
| 171 | const u32 element{static_cast<u32>(attribute) % 4}; | ||
| 172 | const std::array input_map{sph.ps.GenericInputMap(attr_index)}; | ||
| 173 | const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; | ||
| 174 | if (is_perspective) { | ||
| 175 | const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; | ||
| 176 | value = ir.FPMul(value, position_w); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | if (ipa.interpolation_mode == InterpolationMode::Multiply) { | ||
| 180 | value = ir.FPMul(value, F(ipa.multiplier)); | ||
| 181 | } | ||
| 182 | |||
| 183 | // Saturated IPAs are generally generated out of clamped varyings. | ||
| 184 | // For example: clamp(some_varying, 0.0, 1.0) | ||
| 185 | const bool is_saturated{ipa.sat != 0}; | ||
| 186 | if (is_saturated) { | ||
| 187 | if (attribute == IR::Attribute::FrontFace) { | ||
| 188 | throw NotImplementedException("IPA.SAT on FrontFace"); | ||
| 189 | } | ||
| 190 | value = ir.FPSaturate(value); | ||
| 191 | } | ||
| 192 | |||
| 193 | F(ipa.dest_reg, value); | ||
| 194 | } | ||
| 195 | |||
| 196 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { | ||
| 38 | const IR::U32 offset{Offset(v, insn)}; | ||
| 39 | if (offset.IsImmediate()) { | ||
| 40 | return {v.ir.Imm32(offset.U32() / 4), offset}; | ||
| 41 | } else { | ||
| 42 | return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<48, 3, Size> size; | ||
| 50 | } const encoding{insn}; | ||
| 51 | |||
| 52 | switch (encoding.size) { | ||
| 53 | case Size::U8: | ||
| 54 | return {8, false}; | ||
| 55 | case Size::S8: | ||
| 56 | return {8, true}; | ||
| 57 | case Size::U16: | ||
| 58 | return {16, false}; | ||
| 59 | case Size::S16: | ||
| 60 | return {16, true}; | ||
| 61 | case Size::B32: | ||
| 62 | return {32, false}; | ||
| 63 | case Size::B64: | ||
| 64 | return {64, false}; | ||
| 65 | case Size::B128: | ||
| 66 | return {128, false}; | ||
| 67 | default: | ||
| 68 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Reg Reg(u64 insn) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<0, 8, IR::Reg> reg; | ||
| 76 | } const encoding{insn}; | ||
| 77 | |||
| 78 | return encoding.reg; | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 82 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 86 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { | ||
| 90 | const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; | ||
| 91 | const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; | ||
| 92 | return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; | ||
| 93 | } | ||
| 94 | } // Anonymous namespace | ||
| 95 | |||
| 96 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 97 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 98 | const IR::U32 word{LoadLocal(*this, word_offset, offset)}; | ||
| 99 | const IR::Reg dest{Reg(insn)}; | ||
| 100 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 101 | switch (bit_size) { | ||
| 102 | case 8: { | ||
| 103 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 104 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | case 16: { | ||
| 108 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 109 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case 32: | ||
| 113 | case 64: | ||
| 114 | case 128: | ||
| 115 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 116 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 117 | } | ||
| 118 | X(dest, word); | ||
| 119 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 120 | const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; | ||
| 121 | const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; | ||
| 122 | X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); | ||
| 123 | } | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 129 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 130 | const IR::Reg dest{Reg(insn)}; | ||
| 131 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 132 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 133 | switch (bit_size) { | ||
| 134 | case 8: | ||
| 135 | case 16: | ||
| 136 | case 32: | ||
| 137 | X(dest, IR::U32{value}); | ||
| 138 | break; | ||
| 139 | case 64: | ||
| 140 | case 128: | ||
| 141 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 142 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 143 | } | ||
| 144 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 145 | X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); | ||
| 146 | } | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | void TranslatorVisitor::STL(u64 insn) { | ||
| 152 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 153 | if (offset.IsImmediate()) { | ||
| 154 | // TODO: Support storing out of bounds at runtime | ||
| 155 | if (offset.U32() >= env.LocalMemorySize()) { | ||
| 156 | LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", | ||
| 157 | offset.U32(), env.LocalMemorySize()); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | const IR::Reg reg{Reg(insn)}; | ||
| 162 | const IR::U32 src{X(reg)}; | ||
| 163 | const int bit_size{GetSize(insn).first}; | ||
| 164 | switch (bit_size) { | ||
| 165 | case 8: { | ||
| 166 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 167 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 168 | ir.WriteLocal(word_offset, value); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case 16: { | ||
| 172 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 173 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 174 | ir.WriteLocal(word_offset, value); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | case 32: | ||
| 178 | case 64: | ||
| 179 | case 128: | ||
| 180 | if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { | ||
| 181 | throw NotImplementedException("Unaligned source register"); | ||
| 182 | } | ||
| 183 | ir.WriteLocal(word_offset, src); | ||
| 184 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 185 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void TranslatorVisitor::STS(u64 insn) { | ||
| 192 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 193 | const IR::Reg reg{Reg(insn)}; | ||
| 194 | const int bit_size{GetSize(insn).first}; | ||
| 195 | switch (bit_size) { | ||
| 196 | case 8: | ||
| 197 | case 16: | ||
| 198 | case 32: | ||
| 199 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 200 | break; | ||
| 201 | case 64: | ||
| 202 | if (!IR::IsAligned(reg, 2)) { | ||
| 203 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 204 | } | ||
| 205 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 206 | break; | ||
| 207 | case 128: { | ||
| 208 | if (!IR::IsAligned(reg, 2)) { | ||
| 209 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 210 | } | ||
| 211 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 212 | ir.WriteShared(128, offset, vector); | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class StoreSize : u64 { | ||
| 25 | U8, // Zero-extend | ||
| 26 | S8, // Sign-extend | ||
| 27 | U16, // Zero-extend | ||
| 28 | S16, // Sign-extend | ||
| 29 | B32, | ||
| 30 | B64, | ||
| 31 | B128, | ||
| 32 | }; | ||
| 33 | |||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 43 | enum class StoreCache : u64 { | ||
| 44 | WB, // Cache write-back all coherent levels | ||
| 45 | CG, // Cache at global level | ||
| 46 | CS, // Cache streaming, likely to be accessed once | ||
| 47 | WT, // Cache write-through (to system memory) | ||
| 48 | }; | ||
| 49 | |||
| 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 56 | BitField<45, 1, u64> e; | ||
| 57 | } const mem{insn}; | ||
| 58 | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 60 | if (mem.e == 0) { | ||
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||
| 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 63 | } | ||
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | ||
| 65 | throw NotImplementedException("Unaligned address register"); | ||
| 66 | } | ||
| 67 | // Pack two registers to build the 64-bit address | ||
| 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); | ||
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 77 | }()}; | ||
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: | ||
| 122 | case LoadSize::U128: { | ||
| 123 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 124 | throw NotImplementedException("Unaligned data registers"); | ||
| 125 | } | ||
| 126 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 127 | for (int i = 0; i < 4; ++i) { | ||
| 128 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 129 | } | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | default: | ||
| 133 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void TranslatorVisitor::STG(u64 insn) { | ||
| 138 | // STG stores registers into global memory. | ||
| 139 | union { | ||
| 140 | u64 raw; | ||
| 141 | BitField<0, 8, IR::Reg> data_reg; | ||
| 142 | BitField<46, 2, StoreCache> cache; | ||
| 143 | BitField<48, 3, StoreSize> size; | ||
| 144 | } const stg{insn}; | ||
| 145 | |||
| 146 | // Pointer to store data into | ||
| 147 | const IR::U64 address{Address(*this, insn)}; | ||
| 148 | const IR::Reg data_reg{stg.data_reg}; | ||
| 149 | switch (stg.size) { | ||
| 150 | case StoreSize::U8: | ||
| 151 | ir.WriteGlobalU8(address, X(data_reg)); | ||
| 152 | break; | ||
| 153 | case StoreSize::S8: | ||
| 154 | ir.WriteGlobalS8(address, X(data_reg)); | ||
| 155 | break; | ||
| 156 | case StoreSize::U16: | ||
| 157 | ir.WriteGlobalU16(address, X(data_reg)); | ||
| 158 | break; | ||
| 159 | case StoreSize::S16: | ||
| 160 | ir.WriteGlobalS16(address, X(data_reg)); | ||
| 161 | break; | ||
| 162 | case StoreSize::B32: | ||
| 163 | ir.WriteGlobal32(address, X(data_reg)); | ||
| 164 | break; | ||
| 165 | case StoreSize::B64: { | ||
| 166 | if (!IR::IsAligned(data_reg, 2)) { | ||
| 167 | throw NotImplementedException("Unaligned data registers"); | ||
| 168 | } | ||
| 169 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; | ||
| 170 | ir.WriteGlobal64(address, vector); | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | case StoreSize::B128: | ||
| 174 | if (!IR::IsAligned(data_reg, 4)) { | ||
| 175 | throw NotImplementedException("Unaligned data registers"); | ||
| 176 | } | ||
| 177 | const IR::Value vector{ | ||
| 178 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; | ||
| 179 | ir.WriteGlobal128(address, vector); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class LogicalOp : u64 { | ||
| 13 | AND, | ||
| 14 | OR, | ||
| 15 | XOR, | ||
| 16 | PASS_B, | ||
| 17 | }; | ||
| 18 | |||
| 19 | [[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 20 | const IR::U32& operand_2, LogicalOp op) { | ||
| 21 | switch (op) { | ||
| 22 | case LogicalOp::AND: | ||
| 23 | return ir.BitwiseAnd(operand_1, operand_2); | ||
| 24 | case LogicalOp::OR: | ||
| 25 | return ir.BitwiseOr(operand_1, operand_2); | ||
| 26 | case LogicalOp::XOR: | ||
| 27 | return ir.BitwiseXor(operand_1, operand_2); | ||
| 28 | case LogicalOp::PASS_B: | ||
| 29 | return operand_2; | ||
| 30 | default: | ||
| 31 | throw NotImplementedException("Invalid Logical operation {}", op); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, | ||
| 36 | LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, | ||
| 37 | IR::Pred dest_pred = IR::Pred::PT) { | ||
| 38 | union { | ||
| 39 | u64 insn; | ||
| 40 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 41 | BitField<8, 8, IR::Reg> src_reg; | ||
| 42 | } const lop{insn}; | ||
| 43 | |||
| 44 | if (x) { | ||
| 45 | throw NotImplementedException("X"); | ||
| 46 | } | ||
| 47 | IR::U32 op_a{v.X(lop.src_reg)}; | ||
| 48 | if (inv_a != 0) { | ||
| 49 | op_a = v.ir.BitwiseNot(op_a); | ||
| 50 | } | ||
| 51 | if (inv_b != 0) { | ||
| 52 | op_b = v.ir.BitwiseNot(op_b); | ||
| 53 | } | ||
| 54 | |||
| 55 | const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; | ||
| 56 | if (pred_op) { | ||
| 57 | const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; | ||
| 58 | v.ir.SetPred(dest_pred, pred_result); | ||
| 59 | } | ||
| 60 | if (cc) { | ||
| 61 | if (bit_op == LogicalOp::PASS_B) { | ||
| 62 | v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); | ||
| 63 | v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); | ||
| 64 | } else { | ||
| 65 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 66 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 67 | } | ||
| 68 | v.ResetCFlag(); | ||
| 69 | v.ResetOFlag(); | ||
| 70 | } | ||
| 71 | v.X(lop.dest_reg, result); | ||
| 72 | } | ||
| 73 | |||
| 74 | void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<39, 1, u64> inv_a; | ||
| 78 | BitField<40, 1, u64> inv_b; | ||
| 79 | BitField<41, 2, LogicalOp> bit_op; | ||
| 80 | BitField<43, 1, u64> x; | ||
| 81 | BitField<44, 2, PredicateOp> pred_op; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 3, IR::Pred> dest_pred; | ||
| 84 | } const lop{insn}; | ||
| 85 | |||
| 86 | LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, | ||
| 87 | lop.pred_op, lop.dest_pred); | ||
| 88 | } | ||
| 89 | } // Anonymous namespace | ||
| 90 | |||
| 91 | void TranslatorVisitor::LOP_reg(u64 insn) { | ||
| 92 | LOP(*this, insn, GetReg20(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::LOP_cbuf(u64 insn) { | ||
| 96 | LOP(*this, insn, GetCbuf(insn)); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::LOP_imm(u64 insn) { | ||
| 100 | LOP(*this, insn, GetImm20(insn)); | ||
| 101 | } | ||
| 102 | |||
| 103 | void TranslatorVisitor::LOP32I(u64 insn) { | ||
| 104 | union { | ||
| 105 | u64 raw; | ||
| 106 | BitField<53, 2, LogicalOp> bit_op; | ||
| 107 | BitField<57, 1, u64> x; | ||
| 108 | BitField<52, 1, u64> cc; | ||
| 109 | BitField<55, 1, u64> inv_a; | ||
| 110 | BitField<56, 1, u64> inv_b; | ||
| 111 | } const lop32i{insn}; | ||
| 112 | |||
| 113 | LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, | ||
| 114 | lop32i.inv_b != 0, lop32i.bit_op); | ||
| 115 | } | ||
| 116 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | // https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 | ||
| 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) | ||
| 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | ||
| 15 | u64 ttbl) { | ||
| 16 | IR::U32 r{ir.Imm32(0)}; | ||
| 17 | const IR::U32 not_a{ir.BitwiseNot(a)}; | ||
| 18 | const IR::U32 not_b{ir.BitwiseNot(b)}; | ||
| 19 | const IR::U32 not_c{ir.BitwiseNot(c)}; | ||
| 20 | if (ttbl & 0x01) { | ||
| 21 | // r |= ~a & ~b & ~c; | ||
| 22 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 23 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 24 | r = ir.BitwiseOr(r, rhs); | ||
| 25 | } | ||
| 26 | if (ttbl & 0x02) { | ||
| 27 | // r |= ~a & ~b & c; | ||
| 28 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 29 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 30 | r = ir.BitwiseOr(r, rhs); | ||
| 31 | } | ||
| 32 | if (ttbl & 0x04) { | ||
| 33 | // r |= ~a & b & ~c; | ||
| 34 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 35 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 36 | r = ir.BitwiseOr(r, rhs); | ||
| 37 | } | ||
| 38 | if (ttbl & 0x08) { | ||
| 39 | // r |= ~a & b & c; | ||
| 40 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 41 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 42 | r = ir.BitwiseOr(r, rhs); | ||
| 43 | } | ||
| 44 | if (ttbl & 0x10) { | ||
| 45 | // r |= a & ~b & ~c; | ||
| 46 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 47 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 48 | r = ir.BitwiseOr(r, rhs); | ||
| 49 | } | ||
| 50 | if (ttbl & 0x20) { | ||
| 51 | // r |= a & ~b & c; | ||
| 52 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 53 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 54 | r = ir.BitwiseOr(r, rhs); | ||
| 55 | } | ||
| 56 | if (ttbl & 0x40) { | ||
| 57 | // r |= a & b & ~c; | ||
| 58 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 59 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 60 | r = ir.BitwiseOr(r, rhs); | ||
| 61 | } | ||
| 62 | if (ttbl & 0x80) { | ||
| 63 | // r |= a & b & c; | ||
| 64 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 65 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 66 | r = ir.BitwiseOr(r, rhs); | ||
| 67 | } | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 75 | BitField<8, 8, IR::Reg> src_reg; | ||
| 76 | BitField<47, 1, u64> cc; | ||
| 77 | } const lop3{insn}; | ||
| 78 | |||
| 79 | if (lop3.cc != 0) { | ||
| 80 | throw NotImplementedException("LOP3 CC"); | ||
| 81 | } | ||
| 82 | |||
| 83 | const IR::U32 op_a{v.X(lop3.src_reg)}; | ||
| 84 | const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; | ||
| 85 | v.X(lop3.dest_reg, result); | ||
| 86 | return result; | ||
| 87 | } | ||
| 88 | |||
| 89 | u64 GetLut48(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<48, 8, u64> lut; | ||
| 93 | } const lut{insn}; | ||
| 94 | return lut.lut; | ||
| 95 | } | ||
| 96 | } // Anonymous namespace | ||
| 97 | |||
| 98 | void TranslatorVisitor::LOP3_reg(u64 insn) { | ||
| 99 | union { | ||
| 100 | u64 insn; | ||
| 101 | BitField<28, 8, u64> lut; | ||
| 102 | BitField<38, 1, u64> x; | ||
| 103 | BitField<36, 2, PredicateOp> pred_op; | ||
| 104 | BitField<48, 3, IR::Pred> pred; | ||
| 105 | } const lop3{insn}; | ||
| 106 | |||
| 107 | if (lop3.x != 0) { | ||
| 108 | throw NotImplementedException("LOP3 X"); | ||
| 109 | } | ||
| 110 | const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; | ||
| 111 | const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; | ||
| 112 | ir.SetPred(lop3.pred, pred_result); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::LOP3_cbuf(u64 insn) { | ||
| 116 | LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); | ||
| 117 | } | ||
| 118 | |||
| 119 | void TranslatorVisitor::LOP3_imm(u64 insn) { | ||
| 120 | LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); | ||
| 121 | } | ||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | } // Anonymous namespace | ||
| 16 | |||
| 17 | void TranslatorVisitor::P2R_reg(u64) { | ||
| 18 | throw NotImplementedException("P2R (reg)"); | ||
| 19 | } | ||
| 20 | |||
| 21 | void TranslatorVisitor::P2R_cbuf(u64) { | ||
| 22 | throw NotImplementedException("P2R (cbuf)"); | ||
| 23 | } | ||
| 24 | |||
| 25 | void TranslatorVisitor::P2R_imm(u64 insn) { | ||
| 26 | union { | ||
| 27 | u64 raw; | ||
| 28 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 29 | BitField<8, 8, IR::Reg> src; | ||
| 30 | BitField<40, 1, Mode> mode; | ||
| 31 | BitField<41, 2, u64> byte_selector; | ||
| 32 | } const p2r{insn}; | ||
| 33 | |||
| 34 | const u32 mask{GetImm20(insn).U32()}; | ||
| 35 | const bool pr_mode{p2r.mode == Mode::PR}; | ||
| 36 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 37 | const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; | ||
| 38 | IR::U32 insert{ir.Imm32(0)}; | ||
| 39 | for (u32 index = 0; index < num_items; ++index) { | ||
| 40 | if (((mask >> index) & 1) == 0) { | ||
| 41 | continue; | ||
| 42 | } | ||
| 43 | const IR::U1 cond{[this, index, pr_mode] { | ||
| 44 | if (pr_mode) { | ||
| 45 | return ir.GetPred(IR::Pred{index}); | ||
| 46 | } | ||
| 47 | switch (index) { | ||
| 48 | case 0: | ||
| 49 | return ir.GetZFlag(); | ||
| 50 | case 1: | ||
| 51 | return ir.GetSFlag(); | ||
| 52 | case 2: | ||
| 53 | return ir.GetCFlag(); | ||
| 54 | case 3: | ||
| 55 | return ir.GetOFlag(); | ||
| 56 | } | ||
| 57 | throw LogicError("Unreachable P2R index"); | ||
| 58 | }()}; | ||
| 59 | const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; | ||
| 60 | insert = ir.BitwiseOr(insert, bit); | ||
| 61 | } | ||
| 62 | const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; | ||
| 63 | X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<39, 4, u64> mask; | ||
| 18 | BitField<12, 4, u64> mov32i_mask; | ||
| 19 | } const mov{insn}; | ||
| 20 | |||
| 21 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | ||
| 22 | throw NotImplementedException("Non-full move mask"); | ||
| 23 | } | ||
| 24 | v.X(mov.dest_reg, src); | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::MOV_reg(u64 insn) { | ||
| 29 | MOV(*this, insn, GetReg20(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::MOV_cbuf(u64 insn) { | ||
| 33 | MOV(*this, insn, GetCbuf(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::MOV_imm(u64 insn) { | ||
| 37 | MOV(*this, insn, GetImm20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::MOV32I(u64 insn) { | ||
| 41 | MOV(*this, insn, GetImm32(insn), true); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { | ||
| 17 | switch (index) { | ||
| 18 | case 0: | ||
| 19 | return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); | ||
| 20 | case 1: | ||
| 21 | return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); | ||
| 22 | case 2: | ||
| 23 | return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); | ||
| 24 | case 3: | ||
| 25 | return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); | ||
| 26 | default: | ||
| 27 | throw LogicError("Unreachable R2P index"); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { | ||
| 32 | union { | ||
| 33 | u64 raw; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg; | ||
| 35 | BitField<40, 1, Mode> mode; | ||
| 36 | BitField<41, 2, u64> byte_selector; | ||
| 37 | } const r2p{insn}; | ||
| 38 | const IR::U32 src{v.X(r2p.src_reg)}; | ||
| 39 | const IR::U32 count{v.ir.Imm32(1)}; | ||
| 40 | const bool pr_mode{r2p.mode == Mode::PR}; | ||
| 41 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 42 | const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; | ||
| 43 | for (u32 index = 0; index < num_items; ++index) { | ||
| 44 | const IR::U32 offset{v.ir.Imm32(offset_base + index)}; | ||
| 45 | const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; | ||
| 46 | const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; | ||
| 47 | const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; | ||
| 48 | const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; | ||
| 49 | if (pr_mode) { | ||
| 50 | const IR::Pred pred{index}; | ||
| 51 | v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); | ||
| 52 | } else { | ||
| 53 | SetFlag(v.ir, inv_mask_bit, src_bit, index); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::R2P_reg(u64 insn) { | ||
| 60 | R2P(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::R2P_cbuf(u64 insn) { | ||
| 64 | R2P(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::R2P_imm(u64 insn) { | ||
| 68 | R2P(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_CLOCK = 1, | ||
| 14 | SR_VIRTCFG = 2, | ||
| 15 | SR_VIRTID = 3, | ||
| 16 | SR_PM0 = 4, | ||
| 17 | SR_PM1 = 5, | ||
| 18 | SR_PM2 = 6, | ||
| 19 | SR_PM3 = 7, | ||
| 20 | SR_PM4 = 8, | ||
| 21 | SR_PM5 = 9, | ||
| 22 | SR_PM6 = 10, | ||
| 23 | SR_PM7 = 11, | ||
| 24 | SR12 = 12, | ||
| 25 | SR13 = 13, | ||
| 26 | SR14 = 14, | ||
| 27 | SR_ORDERING_TICKET = 15, | ||
| 28 | SR_PRIM_TYPE = 16, | ||
| 29 | SR_INVOCATION_ID = 17, | ||
| 30 | SR_Y_DIRECTION = 18, | ||
| 31 | SR_THREAD_KILL = 19, | ||
| 32 | SM_SHADER_TYPE = 20, | ||
| 33 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 34 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 35 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 36 | SR_MACHINE_ID_0 = 24, | ||
| 37 | SR_MACHINE_ID_1 = 25, | ||
| 38 | SR_MACHINE_ID_2 = 26, | ||
| 39 | SR_MACHINE_ID_3 = 27, | ||
| 40 | SR_AFFINITY = 28, | ||
| 41 | SR_INVOCATION_INFO = 29, | ||
| 42 | SR_WSCALEFACTOR_XY = 30, | ||
| 43 | SR_WSCALEFACTOR_Z = 31, | ||
| 44 | SR_TID = 32, | ||
| 45 | SR_TID_X = 33, | ||
| 46 | SR_TID_Y = 34, | ||
| 47 | SR_TID_Z = 35, | ||
| 48 | SR_CTA_PARAM = 36, | ||
| 49 | SR_CTAID_X = 37, | ||
| 50 | SR_CTAID_Y = 38, | ||
| 51 | SR_CTAID_Z = 39, | ||
| 52 | SR_NTID = 40, | ||
| 53 | SR_CirQueueIncrMinusOne = 41, | ||
| 54 | SR_NLATC = 42, | ||
| 55 | SR43 = 43, | ||
| 56 | SR_SM_SPA_VERSION = 44, | ||
| 57 | SR_MULTIPASSSHADERINFO = 45, | ||
| 58 | SR_LWINHI = 46, | ||
| 59 | SR_SWINHI = 47, | ||
| 60 | SR_SWINLO = 48, | ||
| 61 | SR_SWINSZ = 49, | ||
| 62 | SR_SMEMSZ = 50, | ||
| 63 | SR_SMEMBANKS = 51, | ||
| 64 | SR_LWINLO = 52, | ||
| 65 | SR_LWINSZ = 53, | ||
| 66 | SR_LMEMLOSZ = 54, | ||
| 67 | SR_LMEMHIOFF = 55, | ||
| 68 | SR_EQMASK = 56, | ||
| 69 | SR_LTMASK = 57, | ||
| 70 | SR_LEMASK = 58, | ||
| 71 | SR_GTMASK = 59, | ||
| 72 | SR_GEMASK = 60, | ||
| 73 | SR_REGALLOC = 61, | ||
| 74 | SR_BARRIERALLOC = 62, | ||
| 75 | SR63 = 63, | ||
| 76 | SR_GLOBALERRORSTATUS = 64, | ||
| 77 | SR65 = 65, | ||
| 78 | SR_WARPERRORSTATUS = 66, | ||
| 79 | SR_WARPERRORSTATUSCLEAR = 67, | ||
| 80 | SR68 = 68, | ||
| 81 | SR69 = 69, | ||
| 82 | SR70 = 70, | ||
| 83 | SR71 = 71, | ||
| 84 | SR_PM_HI0 = 72, | ||
| 85 | SR_PM_HI1 = 73, | ||
| 86 | SR_PM_HI2 = 74, | ||
| 87 | SR_PM_HI3 = 75, | ||
| 88 | SR_PM_HI4 = 76, | ||
| 89 | SR_PM_HI5 = 77, | ||
| 90 | SR_PM_HI6 = 78, | ||
| 91 | SR_PM_HI7 = 79, | ||
| 92 | SR_CLOCKLO = 80, | ||
| 93 | SR_CLOCKHI = 81, | ||
| 94 | SR_GLOBALTIMERLO = 82, | ||
| 95 | SR_GLOBALTIMERHI = 83, | ||
| 96 | SR84 = 84, | ||
| 97 | SR85 = 85, | ||
| 98 | SR86 = 86, | ||
| 99 | SR87 = 87, | ||
| 100 | SR88 = 88, | ||
| 101 | SR89 = 89, | ||
| 102 | SR90 = 90, | ||
| 103 | SR91 = 91, | ||
| 104 | SR92 = 92, | ||
| 105 | SR93 = 93, | ||
| 106 | SR94 = 94, | ||
| 107 | SR95 = 95, | ||
| 108 | SR_HWTASKID = 96, | ||
| 109 | SR_CIRCULARQUEUEENTRYINDEX = 97, | ||
| 110 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, | ||
| 111 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, | ||
| 112 | }; | ||
| 113 | |||
| 114 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 115 | switch (special_register) { | ||
| 116 | case SpecialRegister::SR_INVOCATION_ID: | ||
| 117 | return ir.InvocationId(); | ||
| 118 | case SpecialRegister::SR_THREAD_KILL: | ||
| 119 | return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; | ||
| 120 | case SpecialRegister::SR_INVOCATION_INFO: | ||
| 121 | LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); | ||
| 122 | return ir.Imm32(0x00ff'0000); | ||
| 123 | case SpecialRegister::SR_TID: { | ||
| 124 | const IR::Value tid{ir.LocalInvocationId()}; | ||
| 125 | return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||
| 126 | IR::U32{ir.CompositeExtract(tid, 1)}, | ||
| 127 | ir.Imm32(16), ir.Imm32(8)), | ||
| 128 | IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||
| 129 | } | ||
| 130 | case SpecialRegister::SR_TID_X: | ||
| 131 | return ir.LocalInvocationIdX(); | ||
| 132 | case SpecialRegister::SR_TID_Y: | ||
| 133 | return ir.LocalInvocationIdY(); | ||
| 134 | case SpecialRegister::SR_TID_Z: | ||
| 135 | return ir.LocalInvocationIdZ(); | ||
| 136 | case SpecialRegister::SR_CTAID_X: | ||
| 137 | return ir.WorkgroupIdX(); | ||
| 138 | case SpecialRegister::SR_CTAID_Y: | ||
| 139 | return ir.WorkgroupIdY(); | ||
| 140 | case SpecialRegister::SR_CTAID_Z: | ||
| 141 | return ir.WorkgroupIdZ(); | ||
| 142 | case SpecialRegister::SR_WSCALEFACTOR_XY: | ||
| 143 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); | ||
| 144 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 145 | case SpecialRegister::SR_WSCALEFACTOR_Z: | ||
| 146 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); | ||
| 147 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 148 | case SpecialRegister::SR_LANEID: | ||
| 149 | return ir.LaneId(); | ||
| 150 | case SpecialRegister::SR_EQMASK: | ||
| 151 | return ir.SubgroupEqMask(); | ||
| 152 | case SpecialRegister::SR_LTMASK: | ||
| 153 | return ir.SubgroupLtMask(); | ||
| 154 | case SpecialRegister::SR_LEMASK: | ||
| 155 | return ir.SubgroupLeMask(); | ||
| 156 | case SpecialRegister::SR_GTMASK: | ||
| 157 | return ir.SubgroupGtMask(); | ||
| 158 | case SpecialRegister::SR_GEMASK: | ||
| 159 | return ir.SubgroupGeMask(); | ||
| 160 | case SpecialRegister::SR_Y_DIRECTION: | ||
| 161 | return ir.BitCast<IR::U32>(ir.YDirection()); | ||
| 162 | case SpecialRegister::SR_AFFINITY: | ||
| 163 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); | ||
| 164 | return ir.Imm32(0); // This is the default value hardware returns. | ||
| 165 | default: | ||
| 166 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | } // Anonymous namespace | ||
| 170 | |||
| 171 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 172 | union { | ||
| 173 | u64 raw; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 176 | } const s2r{insn}; | ||
| 177 | |||
| 178 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -0,0 +1,283 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | ||
| 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); | ||
| 14 | } | ||
| 15 | |||
| 16 | void TranslatorVisitor::ATOM_cas(u64) { | ||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | ||
| 18 | } | ||
| 19 | |||
| 20 | void TranslatorVisitor::ATOMS_cas(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOMS_cas); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::B2R(u64) { | ||
| 25 | ThrowNotImplemented(Opcode::B2R); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::BPT(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::BPT); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::BRA(u64) { | ||
| 33 | ThrowNotImplemented(Opcode::BRA); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::BRK(u64) { | ||
| 37 | ThrowNotImplemented(Opcode::BRK); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::CAL() { | ||
| 41 | // CAL is a no-op | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::CCTL(u64) { | ||
| 45 | ThrowNotImplemented(Opcode::CCTL); | ||
| 46 | } | ||
| 47 | |||
| 48 | void TranslatorVisitor::CCTLL(u64) { | ||
| 49 | ThrowNotImplemented(Opcode::CCTLL); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::CONT(u64) { | ||
| 53 | ThrowNotImplemented(Opcode::CONT); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::CS2R(u64) { | ||
| 57 | ThrowNotImplemented(Opcode::CS2R); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::FCHK_reg(u64) { | ||
| 61 | ThrowNotImplemented(Opcode::FCHK_reg); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::FCHK_cbuf(u64) { | ||
| 65 | ThrowNotImplemented(Opcode::FCHK_cbuf); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::FCHK_imm(u64) { | ||
| 69 | ThrowNotImplemented(Opcode::FCHK_imm); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::GETCRSPTR(u64) { | ||
| 73 | ThrowNotImplemented(Opcode::GETCRSPTR); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::GETLMEMBASE(u64) { | ||
| 77 | ThrowNotImplemented(Opcode::GETLMEMBASE); | ||
| 78 | } | ||
| 79 | |||
| 80 | void TranslatorVisitor::IDE(u64) { | ||
| 81 | ThrowNotImplemented(Opcode::IDE); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::IDP_reg(u64) { | ||
| 85 | ThrowNotImplemented(Opcode::IDP_reg); | ||
| 86 | } | ||
| 87 | |||
| 88 | void TranslatorVisitor::IDP_imm(u64) { | ||
| 89 | ThrowNotImplemented(Opcode::IDP_imm); | ||
| 90 | } | ||
| 91 | |||
| 92 | void TranslatorVisitor::IMAD_reg(u64) { | ||
| 93 | ThrowNotImplemented(Opcode::IMAD_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::IMAD_rc(u64) { | ||
| 97 | ThrowNotImplemented(Opcode::IMAD_rc); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::IMAD_cr(u64) { | ||
| 101 | ThrowNotImplemented(Opcode::IMAD_cr); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::IMAD_imm(u64) { | ||
| 105 | ThrowNotImplemented(Opcode::IMAD_imm); | ||
| 106 | } | ||
| 107 | |||
| 108 | void TranslatorVisitor::IMAD32I(u64) { | ||
| 109 | ThrowNotImplemented(Opcode::IMAD32I); | ||
| 110 | } | ||
| 111 | |||
| 112 | void TranslatorVisitor::IMADSP_reg(u64) { | ||
| 113 | ThrowNotImplemented(Opcode::IMADSP_reg); | ||
| 114 | } | ||
| 115 | |||
| 116 | void TranslatorVisitor::IMADSP_rc(u64) { | ||
| 117 | ThrowNotImplemented(Opcode::IMADSP_rc); | ||
| 118 | } | ||
| 119 | |||
| 120 | void TranslatorVisitor::IMADSP_cr(u64) { | ||
| 121 | ThrowNotImplemented(Opcode::IMADSP_cr); | ||
| 122 | } | ||
| 123 | |||
| 124 | void TranslatorVisitor::IMADSP_imm(u64) { | ||
| 125 | ThrowNotImplemented(Opcode::IMADSP_imm); | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::IMUL_reg(u64) { | ||
| 129 | ThrowNotImplemented(Opcode::IMUL_reg); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::IMUL_cbuf(u64) { | ||
| 133 | ThrowNotImplemented(Opcode::IMUL_cbuf); | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::IMUL_imm(u64) { | ||
| 137 | ThrowNotImplemented(Opcode::IMUL_imm); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IMUL32I(u64) { | ||
| 141 | ThrowNotImplemented(Opcode::IMUL32I); | ||
| 142 | } | ||
| 143 | |||
| 144 | void TranslatorVisitor::JCAL(u64) { | ||
| 145 | ThrowNotImplemented(Opcode::JCAL); | ||
| 146 | } | ||
| 147 | |||
| 148 | void TranslatorVisitor::JMP(u64) { | ||
| 149 | ThrowNotImplemented(Opcode::JMP); | ||
| 150 | } | ||
| 151 | |||
| 152 | void TranslatorVisitor::KIL() { | ||
| 153 | // KIL is a no-op | ||
| 154 | } | ||
| 155 | |||
| 156 | void TranslatorVisitor::LD(u64) { | ||
| 157 | ThrowNotImplemented(Opcode::LD); | ||
| 158 | } | ||
| 159 | |||
| 160 | void TranslatorVisitor::LEPC(u64) { | ||
| 161 | ThrowNotImplemented(Opcode::LEPC); | ||
| 162 | } | ||
| 163 | |||
| 164 | void TranslatorVisitor::LONGJMP(u64) { | ||
| 165 | ThrowNotImplemented(Opcode::LONGJMP); | ||
| 166 | } | ||
| 167 | |||
| 168 | void TranslatorVisitor::NOP(u64) { | ||
| 169 | // NOP is No-Op. | ||
| 170 | } | ||
| 171 | |||
| 172 | void TranslatorVisitor::PBK() { | ||
| 173 | // PBK is a no-op | ||
| 174 | } | ||
| 175 | |||
| 176 | void TranslatorVisitor::PCNT() { | ||
| 177 | // PCNT is a no-op | ||
| 178 | } | ||
| 179 | |||
| 180 | void TranslatorVisitor::PEXIT(u64) { | ||
| 181 | ThrowNotImplemented(Opcode::PEXIT); | ||
| 182 | } | ||
| 183 | |||
| 184 | void TranslatorVisitor::PLONGJMP(u64) { | ||
| 185 | ThrowNotImplemented(Opcode::PLONGJMP); | ||
| 186 | } | ||
| 187 | |||
| 188 | void TranslatorVisitor::PRET(u64) { | ||
| 189 | ThrowNotImplemented(Opcode::PRET); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::PRMT_reg(u64) { | ||
| 193 | ThrowNotImplemented(Opcode::PRMT_reg); | ||
| 194 | } | ||
| 195 | |||
| 196 | void TranslatorVisitor::PRMT_rc(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::PRMT_rc); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::PRMT_cr(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::PRMT_cr); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::PRMT_imm(u64) { | ||
| 205 | ThrowNotImplemented(Opcode::PRMT_imm); | ||
| 206 | } | ||
| 207 | |||
| 208 | void TranslatorVisitor::R2B(u64) { | ||
| 209 | ThrowNotImplemented(Opcode::R2B); | ||
| 210 | } | ||
| 211 | |||
| 212 | void TranslatorVisitor::RAM(u64) { | ||
| 213 | ThrowNotImplemented(Opcode::RAM); | ||
| 214 | } | ||
| 215 | |||
| 216 | void TranslatorVisitor::RET(u64) { | ||
| 217 | ThrowNotImplemented(Opcode::RET); | ||
| 218 | } | ||
| 219 | |||
| 220 | void TranslatorVisitor::RTT(u64) { | ||
| 221 | ThrowNotImplemented(Opcode::RTT); | ||
| 222 | } | ||
| 223 | |||
| 224 | void TranslatorVisitor::SAM(u64) { | ||
| 225 | ThrowNotImplemented(Opcode::SAM); | ||
| 226 | } | ||
| 227 | |||
| 228 | void TranslatorVisitor::SETCRSPTR(u64) { | ||
| 229 | ThrowNotImplemented(Opcode::SETCRSPTR); | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SETLMEMBASE(u64) { | ||
| 233 | ThrowNotImplemented(Opcode::SETLMEMBASE); | ||
| 234 | } | ||
| 235 | |||
| 236 | void TranslatorVisitor::SSY() { | ||
| 237 | // SSY is a no-op | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::ST(u64) { | ||
| 241 | ThrowNotImplemented(Opcode::ST); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::STP(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::STP); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SUATOM_cas(u64) { | ||
| 249 | ThrowNotImplemented(Opcode::SUATOM_cas); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SYNC(u64) { | ||
| 253 | ThrowNotImplemented(Opcode::SYNC); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::TXA(u64) { | ||
| 257 | ThrowNotImplemented(Opcode::TXA); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::VABSDIFF(u64) { | ||
| 261 | ThrowNotImplemented(Opcode::VABSDIFF); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::VABSDIFF4(u64) { | ||
| 265 | ThrowNotImplemented(Opcode::VABSDIFF4); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::VADD(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::VADD); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::VSET(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::VSET); | ||
| 274 | } | ||
| 275 | void TranslatorVisitor::VSHL(u64) { | ||
| 276 | ThrowNotImplemented(Opcode::VSHL); | ||
| 277 | } | ||
| 278 | |||
| 279 | void TranslatorVisitor::VSHR(u64) { | ||
| 280 | ThrowNotImplemented(Opcode::VSHR); | ||
| 281 | } | ||
| 282 | |||
| 283 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> output_reg; // Not needed on host | ||
| 16 | BitField<39, 1, u64> emit; | ||
| 17 | BitField<40, 1, u64> cut; | ||
| 18 | } const out{insn}; | ||
| 19 | |||
| 20 | stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); | ||
| 21 | |||
| 22 | if (out.emit != 0) { | ||
| 23 | v.ir.EmitVertex(stream_index); | ||
| 24 | } | ||
| 25 | if (out.cut != 0) { | ||
| 26 | v.ir.EndPrimitive(stream_index); | ||
| 27 | } | ||
| 28 | // Host doesn't need the output register, but we can write to it to avoid undefined reads | ||
| 29 | v.X(out.dest_reg, v.ir.Imm32(0)); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::OUT_reg(u64 insn) { | ||
| 34 | OUT(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::OUT_cbuf(u64 insn) { | ||
| 38 | OUT(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::OUT_imm(u64 insn) { | ||
| 42 | OUT(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | CovMask, | ||
| 14 | Covered, | ||
| 15 | Offset, | ||
| 16 | CentroidOffset, | ||
| 17 | MyIndex, | ||
| 18 | }; | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | void TranslatorVisitor::PIXLD(u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<31, 3, Mode> mode; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 26 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 27 | BitField<20, 8, s64> addr_offset; | ||
| 28 | BitField<45, 3, IR::Pred> dest_pred; | ||
| 29 | } const pixld{insn}; | ||
| 30 | |||
| 31 | if (pixld.dest_pred != IR::Pred::PT) { | ||
| 32 | throw NotImplementedException("Destination predicate"); | ||
| 33 | } | ||
| 34 | if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { | ||
| 35 | throw NotImplementedException("Non-zero source register"); | ||
| 36 | } | ||
| 37 | switch (pixld.mode) { | ||
| 38 | case Mode::MyIndex: | ||
| 39 | X(pixld.dest_reg, ir.SampleId()); | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | throw NotImplementedException("Mode {}", pixld.mode.Value()); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSETP(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 15 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 16 | BitField<12, 3, IR::Pred> pred_a; | ||
| 17 | BitField<15, 1, u64> neg_pred_a; | ||
| 18 | BitField<24, 2, BooleanOp> bop_1; | ||
| 19 | BitField<29, 3, IR::Pred> pred_b; | ||
| 20 | BitField<32, 1, u64> neg_pred_b; | ||
| 21 | BitField<39, 3, IR::Pred> pred_c; | ||
| 22 | BitField<42, 1, u64> neg_pred_c; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | } const pset{insn}; | ||
| 25 | |||
| 26 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 27 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 28 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 29 | |||
| 30 | const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 31 | const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; | ||
| 33 | const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; | ||
| 34 | |||
| 35 | ir.SetPred(pset.dest_pred_a, result_a); | ||
| 36 | ir.SetPred(pset.dest_pred_b, result_b); | ||
| 37 | } | ||
| 38 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSET(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<12, 3, IR::Pred> pred_a; | ||
| 16 | BitField<15, 1, u64> neg_pred_a; | ||
| 17 | BitField<24, 2, BooleanOp> bop_1; | ||
| 18 | BitField<29, 3, IR::Pred> pred_b; | ||
| 19 | BitField<32, 1, u64> neg_pred_b; | ||
| 20 | BitField<39, 3, IR::Pred> pred_c; | ||
| 21 | BitField<42, 1, u64> neg_pred_c; | ||
| 22 | BitField<44, 1, u64> bf; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | BitField<47, 1, u64> cc; | ||
| 25 | } const pset{insn}; | ||
| 26 | |||
| 27 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 28 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 29 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 30 | |||
| 31 | const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; | ||
| 33 | |||
| 34 | const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; | ||
| 35 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 36 | |||
| 37 | const IR::U32 result{ir.Select(res_2, true_result, zero)}; | ||
| 38 | |||
| 39 | X(pset.dest_reg, result); | ||
| 40 | if (pset.cc != 0) { | ||
| 41 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 42 | SetZFlag(is_zero); | ||
| 43 | if (pset.bf != 0) { | ||
| 44 | ResetSFlag(); | ||
| 45 | } else { | ||
| 46 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 47 | } | ||
| 48 | ResetOFlag(); | ||
| 49 | ResetCFlag(); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | |||
| 12 | void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | } const sel{insn}; | ||
| 20 | |||
| 21 | const IR::U1 pred = v.ir.GetPred(sel.pred); | ||
| 22 | IR::U32 op_a{v.X(sel.src_reg)}; | ||
| 23 | IR::U32 op_b{src}; | ||
| 24 | if (sel.neg_pred != 0) { | ||
| 25 | std::swap(op_a, op_b); | ||
| 26 | } | ||
| 27 | const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; | ||
| 28 | |||
| 29 | v.X(sel.dest_reg, result); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::SEL_reg(u64 insn) { | ||
| 34 | SEL(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::SEL_cbuf(u64 insn) { | ||
| 38 | SEL(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::SEL_imm(u64 insn) { | ||
| 42 | SEL(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class Size : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | S64, | ||
| 29 | F32FTZRN, | ||
| 30 | F16x2FTZRN, | ||
| 31 | SD32, | ||
| 32 | SD64, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class AtomicOp : u64 { | ||
| 36 | ADD, | ||
| 37 | MIN, | ||
| 38 | MAX, | ||
| 39 | INC, | ||
| 40 | DEC, | ||
| 41 | AND, | ||
| 42 | OR, | ||
| 43 | XOR, | ||
| 44 | EXCH, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Clamp : u64 { | ||
| 48 | IGN, | ||
| 49 | Default, | ||
| 50 | TRAP, | ||
| 51 | }; | ||
| 52 | |||
| 53 | TextureType GetType(Type type) { | ||
| 54 | switch (type) { | ||
| 55 | case Type::_1D: | ||
| 56 | return TextureType::Color1D; | ||
| 57 | case Type::BUFFER_1D: | ||
| 58 | return TextureType::Buffer; | ||
| 59 | case Type::ARRAY_1D: | ||
| 60 | return TextureType::ColorArray1D; | ||
| 61 | case Type::_2D: | ||
| 62 | return TextureType::Color2D; | ||
| 63 | case Type::ARRAY_2D: | ||
| 64 | return TextureType::ColorArray2D; | ||
| 65 | case Type::_3D: | ||
| 66 | return TextureType::Color3D; | ||
| 67 | } | ||
| 68 | throw NotImplementedException("Invalid type {}", type); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 72 | switch (type) { | ||
| 73 | case Type::_1D: | ||
| 74 | case Type::BUFFER_1D: | ||
| 75 | return v.X(reg); | ||
| 76 | case Type::_2D: | ||
| 77 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 78 | case Type::_3D: | ||
| 79 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 80 | default: | ||
| 81 | break; | ||
| 82 | } | ||
| 83 | throw NotImplementedException("Invalid type {}", type); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, | ||
| 87 | const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, | ||
| 88 | bool is_signed) { | ||
| 89 | switch (op) { | ||
| 90 | case AtomicOp::ADD: | ||
| 91 | return ir.ImageAtomicIAdd(handle, coords, op_b, info); | ||
| 92 | case AtomicOp::MIN: | ||
| 93 | return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); | ||
| 94 | case AtomicOp::MAX: | ||
| 95 | return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); | ||
| 96 | case AtomicOp::INC: | ||
| 97 | return ir.ImageAtomicInc(handle, coords, op_b, info); | ||
| 98 | case AtomicOp::DEC: | ||
| 99 | return ir.ImageAtomicDec(handle, coords, op_b, info); | ||
| 100 | case AtomicOp::AND: | ||
| 101 | return ir.ImageAtomicAnd(handle, coords, op_b, info); | ||
| 102 | case AtomicOp::OR: | ||
| 103 | return ir.ImageAtomicOr(handle, coords, op_b, info); | ||
| 104 | case AtomicOp::XOR: | ||
| 105 | return ir.ImageAtomicXor(handle, coords, op_b, info); | ||
| 106 | case AtomicOp::EXCH: | ||
| 107 | return ir.ImageAtomicExchange(handle, coords, op_b, info); | ||
| 108 | default: | ||
| 109 | throw NotImplementedException("Atomic Operation {}", op); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | ImageFormat Format(Size size) { | ||
| 114 | switch (size) { | ||
| 115 | case Size::U32: | ||
| 116 | case Size::S32: | ||
| 117 | case Size::SD32: | ||
| 118 | return ImageFormat::R32_UINT; | ||
| 119 | default: | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | throw NotImplementedException("Invalid size {}", size); | ||
| 123 | } | ||
| 124 | |||
| 125 | bool IsSizeInt32(Size size) { | ||
| 126 | switch (size) { | ||
| 127 | case Size::U32: | ||
| 128 | case Size::S32: | ||
| 129 | case Size::SD32: | ||
| 130 | return true; | ||
| 131 | default: | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, | ||
| 137 | IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, | ||
| 138 | u64 bound_offset, bool is_bindless, bool write_result) { | ||
| 139 | if (clamp != Clamp::IGN) { | ||
| 140 | throw NotImplementedException("Clamp {}", clamp); | ||
| 141 | } | ||
| 142 | if (!IsSizeInt32(size)) { | ||
| 143 | throw NotImplementedException("Size {}", size); | ||
| 144 | } | ||
| 145 | const bool is_signed{size == Size::S32}; | ||
| 146 | const ImageFormat format{Format(size)}; | ||
| 147 | const TextureType tex_type{GetType(type)}; | ||
| 148 | const IR::Value coords{MakeCoords(v, coord_reg, type)}; | ||
| 149 | |||
| 150 | const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) | ||
| 151 | : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; | ||
| 152 | IR::TextureInstInfo info{}; | ||
| 153 | info.type.Assign(tex_type); | ||
| 154 | info.image_format.Assign(format); | ||
| 155 | |||
| 156 | // TODO: float/64-bit operand | ||
| 157 | const IR::Value op_b{v.X(operand_reg)}; | ||
| 158 | const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; | ||
| 159 | |||
| 160 | if (write_result) { | ||
| 161 | v.X(dest_reg, IR::U32{color}); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } // Anonymous namespace | ||
| 165 | |||
| 166 | void TranslatorVisitor::SUATOM(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<54, 1, u64> is_bindless; | ||
| 170 | BitField<29, 4, AtomicOp> op; | ||
| 171 | BitField<33, 3, Type> type; | ||
| 172 | BitField<51, 3, Size> size; | ||
| 173 | BitField<49, 2, Clamp> clamp; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 176 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 177 | BitField<36, 13, u64> bound_offset; // !is_bindless | ||
| 178 | BitField<39, 8, IR::Reg> bindless_reg; // is_bindless | ||
| 179 | } const suatom{insn}; | ||
| 180 | |||
| 181 | ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, | ||
| 182 | suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, | ||
| 183 | suatom.is_bindless != 0, true); | ||
| 184 | } | ||
| 185 | |||
| 186 | void TranslatorVisitor::SURED(u64 insn) { | ||
| 187 | // TODO: confirm offsets | ||
| 188 | union { | ||
| 189 | u64 raw; | ||
| 190 | BitField<51, 1, u64> is_bound; | ||
| 191 | BitField<21, 3, AtomicOp> op; | ||
| 192 | BitField<33, 3, Type> type; | ||
| 193 | BitField<20, 3, Size> size; | ||
| 194 | BitField<49, 2, Clamp> clamp; | ||
| 195 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 197 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 198 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 199 | } const sured{insn}; | ||
| 200 | ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, | ||
| 201 | sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, | ||
| 202 | sured.is_bound == 0, false); | ||
| 203 | } | ||
| 204 | |||
| 205 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp | |||
| @@ -0,0 +1,281 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | constexpr unsigned R = 1 << 0; | ||
| 25 | constexpr unsigned G = 1 << 1; | ||
| 26 | constexpr unsigned B = 1 << 2; | ||
| 27 | constexpr unsigned A = 1 << 3; | ||
| 28 | |||
| 29 | constexpr std::array MASK{ | ||
| 30 | 0U, // | ||
| 31 | R, // | ||
| 32 | G, // | ||
| 33 | R | G, // | ||
| 34 | B, // | ||
| 35 | R | B, // | ||
| 36 | G | B, // | ||
| 37 | R | G | B, // | ||
| 38 | A, // | ||
| 39 | R | A, // | ||
| 40 | G | A, // | ||
| 41 | R | G | A, // | ||
| 42 | B | A, // | ||
| 43 | R | B | A, // | ||
| 44 | G | B | A, // | ||
| 45 | R | G | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | enum class Size : u64 { | ||
| 49 | U8, | ||
| 50 | S8, | ||
| 51 | U16, | ||
| 52 | S16, | ||
| 53 | B32, | ||
| 54 | B64, | ||
| 55 | B128, | ||
| 56 | }; | ||
| 57 | |||
| 58 | enum class Clamp : u64 { | ||
| 59 | IGN, | ||
| 60 | Default, | ||
| 61 | TRAP, | ||
| 62 | }; | ||
| 63 | |||
| 64 | // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators | ||
| 65 | enum class LoadCache : u64 { | ||
| 66 | CA, // Cache at all levels, likely to be accessed again | ||
| 67 | CG, // Cache at global level (L2 and below, not L1) | ||
| 68 | CI, // ??? | ||
| 69 | CV, // Don't cache and fetch again (volatile) | ||
| 70 | }; | ||
| 71 | |||
| 72 | enum class StoreCache : u64 { | ||
| 73 | WB, // Cache write-back all coherent levels | ||
| 74 | CG, // Cache at global level (L2 and below, not L1) | ||
| 75 | CS, // Cache streaming, likely to be accessed once | ||
| 76 | WT, // Cache write-through (to system memory, volatile?) | ||
| 77 | }; | ||
| 78 | |||
| 79 | ImageFormat Format(Size size) { | ||
| 80 | switch (size) { | ||
| 81 | case Size::U8: | ||
| 82 | return ImageFormat::R8_UINT; | ||
| 83 | case Size::S8: | ||
| 84 | return ImageFormat::R8_SINT; | ||
| 85 | case Size::U16: | ||
| 86 | return ImageFormat::R16_UINT; | ||
| 87 | case Size::S16: | ||
| 88 | return ImageFormat::R16_SINT; | ||
| 89 | case Size::B32: | ||
| 90 | return ImageFormat::R32_UINT; | ||
| 91 | case Size::B64: | ||
| 92 | return ImageFormat::R32G32_UINT; | ||
| 93 | case Size::B128: | ||
| 94 | return ImageFormat::R32G32B32A32_UINT; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid size {}", size); | ||
| 97 | } | ||
| 98 | |||
| 99 | int SizeInRegs(Size size) { | ||
| 100 | switch (size) { | ||
| 101 | case Size::U8: | ||
| 102 | case Size::S8: | ||
| 103 | case Size::U16: | ||
| 104 | case Size::S16: | ||
| 105 | case Size::B32: | ||
| 106 | return 1; | ||
| 107 | case Size::B64: | ||
| 108 | return 2; | ||
| 109 | case Size::B128: | ||
| 110 | return 4; | ||
| 111 | } | ||
| 112 | throw NotImplementedException("Invalid size {}", size); | ||
| 113 | } | ||
| 114 | |||
| 115 | TextureType GetType(Type type) { | ||
| 116 | switch (type) { | ||
| 117 | case Type::_1D: | ||
| 118 | return TextureType::Color1D; | ||
| 119 | case Type::BUFFER_1D: | ||
| 120 | return TextureType::Buffer; | ||
| 121 | case Type::ARRAY_1D: | ||
| 122 | return TextureType::ColorArray1D; | ||
| 123 | case Type::_2D: | ||
| 124 | return TextureType::Color2D; | ||
| 125 | case Type::ARRAY_2D: | ||
| 126 | return TextureType::ColorArray2D; | ||
| 127 | case Type::_3D: | ||
| 128 | return TextureType::Color3D; | ||
| 129 | } | ||
| 130 | throw NotImplementedException("Invalid type {}", type); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 134 | const auto array{[&](int index) { | ||
| 135 | return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); | ||
| 136 | }}; | ||
| 137 | switch (type) { | ||
| 138 | case Type::_1D: | ||
| 139 | case Type::BUFFER_1D: | ||
| 140 | return v.X(reg); | ||
| 141 | case Type::ARRAY_1D: | ||
| 142 | return v.ir.CompositeConstruct(v.X(reg), array(1)); | ||
| 143 | case Type::_2D: | ||
| 144 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 145 | case Type::ARRAY_2D: | ||
| 146 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); | ||
| 147 | case Type::_3D: | ||
| 148 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 149 | } | ||
| 150 | throw NotImplementedException("Invalid type {}", type); | ||
| 151 | } | ||
| 152 | |||
| 153 | unsigned SwizzleMask(u64 swizzle) { | ||
| 154 | if (swizzle == 0 || swizzle >= MASK.size()) { | ||
| 155 | throw NotImplementedException("Invalid swizzle {}", swizzle); | ||
| 156 | } | ||
| 157 | return MASK[swizzle]; | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { | ||
| 161 | std::array<IR::U32, 4> colors; | ||
| 162 | for (int i = 0; i < num_regs; ++i) { | ||
| 163 | colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); | ||
| 164 | } | ||
| 165 | for (int i = num_regs; i < 4; ++i) { | ||
| 166 | colors[static_cast<size_t>(i)] = ir.Imm32(0); | ||
| 167 | } | ||
| 168 | return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); | ||
| 169 | } | ||
| 170 | } // Anonymous namespace | ||
| 171 | |||
| 172 | void TranslatorVisitor::SULD(u64 insn) { | ||
| 173 | union { | ||
| 174 | u64 raw; | ||
| 175 | BitField<51, 1, u64> is_bound; | ||
| 176 | BitField<52, 1, u64> d; | ||
| 177 | BitField<23, 1, u64> ba; | ||
| 178 | BitField<33, 3, Type> type; | ||
| 179 | BitField<24, 2, LoadCache> cache; | ||
| 180 | BitField<20, 3, Size> size; // .D | ||
| 181 | BitField<20, 4, u64> swizzle; // .P | ||
| 182 | BitField<49, 2, Clamp> clamp; | ||
| 183 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 184 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 185 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 186 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 187 | } const suld{insn}; | ||
| 188 | |||
| 189 | if (suld.clamp != Clamp::IGN) { | ||
| 190 | throw NotImplementedException("Clamp {}", suld.clamp.Value()); | ||
| 191 | } | ||
| 192 | if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { | ||
| 193 | throw NotImplementedException("Cache {}", suld.cache.Value()); | ||
| 194 | } | ||
| 195 | const bool is_typed{suld.d != 0}; | ||
| 196 | if (is_typed && suld.ba != 0) { | ||
| 197 | throw NotImplementedException("BA"); | ||
| 198 | } | ||
| 199 | |||
| 200 | const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; | ||
| 201 | const TextureType type{GetType(suld.type)}; | ||
| 202 | const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; | ||
| 203 | const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) | ||
| 204 | : X(suld.bindless_reg)}; | ||
| 205 | IR::TextureInstInfo info{}; | ||
| 206 | info.type.Assign(type); | ||
| 207 | info.image_format.Assign(format); | ||
| 208 | |||
| 209 | const IR::Value result{ir.ImageRead(handle, coords, info)}; | ||
| 210 | IR::Reg dest_reg{suld.dest_reg}; | ||
| 211 | if (is_typed) { | ||
| 212 | const int num_regs{SizeInRegs(suld.size)}; | ||
| 213 | for (int i = 0; i < num_regs; ++i) { | ||
| 214 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | const unsigned mask{SwizzleMask(suld.swizzle)}; | ||
| 218 | const int bits{std::popcount(mask)}; | ||
| 219 | if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { | ||
| 220 | throw NotImplementedException("Unaligned destination register"); | ||
| 221 | } | ||
| 222 | for (unsigned component = 0; component < 4; ++component) { | ||
| 223 | if (((mask >> component) & 1) == 0) { | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); | ||
| 227 | ++dest_reg; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SUST(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<51, 1, u64> is_bound; | ||
| 236 | BitField<52, 1, u64> d; | ||
| 237 | BitField<23, 1, u64> ba; | ||
| 238 | BitField<33, 3, Type> type; | ||
| 239 | BitField<24, 2, StoreCache> cache; | ||
| 240 | BitField<20, 3, Size> size; // .D | ||
| 241 | BitField<20, 4, u64> swizzle; // .P | ||
| 242 | BitField<49, 2, Clamp> clamp; | ||
| 243 | BitField<0, 8, IR::Reg> data_reg; | ||
| 244 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 245 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 246 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 247 | } const sust{insn}; | ||
| 248 | |||
| 249 | if (sust.clamp != Clamp::IGN) { | ||
| 250 | throw NotImplementedException("Clamp {}", sust.clamp.Value()); | ||
| 251 | } | ||
| 252 | if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { | ||
| 253 | throw NotImplementedException("Cache {}", sust.cache.Value()); | ||
| 254 | } | ||
| 255 | const bool is_typed{sust.d != 0}; | ||
| 256 | if (is_typed && sust.ba != 0) { | ||
| 257 | throw NotImplementedException("BA"); | ||
| 258 | } | ||
| 259 | const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; | ||
| 260 | const TextureType type{GetType(sust.type)}; | ||
| 261 | const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; | ||
| 262 | const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) | ||
| 263 | : X(sust.bindless_reg)}; | ||
| 264 | IR::TextureInstInfo info{}; | ||
| 265 | info.type.Assign(type); | ||
| 266 | info.image_format.Assign(format); | ||
| 267 | |||
| 268 | IR::Value color; | ||
| 269 | if (is_typed) { | ||
| 270 | color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); | ||
| 271 | } else { | ||
| 272 | const unsigned mask{SwizzleMask(sust.swizzle)}; | ||
| 273 | if (mask != 0xf) { | ||
| 274 | throw NotImplementedException("Non-full mask"); | ||
| 275 | } | ||
| 276 | color = MakeColor(ir, sust.data_reg, 4); | ||
| 277 | } | ||
| 278 | ir.ImageWrite(handle, coords, color, info); | ||
| 279 | } | ||
| 280 | |||
| 281 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct( | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 109 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 110 | case TextureType::_3D: | ||
| 111 | case TextureType::ARRAY_3D: | ||
| 112 | return v.ir.CompositeConstruct( | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 116 | case TextureType::CUBE: | ||
| 117 | case TextureType::ARRAY_CUBE: | ||
| 118 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 119 | } | ||
| 120 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool HasExplicitLod(Blod blod) { | ||
| 124 | switch (blod) { | ||
| 125 | case Blod::LL: | ||
| 126 | case Blod::LLA: | ||
| 127 | case Blod::LZ: | ||
| 128 | return true; | ||
| 129 | default: | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 135 | std::optional<u32> cbuf_offset) { | ||
| 136 | union { | ||
| 137 | u64 raw; | ||
| 138 | BitField<35, 1, u64> ndv; | ||
| 139 | BitField<49, 1, u64> nodep; | ||
| 140 | BitField<50, 1, u64> dc; | ||
| 141 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 142 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 143 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 144 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 145 | BitField<28, 3, TextureType> type; | ||
| 146 | BitField<31, 4, u64> mask; | ||
| 147 | } const tex{insn}; | ||
| 148 | |||
| 149 | if (lc) { | ||
| 150 | throw NotImplementedException("LC"); | ||
| 151 | } | ||
| 152 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 153 | |||
| 154 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 155 | IR::Value handle; | ||
| 156 | IR::Value offset; | ||
| 157 | IR::F32 dref; | ||
| 158 | IR::F32 lod_clamp; | ||
| 159 | if (cbuf_offset) { | ||
| 160 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 161 | } else { | ||
| 162 | handle = v.X(meta_reg++); | ||
| 163 | } | ||
| 164 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 165 | if (aoffi) { | ||
| 166 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 167 | } | ||
| 168 | if (tex.dc != 0) { | ||
| 169 | dref = v.F(meta_reg++); | ||
| 170 | } | ||
| 171 | IR::TextureInstInfo info{}; | ||
| 172 | info.type.Assign(GetType(tex.type)); | ||
| 173 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | ||
| 174 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 175 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 176 | |||
| 177 | const IR::Value sample{[&]() -> IR::Value { | ||
| 178 | if (tex.dc == 0) { | ||
| 179 | if (HasExplicitLod(blod)) { | ||
| 180 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); | ||
| 181 | } else { | ||
| 182 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | if (HasExplicitLod(blod)) { | ||
| 186 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); | ||
| 187 | } else { | ||
| 188 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 189 | info); | ||
| 190 | } | ||
| 191 | }()}; | ||
| 192 | |||
| 193 | IR::Reg dest_reg{tex.dest_reg}; | ||
| 194 | for (int element = 0; element < 4; ++element) { | ||
| 195 | if (((tex.mask >> element) & 1) == 0) { | ||
| 196 | continue; | ||
| 197 | } | ||
| 198 | IR::F32 value; | ||
| 199 | if (tex.dc != 0) { | ||
| 200 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 201 | } else { | ||
| 202 | value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; | ||
| 203 | } | ||
| 204 | v.F(dest_reg, value); | ||
| 205 | ++dest_reg; | ||
| 206 | } | ||
| 207 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 208 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } // Anonymous namespace | ||
| 212 | |||
| 213 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 214 | union { | ||
| 215 | u64 raw; | ||
| 216 | BitField<54, 1, u64> aoffi; | ||
| 217 | BitField<55, 3, Blod> blod; | ||
| 218 | BitField<58, 1, u64> lc; | ||
| 219 | BitField<36, 13, u64> cbuf_offset; | ||
| 220 | } const tex{insn}; | ||
| 221 | |||
| 222 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); | ||
| 223 | } | ||
| 224 | |||
| 225 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 226 | union { | ||
| 227 | u64 raw; | ||
| 228 | BitField<36, 1, u64> aoffi; | ||
| 229 | BitField<37, 3, Blod> blod; | ||
| 230 | BitField<40, 1, u64> lc; | ||
| 231 | } const tex{insn}; | ||
| 232 | |||
| 233 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | union Encoding { | ||
| 20 | u64 raw; | ||
| 21 | BitField<59, 1, Precision> precision; | ||
| 22 | BitField<53, 4, u64> encoding; | ||
| 23 | BitField<49, 1, u64> nodep; | ||
| 24 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 26 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 27 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 28 | BitField<36, 13, u64> cbuf_offset; | ||
| 29 | BitField<50, 3, u64> swizzle; | ||
| 30 | }; | ||
| 31 | |||
| 32 | constexpr unsigned R = 1; | ||
| 33 | constexpr unsigned G = 2; | ||
| 34 | constexpr unsigned B = 4; | ||
| 35 | constexpr unsigned A = 8; | ||
| 36 | |||
| 37 | constexpr std::array RG_LUT{ | ||
| 38 | R, // | ||
| 39 | G, // | ||
| 40 | B, // | ||
| 41 | A, // | ||
| 42 | R | G, // | ||
| 43 | R | A, // | ||
| 44 | G | A, // | ||
| 45 | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | constexpr std::array RGBA_LUT{ | ||
| 49 | R | G | B, // | ||
| 50 | R | G | A, // | ||
| 51 | R | B | A, // | ||
| 52 | G | B | A, // | ||
| 53 | R | G | B | A, // | ||
| 54 | }; | ||
| 55 | |||
| 56 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 57 | if (!IR::IsAligned(reg, alignment)) { | ||
| 58 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | IR::Value Composite(TranslatorVisitor& v, Args... regs) { | ||
| 64 | return v.ir.CompositeConstruct(v.F(regs)...); | ||
| 65 | } | ||
| 66 | |||
| 67 | IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { | ||
| 68 | return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding texs{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; | ||
| 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 75 | const IR::Reg reg_a{texs.src_reg_a}; | ||
| 76 | const IR::Reg reg_b{texs.src_reg_b}; | ||
| 77 | IR::TextureInstInfo info{}; | ||
| 78 | if (texs.precision == Precision::F16) { | ||
| 79 | info.relaxed_precision.Assign(1); | ||
| 80 | } | ||
| 81 | switch (texs.encoding) { | ||
| 82 | case 0: // 1D.LZ | ||
| 83 | info.type.Assign(TextureType::Color1D); | ||
| 84 | return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); | ||
| 85 | case 1: // 2D | ||
| 86 | info.type.Assign(TextureType::Color2D); | ||
| 87 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); | ||
| 88 | case 2: // 2D.LZ | ||
| 89 | info.type.Assign(TextureType::Color2D); | ||
| 90 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); | ||
| 91 | case 3: // 2D.LL | ||
| 92 | CheckAlignment(reg_a, 2); | ||
| 93 | info.type.Assign(TextureType::Color2D); | ||
| 94 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, | ||
| 95 | info); | ||
| 96 | case 4: // 2D.DC | ||
| 97 | CheckAlignment(reg_a, 2); | ||
| 98 | info.type.Assign(TextureType::Color2D); | ||
| 99 | info.is_depth.Assign(1); | ||
| 100 | return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 101 | {}, {}, {}, info); | ||
| 102 | case 5: // 2D.LL.DC | ||
| 103 | CheckAlignment(reg_a, 2); | ||
| 104 | CheckAlignment(reg_b, 2); | ||
| 105 | info.type.Assign(TextureType::Color2D); | ||
| 106 | info.is_depth.Assign(1); | ||
| 107 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), | ||
| 108 | v.F(reg_b + 1), v.F(reg_b), {}, info); | ||
| 109 | case 6: // 2D.LZ.DC | ||
| 110 | CheckAlignment(reg_a, 2); | ||
| 111 | info.type.Assign(TextureType::Color2D); | ||
| 112 | info.is_depth.Assign(1); | ||
| 113 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 114 | zero, {}, info); | ||
| 115 | case 7: // ARRAY_2D | ||
| 116 | CheckAlignment(reg_a, 2); | ||
| 117 | info.type.Assign(TextureType::ColorArray2D); | ||
| 118 | return v.ir.ImageSampleImplicitLod( | ||
| 119 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 120 | {}, {}, {}, info); | ||
| 121 | case 8: // ARRAY_2D.LZ | ||
| 122 | CheckAlignment(reg_a, 2); | ||
| 123 | info.type.Assign(TextureType::ColorArray2D); | ||
| 124 | return v.ir.ImageSampleExplicitLod( | ||
| 125 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 126 | zero, {}, info); | ||
| 127 | case 9: // ARRAY_2D.LZ.DC | ||
| 128 | CheckAlignment(reg_a, 2); | ||
| 129 | CheckAlignment(reg_b, 2); | ||
| 130 | info.type.Assign(TextureType::ColorArray2D); | ||
| 131 | info.is_depth.Assign(1); | ||
| 132 | return v.ir.ImageSampleDrefExplicitLod( | ||
| 133 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 134 | v.F(reg_b + 1), zero, {}, info); | ||
| 135 | case 10: // 3D | ||
| 136 | CheckAlignment(reg_a, 2); | ||
| 137 | info.type.Assign(TextureType::Color3D); | ||
| 138 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 139 | {}, info); | ||
| 140 | case 11: // 3D.LZ | ||
| 141 | CheckAlignment(reg_a, 2); | ||
| 142 | info.type.Assign(TextureType::Color3D); | ||
| 143 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, | ||
| 144 | info); | ||
| 145 | case 12: // CUBE | ||
| 146 | CheckAlignment(reg_a, 2); | ||
| 147 | info.type.Assign(TextureType::ColorCube); | ||
| 148 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 149 | {}, info); | ||
| 150 | case 13: // CUBE.LL | ||
| 151 | CheckAlignment(reg_a, 2); | ||
| 152 | CheckAlignment(reg_b, 2); | ||
| 153 | info.type.Assign(TextureType::ColorCube); | ||
| 154 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), | ||
| 155 | v.F(reg_b + 1), {}, info); | ||
| 156 | default: | ||
| 157 | throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | unsigned Swizzle(u64 insn) { | ||
| 162 | const Encoding texs{insn}; | ||
| 163 | const size_t encoding{texs.swizzle}; | ||
| 164 | if (texs.dest_reg_b == IR::Reg::RZ) { | ||
| 165 | if (encoding >= RG_LUT.size()) { | ||
| 166 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 167 | } | ||
| 168 | return RG_LUT[encoding]; | ||
| 169 | } else { | ||
| 170 | if (encoding >= RGBA_LUT.size()) { | ||
| 171 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 172 | } | ||
| 173 | return RGBA_LUT[encoding]; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 178 | const bool is_shadow{sample.Type() == IR::Type::F32}; | ||
| 179 | if (is_shadow) { | ||
| 180 | const bool is_alpha{component == 3}; | ||
| 181 | return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; | ||
| 182 | } else { | ||
| 183 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 188 | const Encoding texs{insn}; | ||
| 189 | switch (index) { | ||
| 190 | case 0: | ||
| 191 | return texs.dest_reg_a; | ||
| 192 | case 1: | ||
| 193 | CheckAlignment(texs.dest_reg_a, 2); | ||
| 194 | return texs.dest_reg_a + 1; | ||
| 195 | case 2: | ||
| 196 | return texs.dest_reg_b; | ||
| 197 | case 3: | ||
| 198 | CheckAlignment(texs.dest_reg_b, 2); | ||
| 199 | return texs.dest_reg_b + 1; | ||
| 200 | } | ||
| 201 | throw LogicError("Invalid store index {}", index); | ||
| 202 | } | ||
| 203 | |||
| 204 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 205 | const unsigned swizzle{Swizzle(insn)}; | ||
| 206 | unsigned store_index{0}; | ||
| 207 | for (unsigned component = 0; component < 4; ++component) { | ||
| 208 | if (((swizzle >> component) & 1) == 0) { | ||
| 209 | continue; | ||
| 210 | } | ||
| 211 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 212 | v.F(dest, Extract(v, sample, component)); | ||
| 213 | ++store_index; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 218 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 222 | const unsigned swizzle{Swizzle(insn)}; | ||
| 223 | unsigned store_index{0}; | ||
| 224 | std::array<IR::F32, 4> swizzled; | ||
| 225 | for (unsigned component = 0; component < 4; ++component) { | ||
| 226 | if (((swizzle >> component) & 1) == 0) { | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | swizzled[store_index] = Extract(v, sample, component); | ||
| 230 | ++store_index; | ||
| 231 | } | ||
| 232 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 233 | const Encoding texs{insn}; | ||
| 234 | switch (store_index) { | ||
| 235 | case 1: | ||
| 236 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 237 | break; | ||
| 238 | case 2: | ||
| 239 | case 3: | ||
| 240 | case 4: | ||
| 241 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 242 | switch (store_index) { | ||
| 243 | case 2: | ||
| 244 | break; | ||
| 245 | case 3: | ||
| 246 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 247 | break; | ||
| 248 | case 4: | ||
| 249 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } // Anonymous namespace | ||
| 256 | |||
| 257 | void TranslatorVisitor::TEXS(u64 insn) { | ||
| 258 | const IR::Value sample{Sample(*this, insn)}; | ||
| 259 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 260 | Store32(*this, insn, sample); | ||
| 261 | } else { | ||
| 262 | Store16(*this, insn, sample); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class OffsetType : u64 { | ||
| 27 | None = 0, | ||
| 28 | AOFFI, | ||
| 29 | PTP, | ||
| 30 | Invalid, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class ComponentType : u64 { | ||
| 34 | R = 0, | ||
| 35 | G = 1, | ||
| 36 | B = 2, | ||
| 37 | A = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | Shader::TextureType GetType(TextureType type) { | ||
| 41 | switch (type) { | ||
| 42 | case TextureType::_1D: | ||
| 43 | return Shader::TextureType::Color1D; | ||
| 44 | case TextureType::ARRAY_1D: | ||
| 45 | return Shader::TextureType::ColorArray1D; | ||
| 46 | case TextureType::_2D: | ||
| 47 | return Shader::TextureType::Color2D; | ||
| 48 | case TextureType::ARRAY_2D: | ||
| 49 | return Shader::TextureType::ColorArray2D; | ||
| 50 | case TextureType::_3D: | ||
| 51 | return Shader::TextureType::Color3D; | ||
| 52 | case TextureType::ARRAY_3D: | ||
| 53 | throw NotImplementedException("3D array texture type"); | ||
| 54 | case TextureType::CUBE: | ||
| 55 | return Shader::TextureType::ColorCube; | ||
| 56 | case TextureType::ARRAY_CUBE: | ||
| 57 | return Shader::TextureType::ColorArrayCube; | ||
| 58 | } | ||
| 59 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 60 | } | ||
| 61 | |||
| 62 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 63 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 64 | switch (type) { | ||
| 65 | case TextureType::_1D: | ||
| 66 | return v.F(reg); | ||
| 67 | case TextureType::ARRAY_1D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 69 | case TextureType::_2D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 71 | case TextureType::ARRAY_2D: | ||
| 72 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 73 | case TextureType::_3D: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_3D: | ||
| 76 | throw NotImplementedException("3D array texture type"); | ||
| 77 | case TextureType::CUBE: | ||
| 78 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 79 | case TextureType::ARRAY_CUBE: | ||
| 80 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 81 | } | ||
| 82 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 86 | const IR::U32 value{v.X(reg++)}; | ||
| 87 | switch (type) { | ||
| 88 | case TextureType::_1D: | ||
| 89 | case TextureType::ARRAY_1D: | ||
| 90 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); | ||
| 91 | case TextureType::_2D: | ||
| 92 | case TextureType::ARRAY_2D: | ||
| 93 | return v.ir.CompositeConstruct( | ||
| 94 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 95 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 96 | case TextureType::_3D: | ||
| 97 | case TextureType::ARRAY_3D: | ||
| 98 | return v.ir.CompositeConstruct( | ||
| 99 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 100 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 101 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); | ||
| 102 | case TextureType::CUBE: | ||
| 103 | case TextureType::ARRAY_CUBE: | ||
| 104 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 105 | } | ||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 107 | } | ||
| 108 | |||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||
| 110 | const IR::U32 value1{v.X(reg++)}; | ||
| 111 | const IR::U32 value2{v.X(reg++)}; | ||
| 112 | const IR::U32 bitsize{v.ir.Imm32(6)}; | ||
| 113 | const auto make_vector{[&v, &bitsize](const IR::U32& value) { | ||
| 114 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), | ||
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); | ||
| 118 | }}; | ||
| 119 | return {make_vector(value1), make_vector(value2)}; | ||
| 120 | } | ||
| 121 | |||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||
| 123 | bool is_bindless) { | ||
| 124 | union { | ||
| 125 | u64 raw; | ||
| 126 | BitField<35, 1, u64> ndv; | ||
| 127 | BitField<49, 1, u64> nodep; | ||
| 128 | BitField<50, 1, u64> dc; | ||
| 129 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 130 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 131 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 132 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 133 | BitField<28, 3, TextureType> type; | ||
| 134 | BitField<31, 4, u64> mask; | ||
| 135 | BitField<36, 13, u64> cbuf_offset; | ||
| 136 | } const tld4{insn}; | ||
| 137 | |||
| 138 | const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; | ||
| 139 | |||
| 140 | IR::Reg meta_reg{tld4.meta_reg}; | ||
| 141 | IR::Value handle; | ||
| 142 | IR::Value offset; | ||
| 143 | IR::Value offset2; | ||
| 144 | IR::F32 dref; | ||
| 145 | if (!is_bindless) { | ||
| 146 | handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); | ||
| 147 | } else { | ||
| 148 | handle = v.X(meta_reg++); | ||
| 149 | } | ||
| 150 | switch (offset_type) { | ||
| 151 | case OffsetType::None: | ||
| 152 | break; | ||
| 153 | case OffsetType::AOFFI: | ||
| 154 | offset = MakeOffset(v, meta_reg, tld4.type); | ||
| 155 | break; | ||
| 156 | case OffsetType::PTP: | ||
| 157 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | ||
| 158 | break; | ||
| 159 | default: | ||
| 160 | throw NotImplementedException("Invalid offset type {}", offset_type); | ||
| 161 | } | ||
| 162 | if (tld4.dc != 0) { | ||
| 163 | dref = v.F(meta_reg++); | ||
| 164 | } | ||
| 165 | IR::TextureInstInfo info{}; | ||
| 166 | info.type.Assign(GetType(tld4.type)); | ||
| 167 | info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); | ||
| 168 | info.gather_component.Assign(static_cast<u32>(component_type)); | ||
| 169 | const IR::Value sample{[&] { | ||
| 170 | if (tld4.dc == 0) { | ||
| 171 | return v.ir.ImageGather(handle, coords, offset, offset2, info); | ||
| 172 | } | ||
| 173 | return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); | ||
| 174 | }()}; | ||
| 175 | |||
| 176 | IR::Reg dest_reg{tld4.dest_reg}; | ||
| 177 | for (size_t element = 0; element < 4; ++element) { | ||
| 178 | if (((tld4.mask >> element) & 1) == 0) { | ||
| 179 | continue; | ||
| 180 | } | ||
| 181 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 182 | ++dest_reg; | ||
| 183 | } | ||
| 184 | if (tld4.sparse_pred != IR::Pred::PT) { | ||
| 185 | v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } // Anonymous namespace | ||
| 189 | |||
| 190 | void TranslatorVisitor::TLD4(u64 insn) { | ||
| 191 | union { | ||
| 192 | u64 raw; | ||
| 193 | BitField<56, 2, ComponentType> component; | ||
| 194 | BitField<54, 2, OffsetType> offset; | ||
| 195 | } const tld4{insn}; | ||
| 196 | Impl(*this, insn, tld4.component, tld4.offset, false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void TranslatorVisitor::TLD4_b(u64 insn) { | ||
| 200 | union { | ||
| 201 | u64 raw; | ||
| 202 | BitField<38, 2, ComponentType> component; | ||
| 203 | BitField<36, 2, OffsetType> offset; | ||
| 204 | } const tld4{insn}; | ||
| 205 | Impl(*this, insn, tld4.component, tld4.offset, true); | ||
| 206 | } | ||
| 207 | |||
| 208 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F32, | ||
| 16 | F16, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class ComponentType : u64 { | ||
| 20 | R = 0, | ||
| 21 | G = 1, | ||
| 22 | B = 2, | ||
| 23 | A = 3, | ||
| 24 | }; | ||
| 25 | |||
| 26 | union Encoding { | ||
| 27 | u64 raw; | ||
| 28 | BitField<55, 1, Precision> precision; | ||
| 29 | BitField<52, 2, ComponentType> component_type; | ||
| 30 | BitField<51, 1, u64> aoffi; | ||
| 31 | BitField<50, 1, u64> dc; | ||
| 32 | BitField<49, 1, u64> nodep; | ||
| 33 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 34 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 35 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 36 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 37 | BitField<36, 13, u64> cbuf_offset; | ||
| 38 | }; | ||
| 39 | |||
| 40 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 41 | if (!IR::IsAligned(reg, alignment)) { | ||
| 42 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 47 | const IR::U32 value{v.X(reg)}; | ||
| 48 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 49 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 50 | } | ||
| 51 | |||
| 52 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 53 | const Encoding tld4s{insn}; | ||
| 54 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; | ||
| 55 | const IR::Reg reg_a{tld4s.src_reg_a}; | ||
| 56 | const IR::Reg reg_b{tld4s.src_reg_b}; | ||
| 57 | IR::TextureInstInfo info{}; | ||
| 58 | if (tld4s.precision == Precision::F16) { | ||
| 59 | info.relaxed_precision.Assign(1); | ||
| 60 | } | ||
| 61 | info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); | ||
| 62 | info.type.Assign(Shader::TextureType::Color2D); | ||
| 63 | info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); | ||
| 64 | IR::Value coords; | ||
| 65 | if (tld4s.aoffi != 0) { | ||
| 66 | CheckAlignment(reg_a, 2); | ||
| 67 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 68 | IR::Value offset = MakeOffset(v, reg_b); | ||
| 69 | if (tld4s.dc != 0) { | ||
| 70 | CheckAlignment(reg_b, 2); | ||
| 71 | IR::F32 dref = v.F(reg_b + 1); | ||
| 72 | return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); | ||
| 73 | } | ||
| 74 | return v.ir.ImageGather(handle, coords, offset, {}, info); | ||
| 75 | } | ||
| 76 | if (tld4s.dc != 0) { | ||
| 77 | CheckAlignment(reg_a, 2); | ||
| 78 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 79 | IR::F32 dref = v.F(reg_b); | ||
| 80 | return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); | ||
| 81 | } | ||
| 82 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); | ||
| 83 | return v.ir.ImageGather(handle, coords, {}, {}, info); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Reg RegStoreComponent32(u64 insn, size_t index) { | ||
| 87 | const Encoding tlds4{insn}; | ||
| 88 | switch (index) { | ||
| 89 | case 0: | ||
| 90 | return tlds4.dest_reg_a; | ||
| 91 | case 1: | ||
| 92 | CheckAlignment(tlds4.dest_reg_a, 2); | ||
| 93 | return tlds4.dest_reg_a + 1; | ||
| 94 | case 2: | ||
| 95 | return tlds4.dest_reg_b; | ||
| 96 | case 3: | ||
| 97 | CheckAlignment(tlds4.dest_reg_b, 2); | ||
| 98 | return tlds4.dest_reg_b + 1; | ||
| 99 | } | ||
| 100 | throw LogicError("Invalid store index {}", index); | ||
| 101 | } | ||
| 102 | |||
| 103 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 104 | for (size_t component = 0; component < 4; ++component) { | ||
| 105 | const IR::Reg dest{RegStoreComponent32(insn, component)}; | ||
| 106 | v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 111 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 112 | } | ||
| 113 | |||
| 114 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 115 | std::array<IR::F32, 4> swizzled; | ||
| 116 | for (size_t component = 0; component < 4; ++component) { | ||
| 117 | swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 118 | } | ||
| 119 | const Encoding tld4s{insn}; | ||
| 120 | v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 121 | v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 122 | } | ||
| 123 | } // Anonymous namespace | ||
| 124 | |||
| 125 | void TranslatorVisitor::TLD4S(u64 insn) { | ||
| 126 | const IR::Value sample{Sample(*this, insn)}; | ||
| 127 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 128 | Store32(*this, insn, sample); | ||
| 129 | } else { | ||
| 130 | Store16(*this, insn, sample); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { | ||
| 49 | const IR::U32 value{v.X(reg)}; | ||
| 50 | const u32 base{has_lod_clamp ? 12U : 16U}; | ||
| 51 | return v.ir.CompositeConstruct( | ||
| 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), | ||
| 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 57 | union { | ||
| 58 | u64 raw; | ||
| 59 | BitField<49, 1, u64> nodep; | ||
| 60 | BitField<35, 1, u64> aoffi; | ||
| 61 | BitField<50, 1, u64> lc; | ||
| 62 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 65 | BitField<20, 8, IR::Reg> derivate_reg; | ||
| 66 | BitField<28, 3, TextureType> type; | ||
| 67 | BitField<31, 4, u64> mask; | ||
| 68 | BitField<36, 13, u64> cbuf_offset; | ||
| 69 | } const txd{insn}; | ||
| 70 | |||
| 71 | const bool has_lod_clamp = txd.lc != 0; | ||
| 72 | if (has_lod_clamp) { | ||
| 73 | throw NotImplementedException("TXD.LC - CLAMP is not implemented"); | ||
| 74 | } | ||
| 75 | |||
| 76 | IR::Value coords; | ||
| 77 | u32 num_derivates{}; | ||
| 78 | IR::Reg base_reg{txd.coord_reg}; | ||
| 79 | IR::Reg last_reg; | ||
| 80 | IR::Value handle; | ||
| 81 | if (is_bindless) { | ||
| 82 | handle = v.X(base_reg++); | ||
| 83 | } else { | ||
| 84 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 85 | } | ||
| 86 | |||
| 87 | const auto read_array{[&]() -> IR::F32 { | ||
| 88 | const IR::U32 base{v.ir.Imm32(0)}; | ||
| 89 | const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; | ||
| 90 | const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; | ||
| 91 | return v.ir.ConvertUToF(32, 16, array_index); | ||
| 92 | }}; | ||
| 93 | switch (txd.type) { | ||
| 94 | case TextureType::_1D: { | ||
| 95 | coords = v.F(base_reg); | ||
| 96 | num_derivates = 1; | ||
| 97 | last_reg = base_reg + 1; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case TextureType::ARRAY_1D: { | ||
| 101 | last_reg = base_reg + 1; | ||
| 102 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | ||
| 103 | num_derivates = 1; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | case TextureType::_2D: { | ||
| 107 | last_reg = base_reg + 2; | ||
| 108 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | ||
| 109 | num_derivates = 2; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case TextureType::ARRAY_2D: { | ||
| 113 | last_reg = base_reg + 2; | ||
| 114 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | ||
| 115 | num_derivates = 2; | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | default: | ||
| 119 | throw NotImplementedException("Invalid texture type"); | ||
| 120 | } | ||
| 121 | |||
| 122 | const IR::Reg derivate_reg{txd.derivate_reg}; | ||
| 123 | IR::Value derivates; | ||
| 124 | switch (num_derivates) { | ||
| 125 | case 1: { | ||
| 126 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case 2: { | ||
| 130 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | ||
| 131 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | default: | ||
| 135 | throw NotImplementedException("Invalid texture type"); | ||
| 136 | } | ||
| 137 | |||
| 138 | IR::Value offset; | ||
| 139 | if (txd.aoffi != 0) { | ||
| 140 | offset = MakeOffset(v, last_reg, has_lod_clamp); | ||
| 141 | } | ||
| 142 | |||
| 143 | IR::F32 lod_clamp; | ||
| 144 | if (has_lod_clamp) { | ||
| 145 | // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. | ||
| 146 | // to convert a fixed point, float(value) / float(1 << fixed_point) | ||
| 147 | // in this case the fixed_point is 8. | ||
| 148 | const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; | ||
| 149 | const IR::F32 fixp_lc{v.ir.ConvertUToF( | ||
| 150 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; | ||
| 151 | lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); | ||
| 152 | } | ||
| 153 | |||
| 154 | IR::TextureInstInfo info{}; | ||
| 155 | info.type.Assign(GetType(txd.type)); | ||
| 156 | info.num_derivates.Assign(num_derivates); | ||
| 157 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | ||
| 158 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | ||
| 159 | |||
| 160 | IR::Reg dest_reg{txd.dest_reg}; | ||
| 161 | for (size_t element = 0; element < 4; ++element) { | ||
| 162 | if (((txd.mask >> element) & 1) == 0) { | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 166 | ++dest_reg; | ||
| 167 | } | ||
| 168 | if (txd.sparse_pred != IR::Pred::PT) { | ||
| 169 | v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | } // Anonymous namespace | ||
| 173 | |||
| 174 | void TranslatorVisitor::TXD(u64 insn) { | ||
| 175 | Impl(*this, insn, false); | ||
| 176 | } | ||
| 177 | |||
| 178 | void TranslatorVisitor::TXD_b(u64 insn) { | ||
| 179 | Impl(*this, insn, true); | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | const auto read_array{ | ||
| 50 | [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.X(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 73 | const IR::U32 value{v.X(reg++)}; | ||
| 74 | switch (type) { | ||
| 75 | case TextureType::_1D: | ||
| 76 | case TextureType::ARRAY_1D: | ||
| 77 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 78 | case TextureType::_2D: | ||
| 79 | case TextureType::ARRAY_2D: | ||
| 80 | return v.ir.CompositeConstruct( | ||
| 81 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 82 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 83 | case TextureType::_3D: | ||
| 84 | case TextureType::ARRAY_3D: | ||
| 85 | return v.ir.CompositeConstruct( | ||
| 86 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 87 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 88 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 89 | case TextureType::CUBE: | ||
| 90 | case TextureType::ARRAY_CUBE: | ||
| 91 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 92 | } | ||
| 93 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 94 | } | ||
| 95 | |||
| 96 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 97 | union { | ||
| 98 | u64 raw; | ||
| 99 | BitField<49, 1, u64> nodep; | ||
| 100 | BitField<55, 1, u64> lod; | ||
| 101 | BitField<50, 1, u64> multisample; | ||
| 102 | BitField<35, 1, u64> aoffi; | ||
| 103 | BitField<54, 1, u64> clamp; | ||
| 104 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 105 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 106 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 107 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 108 | BitField<28, 3, TextureType> type; | ||
| 109 | BitField<31, 4, u64> mask; | ||
| 110 | BitField<36, 13, u64> cbuf_offset; | ||
| 111 | } const tld{insn}; | ||
| 112 | |||
| 113 | const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; | ||
| 114 | |||
| 115 | IR::Reg meta_reg{tld.meta_reg}; | ||
| 116 | IR::Value handle; | ||
| 117 | IR::Value offset; | ||
| 118 | IR::U32 lod; | ||
| 119 | IR::U32 multisample; | ||
| 120 | if (is_bindless) { | ||
| 121 | handle = v.X(meta_reg++); | ||
| 122 | } else { | ||
| 123 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 124 | } | ||
| 125 | if (tld.lod != 0) { | ||
| 126 | lod = v.X(meta_reg++); | ||
| 127 | } else { | ||
| 128 | lod = v.ir.Imm32(0U); | ||
| 129 | } | ||
| 130 | if (tld.aoffi != 0) { | ||
| 131 | offset = MakeOffset(v, meta_reg, tld.type); | ||
| 132 | } | ||
| 133 | if (tld.multisample != 0) { | ||
| 134 | multisample = v.X(meta_reg++); | ||
| 135 | } | ||
| 136 | if (tld.clamp != 0) { | ||
| 137 | throw NotImplementedException("TLD.CL - CLAMP is not implmented"); | ||
| 138 | } | ||
| 139 | IR::TextureInstInfo info{}; | ||
| 140 | info.type.Assign(GetType(tld.type)); | ||
| 141 | const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; | ||
| 142 | |||
| 143 | IR::Reg dest_reg{tld.dest_reg}; | ||
| 144 | for (size_t element = 0; element < 4; ++element) { | ||
| 145 | if (((tld.mask >> element) & 1) == 0) { | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 149 | ++dest_reg; | ||
| 150 | } | ||
| 151 | if (tld.sparse_pred != IR::Pred::PT) { | ||
| 152 | v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } // Anonymous namespace | ||
| 156 | |||
| 157 | void TranslatorVisitor::TLD(u64 insn) { | ||
| 158 | Impl(*this, insn, false); | ||
| 159 | } | ||
| 160 | |||
| 161 | void TranslatorVisitor::TLD_b(u64 insn) { | ||
| 162 | Impl(*this, insn, true); | ||
| 163 | } | ||
| 164 | |||
| 165 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp | |||
| @@ -0,0 +1,242 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | constexpr unsigned R = 1; | ||
| 20 | constexpr unsigned G = 2; | ||
| 21 | constexpr unsigned B = 4; | ||
| 22 | constexpr unsigned A = 8; | ||
| 23 | |||
| 24 | constexpr std::array RG_LUT{ | ||
| 25 | R, // | ||
| 26 | G, // | ||
| 27 | B, // | ||
| 28 | A, // | ||
| 29 | R | G, // | ||
| 30 | R | A, // | ||
| 31 | G | A, // | ||
| 32 | B | A, // | ||
| 33 | }; | ||
| 34 | |||
| 35 | constexpr std::array RGBA_LUT{ | ||
| 36 | R | G | B, // | ||
| 37 | R | G | A, // | ||
| 38 | R | B | A, // | ||
| 39 | G | B | A, // | ||
| 40 | R | G | B | A, // | ||
| 41 | }; | ||
| 42 | |||
| 43 | union Encoding { | ||
| 44 | u64 raw; | ||
| 45 | BitField<59, 1, Precision> precision; | ||
| 46 | BitField<54, 1, u64> aoffi; | ||
| 47 | BitField<53, 1, u64> lod; | ||
| 48 | BitField<55, 1, u64> ms; | ||
| 49 | BitField<49, 1, u64> nodep; | ||
| 50 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 52 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 53 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 54 | BitField<36, 13, u64> cbuf_offset; | ||
| 55 | BitField<50, 3, u64> swizzle; | ||
| 56 | BitField<53, 4, u64> encoding; | ||
| 57 | }; | ||
| 58 | |||
| 59 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 60 | if (!IR::IsAligned(reg, alignment)) { | ||
| 61 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 66 | const IR::U32 value{v.X(reg)}; | ||
| 67 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 68 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding tlds{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; | ||
| 74 | const IR::Reg reg_a{tlds.src_reg_a}; | ||
| 75 | const IR::Reg reg_b{tlds.src_reg_b}; | ||
| 76 | IR::Value coords; | ||
| 77 | IR::U32 lod{v.ir.Imm32(0U)}; | ||
| 78 | IR::Value offsets; | ||
| 79 | IR::U32 multisample; | ||
| 80 | Shader::TextureType texture_type{}; | ||
| 81 | switch (tlds.encoding) { | ||
| 82 | case 0: | ||
| 83 | texture_type = Shader::TextureType::Color1D; | ||
| 84 | coords = v.X(reg_a); | ||
| 85 | break; | ||
| 86 | case 1: | ||
| 87 | texture_type = Shader::TextureType::Color1D; | ||
| 88 | coords = v.X(reg_a); | ||
| 89 | lod = v.X(reg_b); | ||
| 90 | break; | ||
| 91 | case 2: | ||
| 92 | texture_type = Shader::TextureType::Color2D; | ||
| 93 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); | ||
| 94 | break; | ||
| 95 | case 4: | ||
| 96 | CheckAlignment(reg_a, 2); | ||
| 97 | texture_type = Shader::TextureType::Color2D; | ||
| 98 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 99 | offsets = MakeOffset(v, reg_b); | ||
| 100 | break; | ||
| 101 | case 5: | ||
| 102 | CheckAlignment(reg_a, 2); | ||
| 103 | texture_type = Shader::TextureType::Color2D; | ||
| 104 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 105 | lod = v.X(reg_b); | ||
| 106 | break; | ||
| 107 | case 6: | ||
| 108 | CheckAlignment(reg_a, 2); | ||
| 109 | texture_type = Shader::TextureType::Color2D; | ||
| 110 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 111 | multisample = v.X(reg_b); | ||
| 112 | break; | ||
| 113 | case 7: | ||
| 114 | CheckAlignment(reg_a, 2); | ||
| 115 | texture_type = Shader::TextureType::Color3D; | ||
| 116 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); | ||
| 117 | break; | ||
| 118 | case 8: { | ||
| 119 | CheckAlignment(reg_b, 2); | ||
| 120 | const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; | ||
| 121 | texture_type = Shader::TextureType::ColorArray2D; | ||
| 122 | coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | case 12: | ||
| 126 | CheckAlignment(reg_a, 2); | ||
| 127 | CheckAlignment(reg_b, 2); | ||
| 128 | texture_type = Shader::TextureType::Color2D; | ||
| 129 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 130 | lod = v.X(reg_b); | ||
| 131 | offsets = MakeOffset(v, reg_b + 1); | ||
| 132 | break; | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); | ||
| 135 | } | ||
| 136 | IR::TextureInstInfo info{}; | ||
| 137 | if (tlds.precision == Precision::F16) { | ||
| 138 | info.relaxed_precision.Assign(1); | ||
| 139 | } | ||
| 140 | info.type.Assign(texture_type); | ||
| 141 | return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); | ||
| 142 | } | ||
| 143 | |||
| 144 | unsigned Swizzle(u64 insn) { | ||
| 145 | const Encoding tlds{insn}; | ||
| 146 | const size_t encoding{tlds.swizzle}; | ||
| 147 | if (tlds.dest_reg_b == IR::Reg::RZ) { | ||
| 148 | if (encoding >= RG_LUT.size()) { | ||
| 149 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 150 | } | ||
| 151 | return RG_LUT[encoding]; | ||
| 152 | } else { | ||
| 153 | if (encoding >= RGBA_LUT.size()) { | ||
| 154 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 155 | } | ||
| 156 | return RGBA_LUT[encoding]; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 161 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 162 | } | ||
| 163 | |||
| 164 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 165 | const Encoding tlds{insn}; | ||
| 166 | switch (index) { | ||
| 167 | case 0: | ||
| 168 | return tlds.dest_reg_a; | ||
| 169 | case 1: | ||
| 170 | CheckAlignment(tlds.dest_reg_a, 2); | ||
| 171 | return tlds.dest_reg_a + 1; | ||
| 172 | case 2: | ||
| 173 | return tlds.dest_reg_b; | ||
| 174 | case 3: | ||
| 175 | CheckAlignment(tlds.dest_reg_b, 2); | ||
| 176 | return tlds.dest_reg_b + 1; | ||
| 177 | } | ||
| 178 | throw LogicError("Invalid store index {}", index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 182 | const unsigned swizzle{Swizzle(insn)}; | ||
| 183 | unsigned store_index{0}; | ||
| 184 | for (unsigned component = 0; component < 4; ++component) { | ||
| 185 | if (((swizzle >> component) & 1) == 0) { | ||
| 186 | continue; | ||
| 187 | } | ||
| 188 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 189 | v.F(dest, Extract(v, sample, component)); | ||
| 190 | ++store_index; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 195 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 196 | } | ||
| 197 | |||
| 198 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 199 | const unsigned swizzle{Swizzle(insn)}; | ||
| 200 | unsigned store_index{0}; | ||
| 201 | std::array<IR::F32, 4> swizzled; | ||
| 202 | for (unsigned component = 0; component < 4; ++component) { | ||
| 203 | if (((swizzle >> component) & 1) == 0) { | ||
| 204 | continue; | ||
| 205 | } | ||
| 206 | swizzled[store_index] = Extract(v, sample, component); | ||
| 207 | ++store_index; | ||
| 208 | } | ||
| 209 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 210 | const Encoding tlds{insn}; | ||
| 211 | switch (store_index) { | ||
| 212 | case 1: | ||
| 213 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 214 | break; | ||
| 215 | case 2: | ||
| 216 | case 3: | ||
| 217 | case 4: | ||
| 218 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 219 | switch (store_index) { | ||
| 220 | case 2: | ||
| 221 | break; | ||
| 222 | case 3: | ||
| 223 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 224 | break; | ||
| 225 | case 4: | ||
| 226 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } // Anonymous namespace | ||
| 233 | |||
| 234 | void TranslatorVisitor::TLDS(u64 insn) { | ||
| 235 | const IR::Value sample{Sample(*this, insn)}; | ||
| 236 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 237 | Store32(*this, insn, sample); | ||
| 238 | } else { | ||
| 239 | Store16(*this, insn, sample); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | // The ISA reads an array component here, but this is not needed on high level shading languages | ||
| 50 | // We are dropping this information. | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.F(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.F(reg + 1); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<49, 1, u64> nodep; | ||
| 76 | BitField<35, 1, u64> ndv; | ||
| 77 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 78 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 79 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 80 | BitField<28, 3, TextureType> type; | ||
| 81 | BitField<31, 4, u64> mask; | ||
| 82 | BitField<36, 13, u64> cbuf_offset; | ||
| 83 | } const tmml{insn}; | ||
| 84 | |||
| 85 | if ((tmml.mask & 0b1100) != 0) { | ||
| 86 | throw NotImplementedException("TMML BA results are not implmented"); | ||
| 87 | } | ||
| 88 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; | ||
| 89 | |||
| 90 | IR::U32 handle; | ||
| 91 | IR::Reg meta_reg{tmml.meta_reg}; | ||
| 92 | if (is_bindless) { | ||
| 93 | handle = v.X(meta_reg++); | ||
| 94 | } else { | ||
| 95 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 96 | } | ||
| 97 | IR::TextureInstInfo info{}; | ||
| 98 | info.type.Assign(GetType(tmml.type)); | ||
| 99 | const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; | ||
| 100 | |||
| 101 | IR::Reg dest_reg{tmml.dest_reg}; | ||
| 102 | for (size_t element = 0; element < 4; ++element) { | ||
| 103 | if (((tmml.mask >> element) & 1) == 0) { | ||
| 104 | continue; | ||
| 105 | } | ||
| 106 | IR::F32 value{v.ir.CompositeExtract(sample, element)}; | ||
| 107 | if (element < 2) { | ||
| 108 | IR::U32 casted_value; | ||
| 109 | if (element == 0) { | ||
| 110 | casted_value = v.ir.ConvertFToU(32, value); | ||
| 111 | } else { | ||
| 112 | casted_value = v.ir.ConvertFToS(16, value); | ||
| 113 | } | ||
| 114 | v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); | ||
| 115 | } else { | ||
| 116 | v.F(dest_reg, value); | ||
| 117 | } | ||
| 118 | ++dest_reg; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } // Anonymous namespace | ||
| 122 | |||
| 123 | void TranslatorVisitor::TMML(u64 insn) { | ||
| 124 | Impl(*this, insn, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void TranslatorVisitor::TMML_b(u64 insn) { | ||
| 128 | Impl(*this, insn, true); | ||
| 129 | } | ||
| 130 | |||
| 131 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Mode : u64 { | ||
| 15 | Dimension = 1, | ||
| 16 | TextureType = 2, | ||
| 17 | SamplePos = 5, | ||
| 18 | }; | ||
| 19 | |||
| 20 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { | ||
| 21 | switch (mode) { | ||
| 22 | case Mode::Dimension: { | ||
| 23 | const IR::U32 lod{v.X(src_reg)}; | ||
| 24 | return v.ir.ImageQueryDimension(handle, lod); | ||
| 25 | } | ||
| 26 | case Mode::TextureType: | ||
| 27 | case Mode::SamplePos: | ||
| 28 | default: | ||
| 29 | throw NotImplementedException("Mode {}", mode); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { | ||
| 34 | union { | ||
| 35 | u64 raw; | ||
| 36 | BitField<49, 1, u64> nodep; | ||
| 37 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 38 | BitField<8, 8, IR::Reg> src_reg; | ||
| 39 | BitField<22, 3, Mode> mode; | ||
| 40 | BitField<31, 4, u64> mask; | ||
| 41 | } const txq{insn}; | ||
| 42 | |||
| 43 | IR::Reg src_reg{txq.src_reg}; | ||
| 44 | IR::U32 handle; | ||
| 45 | if (cbuf_offset) { | ||
| 46 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 47 | } else { | ||
| 48 | handle = v.X(src_reg); | ||
| 49 | ++src_reg; | ||
| 50 | } | ||
| 51 | const IR::Value query{Query(v, handle, txq.mode, src_reg)}; | ||
| 52 | IR::Reg dest_reg{txq.dest_reg}; | ||
| 53 | for (int element = 0; element < 4; ++element) { | ||
| 54 | if (((txq.mask >> element) & 1) == 0) { | ||
| 55 | continue; | ||
| 56 | } | ||
| 57 | v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); | ||
| 58 | ++dest_reg; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::TXQ(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<36, 13, u64> cbuf_offset; | ||
| 67 | } const txq{insn}; | ||
| 68 | |||
| 69 | Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::TXQ_b(u64 insn) { | ||
| 73 | Impl(*this, insn, std::nullopt); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | |||
| 10 | IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, | ||
| 11 | u32 selector, bool is_signed) { | ||
| 12 | switch (width) { | ||
| 13 | case VideoWidth::Byte: | ||
| 14 | case VideoWidth::Unknown: | ||
| 15 | return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); | ||
| 16 | case VideoWidth::Short: | ||
| 17 | return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); | ||
| 18 | case VideoWidth::Word: | ||
| 19 | return value; | ||
| 20 | default: | ||
| 21 | throw NotImplementedException("Unknown VideoWidth {}", width); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { | ||
| 26 | // immediates must be 16-bit format. | ||
| 27 | return is_immediate ? VideoWidth::Short : width; | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | enum class VideoWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Unknown, | ||
| 14 | Short, | ||
| 15 | Word, | ||
| 16 | }; | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, | ||
| 19 | VideoWidth width, u32 selector, bool is_signed); | ||
| 20 | |||
| 21 | [[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class VideoMinMaxOps : u64 { | ||
| 13 | MRG_16H, | ||
| 14 | MRG_16L, | ||
| 15 | MRG_8B0, | ||
| 16 | MRG_8B2, | ||
| 17 | ACC, | ||
| 18 | MIN, | ||
| 19 | MAX, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, | ||
| 23 | VideoMinMaxOps op, bool is_signed) { | ||
| 24 | switch (op) { | ||
| 25 | case VideoMinMaxOps::MIN: | ||
| 26 | return ir.IMin(lhs, rhs, is_signed); | ||
| 27 | case VideoMinMaxOps::MAX: | ||
| 28 | return ir.IMax(lhs, rhs, is_signed); | ||
| 29 | default: | ||
| 30 | throw NotImplementedException("VMNMX op {}", op); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void TranslatorVisitor::VMNMX(u64 insn) { | ||
| 36 | union { | ||
| 37 | u64 raw; | ||
| 38 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 39 | BitField<20, 16, u64> src_b_imm; | ||
| 40 | BitField<28, 2, u64> src_b_selector; | ||
| 41 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 42 | BitField<36, 2, u64> src_a_selector; | ||
| 43 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> src_a_sign; | ||
| 46 | BitField<49, 1, u64> src_b_sign; | ||
| 47 | BitField<50, 1, u64> is_src_b_reg; | ||
| 48 | BitField<51, 3, VideoMinMaxOps> op; | ||
| 49 | BitField<54, 1, u64> dest_sign; | ||
| 50 | BitField<55, 1, u64> sat; | ||
| 51 | BitField<56, 1, u64> mx; | ||
| 52 | } const vmnmx{insn}; | ||
| 53 | |||
| 54 | if (vmnmx.cc != 0) { | ||
| 55 | throw NotImplementedException("VMNMX CC"); | ||
| 56 | } | ||
| 57 | if (vmnmx.sat != 0) { | ||
| 58 | throw NotImplementedException("VMNMX SAT"); | ||
| 59 | } | ||
| 60 | // Selectors were shown to default to 2 in unit tests | ||
| 61 | if (vmnmx.src_a_selector != 2) { | ||
| 62 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); | ||
| 63 | } | ||
| 64 | if (vmnmx.src_b_selector != 2) { | ||
| 65 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); | ||
| 66 | } | ||
| 67 | if (vmnmx.src_a_width != VideoWidth::Word) { | ||
| 68 | throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); | ||
| 69 | } | ||
| 70 | |||
| 71 | const bool is_b_imm{vmnmx.is_src_b_reg == 0}; | ||
| 72 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 73 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; | ||
| 74 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 75 | |||
| 76 | const VideoWidth a_width{vmnmx.src_a_width}; | ||
| 77 | const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; | ||
| 78 | |||
| 79 | const bool src_a_signed{vmnmx.src_a_sign != 0}; | ||
| 80 | const bool src_b_signed{vmnmx.src_b_sign != 0}; | ||
| 81 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; | ||
| 82 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; | ||
| 83 | |||
| 84 | // First operation's sign is only dependent on operand b's sign | ||
| 85 | const bool op_1_signed{src_b_signed}; | ||
| 86 | |||
| 87 | const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) | ||
| 88 | : ir.IMin(op_a, op_b, op_1_signed)}; | ||
| 89 | X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::VMAD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<20, 16, u64> src_b_imm; | ||
| 16 | BitField<28, 2, u64> src_b_selector; | ||
| 17 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 18 | BitField<36, 2, u64> src_a_selector; | ||
| 19 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> src_a_sign; | ||
| 22 | BitField<49, 1, u64> src_b_sign; | ||
| 23 | BitField<50, 1, u64> is_src_b_reg; | ||
| 24 | BitField<51, 2, u64> scale; | ||
| 25 | BitField<53, 1, u64> src_c_neg; | ||
| 26 | BitField<54, 1, u64> src_a_neg; | ||
| 27 | BitField<55, 1, u64> sat; | ||
| 28 | } const vmad{insn}; | ||
| 29 | |||
| 30 | if (vmad.cc != 0) { | ||
| 31 | throw NotImplementedException("VMAD CC"); | ||
| 32 | } | ||
| 33 | if (vmad.sat != 0) { | ||
| 34 | throw NotImplementedException("VMAD SAT"); | ||
| 35 | } | ||
| 36 | if (vmad.scale != 0) { | ||
| 37 | throw NotImplementedException("VMAD SCALE"); | ||
| 38 | } | ||
| 39 | if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { | ||
| 40 | throw NotImplementedException("VMAD PO"); | ||
| 41 | } | ||
| 42 | if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { | ||
| 43 | throw NotImplementedException("VMAD NEG"); | ||
| 44 | } | ||
| 45 | const bool is_b_imm{vmad.is_src_b_reg == 0}; | ||
| 46 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 47 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; | ||
| 48 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 49 | |||
| 50 | const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; | ||
| 51 | // Immediate values can't have a selector | ||
| 52 | const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; | ||
| 53 | const VideoWidth a_width{vmad.src_a_width}; | ||
| 54 | const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; | ||
| 55 | |||
| 56 | const bool src_a_signed{vmad.src_a_sign != 0}; | ||
| 57 | const bool src_b_signed{vmad.src_b_sign != 0}; | ||
| 58 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 59 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 60 | |||
| 61 | X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class VsetpCompareOp : u64 { | ||
| 14 | False = 0, | ||
| 15 | LessThan, | ||
| 16 | Equal, | ||
| 17 | LessThanEqual, | ||
| 18 | GreaterThan = 16, | ||
| 19 | NotEqual, | ||
| 20 | GreaterThanEqual, | ||
| 21 | True, | ||
| 22 | }; | ||
| 23 | |||
| 24 | CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { | ||
| 25 | switch (op) { | ||
| 26 | case VsetpCompareOp::False: | ||
| 27 | return CompareOp::False; | ||
| 28 | case VsetpCompareOp::LessThan: | ||
| 29 | return CompareOp::LessThan; | ||
| 30 | case VsetpCompareOp::Equal: | ||
| 31 | return CompareOp::Equal; | ||
| 32 | case VsetpCompareOp::LessThanEqual: | ||
| 33 | return CompareOp::LessThanEqual; | ||
| 34 | case VsetpCompareOp::GreaterThan: | ||
| 35 | return CompareOp::GreaterThan; | ||
| 36 | case VsetpCompareOp::NotEqual: | ||
| 37 | return CompareOp::NotEqual; | ||
| 38 | case VsetpCompareOp::GreaterThanEqual: | ||
| 39 | return CompareOp::GreaterThanEqual; | ||
| 40 | case VsetpCompareOp::True: | ||
| 41 | return CompareOp::True; | ||
| 42 | default: | ||
| 43 | throw NotImplementedException("Invalid compare op {}", op); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::VSETP(u64 insn) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 52 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 53 | BitField<20, 16, u64> src_b_imm; | ||
| 54 | BitField<28, 2, u64> src_b_selector; | ||
| 55 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 56 | BitField<36, 2, u64> src_a_selector; | ||
| 57 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 58 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 59 | BitField<42, 1, u64> neg_bop_pred; | ||
| 60 | BitField<43, 5, VsetpCompareOp> compare_op; | ||
| 61 | BitField<45, 2, BooleanOp> bop; | ||
| 62 | BitField<48, 1, u64> src_a_sign; | ||
| 63 | BitField<49, 1, u64> src_b_sign; | ||
| 64 | BitField<50, 1, u64> is_src_b_reg; | ||
| 65 | } const vsetp{insn}; | ||
| 66 | |||
| 67 | const bool is_b_imm{vsetp.is_src_b_reg == 0}; | ||
| 68 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 69 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; | ||
| 70 | |||
| 71 | const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; | ||
| 72 | const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; | ||
| 73 | const VideoWidth a_width{vsetp.src_a_width}; | ||
| 74 | const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; | ||
| 75 | |||
| 76 | const bool src_a_signed{vsetp.src_a_sign != 0}; | ||
| 77 | const bool src_b_signed{vsetp.src_b_sign != 0}; | ||
| 78 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 79 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 80 | |||
| 81 | // Compare operation's sign is only dependent on operand b's sign | ||
| 82 | const bool compare_signed{src_b_signed}; | ||
| 83 | const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; | ||
| 84 | const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; | ||
| 85 | const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; | ||
| 86 | const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; | ||
| 87 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; | ||
| 88 | ir.SetPred(vsetp.dest_pred_a, result_a); | ||
| 89 | ir.SetPred(vsetp.dest_pred_b, result_b); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class VoteOp : u64 { | ||
| 12 | ALL, | ||
| 13 | ANY, | ||
| 14 | EQ, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { | ||
| 18 | switch (vote_op) { | ||
| 19 | case VoteOp::ALL: | ||
| 20 | return ir.VoteAll(pred); | ||
| 21 | case VoteOp::ANY: | ||
| 22 | return ir.VoteAny(pred); | ||
| 23 | case VoteOp::EQ: | ||
| 24 | return ir.VoteEqual(pred); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid VOTE op {}", vote_op); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | void Vote(TranslatorVisitor& v, u64 insn) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<39, 3, IR::Pred> pred_a; | ||
| 35 | BitField<42, 1, u64> neg_pred_a; | ||
| 36 | BitField<45, 3, IR::Pred> pred_b; | ||
| 37 | BitField<48, 2, VoteOp> vote_op; | ||
| 38 | } const vote{insn}; | ||
| 39 | |||
| 40 | const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; | ||
| 41 | v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); | ||
| 42 | v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::VOTE(u64 insn) { | ||
| 47 | Vote(*this, insn); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::VOTE_vtg(u64) { | ||
| 51 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class ShuffleMode : u64 { | ||
| 14 | IDX, | ||
| 15 | UP, | ||
| 16 | DOWN, | ||
| 17 | BFLY, | ||
| 18 | }; | ||
| 19 | |||
| 20 | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||
| 21 | const IR::U32& index, const IR::U32& mask, | ||
| 22 | ShuffleMode shfl_op) { | ||
| 23 | const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||
| 24 | const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||
| 25 | switch (shfl_op) { | ||
| 26 | case ShuffleMode::IDX: | ||
| 27 | return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||
| 28 | case ShuffleMode::UP: | ||
| 29 | return ir.ShuffleUp(value, index, clamp, seg_mask); | ||
| 30 | case ShuffleMode::DOWN: | ||
| 31 | return ir.ShuffleDown(value, index, clamp, seg_mask); | ||
| 32 | case ShuffleMode::BFLY: | ||
| 33 | return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||
| 34 | default: | ||
| 35 | throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||
| 40 | union { | ||
| 41 | u64 insn; | ||
| 42 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 43 | BitField<8, 8, IR::Reg> src_reg; | ||
| 44 | BitField<30, 2, ShuffleMode> mode; | ||
| 45 | BitField<48, 3, IR::Pred> pred; | ||
| 46 | } const shfl{insn}; | ||
| 47 | |||
| 48 | const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||
| 49 | v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||
| 50 | v.X(shfl.dest_reg, result); | ||
| 51 | } | ||
| 52 | } // Anonymous namespace | ||
| 53 | |||
| 54 | void TranslatorVisitor::SHFL(u64 insn) { | ||
| 55 | union { | ||
| 56 | u64 insn; | ||
| 57 | BitField<20, 5, u64> src_a_imm; | ||
| 58 | BitField<28, 1, u64> src_a_flag; | ||
| 59 | BitField<29, 1, u64> src_b_flag; | ||
| 60 | BitField<34, 13, u64> src_b_imm; | ||
| 61 | } const flags{insn}; | ||
| 62 | const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||
| 63 | : GetReg20(insn)}; | ||
| 64 | const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||
| 65 | : GetReg39(insn)}; | ||
| 66 | Shuffle(*this, insn, src_a, src_b); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..8e3c4c5d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/environment.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | template <auto method> | ||
| 15 | static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { | ||
| 16 | using MethodType = decltype(method); | ||
| 17 | if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) { | ||
| 18 | (visitor.*method)(pc, insn); | ||
| 19 | } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) { | ||
| 20 | (visitor.*method)(insn); | ||
| 21 | } else { | ||
| 22 | (visitor.*method)(); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { | ||
| 27 | if (location_begin == location_end) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | TranslatorVisitor visitor{env, *block}; | ||
| 31 | for (Location pc = location_begin; pc != location_end; ++pc) { | ||
| 32 | const u64 insn{env.ReadInstruction(pc.Offset())}; | ||
| 33 | try { | ||
| 34 | const Opcode opcode{Decode(insn)}; | ||
| 35 | switch (opcode) { | ||
| 36 | #define INST(name, cute, mask) \ | ||
| 37 | case Opcode::name: \ | ||
| 38 | Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ | ||
| 39 | break; | ||
| 40 | #include "shader_recompiler/frontend/maxwell/maxwell.inc" | ||
| 41 | #undef OPCODE | ||
| 42 | default: | ||
| 43 | throw LogicError("Invalid opcode {}", opcode); | ||
| 44 | } | ||
| 45 | } catch (Exception& exception) { | ||
| 46 | exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); | ||
| 47 | throw; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..a3edd2e46 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..c067d459c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/settings.h" | ||
| 10 | #include "shader_recompiler/exception.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/post_order.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 16 | #include "shader_recompiler/host_translate_info.h" | ||
| 17 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell { | ||
| 20 | namespace { | ||
| 21 | IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { | ||
| 22 | size_t num_syntax_blocks{}; | ||
| 23 | for (const auto& node : syntax_list) { | ||
| 24 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 25 | ++num_syntax_blocks; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | IR::BlockList blocks; | ||
| 29 | blocks.reserve(num_syntax_blocks); | ||
| 30 | for (const auto& node : syntax_list) { | ||
| 31 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 32 | blocks.push_back(node.data.block); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | return blocks; | ||
| 36 | } | ||
| 37 | |||
| 38 | void RemoveUnreachableBlocks(IR::Program& program) { | ||
| 39 | // Some blocks might be unreachable if a function call exists unconditionally | ||
| 40 | // If this happens the number of blocks and post order blocks will mismatch | ||
| 41 | if (program.blocks.size() == program.post_order_blocks.size()) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const auto begin{program.blocks.begin() + 1}; | ||
| 45 | const auto end{program.blocks.end()}; | ||
| 46 | const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; | ||
| 47 | program.blocks.erase(std::remove_if(begin, end, pred), end); | ||
| 48 | } | ||
| 49 | |||
| 50 | void CollectInterpolationInfo(Environment& env, IR::Program& program) { | ||
| 51 | if (program.stage != Stage::Fragment) { | ||
| 52 | return; | ||
| 53 | } | ||
| 54 | const ProgramHeader& sph{env.SPH()}; | ||
| 55 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 56 | std::optional<PixelImap> imap; | ||
| 57 | for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { | ||
| 58 | if (value == PixelImap::Unused) { | ||
| 59 | continue; | ||
| 60 | } | ||
| 61 | if (imap && imap != value) { | ||
| 62 | throw NotImplementedException("Per component interpolation"); | ||
| 63 | } | ||
| 64 | imap = value; | ||
| 65 | } | ||
| 66 | if (!imap) { | ||
| 67 | continue; | ||
| 68 | } | ||
| 69 | program.info.interpolation[index] = [&] { | ||
| 70 | switch (*imap) { | ||
| 71 | case PixelImap::Unused: | ||
| 72 | case PixelImap::Perspective: | ||
| 73 | return Interpolation::Smooth; | ||
| 74 | case PixelImap::Constant: | ||
| 75 | return Interpolation::Flat; | ||
| 76 | case PixelImap::ScreenLinear: | ||
| 77 | return Interpolation::NoPerspective; | ||
| 78 | } | ||
| 79 | throw NotImplementedException("Unknown interpolation {}", *imap); | ||
| 80 | }(); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void AddNVNStorageBuffers(IR::Program& program) { | ||
| 85 | if (!program.info.uses_global_memory) { | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | const u32 driver_cbuf{0}; | ||
| 89 | const u32 descriptor_size{0x10}; | ||
| 90 | const u32 num_buffers{16}; | ||
| 91 | const u32 base{[&] { | ||
| 92 | switch (program.stage) { | ||
| 93 | case Stage::VertexA: | ||
| 94 | case Stage::VertexB: | ||
| 95 | return 0x110u; | ||
| 96 | case Stage::TessellationControl: | ||
| 97 | return 0x210u; | ||
| 98 | case Stage::TessellationEval: | ||
| 99 | return 0x310u; | ||
| 100 | case Stage::Geometry: | ||
| 101 | return 0x410u; | ||
| 102 | case Stage::Fragment: | ||
| 103 | return 0x510u; | ||
| 104 | case Stage::Compute: | ||
| 105 | return 0x310u; | ||
| 106 | } | ||
| 107 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 108 | }()}; | ||
| 109 | auto& descs{program.info.storage_buffers_descriptors}; | ||
| 110 | for (u32 index = 0; index < num_buffers; ++index) { | ||
| 111 | if (!program.info.nvn_buffer_used[index]) { | ||
| 112 | continue; | ||
| 113 | } | ||
| 114 | const u32 offset{base + index * descriptor_size}; | ||
| 115 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; | ||
| 116 | if (it != descs.end()) { | ||
| 117 | it->is_written |= program.info.stores_global_memory; | ||
| 118 | continue; | ||
| 119 | } | ||
| 120 | descs.push_back({ | ||
| 121 | .cbuf_index = driver_cbuf, | ||
| 122 | .cbuf_offset = offset, | ||
| 123 | .count = 1, | ||
| 124 | .is_written = program.info.stores_global_memory, | ||
| 125 | }); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } // Anonymous namespace | ||
| 129 | |||
| 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | ||
| 132 | IR::Program program; | ||
| 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | ||
| 134 | program.blocks = GenerateBlocks(program.syntax_list); | ||
| 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); | ||
| 136 | program.stage = env.ShaderStage(); | ||
| 137 | program.local_memory_size = env.LocalMemorySize(); | ||
| 138 | switch (program.stage) { | ||
| 139 | case Stage::TessellationControl: { | ||
| 140 | const ProgramHeader& sph{env.SPH()}; | ||
| 141 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 142 | break; | ||
| 143 | } | ||
| 144 | case Stage::Geometry: { | ||
| 145 | const ProgramHeader& sph{env.SPH()}; | ||
| 146 | program.output_topology = sph.common3.output_topology; | ||
| 147 | program.output_vertices = sph.common4.max_output_vertices; | ||
| 148 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 149 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; | ||
| 150 | if (program.is_geometry_passthrough) { | ||
| 151 | const auto& mask{env.GpPassthroughMask()}; | ||
| 152 | for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { | ||
| 153 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case Stage::Compute: | ||
| 159 | program.workgroup_size = env.WorkgroupSize(); | ||
| 160 | program.shared_memory_size = env.SharedMemorySize(); | ||
| 161 | break; | ||
| 162 | default: | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | RemoveUnreachableBlocks(program); | ||
| 166 | |||
| 167 | // Replace instructions before the SSA rewrite | ||
| 168 | if (!host_info.support_float16) { | ||
| 169 | Optimization::LowerFp16ToFp32(program); | ||
| 170 | } | ||
| 171 | if (!host_info.support_int64) { | ||
| 172 | Optimization::LowerInt64ToInt32(program); | ||
| 173 | } | ||
| 174 | Optimization::SsaRewritePass(program); | ||
| 175 | |||
| 176 | Optimization::GlobalMemoryToStorageBufferPass(program); | ||
| 177 | Optimization::TexturePass(env, program); | ||
| 178 | |||
| 179 | Optimization::ConstantPropagationPass(program); | ||
| 180 | Optimization::DeadCodeEliminationPass(program); | ||
| 181 | if (Settings::values.renderer_debug) { | ||
| 182 | Optimization::VerificationPass(program); | ||
| 183 | } | ||
| 184 | Optimization::CollectShaderInfoPass(env, program); | ||
| 185 | CollectInterpolationInfo(env, program); | ||
| 186 | AddNVNStorageBuffers(program); | ||
| 187 | return program; | ||
| 188 | } | ||
| 189 | |||
| 190 | IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 191 | Environment& env_vertex_b) { | ||
| 192 | IR::Program result{}; | ||
| 193 | Optimization::VertexATransformPass(vertex_a); | ||
| 194 | Optimization::VertexBTransformPass(vertex_b); | ||
| 195 | for (const auto& term : vertex_a.syntax_list) { | ||
| 196 | if (term.type != IR::AbstractSyntaxNode::Type::Return) { | ||
| 197 | result.syntax_list.push_back(term); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), | ||
| 201 | vertex_b.syntax_list.end()); | ||
| 202 | result.blocks = GenerateBlocks(result.syntax_list); | ||
| 203 | result.post_order_blocks = vertex_b.post_order_blocks; | ||
| 204 | for (const auto& block : vertex_a.post_order_blocks) { | ||
| 205 | result.post_order_blocks.push_back(block); | ||
| 206 | } | ||
| 207 | result.stage = Stage::VertexB; | ||
| 208 | result.info = vertex_a.info; | ||
| 209 | result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); | ||
| 210 | result.info.loads.mask |= vertex_b.info.loads.mask; | ||
| 211 | result.info.stores.mask |= vertex_b.info.stores.mask; | ||
| 212 | |||
| 213 | Optimization::JoinTextureInfo(result.info, vertex_b.info); | ||
| 214 | Optimization::JoinStorageInfo(result.info, vertex_b.info); | ||
| 215 | Optimization::DeadCodeEliminationPass(result); | ||
| 216 | if (Settings::values.renderer_debug) { | ||
| 217 | Optimization::VerificationPass(result); | ||
| 218 | } | ||
| 219 | Optimization::CollectShaderInfoPass(env_vertex_b, result); | ||
| 220 | return result; | ||
| 221 | } | ||
| 222 | |||
| 223 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..a84814811 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/host_translate_info.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg, const HostTranslateInfo& host_info); | ||
| 19 | |||
| 20 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 21 | Environment& env_vertex_b); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Shader { | ||
| 8 | |||
| 9 | // Try to keep entries here to a minimum | ||
| 10 | // They can accidentally change the cached information in a shader | ||
| 11 | |||
| 12 | /// Misc information about the host | ||
| 13 | struct HostTranslateInfo { | ||
| 14 | bool support_float16{}; ///< True when the device supports 16-bit floats | ||
| 15 | bool support_int64{}; ///< True when the device supports 64-bit integers | ||
| 16 | }; | ||
| 17 | |||
| 18 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..5ead930f1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -0,0 +1,928 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/alignment.h" | ||
| 6 | #include "shader_recompiler/environment.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | namespace { | ||
| 15 | void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | ||
| 16 | if (count != 1) { | ||
| 17 | throw NotImplementedException("Constant buffer descriptor indexing"); | ||
| 18 | } | ||
| 19 | if ((info.constant_buffer_mask & (1U << index)) != 0) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | info.constant_buffer_mask |= 1U << index; | ||
| 23 | |||
| 24 | auto& cbufs{info.constant_buffer_descriptors}; | ||
| 25 | cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), | ||
| 26 | ConstantBufferDescriptor{ | ||
| 27 | .index = index, | ||
| 28 | .count = 1, | ||
| 29 | }); | ||
| 30 | } | ||
| 31 | |||
| 32 | void GetPatch(Info& info, IR::Patch patch) { | ||
| 33 | if (!IR::IsGeneric(patch)) { | ||
| 34 | throw NotImplementedException("Reading non-generic patch {}", patch); | ||
| 35 | } | ||
| 36 | info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; | ||
| 37 | } | ||
| 38 | |||
| 39 | void SetPatch(Info& info, IR::Patch patch) { | ||
| 40 | if (IR::IsGeneric(patch)) { | ||
| 41 | info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | switch (patch) { | ||
| 45 | case IR::Patch::TessellationLodLeft: | ||
| 46 | case IR::Patch::TessellationLodTop: | ||
| 47 | case IR::Patch::TessellationLodRight: | ||
| 48 | case IR::Patch::TessellationLodBottom: | ||
| 49 | info.stores_tess_level_outer = true; | ||
| 50 | break; | ||
| 51 | case IR::Patch::TessellationLodInteriorU: | ||
| 52 | case IR::Patch::TessellationLodInteriorV: | ||
| 53 | info.stores_tess_level_inner = true; | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Set patch {}", patch); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | void CheckCBufNVN(Info& info, IR::Inst& inst) { | ||
| 61 | const IR::Value cbuf_index{inst.Arg(0)}; | ||
| 62 | if (!cbuf_index.IsImmediate()) { | ||
| 63 | info.nvn_buffer_used.set(); | ||
| 64 | return; | ||
| 65 | } | ||
| 66 | const u32 index{cbuf_index.U32()}; | ||
| 67 | if (index != 0) { | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | const IR::Value cbuf_offset{inst.Arg(1)}; | ||
| 71 | if (!cbuf_offset.IsImmediate()) { | ||
| 72 | info.nvn_buffer_used.set(); | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | const u32 offset{cbuf_offset.U32()}; | ||
| 76 | const u32 descriptor_size{0x10}; | ||
| 77 | const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; | ||
| 78 | if (offset >= info.nvn_buffer_base && offset < upper_limit) { | ||
| 79 | const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; | ||
| 80 | info.nvn_buffer_used.set(nvn_index, true); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void VisitUsages(Info& info, IR::Inst& inst) { | ||
| 85 | switch (inst.GetOpcode()) { | ||
| 86 | case IR::Opcode::CompositeConstructF16x2: | ||
| 87 | case IR::Opcode::CompositeConstructF16x3: | ||
| 88 | case IR::Opcode::CompositeConstructF16x4: | ||
| 89 | case IR::Opcode::CompositeExtractF16x2: | ||
| 90 | case IR::Opcode::CompositeExtractF16x3: | ||
| 91 | case IR::Opcode::CompositeExtractF16x4: | ||
| 92 | case IR::Opcode::CompositeInsertF16x2: | ||
| 93 | case IR::Opcode::CompositeInsertF16x3: | ||
| 94 | case IR::Opcode::CompositeInsertF16x4: | ||
| 95 | case IR::Opcode::SelectF16: | ||
| 96 | case IR::Opcode::BitCastU16F16: | ||
| 97 | case IR::Opcode::BitCastF16U16: | ||
| 98 | case IR::Opcode::PackFloat2x16: | ||
| 99 | case IR::Opcode::UnpackFloat2x16: | ||
| 100 | case IR::Opcode::ConvertS16F16: | ||
| 101 | case IR::Opcode::ConvertS32F16: | ||
| 102 | case IR::Opcode::ConvertS64F16: | ||
| 103 | case IR::Opcode::ConvertU16F16: | ||
| 104 | case IR::Opcode::ConvertU32F16: | ||
| 105 | case IR::Opcode::ConvertU64F16: | ||
| 106 | case IR::Opcode::ConvertF16S8: | ||
| 107 | case IR::Opcode::ConvertF16S16: | ||
| 108 | case IR::Opcode::ConvertF16S32: | ||
| 109 | case IR::Opcode::ConvertF16S64: | ||
| 110 | case IR::Opcode::ConvertF16U8: | ||
| 111 | case IR::Opcode::ConvertF16U16: | ||
| 112 | case IR::Opcode::ConvertF16U32: | ||
| 113 | case IR::Opcode::ConvertF16U64: | ||
| 114 | case IR::Opcode::FPAbs16: | ||
| 115 | case IR::Opcode::FPAdd16: | ||
| 116 | case IR::Opcode::FPCeil16: | ||
| 117 | case IR::Opcode::FPFloor16: | ||
| 118 | case IR::Opcode::FPFma16: | ||
| 119 | case IR::Opcode::FPMul16: | ||
| 120 | case IR::Opcode::FPNeg16: | ||
| 121 | case IR::Opcode::FPRoundEven16: | ||
| 122 | case IR::Opcode::FPSaturate16: | ||
| 123 | case IR::Opcode::FPClamp16: | ||
| 124 | case IR::Opcode::FPTrunc16: | ||
| 125 | case IR::Opcode::FPOrdEqual16: | ||
| 126 | case IR::Opcode::FPUnordEqual16: | ||
| 127 | case IR::Opcode::FPOrdNotEqual16: | ||
| 128 | case IR::Opcode::FPUnordNotEqual16: | ||
| 129 | case IR::Opcode::FPOrdLessThan16: | ||
| 130 | case IR::Opcode::FPUnordLessThan16: | ||
| 131 | case IR::Opcode::FPOrdGreaterThan16: | ||
| 132 | case IR::Opcode::FPUnordGreaterThan16: | ||
| 133 | case IR::Opcode::FPOrdLessThanEqual16: | ||
| 134 | case IR::Opcode::FPUnordLessThanEqual16: | ||
| 135 | case IR::Opcode::FPOrdGreaterThanEqual16: | ||
| 136 | case IR::Opcode::FPUnordGreaterThanEqual16: | ||
| 137 | case IR::Opcode::FPIsNan16: | ||
| 138 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 139 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 140 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 141 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 142 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 143 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 144 | info.uses_fp16 = true; | ||
| 145 | break; | ||
| 146 | case IR::Opcode::CompositeConstructF64x2: | ||
| 147 | case IR::Opcode::CompositeConstructF64x3: | ||
| 148 | case IR::Opcode::CompositeConstructF64x4: | ||
| 149 | case IR::Opcode::CompositeExtractF64x2: | ||
| 150 | case IR::Opcode::CompositeExtractF64x3: | ||
| 151 | case IR::Opcode::CompositeExtractF64x4: | ||
| 152 | case IR::Opcode::CompositeInsertF64x2: | ||
| 153 | case IR::Opcode::CompositeInsertF64x3: | ||
| 154 | case IR::Opcode::CompositeInsertF64x4: | ||
| 155 | case IR::Opcode::SelectF64: | ||
| 156 | case IR::Opcode::BitCastU64F64: | ||
| 157 | case IR::Opcode::BitCastF64U64: | ||
| 158 | case IR::Opcode::PackDouble2x32: | ||
| 159 | case IR::Opcode::UnpackDouble2x32: | ||
| 160 | case IR::Opcode::FPAbs64: | ||
| 161 | case IR::Opcode::FPAdd64: | ||
| 162 | case IR::Opcode::FPCeil64: | ||
| 163 | case IR::Opcode::FPFloor64: | ||
| 164 | case IR::Opcode::FPFma64: | ||
| 165 | case IR::Opcode::FPMax64: | ||
| 166 | case IR::Opcode::FPMin64: | ||
| 167 | case IR::Opcode::FPMul64: | ||
| 168 | case IR::Opcode::FPNeg64: | ||
| 169 | case IR::Opcode::FPRecip64: | ||
| 170 | case IR::Opcode::FPRecipSqrt64: | ||
| 171 | case IR::Opcode::FPRoundEven64: | ||
| 172 | case IR::Opcode::FPSaturate64: | ||
| 173 | case IR::Opcode::FPClamp64: | ||
| 174 | case IR::Opcode::FPTrunc64: | ||
| 175 | case IR::Opcode::FPOrdEqual64: | ||
| 176 | case IR::Opcode::FPUnordEqual64: | ||
| 177 | case IR::Opcode::FPOrdNotEqual64: | ||
| 178 | case IR::Opcode::FPUnordNotEqual64: | ||
| 179 | case IR::Opcode::FPOrdLessThan64: | ||
| 180 | case IR::Opcode::FPUnordLessThan64: | ||
| 181 | case IR::Opcode::FPOrdGreaterThan64: | ||
| 182 | case IR::Opcode::FPUnordGreaterThan64: | ||
| 183 | case IR::Opcode::FPOrdLessThanEqual64: | ||
| 184 | case IR::Opcode::FPUnordLessThanEqual64: | ||
| 185 | case IR::Opcode::FPOrdGreaterThanEqual64: | ||
| 186 | case IR::Opcode::FPUnordGreaterThanEqual64: | ||
| 187 | case IR::Opcode::FPIsNan64: | ||
| 188 | case IR::Opcode::ConvertS16F64: | ||
| 189 | case IR::Opcode::ConvertS32F64: | ||
| 190 | case IR::Opcode::ConvertS64F64: | ||
| 191 | case IR::Opcode::ConvertU16F64: | ||
| 192 | case IR::Opcode::ConvertU32F64: | ||
| 193 | case IR::Opcode::ConvertU64F64: | ||
| 194 | case IR::Opcode::ConvertF32F64: | ||
| 195 | case IR::Opcode::ConvertF64F32: | ||
| 196 | case IR::Opcode::ConvertF64S8: | ||
| 197 | case IR::Opcode::ConvertF64S16: | ||
| 198 | case IR::Opcode::ConvertF64S32: | ||
| 199 | case IR::Opcode::ConvertF64S64: | ||
| 200 | case IR::Opcode::ConvertF64U8: | ||
| 201 | case IR::Opcode::ConvertF64U16: | ||
| 202 | case IR::Opcode::ConvertF64U32: | ||
| 203 | case IR::Opcode::ConvertF64U64: | ||
| 204 | info.uses_fp64 = true; | ||
| 205 | break; | ||
| 206 | default: | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | switch (inst.GetOpcode()) { | ||
| 210 | case IR::Opcode::GetCbufU8: | ||
| 211 | case IR::Opcode::GetCbufS8: | ||
| 212 | case IR::Opcode::UndefU8: | ||
| 213 | case IR::Opcode::LoadGlobalU8: | ||
| 214 | case IR::Opcode::LoadGlobalS8: | ||
| 215 | case IR::Opcode::WriteGlobalU8: | ||
| 216 | case IR::Opcode::WriteGlobalS8: | ||
| 217 | case IR::Opcode::LoadStorageU8: | ||
| 218 | case IR::Opcode::LoadStorageS8: | ||
| 219 | case IR::Opcode::WriteStorageU8: | ||
| 220 | case IR::Opcode::WriteStorageS8: | ||
| 221 | case IR::Opcode::LoadSharedU8: | ||
| 222 | case IR::Opcode::LoadSharedS8: | ||
| 223 | case IR::Opcode::WriteSharedU8: | ||
| 224 | case IR::Opcode::SelectU8: | ||
| 225 | case IR::Opcode::ConvertF16S8: | ||
| 226 | case IR::Opcode::ConvertF16U8: | ||
| 227 | case IR::Opcode::ConvertF32S8: | ||
| 228 | case IR::Opcode::ConvertF32U8: | ||
| 229 | case IR::Opcode::ConvertF64S8: | ||
| 230 | case IR::Opcode::ConvertF64U8: | ||
| 231 | info.uses_int8 = true; | ||
| 232 | break; | ||
| 233 | default: | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | switch (inst.GetOpcode()) { | ||
| 237 | case IR::Opcode::GetCbufU16: | ||
| 238 | case IR::Opcode::GetCbufS16: | ||
| 239 | case IR::Opcode::UndefU16: | ||
| 240 | case IR::Opcode::LoadGlobalU16: | ||
| 241 | case IR::Opcode::LoadGlobalS16: | ||
| 242 | case IR::Opcode::WriteGlobalU16: | ||
| 243 | case IR::Opcode::WriteGlobalS16: | ||
| 244 | case IR::Opcode::LoadStorageU16: | ||
| 245 | case IR::Opcode::LoadStorageS16: | ||
| 246 | case IR::Opcode::WriteStorageU16: | ||
| 247 | case IR::Opcode::WriteStorageS16: | ||
| 248 | case IR::Opcode::LoadSharedU16: | ||
| 249 | case IR::Opcode::LoadSharedS16: | ||
| 250 | case IR::Opcode::WriteSharedU16: | ||
| 251 | case IR::Opcode::SelectU16: | ||
| 252 | case IR::Opcode::BitCastU16F16: | ||
| 253 | case IR::Opcode::BitCastF16U16: | ||
| 254 | case IR::Opcode::ConvertS16F16: | ||
| 255 | case IR::Opcode::ConvertS16F32: | ||
| 256 | case IR::Opcode::ConvertS16F64: | ||
| 257 | case IR::Opcode::ConvertU16F16: | ||
| 258 | case IR::Opcode::ConvertU16F32: | ||
| 259 | case IR::Opcode::ConvertU16F64: | ||
| 260 | case IR::Opcode::ConvertF16S16: | ||
| 261 | case IR::Opcode::ConvertF16U16: | ||
| 262 | case IR::Opcode::ConvertF32S16: | ||
| 263 | case IR::Opcode::ConvertF32U16: | ||
| 264 | case IR::Opcode::ConvertF64S16: | ||
| 265 | case IR::Opcode::ConvertF64U16: | ||
| 266 | info.uses_int16 = true; | ||
| 267 | break; | ||
| 268 | default: | ||
| 269 | break; | ||
| 270 | } | ||
| 271 | switch (inst.GetOpcode()) { | ||
| 272 | case IR::Opcode::UndefU64: | ||
| 273 | case IR::Opcode::LoadGlobalU8: | ||
| 274 | case IR::Opcode::LoadGlobalS8: | ||
| 275 | case IR::Opcode::LoadGlobalU16: | ||
| 276 | case IR::Opcode::LoadGlobalS16: | ||
| 277 | case IR::Opcode::LoadGlobal32: | ||
| 278 | case IR::Opcode::LoadGlobal64: | ||
| 279 | case IR::Opcode::LoadGlobal128: | ||
| 280 | case IR::Opcode::WriteGlobalU8: | ||
| 281 | case IR::Opcode::WriteGlobalS8: | ||
| 282 | case IR::Opcode::WriteGlobalU16: | ||
| 283 | case IR::Opcode::WriteGlobalS16: | ||
| 284 | case IR::Opcode::WriteGlobal32: | ||
| 285 | case IR::Opcode::WriteGlobal64: | ||
| 286 | case IR::Opcode::WriteGlobal128: | ||
| 287 | case IR::Opcode::SelectU64: | ||
| 288 | case IR::Opcode::BitCastU64F64: | ||
| 289 | case IR::Opcode::BitCastF64U64: | ||
| 290 | case IR::Opcode::PackUint2x32: | ||
| 291 | case IR::Opcode::UnpackUint2x32: | ||
| 292 | case IR::Opcode::IAdd64: | ||
| 293 | case IR::Opcode::ISub64: | ||
| 294 | case IR::Opcode::INeg64: | ||
| 295 | case IR::Opcode::ShiftLeftLogical64: | ||
| 296 | case IR::Opcode::ShiftRightLogical64: | ||
| 297 | case IR::Opcode::ShiftRightArithmetic64: | ||
| 298 | case IR::Opcode::ConvertS64F16: | ||
| 299 | case IR::Opcode::ConvertS64F32: | ||
| 300 | case IR::Opcode::ConvertS64F64: | ||
| 301 | case IR::Opcode::ConvertU64F16: | ||
| 302 | case IR::Opcode::ConvertU64F32: | ||
| 303 | case IR::Opcode::ConvertU64F64: | ||
| 304 | case IR::Opcode::ConvertU64U32: | ||
| 305 | case IR::Opcode::ConvertU32U64: | ||
| 306 | case IR::Opcode::ConvertF16U64: | ||
| 307 | case IR::Opcode::ConvertF32U64: | ||
| 308 | case IR::Opcode::ConvertF64U64: | ||
| 309 | case IR::Opcode::SharedAtomicExchange64: | ||
| 310 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 311 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 312 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 313 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 314 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 315 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 316 | case IR::Opcode::GlobalAtomicOr64: | ||
| 317 | case IR::Opcode::GlobalAtomicXor64: | ||
| 318 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 319 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 320 | case IR::Opcode::StorageAtomicSMin64: | ||
| 321 | case IR::Opcode::StorageAtomicUMin64: | ||
| 322 | case IR::Opcode::StorageAtomicSMax64: | ||
| 323 | case IR::Opcode::StorageAtomicUMax64: | ||
| 324 | case IR::Opcode::StorageAtomicAnd64: | ||
| 325 | case IR::Opcode::StorageAtomicOr64: | ||
| 326 | case IR::Opcode::StorageAtomicXor64: | ||
| 327 | case IR::Opcode::StorageAtomicExchange64: | ||
| 328 | info.uses_int64 = true; | ||
| 329 | break; | ||
| 330 | default: | ||
| 331 | break; | ||
| 332 | } | ||
| 333 | switch (inst.GetOpcode()) { | ||
| 334 | case IR::Opcode::WriteGlobalU8: | ||
| 335 | case IR::Opcode::WriteGlobalS8: | ||
| 336 | case IR::Opcode::WriteGlobalU16: | ||
| 337 | case IR::Opcode::WriteGlobalS16: | ||
| 338 | case IR::Opcode::WriteGlobal32: | ||
| 339 | case IR::Opcode::WriteGlobal64: | ||
| 340 | case IR::Opcode::WriteGlobal128: | ||
| 341 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 342 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 343 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 344 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 345 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 346 | case IR::Opcode::GlobalAtomicInc32: | ||
| 347 | case IR::Opcode::GlobalAtomicDec32: | ||
| 348 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 349 | case IR::Opcode::GlobalAtomicOr32: | ||
| 350 | case IR::Opcode::GlobalAtomicXor32: | ||
| 351 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 352 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 353 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 354 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 355 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 356 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 357 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 358 | case IR::Opcode::GlobalAtomicOr64: | ||
| 359 | case IR::Opcode::GlobalAtomicXor64: | ||
| 360 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 361 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 362 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 363 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 364 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 365 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 366 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 367 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 368 | info.stores_global_memory = true; | ||
| 369 | [[fallthrough]]; | ||
| 370 | case IR::Opcode::LoadGlobalU8: | ||
| 371 | case IR::Opcode::LoadGlobalS8: | ||
| 372 | case IR::Opcode::LoadGlobalU16: | ||
| 373 | case IR::Opcode::LoadGlobalS16: | ||
| 374 | case IR::Opcode::LoadGlobal32: | ||
| 375 | case IR::Opcode::LoadGlobal64: | ||
| 376 | case IR::Opcode::LoadGlobal128: | ||
| 377 | info.uses_int64 = true; | ||
| 378 | info.uses_global_memory = true; | ||
| 379 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; | ||
| 380 | info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; | ||
| 381 | break; | ||
| 382 | default: | ||
| 383 | break; | ||
| 384 | } | ||
| 385 | switch (inst.GetOpcode()) { | ||
| 386 | case IR::Opcode::DemoteToHelperInvocation: | ||
| 387 | info.uses_demote_to_helper_invocation = true; | ||
| 388 | break; | ||
| 389 | case IR::Opcode::GetAttribute: | ||
| 390 | info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; | ||
| 391 | break; | ||
| 392 | case IR::Opcode::SetAttribute: | ||
| 393 | info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; | ||
| 394 | break; | ||
| 395 | case IR::Opcode::GetPatch: | ||
| 396 | GetPatch(info, inst.Arg(0).Patch()); | ||
| 397 | break; | ||
| 398 | case IR::Opcode::SetPatch: | ||
| 399 | SetPatch(info, inst.Arg(0).Patch()); | ||
| 400 | break; | ||
| 401 | case IR::Opcode::GetAttributeIndexed: | ||
| 402 | info.loads_indexed_attributes = true; | ||
| 403 | break; | ||
| 404 | case IR::Opcode::SetAttributeIndexed: | ||
| 405 | info.stores_indexed_attributes = true; | ||
| 406 | break; | ||
| 407 | case IR::Opcode::SetFragColor: | ||
| 408 | info.stores_frag_color[inst.Arg(0).U32()] = true; | ||
| 409 | break; | ||
| 410 | case IR::Opcode::SetSampleMask: | ||
| 411 | info.stores_sample_mask = true; | ||
| 412 | break; | ||
| 413 | case IR::Opcode::SetFragDepth: | ||
| 414 | info.stores_frag_depth = true; | ||
| 415 | break; | ||
| 416 | case IR::Opcode::WorkgroupId: | ||
| 417 | info.uses_workgroup_id = true; | ||
| 418 | break; | ||
| 419 | case IR::Opcode::LocalInvocationId: | ||
| 420 | info.uses_local_invocation_id = true; | ||
| 421 | break; | ||
| 422 | case IR::Opcode::InvocationId: | ||
| 423 | info.uses_invocation_id = true; | ||
| 424 | break; | ||
| 425 | case IR::Opcode::SampleId: | ||
| 426 | info.uses_sample_id = true; | ||
| 427 | break; | ||
| 428 | case IR::Opcode::IsHelperInvocation: | ||
| 429 | info.uses_is_helper_invocation = true; | ||
| 430 | break; | ||
| 431 | case IR::Opcode::LaneId: | ||
| 432 | info.uses_subgroup_invocation_id = true; | ||
| 433 | break; | ||
| 434 | case IR::Opcode::ShuffleIndex: | ||
| 435 | case IR::Opcode::ShuffleUp: | ||
| 436 | case IR::Opcode::ShuffleDown: | ||
| 437 | case IR::Opcode::ShuffleButterfly: | ||
| 438 | info.uses_subgroup_shuffles = true; | ||
| 439 | break; | ||
| 440 | case IR::Opcode::GetCbufU8: | ||
| 441 | case IR::Opcode::GetCbufS8: | ||
| 442 | case IR::Opcode::GetCbufU16: | ||
| 443 | case IR::Opcode::GetCbufS16: | ||
| 444 | case IR::Opcode::GetCbufU32: | ||
| 445 | case IR::Opcode::GetCbufF32: | ||
| 446 | case IR::Opcode::GetCbufU32x2: { | ||
| 447 | const IR::Value index{inst.Arg(0)}; | ||
| 448 | const IR::Value offset{inst.Arg(1)}; | ||
| 449 | if (!index.IsImmediate()) { | ||
| 450 | throw NotImplementedException("Constant buffer with non-immediate index"); | ||
| 451 | } | ||
| 452 | AddConstantBufferDescriptor(info, index.U32(), 1); | ||
| 453 | u32 element_size{}; | ||
| 454 | switch (inst.GetOpcode()) { | ||
| 455 | case IR::Opcode::GetCbufU8: | ||
| 456 | case IR::Opcode::GetCbufS8: | ||
| 457 | info.used_constant_buffer_types |= IR::Type::U8; | ||
| 458 | element_size = 1; | ||
| 459 | break; | ||
| 460 | case IR::Opcode::GetCbufU16: | ||
| 461 | case IR::Opcode::GetCbufS16: | ||
| 462 | info.used_constant_buffer_types |= IR::Type::U16; | ||
| 463 | element_size = 2; | ||
| 464 | break; | ||
| 465 | case IR::Opcode::GetCbufU32: | ||
| 466 | info.used_constant_buffer_types |= IR::Type::U32; | ||
| 467 | element_size = 4; | ||
| 468 | break; | ||
| 469 | case IR::Opcode::GetCbufF32: | ||
| 470 | info.used_constant_buffer_types |= IR::Type::F32; | ||
| 471 | element_size = 4; | ||
| 472 | break; | ||
| 473 | case IR::Opcode::GetCbufU32x2: | ||
| 474 | info.used_constant_buffer_types |= IR::Type::U32x2; | ||
| 475 | element_size = 8; | ||
| 476 | break; | ||
| 477 | default: | ||
| 478 | break; | ||
| 479 | } | ||
| 480 | u32& size{info.constant_buffer_used_sizes[index.U32()]}; | ||
| 481 | if (offset.IsImmediate()) { | ||
| 482 | size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); | ||
| 483 | } else { | ||
| 484 | size = 0x10'000; | ||
| 485 | } | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 489 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 490 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 491 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 492 | case IR::Opcode::BindlessImageGather: | ||
| 493 | case IR::Opcode::BindlessImageGatherDref: | ||
| 494 | case IR::Opcode::BindlessImageFetch: | ||
| 495 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 496 | case IR::Opcode::BindlessImageQueryLod: | ||
| 497 | case IR::Opcode::BindlessImageGradient: | ||
| 498 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 499 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 500 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 501 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 502 | case IR::Opcode::BoundImageGather: | ||
| 503 | case IR::Opcode::BoundImageGatherDref: | ||
| 504 | case IR::Opcode::BoundImageFetch: | ||
| 505 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 506 | case IR::Opcode::BoundImageQueryLod: | ||
| 507 | case IR::Opcode::BoundImageGradient: | ||
| 508 | case IR::Opcode::ImageGather: | ||
| 509 | case IR::Opcode::ImageGatherDref: | ||
| 510 | case IR::Opcode::ImageFetch: | ||
| 511 | case IR::Opcode::ImageQueryDimensions: | ||
| 512 | case IR::Opcode::ImageGradient: { | ||
| 513 | const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | ||
| 514 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; | ||
| 515 | info.uses_sparse_residency |= | ||
| 516 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 517 | break; | ||
| 518 | } | ||
| 519 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 520 | case IR::Opcode::ImageSampleExplicitLod: | ||
| 521 | case IR::Opcode::ImageSampleDrefImplicitLod: | ||
| 522 | case IR::Opcode::ImageSampleDrefExplicitLod: | ||
| 523 | case IR::Opcode::ImageQueryLod: { | ||
| 524 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 525 | const TextureType type{flags.type}; | ||
| 526 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; | ||
| 527 | info.uses_shadow_lod |= flags.is_depth != 0; | ||
| 528 | info.uses_sparse_residency |= | ||
| 529 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | case IR::Opcode::ImageRead: { | ||
| 533 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 534 | info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; | ||
| 535 | info.uses_sparse_residency |= | ||
| 536 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | case IR::Opcode::ImageWrite: { | ||
| 540 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 541 | info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; | ||
| 542 | info.uses_image_buffers |= flags.type == TextureType::Buffer; | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | case IR::Opcode::SubgroupEqMask: | ||
| 546 | case IR::Opcode::SubgroupLtMask: | ||
| 547 | case IR::Opcode::SubgroupLeMask: | ||
| 548 | case IR::Opcode::SubgroupGtMask: | ||
| 549 | case IR::Opcode::SubgroupGeMask: | ||
| 550 | info.uses_subgroup_mask = true; | ||
| 551 | break; | ||
| 552 | case IR::Opcode::VoteAll: | ||
| 553 | case IR::Opcode::VoteAny: | ||
| 554 | case IR::Opcode::VoteEqual: | ||
| 555 | case IR::Opcode::SubgroupBallot: | ||
| 556 | info.uses_subgroup_vote = true; | ||
| 557 | break; | ||
| 558 | case IR::Opcode::FSwizzleAdd: | ||
| 559 | info.uses_fswzadd = true; | ||
| 560 | break; | ||
| 561 | case IR::Opcode::DPdxFine: | ||
| 562 | case IR::Opcode::DPdyFine: | ||
| 563 | case IR::Opcode::DPdxCoarse: | ||
| 564 | case IR::Opcode::DPdyCoarse: | ||
| 565 | info.uses_derivatives = true; | ||
| 566 | break; | ||
| 567 | case IR::Opcode::LoadStorageU8: | ||
| 568 | case IR::Opcode::LoadStorageS8: | ||
| 569 | case IR::Opcode::WriteStorageU8: | ||
| 570 | case IR::Opcode::WriteStorageS8: | ||
| 571 | info.used_storage_buffer_types |= IR::Type::U8; | ||
| 572 | break; | ||
| 573 | case IR::Opcode::LoadStorageU16: | ||
| 574 | case IR::Opcode::LoadStorageS16: | ||
| 575 | case IR::Opcode::WriteStorageU16: | ||
| 576 | case IR::Opcode::WriteStorageS16: | ||
| 577 | info.used_storage_buffer_types |= IR::Type::U16; | ||
| 578 | break; | ||
| 579 | case IR::Opcode::LoadStorage32: | ||
| 580 | case IR::Opcode::WriteStorage32: | ||
| 581 | case IR::Opcode::StorageAtomicIAdd32: | ||
| 582 | case IR::Opcode::StorageAtomicUMin32: | ||
| 583 | case IR::Opcode::StorageAtomicUMax32: | ||
| 584 | case IR::Opcode::StorageAtomicAnd32: | ||
| 585 | case IR::Opcode::StorageAtomicOr32: | ||
| 586 | case IR::Opcode::StorageAtomicXor32: | ||
| 587 | case IR::Opcode::StorageAtomicExchange32: | ||
| 588 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 589 | break; | ||
| 590 | case IR::Opcode::LoadStorage64: | ||
| 591 | case IR::Opcode::WriteStorage64: | ||
| 592 | info.used_storage_buffer_types |= IR::Type::U32x2; | ||
| 593 | break; | ||
| 594 | case IR::Opcode::LoadStorage128: | ||
| 595 | case IR::Opcode::WriteStorage128: | ||
| 596 | info.used_storage_buffer_types |= IR::Type::U32x4; | ||
| 597 | break; | ||
| 598 | case IR::Opcode::SharedAtomicSMin32: | ||
| 599 | info.uses_atomic_s32_min = true; | ||
| 600 | break; | ||
| 601 | case IR::Opcode::SharedAtomicSMax32: | ||
| 602 | info.uses_atomic_s32_max = true; | ||
| 603 | break; | ||
| 604 | case IR::Opcode::SharedAtomicInc32: | ||
| 605 | info.uses_shared_increment = true; | ||
| 606 | break; | ||
| 607 | case IR::Opcode::SharedAtomicDec32: | ||
| 608 | info.uses_shared_decrement = true; | ||
| 609 | break; | ||
| 610 | case IR::Opcode::SharedAtomicExchange64: | ||
| 611 | info.uses_int64_bit_atomics = true; | ||
| 612 | break; | ||
| 613 | case IR::Opcode::GlobalAtomicInc32: | ||
| 614 | case IR::Opcode::StorageAtomicInc32: | ||
| 615 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 616 | info.uses_global_increment = true; | ||
| 617 | break; | ||
| 618 | case IR::Opcode::GlobalAtomicDec32: | ||
| 619 | case IR::Opcode::StorageAtomicDec32: | ||
| 620 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 621 | info.uses_global_decrement = true; | ||
| 622 | break; | ||
| 623 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 624 | case IR::Opcode::StorageAtomicAddF32: | ||
| 625 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 626 | info.uses_atomic_f32_add = true; | ||
| 627 | break; | ||
| 628 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 629 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 630 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 631 | info.uses_atomic_f16x2_add = true; | ||
| 632 | break; | ||
| 633 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 634 | case IR::Opcode::StorageAtomicAddF32x2: | ||
| 635 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 636 | info.uses_atomic_f32x2_add = true; | ||
| 637 | break; | ||
| 638 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 639 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 640 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 641 | info.uses_atomic_f16x2_min = true; | ||
| 642 | break; | ||
| 643 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 644 | case IR::Opcode::StorageAtomicMinF32x2: | ||
| 645 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 646 | info.uses_atomic_f32x2_min = true; | ||
| 647 | break; | ||
| 648 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 649 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 650 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 651 | info.uses_atomic_f16x2_max = true; | ||
| 652 | break; | ||
| 653 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 654 | case IR::Opcode::StorageAtomicMaxF32x2: | ||
| 655 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 656 | info.uses_atomic_f32x2_max = true; | ||
| 657 | break; | ||
| 658 | case IR::Opcode::StorageAtomicSMin32: | ||
| 659 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 660 | info.uses_atomic_s32_min = true; | ||
| 661 | break; | ||
| 662 | case IR::Opcode::StorageAtomicSMax32: | ||
| 663 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 664 | info.uses_atomic_s32_max = true; | ||
| 665 | break; | ||
| 666 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 667 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 668 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 669 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 670 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 671 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 672 | case IR::Opcode::GlobalAtomicOr64: | ||
| 673 | case IR::Opcode::GlobalAtomicXor64: | ||
| 674 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 675 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 676 | case IR::Opcode::StorageAtomicSMin64: | ||
| 677 | case IR::Opcode::StorageAtomicUMin64: | ||
| 678 | case IR::Opcode::StorageAtomicSMax64: | ||
| 679 | case IR::Opcode::StorageAtomicUMax64: | ||
| 680 | case IR::Opcode::StorageAtomicAnd64: | ||
| 681 | case IR::Opcode::StorageAtomicOr64: | ||
| 682 | case IR::Opcode::StorageAtomicXor64: | ||
| 683 | info.used_storage_buffer_types |= IR::Type::U64; | ||
| 684 | info.uses_int64_bit_atomics = true; | ||
| 685 | break; | ||
| 686 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 687 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 688 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 689 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 690 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 691 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 692 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 693 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 694 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 695 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 696 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 697 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 698 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 699 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 700 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 701 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 702 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 703 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 704 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 705 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 706 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 707 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 708 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 709 | case IR::Opcode::ImageAtomicSMin32: | ||
| 710 | case IR::Opcode::ImageAtomicUMin32: | ||
| 711 | case IR::Opcode::ImageAtomicSMax32: | ||
| 712 | case IR::Opcode::ImageAtomicUMax32: | ||
| 713 | case IR::Opcode::ImageAtomicInc32: | ||
| 714 | case IR::Opcode::ImageAtomicDec32: | ||
| 715 | case IR::Opcode::ImageAtomicAnd32: | ||
| 716 | case IR::Opcode::ImageAtomicOr32: | ||
| 717 | case IR::Opcode::ImageAtomicXor32: | ||
| 718 | case IR::Opcode::ImageAtomicExchange32: | ||
| 719 | info.uses_atomic_image_u32 = true; | ||
| 720 | break; | ||
| 721 | default: | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | |||
| 726 | void VisitFpModifiers(Info& info, IR::Inst& inst) { | ||
| 727 | switch (inst.GetOpcode()) { | ||
| 728 | case IR::Opcode::FPAdd16: | ||
| 729 | case IR::Opcode::FPFma16: | ||
| 730 | case IR::Opcode::FPMul16: | ||
| 731 | case IR::Opcode::FPRoundEven16: | ||
| 732 | case IR::Opcode::FPFloor16: | ||
| 733 | case IR::Opcode::FPCeil16: | ||
| 734 | case IR::Opcode::FPTrunc16: { | ||
| 735 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 736 | switch (control.fmz_mode) { | ||
| 737 | case IR::FmzMode::DontCare: | ||
| 738 | break; | ||
| 739 | case IR::FmzMode::FTZ: | ||
| 740 | case IR::FmzMode::FMZ: | ||
| 741 | info.uses_fp16_denorms_flush = true; | ||
| 742 | break; | ||
| 743 | case IR::FmzMode::None: | ||
| 744 | info.uses_fp16_denorms_preserve = true; | ||
| 745 | break; | ||
| 746 | } | ||
| 747 | break; | ||
| 748 | } | ||
| 749 | case IR::Opcode::FPAdd32: | ||
| 750 | case IR::Opcode::FPFma32: | ||
| 751 | case IR::Opcode::FPMul32: | ||
| 752 | case IR::Opcode::FPRoundEven32: | ||
| 753 | case IR::Opcode::FPFloor32: | ||
| 754 | case IR::Opcode::FPCeil32: | ||
| 755 | case IR::Opcode::FPTrunc32: | ||
| 756 | case IR::Opcode::FPOrdEqual32: | ||
| 757 | case IR::Opcode::FPUnordEqual32: | ||
| 758 | case IR::Opcode::FPOrdNotEqual32: | ||
| 759 | case IR::Opcode::FPUnordNotEqual32: | ||
| 760 | case IR::Opcode::FPOrdLessThan32: | ||
| 761 | case IR::Opcode::FPUnordLessThan32: | ||
| 762 | case IR::Opcode::FPOrdGreaterThan32: | ||
| 763 | case IR::Opcode::FPUnordGreaterThan32: | ||
| 764 | case IR::Opcode::FPOrdLessThanEqual32: | ||
| 765 | case IR::Opcode::FPUnordLessThanEqual32: | ||
| 766 | case IR::Opcode::FPOrdGreaterThanEqual32: | ||
| 767 | case IR::Opcode::FPUnordGreaterThanEqual32: | ||
| 768 | case IR::Opcode::ConvertF16F32: | ||
| 769 | case IR::Opcode::ConvertF64F32: { | ||
| 770 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 771 | switch (control.fmz_mode) { | ||
| 772 | case IR::FmzMode::DontCare: | ||
| 773 | break; | ||
| 774 | case IR::FmzMode::FTZ: | ||
| 775 | case IR::FmzMode::FMZ: | ||
| 776 | info.uses_fp32_denorms_flush = true; | ||
| 777 | break; | ||
| 778 | case IR::FmzMode::None: | ||
| 779 | info.uses_fp32_denorms_preserve = true; | ||
| 780 | break; | ||
| 781 | } | ||
| 782 | break; | ||
| 783 | } | ||
| 784 | default: | ||
| 785 | break; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 789 | void VisitCbufs(Info& info, IR::Inst& inst) { | ||
| 790 | switch (inst.GetOpcode()) { | ||
| 791 | case IR::Opcode::GetCbufU8: | ||
| 792 | case IR::Opcode::GetCbufS8: | ||
| 793 | case IR::Opcode::GetCbufU16: | ||
| 794 | case IR::Opcode::GetCbufS16: | ||
| 795 | case IR::Opcode::GetCbufU32: | ||
| 796 | case IR::Opcode::GetCbufF32: | ||
| 797 | case IR::Opcode::GetCbufU32x2: { | ||
| 798 | CheckCBufNVN(info, inst); | ||
| 799 | break; | ||
| 800 | } | ||
| 801 | default: | ||
| 802 | break; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | void Visit(Info& info, IR::Inst& inst) { | ||
| 807 | VisitUsages(info, inst); | ||
| 808 | VisitFpModifiers(info, inst); | ||
| 809 | VisitCbufs(info, inst); | ||
| 810 | } | ||
| 811 | |||
| 812 | void GatherInfoFromHeader(Environment& env, Info& info) { | ||
| 813 | Stage stage{env.ShaderStage()}; | ||
| 814 | if (stage == Stage::Compute) { | ||
| 815 | return; | ||
| 816 | } | ||
| 817 | const auto& header{env.SPH()}; | ||
| 818 | if (stage == Stage::Fragment) { | ||
| 819 | if (!info.loads_indexed_attributes) { | ||
| 820 | return; | ||
| 821 | } | ||
| 822 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 823 | const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 824 | const auto vector{header.ps.imap_generic_vector[index]}; | ||
| 825 | info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; | ||
| 826 | info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; | ||
| 827 | info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; | ||
| 828 | info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; | ||
| 829 | } | ||
| 830 | return; | ||
| 831 | } | ||
| 832 | if (info.loads_indexed_attributes) { | ||
| 833 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 834 | const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; | ||
| 835 | const auto mask = header.vtg.InputGeneric(index); | ||
| 836 | for (size_t i = 0; i < 4; ++i) { | ||
| 837 | info.loads.Set(attribute + i, mask[i]); | ||
| 838 | } | ||
| 839 | } | ||
| 840 | for (size_t index = 0; index < 8; ++index) { | ||
| 841 | const u16 mask{header.vtg.clip_distances}; | ||
| 842 | info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); | ||
| 843 | } | ||
| 844 | info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); | ||
| 845 | info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); | ||
| 846 | info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); | ||
| 847 | info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); | ||
| 848 | info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); | ||
| 849 | info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); | ||
| 850 | info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); | ||
| 851 | info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); | ||
| 852 | info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); | ||
| 853 | info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); | ||
| 854 | info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); | ||
| 855 | info.loads.Set(IR::Attribute::TessellationEvaluationPointU, | ||
| 856 | header.vtg.tessellation_eval_point_u != 0); | ||
| 857 | info.loads.Set(IR::Attribute::TessellationEvaluationPointV, | ||
| 858 | header.vtg.tessellation_eval_point_v != 0); | ||
| 859 | info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); | ||
| 860 | info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); | ||
| 861 | // TODO: Legacy varyings | ||
| 862 | } | ||
| 863 | if (info.stores_indexed_attributes) { | ||
| 864 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 865 | const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; | ||
| 866 | const auto mask{header.vtg.OutputGeneric(index)}; | ||
| 867 | for (size_t i = 0; i < 4; ++i) { | ||
| 868 | info.stores.Set(attribute + i, mask[i]); | ||
| 869 | } | ||
| 870 | } | ||
| 871 | for (size_t index = 0; index < 8; ++index) { | ||
| 872 | const u16 mask{header.vtg.omap_systemc.clip_distances}; | ||
| 873 | info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); | ||
| 874 | } | ||
| 875 | info.stores.Set(IR::Attribute::PrimitiveId, | ||
| 876 | header.vtg.omap_systemb.primitive_array_id != 0); | ||
| 877 | info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); | ||
| 878 | info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); | ||
| 879 | info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); | ||
| 880 | info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); | ||
| 881 | info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); | ||
| 882 | info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); | ||
| 883 | info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); | ||
| 884 | info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); | ||
| 885 | info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); | ||
| 886 | info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); | ||
| 887 | info.stores.Set(IR::Attribute::TessellationEvaluationPointU, | ||
| 888 | header.vtg.omap_systemc.tessellation_eval_point_u != 0); | ||
| 889 | info.stores.Set(IR::Attribute::TessellationEvaluationPointV, | ||
| 890 | header.vtg.omap_systemc.tessellation_eval_point_v != 0); | ||
| 891 | info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); | ||
| 892 | info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); | ||
| 893 | // TODO: Legacy varyings | ||
| 894 | } | ||
| 895 | } | ||
| 896 | } // Anonymous namespace | ||
| 897 | |||
| 898 | void CollectShaderInfoPass(Environment& env, IR::Program& program) { | ||
| 899 | Info& info{program.info}; | ||
| 900 | const u32 base{[&] { | ||
| 901 | switch (program.stage) { | ||
| 902 | case Stage::VertexA: | ||
| 903 | case Stage::VertexB: | ||
| 904 | return 0x110u; | ||
| 905 | case Stage::TessellationControl: | ||
| 906 | return 0x210u; | ||
| 907 | case Stage::TessellationEval: | ||
| 908 | return 0x310u; | ||
| 909 | case Stage::Geometry: | ||
| 910 | return 0x410u; | ||
| 911 | case Stage::Fragment: | ||
| 912 | return 0x510u; | ||
| 913 | case Stage::Compute: | ||
| 914 | return 0x310u; | ||
| 915 | } | ||
| 916 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 917 | }()}; | ||
| 918 | info.nvn_buffer_base = base; | ||
| 919 | |||
| 920 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 921 | for (IR::Inst& inst : block->Instructions()) { | ||
| 922 | Visit(info, inst); | ||
| 923 | } | ||
| 924 | } | ||
| 925 | GatherInfoFromHeader(env, info); | ||
| 926 | } | ||
| 927 | |||
| 928 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..8dd6d6c2c --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -0,0 +1,610 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | #include <type_traits> | ||
| 8 | |||
| 9 | #include "common/bit_cast.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 15 | |||
| 16 | namespace Shader::Optimization { | ||
| 17 | namespace { | ||
| 18 | // Metaprogramming stuff to get arguments information out of a lambda | ||
| 19 | template <typename Func> | ||
| 20 | struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {}; | ||
| 21 | |||
| 22 | template <typename ReturnType, typename LambdaType, typename... Args> | ||
| 23 | struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> { | ||
| 24 | template <size_t I> | ||
| 25 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 26 | |||
| 27 | static constexpr size_t NUM_ARGS{sizeof...(Args)}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | template <typename T> | ||
| 31 | [[nodiscard]] T Arg(const IR::Value& value) { | ||
| 32 | if constexpr (std::is_same_v<T, bool>) { | ||
| 33 | return value.U1(); | ||
| 34 | } else if constexpr (std::is_same_v<T, u32>) { | ||
| 35 | return value.U32(); | ||
| 36 | } else if constexpr (std::is_same_v<T, s32>) { | ||
| 37 | return static_cast<s32>(value.U32()); | ||
| 38 | } else if constexpr (std::is_same_v<T, f32>) { | ||
| 39 | return value.F32(); | ||
| 40 | } else if constexpr (std::is_same_v<T, u64>) { | ||
| 41 | return value.U64(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | template <typename T, typename ImmFn> | ||
| 46 | bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { | ||
| 47 | const IR::Value lhs{inst.Arg(0)}; | ||
| 48 | const IR::Value rhs{inst.Arg(1)}; | ||
| 49 | |||
| 50 | const bool is_lhs_immediate{lhs.IsImmediate()}; | ||
| 51 | const bool is_rhs_immediate{rhs.IsImmediate()}; | ||
| 52 | |||
| 53 | if (is_lhs_immediate && is_rhs_immediate) { | ||
| 54 | const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))}; | ||
| 55 | inst.ReplaceUsesWith(IR::Value{result}); | ||
| 56 | return false; | ||
| 57 | } | ||
| 58 | if (is_lhs_immediate && !is_rhs_immediate) { | ||
| 59 | IR::Inst* const rhs_inst{rhs.InstRecursive()}; | ||
| 60 | if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { | ||
| 61 | const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; | ||
| 62 | inst.SetArg(0, rhs_inst->Arg(0)); | ||
| 63 | inst.SetArg(1, IR::Value{combined}); | ||
| 64 | } else { | ||
| 65 | // Normalize | ||
| 66 | inst.SetArg(0, rhs); | ||
| 67 | inst.SetArg(1, lhs); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | if (!is_lhs_immediate && is_rhs_immediate) { | ||
| 71 | const IR::Inst* const lhs_inst{lhs.InstRecursive()}; | ||
| 72 | if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { | ||
| 73 | const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; | ||
| 74 | inst.SetArg(0, lhs_inst->Arg(0)); | ||
| 75 | inst.SetArg(1, IR::Value{combined}); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | return true; | ||
| 79 | } | ||
| 80 | |||
| 81 | template <typename Func> | ||
| 82 | bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { | ||
| 83 | if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; | ||
| 87 | inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); | ||
| 88 | return true; | ||
| 89 | } | ||
| 90 | |||
| 91 | void FoldGetRegister(IR::Inst& inst) { | ||
| 92 | if (inst.Arg(0).Reg() == IR::Reg::RZ) { | ||
| 93 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | void FoldGetPred(IR::Inst& inst) { | ||
| 98 | if (inst.Arg(0).Pred() == IR::Pred::PT) { | ||
| 99 | inst.ReplaceUsesWith(IR::Value{true}); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /// Replaces the pattern generated by two XMAD multiplications | ||
| 104 | bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { | ||
| 105 | /* | ||
| 106 | * We are looking for this pattern: | ||
| 107 | * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||
| 108 | * %rhs_mul = IMul32 %rhs_bfe, %factor_b | ||
| 109 | * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||
| 110 | * %rhs_mul = IMul32 %lhs_bfe, %factor_b | ||
| 111 | * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 | ||
| 112 | * %result = IAdd32 %lhs_shl, %rhs_mul | ||
| 113 | * | ||
| 114 | * And replacing it with | ||
| 115 | * %result = IMul32 %factor_a, %factor_b | ||
| 116 | * | ||
| 117 | * This optimization has been proven safe by LLVM and MSVC. | ||
| 118 | */ | ||
| 119 | const IR::Value lhs_arg{inst.Arg(0)}; | ||
| 120 | const IR::Value rhs_arg{inst.Arg(1)}; | ||
| 121 | if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { | ||
| 122 | return false; | ||
| 123 | } | ||
| 124 | IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; | ||
| 125 | if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||
| 126 | lhs_shl->Arg(1) != IR::Value{16U}) { | ||
| 127 | return false; | ||
| 128 | } | ||
| 129 | if (lhs_shl->Arg(0).IsImmediate()) { | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; | ||
| 133 | IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; | ||
| 134 | if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { | ||
| 135 | return false; | ||
| 136 | } | ||
| 137 | if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { | ||
| 138 | return false; | ||
| 139 | } | ||
| 140 | const IR::U32 factor_b{lhs_mul->Arg(1)}; | ||
| 141 | if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { | ||
| 142 | return false; | ||
| 143 | } | ||
| 144 | IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; | ||
| 145 | IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; | ||
| 146 | if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 153 | return false; | ||
| 154 | } | ||
| 155 | if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 156 | return false; | ||
| 157 | } | ||
| 158 | if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { | ||
| 159 | return false; | ||
| 160 | } | ||
| 161 | const IR::U32 factor_a{lhs_bfe->Arg(0)}; | ||
| 162 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 163 | inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); | ||
| 164 | return true; | ||
| 165 | } | ||
| 166 | |||
| 167 | template <typename T> | ||
| 168 | void FoldAdd(IR::Block& block, IR::Inst& inst) { | ||
| 169 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 170 | return; | ||
| 171 | } | ||
| 172 | if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) { | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | const IR::Value rhs{inst.Arg(1)}; | ||
| 176 | if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { | ||
| 177 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | if constexpr (std::is_same_v<T, u32>) { | ||
| 181 | if (FoldXmadMultiply(block, inst)) { | ||
| 182 | return; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | void FoldISub32(IR::Inst& inst) { | ||
| 188 | if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | // ISub32 is generally used to subtract two constant buffers, compare and replace this with | ||
| 195 | // zero if they equal. | ||
| 196 | const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { | ||
| 197 | return a->GetOpcode() == IR::Opcode::GetCbufU32 && | ||
| 198 | b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && | ||
| 199 | a->Arg(1) == b->Arg(1); | ||
| 200 | }}; | ||
| 201 | IR::Inst* op_a{inst.Arg(0).InstRecursive()}; | ||
| 202 | IR::Inst* op_b{inst.Arg(1).InstRecursive()}; | ||
| 203 | if (equal_cbuf(op_a, op_b)) { | ||
| 204 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 205 | return; | ||
| 206 | } | ||
| 207 | // It's also possible a value is being added to a cbuf and then subtracted | ||
| 208 | if (op_b->GetOpcode() == IR::Opcode::IAdd32) { | ||
| 209 | // Canonicalize local variables to simplify the following logic | ||
| 210 | std::swap(op_a, op_b); | ||
| 211 | } | ||
| 212 | if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | IR::Inst* const inst_cbuf{op_b}; | ||
| 216 | if (op_a->GetOpcode() != IR::Opcode::IAdd32) { | ||
| 217 | return; | ||
| 218 | } | ||
| 219 | IR::Value add_op_a{op_a->Arg(0)}; | ||
| 220 | IR::Value add_op_b{op_a->Arg(1)}; | ||
| 221 | if (add_op_b.IsImmediate()) { | ||
| 222 | // Canonicalize | ||
| 223 | std::swap(add_op_a, add_op_b); | ||
| 224 | } | ||
| 225 | if (add_op_b.IsImmediate()) { | ||
| 226 | return; | ||
| 227 | } | ||
| 228 | IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; | ||
| 229 | if (equal_cbuf(add_cbuf, inst_cbuf)) { | ||
| 230 | inst.ReplaceUsesWith(add_op_a); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | void FoldSelect(IR::Inst& inst) { | ||
| 235 | const IR::Value cond{inst.Arg(0)}; | ||
| 236 | if (cond.IsImmediate()) { | ||
| 237 | inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | void FoldFPMul32(IR::Inst& inst) { | ||
| 242 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 243 | if (control.no_contraction) { | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | // Fold interpolation operations | ||
| 247 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 248 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 249 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||
| 253 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||
| 254 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | ||
| 255 | rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { | ||
| 256 | return; | ||
| 257 | } | ||
| 258 | const IR::Value recip_source{rhs_op->Arg(0)}; | ||
| 259 | const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()}; | ||
| 260 | if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) { | ||
| 261 | return; | ||
| 262 | } | ||
| 263 | IR::Inst* const attr_a{recip_source.InstRecursive()}; | ||
| 264 | IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; | ||
| 265 | if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || | ||
| 266 | attr_b->GetOpcode() != IR::Opcode::GetAttribute) { | ||
| 267 | return; | ||
| 268 | } | ||
| 269 | if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { | ||
| 270 | inst.ReplaceUsesWith(lhs_op->Arg(0)); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void FoldLogicalAnd(IR::Inst& inst) { | ||
| 275 | if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) { | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | const IR::Value rhs{inst.Arg(1)}; | ||
| 279 | if (rhs.IsImmediate()) { | ||
| 280 | if (rhs.U1()) { | ||
| 281 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 282 | } else { | ||
| 283 | inst.ReplaceUsesWith(IR::Value{false}); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | void FoldLogicalOr(IR::Inst& inst) { | ||
| 289 | if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) { | ||
| 290 | return; | ||
| 291 | } | ||
| 292 | const IR::Value rhs{inst.Arg(1)}; | ||
| 293 | if (rhs.IsImmediate()) { | ||
| 294 | if (rhs.U1()) { | ||
| 295 | inst.ReplaceUsesWith(IR::Value{true}); | ||
| 296 | } else { | ||
| 297 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 298 | } | ||
| 299 | } | ||
| 300 | } | ||
| 301 | |||
| 302 | void FoldLogicalNot(IR::Inst& inst) { | ||
| 303 | const IR::U1 value{inst.Arg(0)}; | ||
| 304 | if (value.IsImmediate()) { | ||
| 305 | inst.ReplaceUsesWith(IR::Value{!value.U1()}); | ||
| 306 | return; | ||
| 307 | } | ||
| 308 | IR::Inst* const arg{value.InstRecursive()}; | ||
| 309 | if (arg->GetOpcode() == IR::Opcode::LogicalNot) { | ||
| 310 | inst.ReplaceUsesWith(arg->Arg(0)); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | template <IR::Opcode op, typename Dest, typename Source> | ||
| 315 | void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | ||
| 316 | const IR::Value value{inst.Arg(0)}; | ||
| 317 | if (value.IsImmediate()) { | ||
| 318 | inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))}); | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | IR::Inst* const arg_inst{value.InstRecursive()}; | ||
| 322 | if (arg_inst->GetOpcode() == reverse) { | ||
| 323 | inst.ReplaceUsesWith(arg_inst->Arg(0)); | ||
| 324 | return; | ||
| 325 | } | ||
| 326 | if constexpr (op == IR::Opcode::BitCastF32U32) { | ||
| 327 | if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { | ||
| 328 | // Replace the bitcast with a typed constant buffer read | ||
| 329 | inst.ReplaceOpcode(IR::Opcode::GetCbufF32); | ||
| 330 | inst.SetArg(0, arg_inst->Arg(0)); | ||
| 331 | inst.SetArg(1, arg_inst->Arg(1)); | ||
| 332 | return; | ||
| 333 | } | ||
| 334 | } | ||
| 335 | } | ||
| 336 | |||
| 337 | void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { | ||
| 338 | const IR::Value value{inst.Arg(0)}; | ||
| 339 | if (value.IsImmediate()) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | IR::Inst* const arg_inst{value.InstRecursive()}; | ||
| 343 | if (arg_inst->GetOpcode() == reverse) { | ||
| 344 | inst.ReplaceUsesWith(arg_inst->Arg(0)); | ||
| 345 | return; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | template <typename Func, size_t... I> | ||
| 350 | IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { | ||
| 351 | using Traits = LambdaTraits<decltype(func)>; | ||
| 352 | return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; | ||
| 353 | } | ||
| 354 | |||
| 355 | std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, | ||
| 356 | IR::Opcode construct, u32 first_index) { | ||
| 357 | IR::Inst* const inst{inst_value.InstRecursive()}; | ||
| 358 | if (inst->GetOpcode() == construct) { | ||
| 359 | return inst->Arg(first_index); | ||
| 360 | } | ||
| 361 | if (inst->GetOpcode() != insert) { | ||
| 362 | return std::nullopt; | ||
| 363 | } | ||
| 364 | IR::Value value_index{inst->Arg(2)}; | ||
| 365 | if (!value_index.IsImmediate()) { | ||
| 366 | return std::nullopt; | ||
| 367 | } | ||
| 368 | const u32 second_index{value_index.U32()}; | ||
| 369 | if (first_index != second_index) { | ||
| 370 | IR::Value value_composite{inst->Arg(0)}; | ||
| 371 | if (value_composite.IsImmediate()) { | ||
| 372 | return std::nullopt; | ||
| 373 | } | ||
| 374 | return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); | ||
| 375 | } | ||
| 376 | return inst->Arg(1); | ||
| 377 | } | ||
| 378 | |||
| 379 | void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { | ||
| 380 | const IR::Value value_1{inst.Arg(0)}; | ||
| 381 | const IR::Value value_2{inst.Arg(1)}; | ||
| 382 | if (value_1.IsImmediate()) { | ||
| 383 | return; | ||
| 384 | } | ||
| 385 | if (!value_2.IsImmediate()) { | ||
| 386 | return; | ||
| 387 | } | ||
| 388 | const u32 first_index{value_2.U32()}; | ||
| 389 | const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; | ||
| 390 | if (!result) { | ||
| 391 | return; | ||
| 392 | } | ||
| 393 | inst.ReplaceUsesWith(*result); | ||
| 394 | } | ||
| 395 | |||
| 396 | IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { | ||
| 397 | if (value.IsImmediate()) { | ||
| 398 | return value; | ||
| 399 | } | ||
| 400 | IR::Inst* const inst{value.InstRecursive()}; | ||
| 401 | if (inst->GetOpcode() == expected_cast) { | ||
| 402 | return inst->Arg(0).Resolve(); | ||
| 403 | } | ||
| 404 | return value; | ||
| 405 | } | ||
| 406 | |||
| 407 | void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||
| 408 | const IR::Value swizzle{inst.Arg(2)}; | ||
| 409 | if (!swizzle.IsImmediate()) { | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 413 | const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 414 | if (value_1.IsImmediate()) { | ||
| 415 | return; | ||
| 416 | } | ||
| 417 | const u32 swizzle_value{swizzle.U32()}; | ||
| 418 | if (swizzle_value != 0x99 && swizzle_value != 0xA5) { | ||
| 419 | return; | ||
| 420 | } | ||
| 421 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 422 | if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | ||
| 426 | if (value_2 != value_3) { | ||
| 427 | return; | ||
| 428 | } | ||
| 429 | const IR::Value index{inst2->Arg(1)}; | ||
| 430 | const IR::Value clamp{inst2->Arg(2)}; | ||
| 431 | const IR::Value segmentation_mask{inst2->Arg(3)}; | ||
| 432 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | if (swizzle_value == 0x99) { | ||
| 439 | // DPdxFine | ||
| 440 | if (index.U32() == 1) { | ||
| 441 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 442 | inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)})); | ||
| 443 | } | ||
| 444 | } else if (swizzle_value == 0xA5) { | ||
| 445 | // DPdyFine | ||
| 446 | if (index.U32() == 2) { | ||
| 447 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 448 | inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)})); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||
| 454 | switch (inst.GetOpcode()) { | ||
| 455 | case IR::Opcode::GetRegister: | ||
| 456 | return FoldGetRegister(inst); | ||
| 457 | case IR::Opcode::GetPred: | ||
| 458 | return FoldGetPred(inst); | ||
| 459 | case IR::Opcode::IAdd32: | ||
| 460 | return FoldAdd<u32>(block, inst); | ||
| 461 | case IR::Opcode::ISub32: | ||
| 462 | return FoldISub32(inst); | ||
| 463 | case IR::Opcode::IMul32: | ||
| 464 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); | ||
| 465 | return; | ||
| 466 | case IR::Opcode::ShiftRightArithmetic32: | ||
| 467 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); }); | ||
| 468 | return; | ||
| 469 | case IR::Opcode::BitCastF32U32: | ||
| 470 | return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32); | ||
| 471 | case IR::Opcode::BitCastU32F32: | ||
| 472 | return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32); | ||
| 473 | case IR::Opcode::IAdd64: | ||
| 474 | return FoldAdd<u64>(block, inst); | ||
| 475 | case IR::Opcode::PackHalf2x16: | ||
| 476 | return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); | ||
| 477 | case IR::Opcode::UnpackHalf2x16: | ||
| 478 | return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); | ||
| 479 | case IR::Opcode::SelectU1: | ||
| 480 | case IR::Opcode::SelectU8: | ||
| 481 | case IR::Opcode::SelectU16: | ||
| 482 | case IR::Opcode::SelectU32: | ||
| 483 | case IR::Opcode::SelectU64: | ||
| 484 | case IR::Opcode::SelectF16: | ||
| 485 | case IR::Opcode::SelectF32: | ||
| 486 | case IR::Opcode::SelectF64: | ||
| 487 | return FoldSelect(inst); | ||
| 488 | case IR::Opcode::FPMul32: | ||
| 489 | return FoldFPMul32(inst); | ||
| 490 | case IR::Opcode::LogicalAnd: | ||
| 491 | return FoldLogicalAnd(inst); | ||
| 492 | case IR::Opcode::LogicalOr: | ||
| 493 | return FoldLogicalOr(inst); | ||
| 494 | case IR::Opcode::LogicalNot: | ||
| 495 | return FoldLogicalNot(inst); | ||
| 496 | case IR::Opcode::SLessThan: | ||
| 497 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); | ||
| 498 | return; | ||
| 499 | case IR::Opcode::ULessThan: | ||
| 500 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); | ||
| 501 | return; | ||
| 502 | case IR::Opcode::SLessThanEqual: | ||
| 503 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); | ||
| 504 | return; | ||
| 505 | case IR::Opcode::ULessThanEqual: | ||
| 506 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); | ||
| 507 | return; | ||
| 508 | case IR::Opcode::SGreaterThan: | ||
| 509 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); | ||
| 510 | return; | ||
| 511 | case IR::Opcode::UGreaterThan: | ||
| 512 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); | ||
| 513 | return; | ||
| 514 | case IR::Opcode::SGreaterThanEqual: | ||
| 515 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); | ||
| 516 | return; | ||
| 517 | case IR::Opcode::UGreaterThanEqual: | ||
| 518 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); | ||
| 519 | return; | ||
| 520 | case IR::Opcode::IEqual: | ||
| 521 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); | ||
| 522 | return; | ||
| 523 | case IR::Opcode::INotEqual: | ||
| 524 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); | ||
| 525 | return; | ||
| 526 | case IR::Opcode::BitwiseAnd32: | ||
| 527 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); | ||
| 528 | return; | ||
| 529 | case IR::Opcode::BitwiseOr32: | ||
| 530 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); | ||
| 531 | return; | ||
| 532 | case IR::Opcode::BitwiseXor32: | ||
| 533 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); | ||
| 534 | return; | ||
| 535 | case IR::Opcode::BitFieldUExtract: | ||
| 536 | FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { | ||
| 537 | if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) { | ||
| 538 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, | ||
| 539 | base, shift, count); | ||
| 540 | } | ||
| 541 | return (base >> shift) & ((1U << count) - 1); | ||
| 542 | }); | ||
| 543 | return; | ||
| 544 | case IR::Opcode::BitFieldSExtract: | ||
| 545 | FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { | ||
| 546 | const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)}; | ||
| 547 | const size_t left_shift{32 - back_shift}; | ||
| 548 | const size_t right_shift{static_cast<size_t>(32 - count)}; | ||
| 549 | if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { | ||
| 550 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, | ||
| 551 | base, shift, count); | ||
| 552 | } | ||
| 553 | return static_cast<u32>((base << left_shift) >> right_shift); | ||
| 554 | }); | ||
| 555 | return; | ||
| 556 | case IR::Opcode::BitFieldInsert: | ||
| 557 | FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { | ||
| 558 | if (bits >= 32 || offset >= 32) { | ||
| 559 | throw LogicError("Undefined result in {}({}, {}, {}, {})", | ||
| 560 | IR::Opcode::BitFieldInsert, base, insert, offset, bits); | ||
| 561 | } | ||
| 562 | return (base & ~(~(~0u << bits) << offset)) | (insert << offset); | ||
| 563 | }); | ||
| 564 | return; | ||
| 565 | case IR::Opcode::CompositeExtractU32x2: | ||
| 566 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, | ||
| 567 | IR::Opcode::CompositeInsertU32x2); | ||
| 568 | case IR::Opcode::CompositeExtractU32x3: | ||
| 569 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, | ||
| 570 | IR::Opcode::CompositeInsertU32x3); | ||
| 571 | case IR::Opcode::CompositeExtractU32x4: | ||
| 572 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, | ||
| 573 | IR::Opcode::CompositeInsertU32x4); | ||
| 574 | case IR::Opcode::CompositeExtractF32x2: | ||
| 575 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, | ||
| 576 | IR::Opcode::CompositeInsertF32x2); | ||
| 577 | case IR::Opcode::CompositeExtractF32x3: | ||
| 578 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, | ||
| 579 | IR::Opcode::CompositeInsertF32x3); | ||
| 580 | case IR::Opcode::CompositeExtractF32x4: | ||
| 581 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, | ||
| 582 | IR::Opcode::CompositeInsertF32x4); | ||
| 583 | case IR::Opcode::CompositeExtractF16x2: | ||
| 584 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, | ||
| 585 | IR::Opcode::CompositeInsertF16x2); | ||
| 586 | case IR::Opcode::CompositeExtractF16x3: | ||
| 587 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, | ||
| 588 | IR::Opcode::CompositeInsertF16x3); | ||
| 589 | case IR::Opcode::CompositeExtractF16x4: | ||
| 590 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, | ||
| 591 | IR::Opcode::CompositeInsertF16x4); | ||
| 592 | case IR::Opcode::FSwizzleAdd: | ||
| 593 | return FoldFSwizzleAdd(block, inst); | ||
| 594 | default: | ||
| 595 | break; | ||
| 596 | } | ||
| 597 | } | ||
| 598 | } // Anonymous namespace | ||
| 599 | |||
| 600 | void ConstantPropagationPass(IR::Program& program) { | ||
| 601 | const auto end{program.post_order_blocks.rend()}; | ||
| 602 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||
| 603 | IR::Block* const block{*it}; | ||
| 604 | for (IR::Inst& inst : block->Instructions()) { | ||
| 605 | ConstantPropagation(*block, inst); | ||
| 606 | } | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp new file mode 100644 index 000000000..400836301 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | |||
| 11 | void DeadCodeEliminationPass(IR::Program& program) { | ||
| 12 | // We iterate over the instructions in reverse order. | ||
| 13 | // This is because removing an instruction reduces the number of uses for earlier instructions. | ||
| 14 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 15 | auto it{block->end()}; | ||
| 16 | while (it != block->begin()) { | ||
| 17 | --it; | ||
| 18 | if (!it->HasUses() && !it->MayHaveSideEffects()) { | ||
| 19 | it->Invalidate(); | ||
| 20 | it = block->Instructions().erase(it); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..055ba9c54 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 6 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 7 | |||
| 8 | namespace Shader::Optimization { | ||
| 9 | |||
| 10 | void VertexATransformPass(IR::Program& program) { | ||
| 11 | for (IR::Block* const block : program.blocks) { | ||
| 12 | for (IR::Inst& inst : block->Instructions()) { | ||
| 13 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 14 | return inst.Invalidate(); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | void VertexBTransformPass(IR::Program& program) { | ||
| 21 | for (IR::Block* const block : program.blocks) { | ||
| 22 | for (IR::Inst& inst : block->Instructions()) { | ||
| 23 | if (inst.GetOpcode() == IR::Opcode::Prologue) { | ||
| 24 | return inst.Invalidate(); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..4197b0095 --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -0,0 +1,526 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <compare> | ||
| 7 | #include <optional> | ||
| 8 | #include <queue> | ||
| 9 | |||
| 10 | #include <boost/container/flat_set.hpp> | ||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 18 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 19 | |||
| 20 | namespace Shader::Optimization { | ||
| 21 | namespace { | ||
| 22 | /// Address in constant buffers to the storage buffer descriptor | ||
| 23 | struct StorageBufferAddr { | ||
| 24 | auto operator<=>(const StorageBufferAddr&) const noexcept = default; | ||
| 25 | |||
| 26 | u32 index; | ||
| 27 | u32 offset; | ||
| 28 | }; | ||
| 29 | |||
| 30 | /// Block iterator to a global memory instruction and the storage buffer it uses | ||
| 31 | struct StorageInst { | ||
| 32 | StorageBufferAddr storage_buffer; | ||
| 33 | IR::Inst* inst; | ||
| 34 | IR::Block* block; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /// Bias towards a certain range of constant buffers when looking for storage buffers | ||
| 38 | struct Bias { | ||
| 39 | u32 index; | ||
| 40 | u32 offset_begin; | ||
| 41 | u32 offset_end; | ||
| 42 | }; | ||
| 43 | |||
| 44 | using boost::container::flat_set; | ||
| 45 | using boost::container::small_vector; | ||
| 46 | using StorageBufferSet = | ||
| 47 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; | ||
| 48 | using StorageInstVector = small_vector<StorageInst, 24>; | ||
| 49 | using StorageWritesSet = | ||
| 50 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; | ||
| 51 | |||
| 52 | struct StorageInfo { | ||
| 53 | StorageBufferSet set; | ||
| 54 | StorageInstVector to_replace; | ||
| 55 | StorageWritesSet writes; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Returns true when the instruction is a global memory instruction | ||
| 59 | bool IsGlobalMemory(const IR::Inst& inst) { | ||
| 60 | switch (inst.GetOpcode()) { | ||
| 61 | case IR::Opcode::LoadGlobalS8: | ||
| 62 | case IR::Opcode::LoadGlobalU8: | ||
| 63 | case IR::Opcode::LoadGlobalS16: | ||
| 64 | case IR::Opcode::LoadGlobalU16: | ||
| 65 | case IR::Opcode::LoadGlobal32: | ||
| 66 | case IR::Opcode::LoadGlobal64: | ||
| 67 | case IR::Opcode::LoadGlobal128: | ||
| 68 | case IR::Opcode::WriteGlobalS8: | ||
| 69 | case IR::Opcode::WriteGlobalU8: | ||
| 70 | case IR::Opcode::WriteGlobalS16: | ||
| 71 | case IR::Opcode::WriteGlobalU16: | ||
| 72 | case IR::Opcode::WriteGlobal32: | ||
| 73 | case IR::Opcode::WriteGlobal64: | ||
| 74 | case IR::Opcode::WriteGlobal128: | ||
| 75 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 76 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 77 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 78 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 79 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 80 | case IR::Opcode::GlobalAtomicInc32: | ||
| 81 | case IR::Opcode::GlobalAtomicDec32: | ||
| 82 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 83 | case IR::Opcode::GlobalAtomicOr32: | ||
| 84 | case IR::Opcode::GlobalAtomicXor32: | ||
| 85 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 86 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 87 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 88 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 89 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 90 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 91 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 92 | case IR::Opcode::GlobalAtomicOr64: | ||
| 93 | case IR::Opcode::GlobalAtomicXor64: | ||
| 94 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 95 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 96 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 97 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 98 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 99 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 100 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 101 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 102 | return true; | ||
| 103 | default: | ||
| 104 | return false; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | /// Returns true when the instruction is a global memory instruction | ||
| 109 | bool IsGlobalMemoryWrite(const IR::Inst& inst) { | ||
| 110 | switch (inst.GetOpcode()) { | ||
| 111 | case IR::Opcode::WriteGlobalS8: | ||
| 112 | case IR::Opcode::WriteGlobalU8: | ||
| 113 | case IR::Opcode::WriteGlobalS16: | ||
| 114 | case IR::Opcode::WriteGlobalU16: | ||
| 115 | case IR::Opcode::WriteGlobal32: | ||
| 116 | case IR::Opcode::WriteGlobal64: | ||
| 117 | case IR::Opcode::WriteGlobal128: | ||
| 118 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 119 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 120 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 121 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 122 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 123 | case IR::Opcode::GlobalAtomicInc32: | ||
| 124 | case IR::Opcode::GlobalAtomicDec32: | ||
| 125 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 126 | case IR::Opcode::GlobalAtomicOr32: | ||
| 127 | case IR::Opcode::GlobalAtomicXor32: | ||
| 128 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 129 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 130 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 131 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 132 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 133 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 134 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 135 | case IR::Opcode::GlobalAtomicOr64: | ||
| 136 | case IR::Opcode::GlobalAtomicXor64: | ||
| 137 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 138 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 139 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 140 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 141 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 142 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 143 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 144 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 145 | return true; | ||
| 146 | default: | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | /// Converts a global memory opcode to its storage buffer equivalent | ||
| 152 | IR::Opcode GlobalToStorage(IR::Opcode opcode) { | ||
| 153 | switch (opcode) { | ||
| 154 | case IR::Opcode::LoadGlobalS8: | ||
| 155 | return IR::Opcode::LoadStorageS8; | ||
| 156 | case IR::Opcode::LoadGlobalU8: | ||
| 157 | return IR::Opcode::LoadStorageU8; | ||
| 158 | case IR::Opcode::LoadGlobalS16: | ||
| 159 | return IR::Opcode::LoadStorageS16; | ||
| 160 | case IR::Opcode::LoadGlobalU16: | ||
| 161 | return IR::Opcode::LoadStorageU16; | ||
| 162 | case IR::Opcode::LoadGlobal32: | ||
| 163 | return IR::Opcode::LoadStorage32; | ||
| 164 | case IR::Opcode::LoadGlobal64: | ||
| 165 | return IR::Opcode::LoadStorage64; | ||
| 166 | case IR::Opcode::LoadGlobal128: | ||
| 167 | return IR::Opcode::LoadStorage128; | ||
| 168 | case IR::Opcode::WriteGlobalS8: | ||
| 169 | return IR::Opcode::WriteStorageS8; | ||
| 170 | case IR::Opcode::WriteGlobalU8: | ||
| 171 | return IR::Opcode::WriteStorageU8; | ||
| 172 | case IR::Opcode::WriteGlobalS16: | ||
| 173 | return IR::Opcode::WriteStorageS16; | ||
| 174 | case IR::Opcode::WriteGlobalU16: | ||
| 175 | return IR::Opcode::WriteStorageU16; | ||
| 176 | case IR::Opcode::WriteGlobal32: | ||
| 177 | return IR::Opcode::WriteStorage32; | ||
| 178 | case IR::Opcode::WriteGlobal64: | ||
| 179 | return IR::Opcode::WriteStorage64; | ||
| 180 | case IR::Opcode::WriteGlobal128: | ||
| 181 | return IR::Opcode::WriteStorage128; | ||
| 182 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 183 | return IR::Opcode::StorageAtomicIAdd32; | ||
| 184 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 185 | return IR::Opcode::StorageAtomicSMin32; | ||
| 186 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 187 | return IR::Opcode::StorageAtomicUMin32; | ||
| 188 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 189 | return IR::Opcode::StorageAtomicSMax32; | ||
| 190 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 191 | return IR::Opcode::StorageAtomicUMax32; | ||
| 192 | case IR::Opcode::GlobalAtomicInc32: | ||
| 193 | return IR::Opcode::StorageAtomicInc32; | ||
| 194 | case IR::Opcode::GlobalAtomicDec32: | ||
| 195 | return IR::Opcode::StorageAtomicDec32; | ||
| 196 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 197 | return IR::Opcode::StorageAtomicAnd32; | ||
| 198 | case IR::Opcode::GlobalAtomicOr32: | ||
| 199 | return IR::Opcode::StorageAtomicOr32; | ||
| 200 | case IR::Opcode::GlobalAtomicXor32: | ||
| 201 | return IR::Opcode::StorageAtomicXor32; | ||
| 202 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 203 | return IR::Opcode::StorageAtomicIAdd64; | ||
| 204 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 205 | return IR::Opcode::StorageAtomicSMin64; | ||
| 206 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 207 | return IR::Opcode::StorageAtomicUMin64; | ||
| 208 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 209 | return IR::Opcode::StorageAtomicSMax64; | ||
| 210 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 211 | return IR::Opcode::StorageAtomicUMax64; | ||
| 212 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 213 | return IR::Opcode::StorageAtomicAnd64; | ||
| 214 | case IR::Opcode::GlobalAtomicOr64: | ||
| 215 | return IR::Opcode::StorageAtomicOr64; | ||
| 216 | case IR::Opcode::GlobalAtomicXor64: | ||
| 217 | return IR::Opcode::StorageAtomicXor64; | ||
| 218 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 219 | return IR::Opcode::StorageAtomicExchange32; | ||
| 220 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 221 | return IR::Opcode::StorageAtomicExchange64; | ||
| 222 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 223 | return IR::Opcode::StorageAtomicAddF32; | ||
| 224 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 225 | return IR::Opcode::StorageAtomicAddF16x2; | ||
| 226 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 227 | return IR::Opcode::StorageAtomicMinF16x2; | ||
| 228 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 229 | return IR::Opcode::StorageAtomicMaxF16x2; | ||
| 230 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 231 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 232 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 233 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 234 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 235 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 236 | default: | ||
| 237 | throw InvalidArgument("Invalid global memory opcode {}", opcode); | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | /// Returns true when a storage buffer address satisfies a bias | ||
| 242 | bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { | ||
| 243 | return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && | ||
| 244 | storage_buffer.offset < bias.offset_end; | ||
| 245 | } | ||
| 246 | |||
| 247 | struct LowAddrInfo { | ||
| 248 | IR::U32 value; | ||
| 249 | s32 imm_offset; | ||
| 250 | }; | ||
| 251 | |||
| 252 | /// Tries to track the first 32-bits of a global memory instruction | ||
| 253 | std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | ||
| 254 | // The first argument is the low level GPU pointer to the global memory instruction | ||
| 255 | const IR::Value addr{inst->Arg(0)}; | ||
| 256 | if (addr.IsImmediate()) { | ||
| 257 | // Not much we can do if it's an immediate | ||
| 258 | return std::nullopt; | ||
| 259 | } | ||
| 260 | // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 | ||
| 261 | IR::Inst* addr_inst{addr.InstRecursive()}; | ||
| 262 | s32 imm_offset{0}; | ||
| 263 | if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { | ||
| 264 | // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||
| 265 | // instruction. This expects for the instruction to be canonicalized having the address on | ||
| 266 | // the first argument and the immediate offset on the second one. | ||
| 267 | const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||
| 268 | if (!imm_offset_value.IsImmediate()) { | ||
| 269 | return std::nullopt; | ||
| 270 | } | ||
| 271 | imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||
| 272 | const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||
| 273 | if (iadd_addr.IsImmediate()) { | ||
| 274 | return std::nullopt; | ||
| 275 | } | ||
| 276 | addr_inst = iadd_addr.InstRecursive(); | ||
| 277 | } | ||
| 278 | // With IAdd64 handled, now PackUint2x32 is expected | ||
| 279 | if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { | ||
| 280 | // PackUint2x32 is expected to be generated from a vector | ||
| 281 | const IR::Value vector{addr_inst->Arg(0)}; | ||
| 282 | if (vector.IsImmediate()) { | ||
| 283 | return std::nullopt; | ||
| 284 | } | ||
| 285 | addr_inst = vector.InstRecursive(); | ||
| 286 | } | ||
| 287 | // The vector is expected to be a CompositeConstructU32x2 | ||
| 288 | if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { | ||
| 289 | return std::nullopt; | ||
| 290 | } | ||
| 291 | // Grab the first argument from the CompositeConstructU32x2, this is the low address. | ||
| 292 | return LowAddrInfo{ | ||
| 293 | .value{IR::U32{addr_inst->Arg(0)}}, | ||
| 294 | .imm_offset = imm_offset, | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | /// Tries to track the storage buffer address used by a global memory instruction | ||
| 299 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||
| 300 | const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { | ||
| 301 | if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { | ||
| 302 | return std::nullopt; | ||
| 303 | } | ||
| 304 | const IR::Value index{inst->Arg(0)}; | ||
| 305 | const IR::Value offset{inst->Arg(1)}; | ||
| 306 | if (!index.IsImmediate()) { | ||
| 307 | // Definitely not a storage buffer if it's read from a | ||
| 308 | // non-immediate index | ||
| 309 | return std::nullopt; | ||
| 310 | } | ||
| 311 | if (!offset.IsImmediate()) { | ||
| 312 | // TODO: Support SSBO arrays | ||
| 313 | return std::nullopt; | ||
| 314 | } | ||
| 315 | const StorageBufferAddr storage_buffer{ | ||
| 316 | .index = index.U32(), | ||
| 317 | .offset = offset.U32(), | ||
| 318 | }; | ||
| 319 | if (!Common::IsAligned(storage_buffer.offset, 16)) { | ||
| 320 | // The SSBO pointer has to be aligned | ||
| 321 | return std::nullopt; | ||
| 322 | } | ||
| 323 | if (bias && !MeetsBias(storage_buffer, *bias)) { | ||
| 324 | // We have to blacklist some addresses in case we wrongly | ||
| 325 | // point to them | ||
| 326 | return std::nullopt; | ||
| 327 | } | ||
| 328 | return storage_buffer; | ||
| 329 | }}; | ||
| 330 | return BreadthFirstSearch(value, pred); | ||
| 331 | } | ||
| 332 | |||
| 333 | /// Collects the storage buffer used by a global memory instruction and the instruction itself | ||
| 334 | void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { | ||
| 335 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to | ||
| 336 | // avoid getting false positives | ||
| 337 | static constexpr Bias nvn_bias{ | ||
| 338 | .index = 0, | ||
| 339 | .offset_begin = 0x110, | ||
| 340 | .offset_end = 0x610, | ||
| 341 | }; | ||
| 342 | // Track the low address of the instruction | ||
| 343 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; | ||
| 344 | if (!low_addr_info) { | ||
| 345 | // Failed to track the low address, use NVN fallbacks | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | // First try to find storage buffers in the NVN address | ||
| 349 | const IR::U32 low_addr{low_addr_info->value}; | ||
| 350 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; | ||
| 351 | if (!storage_buffer) { | ||
| 352 | // If it fails, track without a bias | ||
| 353 | storage_buffer = Track(low_addr, nullptr); | ||
| 354 | if (!storage_buffer) { | ||
| 355 | // If that also fails, use NVN fallbacks | ||
| 356 | return; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | // Collect storage buffer and the instruction | ||
| 360 | if (IsGlobalMemoryWrite(inst)) { | ||
| 361 | info.writes.insert(*storage_buffer); | ||
| 362 | } | ||
| 363 | info.set.insert(*storage_buffer); | ||
| 364 | info.to_replace.push_back(StorageInst{ | ||
| 365 | .storage_buffer{*storage_buffer}, | ||
| 366 | .inst = &inst, | ||
| 367 | .block = &block, | ||
| 368 | }); | ||
| 369 | } | ||
| 370 | |||
| 371 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | ||
| 372 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | ||
| 373 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 374 | IR::U32 offset; | ||
| 375 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | ||
| 376 | offset = low_addr->value; | ||
| 377 | if (low_addr->imm_offset != 0) { | ||
| 378 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | ||
| 379 | } | ||
| 380 | } else { | ||
| 381 | offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); | ||
| 382 | } | ||
| 383 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | ||
| 384 | // buffer offset in bytes. | ||
| 385 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | ||
| 386 | return ir.ISub(offset, low_cbuf); | ||
| 387 | } | ||
| 388 | |||
| 389 | /// Replace a global memory load instruction with its storage buffer equivalent | ||
| 390 | void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 391 | const IR::U32& offset) { | ||
| 392 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 393 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 394 | const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; | ||
| 395 | inst.ReplaceUsesWith(value); | ||
| 396 | } | ||
| 397 | |||
| 398 | /// Replace a global memory write instruction with its storage buffer equivalent | ||
| 399 | void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 400 | const IR::U32& offset) { | ||
| 401 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 402 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 403 | block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); | ||
| 404 | inst.Invalidate(); | ||
| 405 | } | ||
| 406 | |||
| 407 | /// Replace an atomic operation on global memory instruction with its storage buffer equivalent | ||
| 408 | void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 409 | const IR::U32& offset) { | ||
| 410 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 411 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 412 | const IR::Value value{ | ||
| 413 | &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; | ||
| 414 | inst.ReplaceUsesWith(value); | ||
| 415 | } | ||
| 416 | |||
| 417 | /// Replace a global memory instruction with its storage buffer equivalent | ||
| 418 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 419 | const IR::U32& offset) { | ||
| 420 | switch (inst.GetOpcode()) { | ||
| 421 | case IR::Opcode::LoadGlobalS8: | ||
| 422 | case IR::Opcode::LoadGlobalU8: | ||
| 423 | case IR::Opcode::LoadGlobalS16: | ||
| 424 | case IR::Opcode::LoadGlobalU16: | ||
| 425 | case IR::Opcode::LoadGlobal32: | ||
| 426 | case IR::Opcode::LoadGlobal64: | ||
| 427 | case IR::Opcode::LoadGlobal128: | ||
| 428 | return ReplaceLoad(block, inst, storage_index, offset); | ||
| 429 | case IR::Opcode::WriteGlobalS8: | ||
| 430 | case IR::Opcode::WriteGlobalU8: | ||
| 431 | case IR::Opcode::WriteGlobalS16: | ||
| 432 | case IR::Opcode::WriteGlobalU16: | ||
| 433 | case IR::Opcode::WriteGlobal32: | ||
| 434 | case IR::Opcode::WriteGlobal64: | ||
| 435 | case IR::Opcode::WriteGlobal128: | ||
| 436 | return ReplaceWrite(block, inst, storage_index, offset); | ||
| 437 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 438 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 439 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 440 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 441 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 442 | case IR::Opcode::GlobalAtomicInc32: | ||
| 443 | case IR::Opcode::GlobalAtomicDec32: | ||
| 444 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 445 | case IR::Opcode::GlobalAtomicOr32: | ||
| 446 | case IR::Opcode::GlobalAtomicXor32: | ||
| 447 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 448 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 449 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 450 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 451 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 452 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 453 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 454 | case IR::Opcode::GlobalAtomicOr64: | ||
| 455 | case IR::Opcode::GlobalAtomicXor64: | ||
| 456 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 457 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 458 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 459 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 460 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 461 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 462 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 463 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 464 | return ReplaceAtomic(block, inst, storage_index, offset); | ||
| 465 | default: | ||
| 466 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); | ||
| 467 | } | ||
| 468 | } | ||
| 469 | } // Anonymous namespace | ||
| 470 | |||
| 471 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||
| 472 | StorageInfo info; | ||
| 473 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 474 | for (IR::Inst& inst : block->Instructions()) { | ||
| 475 | if (!IsGlobalMemory(inst)) { | ||
| 476 | continue; | ||
| 477 | } | ||
| 478 | CollectStorageBuffers(*block, inst, info); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | for (const StorageBufferAddr& storage_buffer : info.set) { | ||
| 482 | program.info.storage_buffers_descriptors.push_back({ | ||
| 483 | .cbuf_index = storage_buffer.index, | ||
| 484 | .cbuf_offset = storage_buffer.offset, | ||
| 485 | .count = 1, | ||
| 486 | .is_written = info.writes.contains(storage_buffer), | ||
| 487 | }); | ||
| 488 | } | ||
| 489 | for (const StorageInst& storage_inst : info.to_replace) { | ||
| 490 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | ||
| 491 | const auto it{info.set.find(storage_inst.storage_buffer)}; | ||
| 492 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | ||
| 493 | IR::Block* const block{storage_inst.block}; | ||
| 494 | IR::Inst* const inst{storage_inst.inst}; | ||
| 495 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | ||
| 496 | Replace(*block, *inst, index, offset); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | |||
| 500 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 501 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 502 | // TODO: Handle arrays | ||
| 503 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 504 | if (it != descriptors.end()) { | ||
| 505 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 506 | } | ||
| 507 | descriptors.push_back(desc); | ||
| 508 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 509 | } | ||
| 510 | |||
| 511 | void JoinStorageInfo(Info& base, Info& source) { | ||
| 512 | auto& descriptors = base.storage_buffers_descriptors; | ||
| 513 | for (auto& desc : source.storage_buffers_descriptors) { | ||
| 514 | auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { | ||
| 515 | return desc.cbuf_index == existing.cbuf_index && | ||
| 516 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; | ||
| 517 | })}; | ||
| 518 | if (it != descriptors.end()) { | ||
| 519 | it->is_written |= desc.is_written; | ||
| 520 | continue; | ||
| 521 | } | ||
| 522 | descriptors.push_back(desc); | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp new file mode 100644 index 000000000..e9b55f835 --- /dev/null +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 10 | |||
| 11 | namespace Shader::Optimization { | ||
| 12 | |||
| 13 | void IdentityRemovalPass(IR::Program& program) { | ||
| 14 | std::vector<IR::Inst*> to_invalidate; | ||
| 15 | for (IR::Block* const block : program.blocks) { | ||
| 16 | for (auto inst = block->begin(); inst != block->end();) { | ||
| 17 | const size_t num_args{inst->NumArgs()}; | ||
| 18 | for (size_t i = 0; i < num_args; ++i) { | ||
| 19 | IR::Value arg; | ||
| 20 | while ((arg = inst->Arg(i)).IsIdentity()) { | ||
| 21 | inst->SetArg(i, arg.Inst()->Arg(0)); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (inst->GetOpcode() == IR::Opcode::Identity || | ||
| 25 | inst->GetOpcode() == IR::Opcode::Void) { | ||
| 26 | to_invalidate.push_back(&*inst); | ||
| 27 | inst = block->Instructions().erase(inst); | ||
| 28 | } else { | ||
| 29 | ++inst; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | for (IR::Inst* const inst : to_invalidate) { | ||
| 34 | inst->Invalidate(); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..773e1f961 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 10 | |||
| 11 | namespace Shader::Optimization { | ||
| 12 | namespace { | ||
| 13 | IR::Opcode Replace(IR::Opcode op) { | ||
| 14 | switch (op) { | ||
| 15 | case IR::Opcode::FPAbs16: | ||
| 16 | return IR::Opcode::FPAbs32; | ||
| 17 | case IR::Opcode::FPAdd16: | ||
| 18 | return IR::Opcode::FPAdd32; | ||
| 19 | case IR::Opcode::FPCeil16: | ||
| 20 | return IR::Opcode::FPCeil32; | ||
| 21 | case IR::Opcode::FPFloor16: | ||
| 22 | return IR::Opcode::FPFloor32; | ||
| 23 | case IR::Opcode::FPFma16: | ||
| 24 | return IR::Opcode::FPFma32; | ||
| 25 | case IR::Opcode::FPMul16: | ||
| 26 | return IR::Opcode::FPMul32; | ||
| 27 | case IR::Opcode::FPNeg16: | ||
| 28 | return IR::Opcode::FPNeg32; | ||
| 29 | case IR::Opcode::FPRoundEven16: | ||
| 30 | return IR::Opcode::FPRoundEven32; | ||
| 31 | case IR::Opcode::FPSaturate16: | ||
| 32 | return IR::Opcode::FPSaturate32; | ||
| 33 | case IR::Opcode::FPClamp16: | ||
| 34 | return IR::Opcode::FPClamp32; | ||
| 35 | case IR::Opcode::FPTrunc16: | ||
| 36 | return IR::Opcode::FPTrunc32; | ||
| 37 | case IR::Opcode::CompositeConstructF16x2: | ||
| 38 | return IR::Opcode::CompositeConstructF32x2; | ||
| 39 | case IR::Opcode::CompositeConstructF16x3: | ||
| 40 | return IR::Opcode::CompositeConstructF32x3; | ||
| 41 | case IR::Opcode::CompositeConstructF16x4: | ||
| 42 | return IR::Opcode::CompositeConstructF32x4; | ||
| 43 | case IR::Opcode::CompositeExtractF16x2: | ||
| 44 | return IR::Opcode::CompositeExtractF32x2; | ||
| 45 | case IR::Opcode::CompositeExtractF16x3: | ||
| 46 | return IR::Opcode::CompositeExtractF32x3; | ||
| 47 | case IR::Opcode::CompositeExtractF16x4: | ||
| 48 | return IR::Opcode::CompositeExtractF32x4; | ||
| 49 | case IR::Opcode::CompositeInsertF16x2: | ||
| 50 | return IR::Opcode::CompositeInsertF32x2; | ||
| 51 | case IR::Opcode::CompositeInsertF16x3: | ||
| 52 | return IR::Opcode::CompositeInsertF32x3; | ||
| 53 | case IR::Opcode::CompositeInsertF16x4: | ||
| 54 | return IR::Opcode::CompositeInsertF32x4; | ||
| 55 | case IR::Opcode::FPOrdEqual16: | ||
| 56 | return IR::Opcode::FPOrdEqual32; | ||
| 57 | case IR::Opcode::FPUnordEqual16: | ||
| 58 | return IR::Opcode::FPUnordEqual32; | ||
| 59 | case IR::Opcode::FPOrdNotEqual16: | ||
| 60 | return IR::Opcode::FPOrdNotEqual32; | ||
| 61 | case IR::Opcode::FPUnordNotEqual16: | ||
| 62 | return IR::Opcode::FPUnordNotEqual32; | ||
| 63 | case IR::Opcode::FPOrdLessThan16: | ||
| 64 | return IR::Opcode::FPOrdLessThan32; | ||
| 65 | case IR::Opcode::FPUnordLessThan16: | ||
| 66 | return IR::Opcode::FPUnordLessThan32; | ||
| 67 | case IR::Opcode::FPOrdGreaterThan16: | ||
| 68 | return IR::Opcode::FPOrdGreaterThan32; | ||
| 69 | case IR::Opcode::FPUnordGreaterThan16: | ||
| 70 | return IR::Opcode::FPUnordGreaterThan32; | ||
| 71 | case IR::Opcode::FPOrdLessThanEqual16: | ||
| 72 | return IR::Opcode::FPOrdLessThanEqual32; | ||
| 73 | case IR::Opcode::FPUnordLessThanEqual16: | ||
| 74 | return IR::Opcode::FPUnordLessThanEqual32; | ||
| 75 | case IR::Opcode::FPOrdGreaterThanEqual16: | ||
| 76 | return IR::Opcode::FPOrdGreaterThanEqual32; | ||
| 77 | case IR::Opcode::FPUnordGreaterThanEqual16: | ||
| 78 | return IR::Opcode::FPUnordGreaterThanEqual32; | ||
| 79 | case IR::Opcode::FPIsNan16: | ||
| 80 | return IR::Opcode::FPIsNan32; | ||
| 81 | case IR::Opcode::ConvertS16F16: | ||
| 82 | return IR::Opcode::ConvertS16F32; | ||
| 83 | case IR::Opcode::ConvertS32F16: | ||
| 84 | return IR::Opcode::ConvertS32F32; | ||
| 85 | case IR::Opcode::ConvertS64F16: | ||
| 86 | return IR::Opcode::ConvertS64F32; | ||
| 87 | case IR::Opcode::ConvertU16F16: | ||
| 88 | return IR::Opcode::ConvertU16F32; | ||
| 89 | case IR::Opcode::ConvertU32F16: | ||
| 90 | return IR::Opcode::ConvertU32F32; | ||
| 91 | case IR::Opcode::ConvertU64F16: | ||
| 92 | return IR::Opcode::ConvertU64F32; | ||
| 93 | case IR::Opcode::PackFloat2x16: | ||
| 94 | return IR::Opcode::PackHalf2x16; | ||
| 95 | case IR::Opcode::UnpackFloat2x16: | ||
| 96 | return IR::Opcode::UnpackHalf2x16; | ||
| 97 | case IR::Opcode::ConvertF32F16: | ||
| 98 | return IR::Opcode::Identity; | ||
| 99 | case IR::Opcode::ConvertF16F32: | ||
| 100 | return IR::Opcode::Identity; | ||
| 101 | case IR::Opcode::ConvertF16S8: | ||
| 102 | return IR::Opcode::ConvertF32S8; | ||
| 103 | case IR::Opcode::ConvertF16S16: | ||
| 104 | return IR::Opcode::ConvertF32S16; | ||
| 105 | case IR::Opcode::ConvertF16S32: | ||
| 106 | return IR::Opcode::ConvertF32S32; | ||
| 107 | case IR::Opcode::ConvertF16S64: | ||
| 108 | return IR::Opcode::ConvertF32S64; | ||
| 109 | case IR::Opcode::ConvertF16U8: | ||
| 110 | return IR::Opcode::ConvertF32U8; | ||
| 111 | case IR::Opcode::ConvertF16U16: | ||
| 112 | return IR::Opcode::ConvertF32U16; | ||
| 113 | case IR::Opcode::ConvertF16U32: | ||
| 114 | return IR::Opcode::ConvertF32U32; | ||
| 115 | case IR::Opcode::ConvertF16U64: | ||
| 116 | return IR::Opcode::ConvertF32U64; | ||
| 117 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 118 | return IR::Opcode::GlobalAtomicAddF32x2; | ||
| 119 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 120 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 121 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 122 | return IR::Opcode::GlobalAtomicMinF32x2; | ||
| 123 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 124 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 125 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 126 | return IR::Opcode::GlobalAtomicMaxF32x2; | ||
| 127 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 128 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 129 | default: | ||
| 130 | return op; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | } // Anonymous namespace | ||
| 134 | |||
| 135 | void LowerFp16ToFp32(IR::Program& program) { | ||
| 136 | for (IR::Block* const block : program.blocks) { | ||
| 137 | for (IR::Inst& inst : block->Instructions()) { | ||
| 138 | inst.ReplaceOpcode(Replace(inst.GetOpcode())); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..e80d3d1d9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 12 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 13 | |||
| 14 | namespace Shader::Optimization { | ||
| 15 | namespace { | ||
| 16 | std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) { | ||
| 17 | if (packed.IsImmediate()) { | ||
| 18 | const u64 value{packed.U64()}; | ||
| 19 | return { | ||
| 20 | ir.Imm32(static_cast<u32>(value)), | ||
| 21 | ir.Imm32(static_cast<u32>(value >> 32)), | ||
| 22 | }; | ||
| 23 | } else { | ||
| 24 | return std::pair<IR::U32, IR::U32>{ | ||
| 25 | ir.CompositeExtract(packed, 0u), | ||
| 26 | ir.CompositeExtract(packed, 1u), | ||
| 27 | }; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void IAdd64To32(IR::Block& block, IR::Inst& inst) { | ||
| 32 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 33 | throw NotImplementedException("IAdd64 emulation with pseudo instructions"); | ||
| 34 | } | ||
| 35 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 36 | const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; | ||
| 37 | const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; | ||
| 38 | |||
| 39 | const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; | ||
| 40 | const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 41 | |||
| 42 | const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; | ||
| 43 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void ISub64To32(IR::Block& block, IR::Inst& inst) { | ||
| 47 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 48 | throw NotImplementedException("ISub64 emulation with pseudo instructions"); | ||
| 49 | } | ||
| 50 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 51 | const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; | ||
| 52 | const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; | ||
| 53 | |||
| 54 | const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; | ||
| 55 | const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; | ||
| 56 | const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 57 | |||
| 58 | const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; | ||
| 59 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 60 | } | ||
| 61 | |||
| 62 | void INeg64To32(IR::Block& block, IR::Inst& inst) { | ||
| 63 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 64 | throw NotImplementedException("INeg64 emulation with pseudo instructions"); | ||
| 65 | } | ||
| 66 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 67 | auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 68 | lo = ir.BitwiseNot(lo); | ||
| 69 | hi = ir.BitwiseNot(hi); | ||
| 70 | |||
| 71 | lo = ir.IAdd(lo, ir.Imm32(1)); | ||
| 72 | |||
| 73 | const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 74 | hi = ir.IAdd(hi, carry); | ||
| 75 | |||
| 76 | inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { | ||
| 80 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 81 | throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); | ||
| 82 | } | ||
| 83 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 84 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 85 | const IR::U32 shift{inst.Arg(1)}; | ||
| 86 | |||
| 87 | const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; | ||
| 88 | const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; | ||
| 89 | |||
| 90 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 91 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 92 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 93 | |||
| 94 | const IR::U32 long_ret_lo{ir.Imm32(0)}; | ||
| 95 | const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; | ||
| 96 | |||
| 97 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 98 | const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; | ||
| 99 | const IR::U32 short_ret_lo{shifted_lo}; | ||
| 100 | const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; | ||
| 101 | |||
| 102 | const IR::U32 zero_ret_lo{lo}; | ||
| 103 | const IR::U32 zero_ret_hi{hi}; | ||
| 104 | |||
| 105 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 106 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 107 | |||
| 108 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 109 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 110 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 111 | } | ||
| 112 | |||
| 113 | void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { | ||
| 114 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 115 | throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); | ||
| 116 | } | ||
| 117 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 118 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 119 | const IR::U32 shift{inst.Arg(1)}; | ||
| 120 | |||
| 121 | const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; | ||
| 122 | const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; | ||
| 123 | |||
| 124 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 125 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 126 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 127 | |||
| 128 | const IR::U32 long_ret_hi{ir.Imm32(0)}; | ||
| 129 | const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; | ||
| 130 | |||
| 131 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 132 | const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; | ||
| 133 | const IR::U32 short_ret_hi{shifted_hi}; | ||
| 134 | const IR::U32 short_ret_lo{ | ||
| 135 | ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; | ||
| 136 | |||
| 137 | const IR::U32 zero_ret_lo{lo}; | ||
| 138 | const IR::U32 zero_ret_hi{hi}; | ||
| 139 | |||
| 140 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 141 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 142 | |||
| 143 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 144 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 145 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 146 | } | ||
| 147 | |||
| 148 | void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { | ||
| 149 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 150 | throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); | ||
| 151 | } | ||
| 152 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 153 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 154 | const IR::U32 shift{inst.Arg(1)}; | ||
| 155 | |||
| 156 | const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; | ||
| 157 | const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; | ||
| 158 | |||
| 159 | const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; | ||
| 160 | |||
| 161 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 162 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 163 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 164 | |||
| 165 | const IR::U32 long_ret_hi{sign_extension}; | ||
| 166 | const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; | ||
| 167 | |||
| 168 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 169 | const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); | ||
| 170 | const IR::U32 short_ret_hi{shifted_hi}; | ||
| 171 | const IR::U32 short_ret_lo{ | ||
| 172 | ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; | ||
| 173 | |||
| 174 | const IR::U32 zero_ret_lo{lo}; | ||
| 175 | const IR::U32 zero_ret_hi{hi}; | ||
| 176 | |||
| 177 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 178 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 179 | |||
| 180 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 181 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 182 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 183 | } | ||
| 184 | |||
| 185 | void Lower(IR::Block& block, IR::Inst& inst) { | ||
| 186 | switch (inst.GetOpcode()) { | ||
| 187 | case IR::Opcode::PackUint2x32: | ||
| 188 | case IR::Opcode::UnpackUint2x32: | ||
| 189 | return inst.ReplaceOpcode(IR::Opcode::Identity); | ||
| 190 | case IR::Opcode::IAdd64: | ||
| 191 | return IAdd64To32(block, inst); | ||
| 192 | case IR::Opcode::ISub64: | ||
| 193 | return ISub64To32(block, inst); | ||
| 194 | case IR::Opcode::INeg64: | ||
| 195 | return INeg64To32(block, inst); | ||
| 196 | case IR::Opcode::ShiftLeftLogical64: | ||
| 197 | return ShiftLeftLogical64To32(block, inst); | ||
| 198 | case IR::Opcode::ShiftRightLogical64: | ||
| 199 | return ShiftRightLogical64To32(block, inst); | ||
| 200 | case IR::Opcode::ShiftRightArithmetic64: | ||
| 201 | return ShiftRightArithmetic64To32(block, inst); | ||
| 202 | default: | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void LowerInt64ToInt32(IR::Program& program) { | ||
| 209 | const auto end{program.post_order_blocks.rend()}; | ||
| 210 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||
| 211 | IR::Block* const block{*it}; | ||
| 212 | for (IR::Inst& inst : block->Instructions()) { | ||
| 213 | Lower(*block, inst); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h new file mode 100644 index 000000000..2f89b1ea0 --- /dev/null +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "shader_recompiler/environment.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | |||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | ||
| 16 | void ConstantPropagationPass(IR::Program& program); | ||
| 17 | void DeadCodeEliminationPass(IR::Program& program); | ||
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | ||
| 19 | void IdentityRemovalPass(IR::Program& program); | ||
| 20 | void LowerFp16ToFp32(IR::Program& program); | ||
| 21 | void LowerInt64ToInt32(IR::Program& program); | ||
| 22 | void SsaRewritePass(IR::Program& program); | ||
| 23 | void TexturePass(Environment& env, IR::Program& program); | ||
| 24 | void VerificationPass(const IR::Program& program); | ||
| 25 | |||
| 26 | // Dual Vertex | ||
| 27 | void VertexATransformPass(IR::Program& program); | ||
| 28 | void VertexBTransformPass(IR::Program& program); | ||
| 29 | void JoinTextureInfo(Info& base, Info& source); | ||
| 30 | void JoinStorageInfo(Info& base, Info& source); | ||
| 31 | |||
| 32 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 000000000..53145fb5e --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | |||
| @@ -0,0 +1,383 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This file implements the SSA rewriting algorithm proposed in | ||
| 6 | // | ||
| 7 | // Simple and Efficient Construction of Static Single Assignment Form. | ||
| 8 | // Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) | ||
| 9 | // In: Jhala R., De Bosschere K. (eds) | ||
| 10 | // Compiler Construction. CC 2013. | ||
| 11 | // Lecture Notes in Computer Science, vol 7791. | ||
| 12 | // Springer, Berlin, Heidelberg | ||
| 13 | // | ||
| 14 | // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 | ||
| 15 | // | ||
| 16 | |||
| 17 | #include <span> | ||
| 18 | #include <variant> | ||
| 19 | #include <vector> | ||
| 20 | |||
| 21 | #include <boost/container/flat_map.hpp> | ||
| 22 | #include <boost/container/flat_set.hpp> | ||
| 23 | |||
| 24 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 25 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 26 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 27 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 28 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 29 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 30 | |||
| 31 | namespace Shader::Optimization { | ||
| 32 | namespace { | ||
| 33 | struct FlagTag { | ||
| 34 | auto operator<=>(const FlagTag&) const noexcept = default; | ||
| 35 | }; | ||
| 36 | struct ZeroFlagTag : FlagTag {}; | ||
| 37 | struct SignFlagTag : FlagTag {}; | ||
| 38 | struct CarryFlagTag : FlagTag {}; | ||
| 39 | struct OverflowFlagTag : FlagTag {}; | ||
| 40 | |||
| 41 | struct GotoVariable : FlagTag { | ||
| 42 | GotoVariable() = default; | ||
| 43 | explicit GotoVariable(u32 index_) : index{index_} {} | ||
| 44 | |||
| 45 | auto operator<=>(const GotoVariable&) const noexcept = default; | ||
| 46 | |||
| 47 | u32 index; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct IndirectBranchVariable { | ||
| 51 | auto operator<=>(const IndirectBranchVariable&) const noexcept = default; | ||
| 52 | }; | ||
| 53 | |||
| 54 | using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag, | ||
| 55 | OverflowFlagTag, GotoVariable, IndirectBranchVariable>; | ||
| 56 | using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>; | ||
| 57 | |||
| 58 | struct DefTable { | ||
| 59 | const IR::Value& Def(IR::Block* block, IR::Reg variable) { | ||
| 60 | return block->SsaRegValue(variable); | ||
| 61 | } | ||
| 62 | void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { | ||
| 63 | block->SetSsaRegValue(variable, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | const IR::Value& Def(IR::Block* block, IR::Pred variable) { | ||
| 67 | return preds[IR::PredIndex(variable)][block]; | ||
| 68 | } | ||
| 69 | void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { | ||
| 70 | preds[IR::PredIndex(variable)].insert_or_assign(block, value); | ||
| 71 | } | ||
| 72 | |||
| 73 | const IR::Value& Def(IR::Block* block, GotoVariable variable) { | ||
| 74 | return goto_vars[variable.index][block]; | ||
| 75 | } | ||
| 76 | void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { | ||
| 77 | goto_vars[variable.index].insert_or_assign(block, value); | ||
| 78 | } | ||
| 79 | |||
| 80 | const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { | ||
| 81 | return indirect_branch_var[block]; | ||
| 82 | } | ||
| 83 | void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { | ||
| 84 | indirect_branch_var.insert_or_assign(block, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | const IR::Value& Def(IR::Block* block, ZeroFlagTag) { | ||
| 88 | return zero_flag[block]; | ||
| 89 | } | ||
| 90 | void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { | ||
| 91 | zero_flag.insert_or_assign(block, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | const IR::Value& Def(IR::Block* block, SignFlagTag) { | ||
| 95 | return sign_flag[block]; | ||
| 96 | } | ||
| 97 | void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { | ||
| 98 | sign_flag.insert_or_assign(block, value); | ||
| 99 | } | ||
| 100 | |||
| 101 | const IR::Value& Def(IR::Block* block, CarryFlagTag) { | ||
| 102 | return carry_flag[block]; | ||
| 103 | } | ||
| 104 | void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { | ||
| 105 | carry_flag.insert_or_assign(block, value); | ||
| 106 | } | ||
| 107 | |||
| 108 | const IR::Value& Def(IR::Block* block, OverflowFlagTag) { | ||
| 109 | return overflow_flag[block]; | ||
| 110 | } | ||
| 111 | void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { | ||
| 112 | overflow_flag.insert_or_assign(block, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::array<ValueMap, IR::NUM_USER_PREDS> preds; | ||
| 116 | boost::container::flat_map<u32, ValueMap> goto_vars; | ||
| 117 | ValueMap indirect_branch_var; | ||
| 118 | ValueMap zero_flag; | ||
| 119 | ValueMap sign_flag; | ||
| 120 | ValueMap carry_flag; | ||
| 121 | ValueMap overflow_flag; | ||
| 122 | }; | ||
| 123 | |||
| 124 | IR::Opcode UndefOpcode(IR::Reg) noexcept { | ||
| 125 | return IR::Opcode::UndefU32; | ||
| 126 | } | ||
| 127 | |||
| 128 | IR::Opcode UndefOpcode(IR::Pred) noexcept { | ||
| 129 | return IR::Opcode::UndefU1; | ||
| 130 | } | ||
| 131 | |||
| 132 | IR::Opcode UndefOpcode(const FlagTag&) noexcept { | ||
| 133 | return IR::Opcode::UndefU1; | ||
| 134 | } | ||
| 135 | |||
| 136 | IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { | ||
| 137 | return IR::Opcode::UndefU32; | ||
| 138 | } | ||
| 139 | |||
| 140 | enum class Status { | ||
| 141 | Start, | ||
| 142 | SetValue, | ||
| 143 | PreparePhiArgument, | ||
| 144 | PushPhiArgument, | ||
| 145 | }; | ||
| 146 | |||
| 147 | template <typename Type> | ||
| 148 | struct ReadState { | ||
| 149 | ReadState(IR::Block* block_) : block{block_} {} | ||
| 150 | ReadState() = default; | ||
| 151 | |||
| 152 | IR::Block* block{}; | ||
| 153 | IR::Value result{}; | ||
| 154 | IR::Inst* phi{}; | ||
| 155 | IR::Block* const* pred_it{}; | ||
| 156 | IR::Block* const* pred_end{}; | ||
| 157 | Status pc{Status::Start}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | class Pass { | ||
| 161 | public: | ||
| 162 | template <typename Type> | ||
| 163 | void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { | ||
| 164 | current_def.SetDef(block, variable, value); | ||
| 165 | } | ||
| 166 | |||
| 167 | template <typename Type> | ||
| 168 | IR::Value ReadVariable(Type variable, IR::Block* root_block) { | ||
| 169 | boost::container::small_vector<ReadState<Type>, 64> stack{ | ||
| 170 | ReadState<Type>(nullptr), | ||
| 171 | ReadState<Type>(root_block), | ||
| 172 | }; | ||
| 173 | const auto prepare_phi_operand{[&] { | ||
| 174 | if (stack.back().pred_it == stack.back().pred_end) { | ||
| 175 | IR::Inst* const phi{stack.back().phi}; | ||
| 176 | IR::Block* const block{stack.back().block}; | ||
| 177 | const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; | ||
| 178 | stack.pop_back(); | ||
| 179 | stack.back().result = result; | ||
| 180 | WriteVariable(variable, block, result); | ||
| 181 | } else { | ||
| 182 | IR::Block* const imm_pred{*stack.back().pred_it}; | ||
| 183 | stack.back().pc = Status::PushPhiArgument; | ||
| 184 | stack.emplace_back(imm_pred); | ||
| 185 | } | ||
| 186 | }}; | ||
| 187 | do { | ||
| 188 | IR::Block* const block{stack.back().block}; | ||
| 189 | switch (stack.back().pc) { | ||
| 190 | case Status::Start: { | ||
| 191 | if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { | ||
| 192 | stack.back().result = def; | ||
| 193 | } else if (!block->IsSsaSealed()) { | ||
| 194 | // Incomplete CFG | ||
| 195 | IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||
| 196 | phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); | ||
| 197 | |||
| 198 | incomplete_phis[block].insert_or_assign(variable, phi); | ||
| 199 | stack.back().result = IR::Value{&*phi}; | ||
| 200 | } else if (const std::span imm_preds = block->ImmPredecessors(); | ||
| 201 | imm_preds.size() == 1) { | ||
| 202 | // Optimize the common case of one predecessor: no phi needed | ||
| 203 | stack.back().pc = Status::SetValue; | ||
| 204 | stack.emplace_back(imm_preds.front()); | ||
| 205 | break; | ||
| 206 | } else { | ||
| 207 | // Break potential cycles with operandless phi | ||
| 208 | IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||
| 209 | phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); | ||
| 210 | |||
| 211 | WriteVariable(variable, block, IR::Value{phi}); | ||
| 212 | |||
| 213 | stack.back().phi = phi; | ||
| 214 | stack.back().pred_it = imm_preds.data(); | ||
| 215 | stack.back().pred_end = imm_preds.data() + imm_preds.size(); | ||
| 216 | prepare_phi_operand(); | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | [[fallthrough]]; | ||
| 221 | case Status::SetValue: { | ||
| 222 | const IR::Value result{stack.back().result}; | ||
| 223 | WriteVariable(variable, block, result); | ||
| 224 | stack.pop_back(); | ||
| 225 | stack.back().result = result; | ||
| 226 | break; | ||
| 227 | } | ||
| 228 | case Status::PushPhiArgument: { | ||
| 229 | IR::Inst* const phi{stack.back().phi}; | ||
| 230 | phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); | ||
| 231 | ++stack.back().pred_it; | ||
| 232 | } | ||
| 233 | [[fallthrough]]; | ||
| 234 | case Status::PreparePhiArgument: | ||
| 235 | prepare_phi_operand(); | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | } while (stack.size() > 1); | ||
| 239 | return stack.back().result; | ||
| 240 | } | ||
| 241 | |||
| 242 | void SealBlock(IR::Block* block) { | ||
| 243 | const auto it{incomplete_phis.find(block)}; | ||
| 244 | if (it != incomplete_phis.end()) { | ||
| 245 | for (auto& pair : it->second) { | ||
| 246 | auto& variant{pair.first}; | ||
| 247 | auto& phi{pair.second}; | ||
| 248 | std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | block->SsaSeal(); | ||
| 252 | } | ||
| 253 | |||
| 254 | private: | ||
| 255 | template <typename Type> | ||
| 256 | IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { | ||
| 257 | for (IR::Block* const imm_pred : block->ImmPredecessors()) { | ||
| 258 | phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); | ||
| 259 | } | ||
| 260 | return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); | ||
| 261 | } | ||
| 262 | |||
| 263 | IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { | ||
| 264 | IR::Value same; | ||
| 265 | const size_t num_args{phi.NumArgs()}; | ||
| 266 | for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { | ||
| 267 | const IR::Value& op{phi.Arg(arg_index)}; | ||
| 268 | if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { | ||
| 269 | // Unique value or self-reference | ||
| 270 | continue; | ||
| 271 | } | ||
| 272 | if (!same.IsEmpty()) { | ||
| 273 | // The phi merges at least two values: not trivial | ||
| 274 | return IR::Value{&phi}; | ||
| 275 | } | ||
| 276 | same = op; | ||
| 277 | } | ||
| 278 | // Remove the phi node from the block, it will be reinserted | ||
| 279 | IR::Block::InstructionList& list{block->Instructions()}; | ||
| 280 | list.erase(IR::Block::InstructionList::s_iterator_to(phi)); | ||
| 281 | |||
| 282 | // Find the first non-phi instruction and use it as an insertion point | ||
| 283 | IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; | ||
| 284 | if (same.IsEmpty()) { | ||
| 285 | // The phi is unreachable or in the start block | ||
| 286 | // Insert an undefined instruction and make it the phi node replacement | ||
| 287 | // The "phi" node reinsertion point is specified after this instruction | ||
| 288 | reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); | ||
| 289 | same = IR::Value{&*reinsert_point}; | ||
| 290 | ++reinsert_point; | ||
| 291 | } | ||
| 292 | // Reinsert the phi node and reroute all its uses to the "same" value | ||
| 293 | list.insert(reinsert_point, phi); | ||
| 294 | phi.ReplaceUsesWith(same); | ||
| 295 | // TODO: Try to recursively remove all phi users, which might have become trivial | ||
| 296 | return same; | ||
| 297 | } | ||
| 298 | |||
| 299 | boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>> | ||
| 300 | incomplete_phis; | ||
| 301 | DefTable current_def; | ||
| 302 | }; | ||
| 303 | |||
| 304 | void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { | ||
| 305 | switch (inst.GetOpcode()) { | ||
| 306 | case IR::Opcode::SetRegister: | ||
| 307 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | ||
| 308 | pass.WriteVariable(reg, block, inst.Arg(1)); | ||
| 309 | } | ||
| 310 | break; | ||
| 311 | case IR::Opcode::SetPred: | ||
| 312 | if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { | ||
| 313 | pass.WriteVariable(pred, block, inst.Arg(1)); | ||
| 314 | } | ||
| 315 | break; | ||
| 316 | case IR::Opcode::SetGotoVariable: | ||
| 317 | pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); | ||
| 318 | break; | ||
| 319 | case IR::Opcode::SetIndirectBranchVariable: | ||
| 320 | pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); | ||
| 321 | break; | ||
| 322 | case IR::Opcode::SetZFlag: | ||
| 323 | pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); | ||
| 324 | break; | ||
| 325 | case IR::Opcode::SetSFlag: | ||
| 326 | pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); | ||
| 327 | break; | ||
| 328 | case IR::Opcode::SetCFlag: | ||
| 329 | pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); | ||
| 330 | break; | ||
| 331 | case IR::Opcode::SetOFlag: | ||
| 332 | pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); | ||
| 333 | break; | ||
| 334 | case IR::Opcode::GetRegister: | ||
| 335 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | ||
| 336 | inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); | ||
| 337 | } | ||
| 338 | break; | ||
| 339 | case IR::Opcode::GetPred: | ||
| 340 | if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { | ||
| 341 | inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); | ||
| 342 | } | ||
| 343 | break; | ||
| 344 | case IR::Opcode::GetGotoVariable: | ||
| 345 | inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); | ||
| 346 | break; | ||
| 347 | case IR::Opcode::GetIndirectBranchVariable: | ||
| 348 | inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); | ||
| 349 | break; | ||
| 350 | case IR::Opcode::GetZFlag: | ||
| 351 | inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); | ||
| 352 | break; | ||
| 353 | case IR::Opcode::GetSFlag: | ||
| 354 | inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); | ||
| 355 | break; | ||
| 356 | case IR::Opcode::GetCFlag: | ||
| 357 | inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); | ||
| 358 | break; | ||
| 359 | case IR::Opcode::GetOFlag: | ||
| 360 | inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); | ||
| 361 | break; | ||
| 362 | default: | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | |||
| 367 | void VisitBlock(Pass& pass, IR::Block* block) { | ||
| 368 | for (IR::Inst& inst : block->Instructions()) { | ||
| 369 | VisitInst(pass, block, inst); | ||
| 370 | } | ||
| 371 | pass.SealBlock(block); | ||
| 372 | } | ||
| 373 | } // Anonymous namespace | ||
| 374 | |||
| 375 | void SsaRewritePass(IR::Program& program) { | ||
| 376 | Pass pass; | ||
| 377 | const auto end{program.post_order_blocks.rend()}; | ||
| 378 | for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { | ||
| 379 | VisitBlock(pass, *block); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..44ad10d43 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -0,0 +1,523 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <bit> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 15 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 16 | #include "shader_recompiler/shader_info.h" | ||
| 17 | |||
| 18 | namespace Shader::Optimization { | ||
| 19 | namespace { | ||
| 20 | struct ConstBufferAddr { | ||
| 21 | u32 index; | ||
| 22 | u32 offset; | ||
| 23 | u32 secondary_index; | ||
| 24 | u32 secondary_offset; | ||
| 25 | IR::U32 dynamic_offset; | ||
| 26 | u32 count; | ||
| 27 | bool has_secondary; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct TextureInst { | ||
| 31 | ConstBufferAddr cbuf; | ||
| 32 | IR::Inst* inst; | ||
| 33 | IR::Block* block; | ||
| 34 | }; | ||
| 35 | |||
| 36 | using TextureInstVector = boost::container::small_vector<TextureInst, 24>; | ||
| 37 | |||
| 38 | constexpr u32 DESCRIPTOR_SIZE = 8; | ||
| 39 | constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE)); | ||
| 40 | |||
| 41 | IR::Opcode IndexedInstruction(const IR::Inst& inst) { | ||
| 42 | switch (inst.GetOpcode()) { | ||
| 43 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 44 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 45 | return IR::Opcode::ImageSampleImplicitLod; | ||
| 46 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 47 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 48 | return IR::Opcode::ImageSampleExplicitLod; | ||
| 49 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 50 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 51 | return IR::Opcode::ImageSampleDrefImplicitLod; | ||
| 52 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 53 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 54 | return IR::Opcode::ImageSampleDrefExplicitLod; | ||
| 55 | case IR::Opcode::BindlessImageGather: | ||
| 56 | case IR::Opcode::BoundImageGather: | ||
| 57 | return IR::Opcode::ImageGather; | ||
| 58 | case IR::Opcode::BindlessImageGatherDref: | ||
| 59 | case IR::Opcode::BoundImageGatherDref: | ||
| 60 | return IR::Opcode::ImageGatherDref; | ||
| 61 | case IR::Opcode::BindlessImageFetch: | ||
| 62 | case IR::Opcode::BoundImageFetch: | ||
| 63 | return IR::Opcode::ImageFetch; | ||
| 64 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 65 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 66 | return IR::Opcode::ImageQueryDimensions; | ||
| 67 | case IR::Opcode::BoundImageQueryLod: | ||
| 68 | case IR::Opcode::BindlessImageQueryLod: | ||
| 69 | return IR::Opcode::ImageQueryLod; | ||
| 70 | case IR::Opcode::BoundImageGradient: | ||
| 71 | case IR::Opcode::BindlessImageGradient: | ||
| 72 | return IR::Opcode::ImageGradient; | ||
| 73 | case IR::Opcode::BoundImageRead: | ||
| 74 | case IR::Opcode::BindlessImageRead: | ||
| 75 | return IR::Opcode::ImageRead; | ||
| 76 | case IR::Opcode::BoundImageWrite: | ||
| 77 | case IR::Opcode::BindlessImageWrite: | ||
| 78 | return IR::Opcode::ImageWrite; | ||
| 79 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 80 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 81 | return IR::Opcode::ImageAtomicIAdd32; | ||
| 82 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 83 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 84 | return IR::Opcode::ImageAtomicSMin32; | ||
| 85 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 86 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 87 | return IR::Opcode::ImageAtomicUMin32; | ||
| 88 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 89 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 90 | return IR::Opcode::ImageAtomicSMax32; | ||
| 91 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 92 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 93 | return IR::Opcode::ImageAtomicUMax32; | ||
| 94 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 95 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 96 | return IR::Opcode::ImageAtomicInc32; | ||
| 97 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 98 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 99 | return IR::Opcode::ImageAtomicDec32; | ||
| 100 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 101 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 102 | return IR::Opcode::ImageAtomicAnd32; | ||
| 103 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 104 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 105 | return IR::Opcode::ImageAtomicOr32; | ||
| 106 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 107 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 108 | return IR::Opcode::ImageAtomicXor32; | ||
| 109 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 110 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 111 | return IR::Opcode::ImageAtomicExchange32; | ||
| 112 | default: | ||
| 113 | return IR::Opcode::Void; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | bool IsBindless(const IR::Inst& inst) { | ||
| 118 | switch (inst.GetOpcode()) { | ||
| 119 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 120 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 121 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 122 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 123 | case IR::Opcode::BindlessImageGather: | ||
| 124 | case IR::Opcode::BindlessImageGatherDref: | ||
| 125 | case IR::Opcode::BindlessImageFetch: | ||
| 126 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 127 | case IR::Opcode::BindlessImageQueryLod: | ||
| 128 | case IR::Opcode::BindlessImageGradient: | ||
| 129 | case IR::Opcode::BindlessImageRead: | ||
| 130 | case IR::Opcode::BindlessImageWrite: | ||
| 131 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 132 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 133 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 134 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 135 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 136 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 137 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 138 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 139 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 140 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 141 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 142 | return true; | ||
| 143 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 144 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 145 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 146 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 147 | case IR::Opcode::BoundImageGather: | ||
| 148 | case IR::Opcode::BoundImageGatherDref: | ||
| 149 | case IR::Opcode::BoundImageFetch: | ||
| 150 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 151 | case IR::Opcode::BoundImageQueryLod: | ||
| 152 | case IR::Opcode::BoundImageGradient: | ||
| 153 | case IR::Opcode::BoundImageRead: | ||
| 154 | case IR::Opcode::BoundImageWrite: | ||
| 155 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 156 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 157 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 158 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 159 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 160 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 161 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 162 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 163 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 164 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 165 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 166 | return false; | ||
| 167 | default: | ||
| 168 | throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | bool IsTextureInstruction(const IR::Inst& inst) { | ||
| 173 | return IndexedInstruction(inst) != IR::Opcode::Void; | ||
| 174 | } | ||
| 175 | |||
| 176 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst); | ||
| 177 | |||
| 178 | std::optional<ConstBufferAddr> Track(const IR::Value& value) { | ||
| 179 | return IR::BreadthFirstSearch(value, TryGetConstBuffer); | ||
| 180 | } | ||
| 181 | |||
| 182 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { | ||
| 183 | switch (inst->GetOpcode()) { | ||
| 184 | default: | ||
| 185 | return std::nullopt; | ||
| 186 | case IR::Opcode::BitwiseOr32: { | ||
| 187 | std::optional lhs{Track(inst->Arg(0))}; | ||
| 188 | std::optional rhs{Track(inst->Arg(1))}; | ||
| 189 | if (!lhs || !rhs) { | ||
| 190 | return std::nullopt; | ||
| 191 | } | ||
| 192 | if (lhs->has_secondary || rhs->has_secondary) { | ||
| 193 | return std::nullopt; | ||
| 194 | } | ||
| 195 | if (lhs->count > 1 || rhs->count > 1) { | ||
| 196 | return std::nullopt; | ||
| 197 | } | ||
| 198 | if (lhs->index > rhs->index || lhs->offset > rhs->offset) { | ||
| 199 | std::swap(lhs, rhs); | ||
| 200 | } | ||
| 201 | return ConstBufferAddr{ | ||
| 202 | .index = lhs->index, | ||
| 203 | .offset = lhs->offset, | ||
| 204 | .secondary_index = rhs->index, | ||
| 205 | .secondary_offset = rhs->offset, | ||
| 206 | .dynamic_offset = {}, | ||
| 207 | .count = 1, | ||
| 208 | .has_secondary = true, | ||
| 209 | }; | ||
| 210 | } | ||
| 211 | case IR::Opcode::GetCbufU32x2: | ||
| 212 | case IR::Opcode::GetCbufU32: | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | const IR::Value index{inst->Arg(0)}; | ||
| 216 | const IR::Value offset{inst->Arg(1)}; | ||
| 217 | if (!index.IsImmediate()) { | ||
| 218 | // Reading a bindless texture from variable indices is valid | ||
| 219 | // but not supported here at the moment | ||
| 220 | return std::nullopt; | ||
| 221 | } | ||
| 222 | if (offset.IsImmediate()) { | ||
| 223 | return ConstBufferAddr{ | ||
| 224 | .index = index.U32(), | ||
| 225 | .offset = offset.U32(), | ||
| 226 | .secondary_index = 0, | ||
| 227 | .secondary_offset = 0, | ||
| 228 | .dynamic_offset = {}, | ||
| 229 | .count = 1, | ||
| 230 | .has_secondary = false, | ||
| 231 | }; | ||
| 232 | } | ||
| 233 | IR::Inst* const offset_inst{offset.InstRecursive()}; | ||
| 234 | if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { | ||
| 235 | return std::nullopt; | ||
| 236 | } | ||
| 237 | u32 base_offset{}; | ||
| 238 | IR::U32 dynamic_offset; | ||
| 239 | if (offset_inst->Arg(0).IsImmediate()) { | ||
| 240 | base_offset = offset_inst->Arg(0).U32(); | ||
| 241 | dynamic_offset = IR::U32{offset_inst->Arg(1)}; | ||
| 242 | } else if (offset_inst->Arg(1).IsImmediate()) { | ||
| 243 | base_offset = offset_inst->Arg(1).U32(); | ||
| 244 | dynamic_offset = IR::U32{offset_inst->Arg(0)}; | ||
| 245 | } else { | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | return ConstBufferAddr{ | ||
| 249 | .index = index.U32(), | ||
| 250 | .offset = base_offset, | ||
| 251 | .secondary_index = 0, | ||
| 252 | .secondary_offset = 0, | ||
| 253 | .dynamic_offset = dynamic_offset, | ||
| 254 | .count = 8, | ||
| 255 | .has_secondary = false, | ||
| 256 | }; | ||
| 257 | } | ||
| 258 | |||
| 259 | TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { | ||
| 260 | ConstBufferAddr addr; | ||
| 261 | if (IsBindless(inst)) { | ||
| 262 | const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))}; | ||
| 263 | if (!track_addr) { | ||
| 264 | throw NotImplementedException("Failed to track bindless texture constant buffer"); | ||
| 265 | } | ||
| 266 | addr = *track_addr; | ||
| 267 | } else { | ||
| 268 | addr = ConstBufferAddr{ | ||
| 269 | .index = env.TextureBoundBuffer(), | ||
| 270 | .offset = inst.Arg(0).U32(), | ||
| 271 | .secondary_index = 0, | ||
| 272 | .secondary_offset = 0, | ||
| 273 | .dynamic_offset = {}, | ||
| 274 | .count = 1, | ||
| 275 | .has_secondary = false, | ||
| 276 | }; | ||
| 277 | } | ||
| 278 | return TextureInst{ | ||
| 279 | .cbuf = addr, | ||
| 280 | .inst = &inst, | ||
| 281 | .block = block, | ||
| 282 | }; | ||
| 283 | } | ||
| 284 | |||
| 285 | TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { | ||
| 286 | const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; | ||
| 287 | const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; | ||
| 288 | const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; | ||
| 289 | const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; | ||
| 290 | return env.ReadTextureType(lhs_raw | rhs_raw); | ||
| 291 | } | ||
| 292 | |||
| 293 | class Descriptors { | ||
| 294 | public: | ||
| 295 | explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, | ||
| 296 | ImageBufferDescriptors& image_buffer_descriptors_, | ||
| 297 | TextureDescriptors& texture_descriptors_, | ||
| 298 | ImageDescriptors& image_descriptors_) | ||
| 299 | : texture_buffer_descriptors{texture_buffer_descriptors_}, | ||
| 300 | image_buffer_descriptors{image_buffer_descriptors_}, | ||
| 301 | texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} | ||
| 302 | |||
| 303 | u32 Add(const TextureBufferDescriptor& desc) { | ||
| 304 | return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 305 | return desc.cbuf_index == existing.cbuf_index && | ||
| 306 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 307 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 308 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 309 | desc.count == existing.count && desc.size_shift == existing.size_shift && | ||
| 310 | desc.has_secondary == existing.has_secondary; | ||
| 311 | }); | ||
| 312 | } | ||
| 313 | |||
| 314 | u32 Add(const ImageBufferDescriptor& desc) { | ||
| 315 | const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 316 | return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && | ||
| 317 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 318 | desc.size_shift == existing.size_shift; | ||
| 319 | })}; | ||
| 320 | image_buffer_descriptors[index].is_written |= desc.is_written; | ||
| 321 | image_buffer_descriptors[index].is_read |= desc.is_read; | ||
| 322 | return index; | ||
| 323 | } | ||
| 324 | |||
| 325 | u32 Add(const TextureDescriptor& desc) { | ||
| 326 | return Add(texture_descriptors, desc, [&desc](const auto& existing) { | ||
| 327 | return desc.type == existing.type && desc.is_depth == existing.is_depth && | ||
| 328 | desc.has_secondary == existing.has_secondary && | ||
| 329 | desc.cbuf_index == existing.cbuf_index && | ||
| 330 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 331 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 332 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 333 | desc.count == existing.count && desc.size_shift == existing.size_shift; | ||
| 334 | }); | ||
| 335 | } | ||
| 336 | |||
| 337 | u32 Add(const ImageDescriptor& desc) { | ||
| 338 | const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { | ||
| 339 | return desc.type == existing.type && desc.format == existing.format && | ||
| 340 | desc.cbuf_index == existing.cbuf_index && | ||
| 341 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 342 | desc.size_shift == existing.size_shift; | ||
| 343 | })}; | ||
| 344 | image_descriptors[index].is_written |= desc.is_written; | ||
| 345 | image_descriptors[index].is_read |= desc.is_read; | ||
| 346 | return index; | ||
| 347 | } | ||
| 348 | |||
| 349 | private: | ||
| 350 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 351 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 352 | // TODO: Handle arrays | ||
| 353 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 354 | if (it != descriptors.end()) { | ||
| 355 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 356 | } | ||
| 357 | descriptors.push_back(desc); | ||
| 358 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 359 | } | ||
| 360 | |||
| 361 | TextureBufferDescriptors& texture_buffer_descriptors; | ||
| 362 | ImageBufferDescriptors& image_buffer_descriptors; | ||
| 363 | TextureDescriptors& texture_descriptors; | ||
| 364 | ImageDescriptors& image_descriptors; | ||
| 365 | }; | ||
| 366 | } // Anonymous namespace | ||
| 367 | |||
| 368 | void TexturePass(Environment& env, IR::Program& program) { | ||
| 369 | TextureInstVector to_replace; | ||
| 370 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 371 | for (IR::Inst& inst : block->Instructions()) { | ||
| 372 | if (!IsTextureInstruction(inst)) { | ||
| 373 | continue; | ||
| 374 | } | ||
| 375 | to_replace.push_back(MakeInst(env, block, inst)); | ||
| 376 | } | ||
| 377 | } | ||
| 378 | // Sort instructions to visit textures by constant buffer index, then by offset | ||
| 379 | std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { | ||
| 380 | return lhs.cbuf.offset < rhs.cbuf.offset; | ||
| 381 | }); | ||
| 382 | std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { | ||
| 383 | return lhs.cbuf.index < rhs.cbuf.index; | ||
| 384 | }); | ||
| 385 | Descriptors descriptors{ | ||
| 386 | program.info.texture_buffer_descriptors, | ||
| 387 | program.info.image_buffer_descriptors, | ||
| 388 | program.info.texture_descriptors, | ||
| 389 | program.info.image_descriptors, | ||
| 390 | }; | ||
| 391 | for (TextureInst& texture_inst : to_replace) { | ||
| 392 | // TODO: Handle arrays | ||
| 393 | IR::Inst* const inst{texture_inst.inst}; | ||
| 394 | inst->ReplaceOpcode(IndexedInstruction(*inst)); | ||
| 395 | |||
| 396 | const auto& cbuf{texture_inst.cbuf}; | ||
| 397 | auto flags{inst->Flags<IR::TextureInstInfo>()}; | ||
| 398 | switch (inst->GetOpcode()) { | ||
| 399 | case IR::Opcode::ImageQueryDimensions: | ||
| 400 | flags.type.Assign(ReadTextureType(env, cbuf)); | ||
| 401 | inst->SetFlags(flags); | ||
| 402 | break; | ||
| 403 | case IR::Opcode::ImageFetch: | ||
| 404 | if (flags.type != TextureType::Color1D) { | ||
| 405 | break; | ||
| 406 | } | ||
| 407 | if (ReadTextureType(env, cbuf) == TextureType::Buffer) { | ||
| 408 | // Replace with the bound texture type only when it's a texture buffer | ||
| 409 | // If the instruction is 1D and the bound type is 2D, don't change the code and let | ||
| 410 | // the rasterizer robustness handle it | ||
| 411 | // This happens on Fire Emblem: Three Houses | ||
| 412 | flags.type.Assign(TextureType::Buffer); | ||
| 413 | } | ||
| 414 | break; | ||
| 415 | default: | ||
| 416 | break; | ||
| 417 | } | ||
| 418 | u32 index; | ||
| 419 | switch (inst->GetOpcode()) { | ||
| 420 | case IR::Opcode::ImageRead: | ||
| 421 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 422 | case IR::Opcode::ImageAtomicSMin32: | ||
| 423 | case IR::Opcode::ImageAtomicUMin32: | ||
| 424 | case IR::Opcode::ImageAtomicSMax32: | ||
| 425 | case IR::Opcode::ImageAtomicUMax32: | ||
| 426 | case IR::Opcode::ImageAtomicInc32: | ||
| 427 | case IR::Opcode::ImageAtomicDec32: | ||
| 428 | case IR::Opcode::ImageAtomicAnd32: | ||
| 429 | case IR::Opcode::ImageAtomicOr32: | ||
| 430 | case IR::Opcode::ImageAtomicXor32: | ||
| 431 | case IR::Opcode::ImageAtomicExchange32: | ||
| 432 | case IR::Opcode::ImageWrite: { | ||
| 433 | if (cbuf.has_secondary) { | ||
| 434 | throw NotImplementedException("Unexpected separate sampler"); | ||
| 435 | } | ||
| 436 | const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; | ||
| 437 | const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; | ||
| 438 | if (flags.type == TextureType::Buffer) { | ||
| 439 | index = descriptors.Add(ImageBufferDescriptor{ | ||
| 440 | .format = flags.image_format, | ||
| 441 | .is_written = is_written, | ||
| 442 | .is_read = is_read, | ||
| 443 | .cbuf_index = cbuf.index, | ||
| 444 | .cbuf_offset = cbuf.offset, | ||
| 445 | .count = cbuf.count, | ||
| 446 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 447 | }); | ||
| 448 | } else { | ||
| 449 | index = descriptors.Add(ImageDescriptor{ | ||
| 450 | .type = flags.type, | ||
| 451 | .format = flags.image_format, | ||
| 452 | .is_written = is_written, | ||
| 453 | .is_read = is_read, | ||
| 454 | .cbuf_index = cbuf.index, | ||
| 455 | .cbuf_offset = cbuf.offset, | ||
| 456 | .count = cbuf.count, | ||
| 457 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 458 | }); | ||
| 459 | } | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | default: | ||
| 463 | if (flags.type == TextureType::Buffer) { | ||
| 464 | index = descriptors.Add(TextureBufferDescriptor{ | ||
| 465 | .has_secondary = cbuf.has_secondary, | ||
| 466 | .cbuf_index = cbuf.index, | ||
| 467 | .cbuf_offset = cbuf.offset, | ||
| 468 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 469 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 470 | .count = cbuf.count, | ||
| 471 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 472 | }); | ||
| 473 | } else { | ||
| 474 | index = descriptors.Add(TextureDescriptor{ | ||
| 475 | .type = flags.type, | ||
| 476 | .is_depth = flags.is_depth != 0, | ||
| 477 | .has_secondary = cbuf.has_secondary, | ||
| 478 | .cbuf_index = cbuf.index, | ||
| 479 | .cbuf_offset = cbuf.offset, | ||
| 480 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 481 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 482 | .count = cbuf.count, | ||
| 483 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 484 | }); | ||
| 485 | } | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | flags.descriptor_index.Assign(index); | ||
| 489 | inst->SetFlags(flags); | ||
| 490 | |||
| 491 | if (cbuf.count > 1) { | ||
| 492 | const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; | ||
| 493 | IR::IREmitter ir{*texture_inst.block, insert_point}; | ||
| 494 | const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; | ||
| 495 | inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); | ||
| 496 | } else { | ||
| 497 | inst->SetArg(0, IR::Value{}); | ||
| 498 | } | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | void JoinTextureInfo(Info& base, Info& source) { | ||
| 503 | Descriptors descriptors{ | ||
| 504 | base.texture_buffer_descriptors, | ||
| 505 | base.image_buffer_descriptors, | ||
| 506 | base.texture_descriptors, | ||
| 507 | base.image_descriptors, | ||
| 508 | }; | ||
| 509 | for (auto& desc : source.texture_buffer_descriptors) { | ||
| 510 | descriptors.Add(desc); | ||
| 511 | } | ||
| 512 | for (auto& desc : source.image_buffer_descriptors) { | ||
| 513 | descriptors.Add(desc); | ||
| 514 | } | ||
| 515 | for (auto& desc : source.texture_descriptors) { | ||
| 516 | descriptors.Add(desc); | ||
| 517 | } | ||
| 518 | for (auto& desc : source.image_descriptors) { | ||
| 519 | descriptors.Add(desc); | ||
| 520 | } | ||
| 521 | } | ||
| 522 | |||
| 523 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp new file mode 100644 index 000000000..975d5aadf --- /dev/null +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <set> | ||
| 7 | |||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | |||
| 15 | static void ValidateTypes(const IR::Program& program) { | ||
| 16 | for (const auto& block : program.blocks) { | ||
| 17 | for (const IR::Inst& inst : *block) { | ||
| 18 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 19 | // Skip validation on phi nodes | ||
| 20 | continue; | ||
| 21 | } | ||
| 22 | const size_t num_args{inst.NumArgs()}; | ||
| 23 | for (size_t i = 0; i < num_args; ++i) { | ||
| 24 | const IR::Type t1{inst.Arg(i).Type()}; | ||
| 25 | const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; | ||
| 26 | if (!IR::AreTypesCompatible(t1, t2)) { | ||
| 27 | throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | static void ValidateUses(const IR::Program& program) { | ||
| 35 | std::map<IR::Inst*, int> actual_uses; | ||
| 36 | for (const auto& block : program.blocks) { | ||
| 37 | for (const IR::Inst& inst : *block) { | ||
| 38 | const size_t num_args{inst.NumArgs()}; | ||
| 39 | for (size_t i = 0; i < num_args; ++i) { | ||
| 40 | const IR::Value arg{inst.Arg(i)}; | ||
| 41 | if (!arg.IsImmediate()) { | ||
| 42 | ++actual_uses[arg.Inst()]; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } | ||
| 47 | for (const auto [inst, uses] : actual_uses) { | ||
| 48 | if (inst->UseCount() != uses) { | ||
| 49 | throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | static void ValidateForwardDeclarations(const IR::Program& program) { | ||
| 55 | std::set<const IR::Inst*> definitions; | ||
| 56 | for (const IR::Block* const block : program.blocks) { | ||
| 57 | for (const IR::Inst& inst : *block) { | ||
| 58 | definitions.emplace(&inst); | ||
| 59 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 60 | // Phi nodes can have forward declarations | ||
| 61 | continue; | ||
| 62 | } | ||
| 63 | const size_t num_args{inst.NumArgs()}; | ||
| 64 | for (size_t arg = 0; arg < num_args; ++arg) { | ||
| 65 | if (inst.Arg(arg).IsImmediate()) { | ||
| 66 | continue; | ||
| 67 | } | ||
| 68 | if (!definitions.contains(inst.Arg(arg).Inst())) { | ||
| 69 | throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | static void ValidatePhiNodes(const IR::Program& program) { | ||
| 77 | for (const IR::Block* const block : program.blocks) { | ||
| 78 | bool no_more_phis{false}; | ||
| 79 | for (const IR::Inst& inst : *block) { | ||
| 80 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 81 | if (no_more_phis) { | ||
| 82 | throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); | ||
| 83 | } | ||
| 84 | } else { | ||
| 85 | no_more_phis = true; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | void VerificationPass(const IR::Program& program) { | ||
| 92 | ValidateTypes(program); | ||
| 93 | ValidateUses(program); | ||
| 94 | ValidateForwardDeclarations(program); | ||
| 95 | ValidatePhiNodes(program); | ||
| 96 | } | ||
| 97 | |||
| 98 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 000000000..f8b255b66 --- /dev/null +++ b/src/shader_recompiler/object_pool.h | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | namespace Shader { | ||
| 12 | |||
| 13 | template <typename T> | ||
| 14 | requires std::is_destructible_v<T> class ObjectPool { | ||
| 15 | public: | ||
| 16 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { | ||
| 17 | node = &chunks.emplace_back(new_chunk_size); | ||
| 18 | } | ||
| 19 | |||
| 20 | template <typename... Args> | ||
| 21 | requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { | ||
| 22 | return std::construct_at(Memory(), std::forward<Args>(args)...); | ||
| 23 | } | ||
| 24 | |||
| 25 | void ReleaseContents() { | ||
| 26 | if (chunks.empty()) { | ||
| 27 | return; | ||
| 28 | } | ||
| 29 | Chunk& root{chunks.front()}; | ||
| 30 | if (root.used_objects == root.num_objects) { | ||
| 31 | // Root chunk has been filled, squash allocations into it | ||
| 32 | const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; | ||
| 33 | chunks.clear(); | ||
| 34 | chunks.emplace_back(total_objects); | ||
| 35 | } else { | ||
| 36 | root.Release(); | ||
| 37 | chunks.resize(1); | ||
| 38 | } | ||
| 39 | chunks.shrink_to_fit(); | ||
| 40 | node = &chunks.front(); | ||
| 41 | } | ||
| 42 | |||
| 43 | private: | ||
| 44 | struct NonTrivialDummy { | ||
| 45 | NonTrivialDummy() noexcept {} | ||
| 46 | }; | ||
| 47 | |||
| 48 | union Storage { | ||
| 49 | Storage() noexcept {} | ||
| 50 | ~Storage() noexcept {} | ||
| 51 | |||
| 52 | NonTrivialDummy dummy{}; | ||
| 53 | T object; | ||
| 54 | }; | ||
| 55 | |||
| 56 | struct Chunk { | ||
| 57 | explicit Chunk() = default; | ||
| 58 | explicit Chunk(size_t size) | ||
| 59 | : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {} | ||
| 60 | |||
| 61 | Chunk& operator=(Chunk&& rhs) noexcept { | ||
| 62 | Release(); | ||
| 63 | used_objects = std::exchange(rhs.used_objects, 0); | ||
| 64 | num_objects = std::exchange(rhs.num_objects, 0); | ||
| 65 | storage = std::move(rhs.storage); | ||
| 66 | } | ||
| 67 | |||
| 68 | Chunk(Chunk&& rhs) noexcept | ||
| 69 | : used_objects{std::exchange(rhs.used_objects, 0)}, | ||
| 70 | num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {} | ||
| 71 | |||
| 72 | ~Chunk() { | ||
| 73 | Release(); | ||
| 74 | } | ||
| 75 | |||
| 76 | void Release() { | ||
| 77 | std::destroy_n(storage.get(), used_objects); | ||
| 78 | used_objects = 0; | ||
| 79 | } | ||
| 80 | |||
| 81 | size_t used_objects{}; | ||
| 82 | size_t num_objects{}; | ||
| 83 | std::unique_ptr<Storage[]> storage; | ||
| 84 | }; | ||
| 85 | |||
| 86 | [[nodiscard]] T* Memory() { | ||
| 87 | Chunk* const chunk{FreeChunk()}; | ||
| 88 | return &chunk->storage[chunk->used_objects++].object; | ||
| 89 | } | ||
| 90 | |||
| 91 | [[nodiscard]] Chunk* FreeChunk() { | ||
| 92 | if (node->used_objects != node->num_objects) { | ||
| 93 | return node; | ||
| 94 | } | ||
| 95 | node = &chunks.emplace_back(new_chunk_size); | ||
| 96 | return node; | ||
| 97 | } | ||
| 98 | |||
| 99 | Chunk* node{}; | ||
| 100 | std::vector<Chunk> chunks; | ||
| 101 | size_t new_chunk_size{}; | ||
| 102 | }; | ||
| 103 | |||
| 104 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..f0c3b3b17 --- /dev/null +++ b/src/shader_recompiler/profile.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader { | ||
| 10 | |||
| 11 | struct Profile { | ||
| 12 | u32 supported_spirv{0x00010000}; | ||
| 13 | |||
| 14 | bool unified_descriptor_binding{}; | ||
| 15 | bool support_descriptor_aliasing{}; | ||
| 16 | bool support_int8{}; | ||
| 17 | bool support_int16{}; | ||
| 18 | bool support_int64{}; | ||
| 19 | bool support_vertex_instance_id{}; | ||
| 20 | bool support_float_controls{}; | ||
| 21 | bool support_separate_denorm_behavior{}; | ||
| 22 | bool support_separate_rounding_mode{}; | ||
| 23 | bool support_fp16_denorm_preserve{}; | ||
| 24 | bool support_fp32_denorm_preserve{}; | ||
| 25 | bool support_fp16_denorm_flush{}; | ||
| 26 | bool support_fp32_denorm_flush{}; | ||
| 27 | bool support_fp16_signed_zero_nan_preserve{}; | ||
| 28 | bool support_fp32_signed_zero_nan_preserve{}; | ||
| 29 | bool support_fp64_signed_zero_nan_preserve{}; | ||
| 30 | bool support_explicit_workgroup_layout{}; | ||
| 31 | bool support_vote{}; | ||
| 32 | bool support_viewport_index_layer_non_geometry{}; | ||
| 33 | bool support_viewport_mask{}; | ||
| 34 | bool support_typeless_image_loads{}; | ||
| 35 | bool support_demote_to_helper_invocation{}; | ||
| 36 | bool support_int64_atomics{}; | ||
| 37 | bool support_derivative_control{}; | ||
| 38 | bool support_geometry_shader_passthrough{}; | ||
| 39 | bool support_gl_nv_gpu_shader_5{}; | ||
| 40 | bool support_gl_amd_gpu_shader_half_float{}; | ||
| 41 | bool support_gl_texture_shadow_lod{}; | ||
| 42 | bool support_gl_warp_intrinsics{}; | ||
| 43 | bool support_gl_variable_aoffi{}; | ||
| 44 | bool support_gl_sparse_textures{}; | ||
| 45 | bool support_gl_derivative_control{}; | ||
| 46 | |||
| 47 | bool warp_size_potentially_larger_than_guest{}; | ||
| 48 | |||
| 49 | bool lower_left_origin_mode{}; | ||
| 50 | /// Fragment outputs have to be declared even if they are not written to avoid undefined values. | ||
| 51 | /// See Ori and the Blind Forest's main menu for reference. | ||
| 52 | bool need_declared_frag_colors{}; | ||
| 53 | /// Prevents fast math optimizations that may cause inaccuracies | ||
| 54 | bool need_fastmath_off{}; | ||
| 55 | |||
| 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | ||
| 57 | bool has_broken_spirv_clamp{}; | ||
| 58 | /// Offset image operands with an unsigned type do not work | ||
| 59 | bool has_broken_unsigned_image_offsets{}; | ||
| 60 | /// Signed instructions with unsigned data types are misinterpreted | ||
| 61 | bool has_broken_signed_operations{}; | ||
| 62 | /// Float controls break when fp16 is enabled | ||
| 63 | bool has_broken_fp16_float_controls{}; | ||
| 64 | /// Dynamic vec4 indexing is broken on some OpenGL drivers | ||
| 65 | bool has_gl_component_indexing_bug{}; | ||
| 66 | /// The precise type qualifier is broken in the fragment stage of some drivers | ||
| 67 | bool has_gl_precise_bug{}; | ||
| 68 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics | ||
| 69 | bool ignore_nan_fp_comparisons{}; | ||
| 70 | |||
| 71 | u32 gl_max_compute_smem_size{}; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..bd6c2bfb5 --- /dev/null +++ b/src/shader_recompiler/program_header.h | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Shader { | ||
| 15 | |||
| 16 | enum class OutputTopology : u32 { | ||
| 17 | PointList = 1, | ||
| 18 | LineStrip = 6, | ||
| 19 | TriangleStrip = 7, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class PixelImap : u8 { | ||
| 23 | Unused = 0, | ||
| 24 | Constant = 1, | ||
| 25 | Perspective = 2, | ||
| 26 | ScreenLinear = 3, | ||
| 27 | }; | ||
| 28 | |||
| 29 | // Documentation in: | ||
| 30 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html | ||
| 31 | struct ProgramHeader { | ||
| 32 | union { | ||
| 33 | BitField<0, 5, u32> sph_type; | ||
| 34 | BitField<5, 5, u32> version; | ||
| 35 | BitField<10, 4, u32> shader_type; | ||
| 36 | BitField<14, 1, u32> mrt_enable; | ||
| 37 | BitField<15, 1, u32> kills_pixels; | ||
| 38 | BitField<16, 1, u32> does_global_store; | ||
| 39 | BitField<17, 4, u32> sass_version; | ||
| 40 | BitField<21, 2, u32> reserved1; | ||
| 41 | BitField<24, 1, u32> geometry_passthrough; | ||
| 42 | BitField<25, 1, u32> reserved2; | ||
| 43 | BitField<26, 1, u32> does_load_or_store; | ||
| 44 | BitField<27, 1, u32> does_fp64; | ||
| 45 | BitField<28, 4, u32> stream_out_mask; | ||
| 46 | } common0; | ||
| 47 | |||
| 48 | union { | ||
| 49 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 50 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 51 | } common1; | ||
| 52 | |||
| 53 | union { | ||
| 54 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 55 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 56 | } common2; | ||
| 57 | |||
| 58 | union { | ||
| 59 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 60 | BitField<24, 4, OutputTopology> output_topology; | ||
| 61 | BitField<28, 4, u32> reserved; | ||
| 62 | } common3; | ||
| 63 | |||
| 64 | union { | ||
| 65 | BitField<0, 12, u32> max_output_vertices; | ||
| 66 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 67 | BitField<20, 4, u32> reserved; | ||
| 68 | BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 69 | } common4; | ||
| 70 | |||
| 71 | union { | ||
| 72 | struct { | ||
| 73 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 74 | |||
| 75 | union { | ||
| 76 | BitField<0, 1, u8> primitive_array_id; | ||
| 77 | BitField<1, 1, u8> rt_array_index; | ||
| 78 | BitField<2, 1, u8> viewport_index; | ||
| 79 | BitField<3, 1, u8> point_size; | ||
| 80 | BitField<4, 1, u8> position_x; | ||
| 81 | BitField<5, 1, u8> position_y; | ||
| 82 | BitField<6, 1, u8> position_z; | ||
| 83 | BitField<7, 1, u8> position_w; | ||
| 84 | u8 raw; | ||
| 85 | } imap_systemb; | ||
| 86 | |||
| 87 | std::array<u8, 16> imap_generic_vector; | ||
| 88 | |||
| 89 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 90 | union { | ||
| 91 | BitField<0, 8, u16> clip_distances; | ||
| 92 | BitField<8, 1, u16> point_sprite_s; | ||
| 93 | BitField<9, 1, u16> point_sprite_t; | ||
| 94 | BitField<10, 1, u16> fog_coordinate; | ||
| 95 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 96 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 97 | BitField<14, 1, u16> instance_id; | ||
| 98 | BitField<15, 1, u16> vertex_id; | ||
| 99 | }; | ||
| 100 | INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] | ||
| 101 | INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved | ||
| 102 | INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA | ||
| 103 | |||
| 104 | union { | ||
| 105 | BitField<0, 1, u8> primitive_array_id; | ||
| 106 | BitField<1, 1, u8> rt_array_index; | ||
| 107 | BitField<2, 1, u8> viewport_index; | ||
| 108 | BitField<3, 1, u8> point_size; | ||
| 109 | BitField<4, 1, u8> position_x; | ||
| 110 | BitField<5, 1, u8> position_y; | ||
| 111 | BitField<6, 1, u8> position_z; | ||
| 112 | BitField<7, 1, u8> position_w; | ||
| 113 | u8 raw; | ||
| 114 | } omap_systemb; | ||
| 115 | |||
| 116 | std::array<u8, 16> omap_generic_vector; | ||
| 117 | |||
| 118 | INSERT_PADDING_BYTES_NOINIT(2); // OmapColor | ||
| 119 | |||
| 120 | union { | ||
| 121 | BitField<0, 8, u16> clip_distances; | ||
| 122 | BitField<8, 1, u16> point_sprite_s; | ||
| 123 | BitField<9, 1, u16> point_sprite_t; | ||
| 124 | BitField<10, 1, u16> fog_coordinate; | ||
| 125 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 126 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 127 | BitField<14, 1, u16> instance_id; | ||
| 128 | BitField<15, 1, u16> vertex_id; | ||
| 129 | } omap_systemc; | ||
| 130 | |||
| 131 | INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] | ||
| 132 | INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved | ||
| 133 | |||
| 134 | [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept { | ||
| 135 | const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; | ||
| 136 | return { | ||
| 137 | (data & 1) != 0, | ||
| 138 | (data & 2) != 0, | ||
| 139 | (data & 4) != 0, | ||
| 140 | (data & 8) != 0, | ||
| 141 | }; | ||
| 142 | } | ||
| 143 | |||
| 144 | [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept { | ||
| 145 | const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; | ||
| 146 | return { | ||
| 147 | (data & 1) != 0, | ||
| 148 | (data & 2) != 0, | ||
| 149 | (data & 4) != 0, | ||
| 150 | (data & 8) != 0, | ||
| 151 | }; | ||
| 152 | } | ||
| 153 | } vtg; | ||
| 154 | |||
| 155 | struct { | ||
| 156 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 157 | |||
| 158 | union { | ||
| 159 | BitField<0, 1, u8> primitive_array_id; | ||
| 160 | BitField<1, 1, u8> rt_array_index; | ||
| 161 | BitField<2, 1, u8> viewport_index; | ||
| 162 | BitField<3, 1, u8> point_size; | ||
| 163 | BitField<4, 1, u8> position_x; | ||
| 164 | BitField<5, 1, u8> position_y; | ||
| 165 | BitField<6, 1, u8> position_z; | ||
| 166 | BitField<7, 1, u8> position_w; | ||
| 167 | BitField<0, 4, u8> first; | ||
| 168 | BitField<4, 4, u8> position; | ||
| 169 | u8 raw; | ||
| 170 | } imap_systemb; | ||
| 171 | |||
| 172 | union { | ||
| 173 | BitField<0, 2, PixelImap> x; | ||
| 174 | BitField<2, 2, PixelImap> y; | ||
| 175 | BitField<4, 2, PixelImap> z; | ||
| 176 | BitField<6, 2, PixelImap> w; | ||
| 177 | u8 raw; | ||
| 178 | } imap_generic_vector[32]; | ||
| 179 | |||
| 180 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 181 | INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC | ||
| 182 | INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] | ||
| 183 | INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved | ||
| 184 | |||
| 185 | struct { | ||
| 186 | u32 target; | ||
| 187 | union { | ||
| 188 | BitField<0, 1, u32> sample_mask; | ||
| 189 | BitField<1, 1, u32> depth; | ||
| 190 | BitField<2, 30, u32> reserved; | ||
| 191 | }; | ||
| 192 | } omap; | ||
| 193 | |||
| 194 | [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept { | ||
| 195 | const u32 bits{omap.target >> (rt * 4)}; | ||
| 196 | return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; | ||
| 197 | } | ||
| 198 | |||
| 199 | [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { | ||
| 200 | const auto& vector{imap_generic_vector[attribute]}; | ||
| 201 | return {vector.x, vector.y, vector.z, vector.w}; | ||
| 202 | } | ||
| 203 | |||
| 204 | [[nodiscard]] bool IsGenericVectorActive(size_t index) const { | ||
| 205 | return imap_generic_vector[index].raw != 0; | ||
| 206 | } | ||
| 207 | } ps; | ||
| 208 | |||
| 209 | std::array<u32, 0xf> raw; | ||
| 210 | }; | ||
| 211 | |||
| 212 | [[nodiscard]] u64 LocalMemorySize() const noexcept { | ||
| 213 | return static_cast<u64>(common1.shader_local_memory_low_size) | | ||
| 214 | (static_cast<u64>(common2.shader_local_memory_high_size) << 24); | ||
| 215 | } | ||
| 216 | }; | ||
| 217 | static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); | ||
| 218 | |||
| 219 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..f3f83a258 --- /dev/null +++ b/src/shader_recompiler/runtime_info.h | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <optional> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/varying_state.h" | ||
| 14 | |||
| 15 | namespace Shader { | ||
| 16 | |||
| 17 | enum class AttributeType : u8 { | ||
| 18 | Float, | ||
| 19 | SignedInt, | ||
| 20 | UnsignedInt, | ||
| 21 | Disabled, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class InputTopology { | ||
| 25 | Points, | ||
| 26 | Lines, | ||
| 27 | LinesAdjacency, | ||
| 28 | Triangles, | ||
| 29 | TrianglesAdjacency, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class CompareFunction { | ||
| 33 | Never, | ||
| 34 | Less, | ||
| 35 | Equal, | ||
| 36 | LessThanEqual, | ||
| 37 | Greater, | ||
| 38 | NotEqual, | ||
| 39 | GreaterThanEqual, | ||
| 40 | Always, | ||
| 41 | }; | ||
| 42 | |||
| 43 | enum class TessPrimitive { | ||
| 44 | Isolines, | ||
| 45 | Triangles, | ||
| 46 | Quads, | ||
| 47 | }; | ||
| 48 | |||
| 49 | enum class TessSpacing { | ||
| 50 | Equal, | ||
| 51 | FractionalOdd, | ||
| 52 | FractionalEven, | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct TransformFeedbackVarying { | ||
| 56 | u32 buffer{}; | ||
| 57 | u32 stride{}; | ||
| 58 | u32 offset{}; | ||
| 59 | u32 components{}; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct RuntimeInfo { | ||
| 63 | std::array<AttributeType, 32> generic_input_types{}; | ||
| 64 | VaryingState previous_stage_stores; | ||
| 65 | |||
| 66 | bool convert_depth_mode{}; | ||
| 67 | bool force_early_z{}; | ||
| 68 | |||
| 69 | TessPrimitive tess_primitive{}; | ||
| 70 | TessSpacing tess_spacing{}; | ||
| 71 | bool tess_clockwise{}; | ||
| 72 | |||
| 73 | InputTopology input_topology{}; | ||
| 74 | |||
| 75 | std::optional<float> fixed_state_point_size; | ||
| 76 | std::optional<CompareFunction> alpha_test_func; | ||
| 77 | float alpha_test_reference{}; | ||
| 78 | |||
| 79 | /// Static Y negate value | ||
| 80 | bool y_negate{}; | ||
| 81 | /// Use storage buffers instead of global pointers on GLASM | ||
| 82 | bool glasm_use_storage_buffers{}; | ||
| 83 | |||
| 84 | /// Transform feedback state for each varying | ||
| 85 | std::vector<TransformFeedbackVarying> xfb_varyings; | ||
| 86 | }; | ||
| 87 | |||
| 88 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 000000000..4ef4dbd40 --- /dev/null +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 12 | #include "shader_recompiler/varying_state.h" | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/container/static_vector.hpp> | ||
| 16 | |||
| 17 | namespace Shader { | ||
| 18 | |||
| 19 | enum class TextureType : u32 { | ||
| 20 | Color1D, | ||
| 21 | ColorArray1D, | ||
| 22 | Color2D, | ||
| 23 | ColorArray2D, | ||
| 24 | Color3D, | ||
| 25 | ColorCube, | ||
| 26 | ColorArrayCube, | ||
| 27 | Buffer, | ||
| 28 | }; | ||
| 29 | constexpr u32 NUM_TEXTURE_TYPES = 8; | ||
| 30 | |||
| 31 | enum class ImageFormat : u32 { | ||
| 32 | Typeless, | ||
| 33 | R8_UINT, | ||
| 34 | R8_SINT, | ||
| 35 | R16_UINT, | ||
| 36 | R16_SINT, | ||
| 37 | R32_UINT, | ||
| 38 | R32G32_UINT, | ||
| 39 | R32G32B32A32_UINT, | ||
| 40 | }; | ||
| 41 | |||
| 42 | enum class Interpolation { | ||
| 43 | Smooth, | ||
| 44 | Flat, | ||
| 45 | NoPerspective, | ||
| 46 | }; | ||
| 47 | |||
| 48 | struct ConstantBufferDescriptor { | ||
| 49 | u32 index; | ||
| 50 | u32 count; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct StorageBufferDescriptor { | ||
| 54 | u32 cbuf_index; | ||
| 55 | u32 cbuf_offset; | ||
| 56 | u32 count; | ||
| 57 | bool is_written; | ||
| 58 | }; | ||
| 59 | |||
| 60 | struct TextureBufferDescriptor { | ||
| 61 | bool has_secondary; | ||
| 62 | u32 cbuf_index; | ||
| 63 | u32 cbuf_offset; | ||
| 64 | u32 secondary_cbuf_index; | ||
| 65 | u32 secondary_cbuf_offset; | ||
| 66 | u32 count; | ||
| 67 | u32 size_shift; | ||
| 68 | }; | ||
| 69 | using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; | ||
| 70 | |||
| 71 | struct ImageBufferDescriptor { | ||
| 72 | ImageFormat format; | ||
| 73 | bool is_written; | ||
| 74 | bool is_read; | ||
| 75 | u32 cbuf_index; | ||
| 76 | u32 cbuf_offset; | ||
| 77 | u32 count; | ||
| 78 | u32 size_shift; | ||
| 79 | }; | ||
| 80 | using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; | ||
| 81 | |||
| 82 | struct TextureDescriptor { | ||
| 83 | TextureType type; | ||
| 84 | bool is_depth; | ||
| 85 | bool has_secondary; | ||
| 86 | u32 cbuf_index; | ||
| 87 | u32 cbuf_offset; | ||
| 88 | u32 secondary_cbuf_index; | ||
| 89 | u32 secondary_cbuf_offset; | ||
| 90 | u32 count; | ||
| 91 | u32 size_shift; | ||
| 92 | }; | ||
| 93 | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; | ||
| 94 | |||
| 95 | struct ImageDescriptor { | ||
| 96 | TextureType type; | ||
| 97 | ImageFormat format; | ||
| 98 | bool is_written; | ||
| 99 | bool is_read; | ||
| 100 | u32 cbuf_index; | ||
| 101 | u32 cbuf_offset; | ||
| 102 | u32 count; | ||
| 103 | u32 size_shift; | ||
| 104 | }; | ||
| 105 | using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; | ||
| 106 | |||
| 107 | struct Info { | ||
| 108 | static constexpr size_t MAX_CBUFS{18}; | ||
| 109 | static constexpr size_t MAX_SSBOS{32}; | ||
| 110 | |||
| 111 | bool uses_workgroup_id{}; | ||
| 112 | bool uses_local_invocation_id{}; | ||
| 113 | bool uses_invocation_id{}; | ||
| 114 | bool uses_sample_id{}; | ||
| 115 | bool uses_is_helper_invocation{}; | ||
| 116 | bool uses_subgroup_invocation_id{}; | ||
| 117 | bool uses_subgroup_shuffles{}; | ||
| 118 | std::array<bool, 30> uses_patches{}; | ||
| 119 | |||
| 120 | std::array<Interpolation, 32> interpolation{}; | ||
| 121 | VaryingState loads; | ||
| 122 | VaryingState stores; | ||
| 123 | VaryingState passthrough; | ||
| 124 | |||
| 125 | bool loads_indexed_attributes{}; | ||
| 126 | |||
| 127 | std::array<bool, 8> stores_frag_color{}; | ||
| 128 | bool stores_sample_mask{}; | ||
| 129 | bool stores_frag_depth{}; | ||
| 130 | |||
| 131 | bool stores_tess_level_outer{}; | ||
| 132 | bool stores_tess_level_inner{}; | ||
| 133 | |||
| 134 | bool stores_indexed_attributes{}; | ||
| 135 | |||
| 136 | bool stores_global_memory{}; | ||
| 137 | |||
| 138 | bool uses_fp16{}; | ||
| 139 | bool uses_fp64{}; | ||
| 140 | bool uses_fp16_denorms_flush{}; | ||
| 141 | bool uses_fp16_denorms_preserve{}; | ||
| 142 | bool uses_fp32_denorms_flush{}; | ||
| 143 | bool uses_fp32_denorms_preserve{}; | ||
| 144 | bool uses_int8{}; | ||
| 145 | bool uses_int16{}; | ||
| 146 | bool uses_int64{}; | ||
| 147 | bool uses_image_1d{}; | ||
| 148 | bool uses_sampled_1d{}; | ||
| 149 | bool uses_sparse_residency{}; | ||
| 150 | bool uses_demote_to_helper_invocation{}; | ||
| 151 | bool uses_subgroup_vote{}; | ||
| 152 | bool uses_subgroup_mask{}; | ||
| 153 | bool uses_fswzadd{}; | ||
| 154 | bool uses_derivatives{}; | ||
| 155 | bool uses_typeless_image_reads{}; | ||
| 156 | bool uses_typeless_image_writes{}; | ||
| 157 | bool uses_image_buffers{}; | ||
| 158 | bool uses_shared_increment{}; | ||
| 159 | bool uses_shared_decrement{}; | ||
| 160 | bool uses_global_increment{}; | ||
| 161 | bool uses_global_decrement{}; | ||
| 162 | bool uses_atomic_f32_add{}; | ||
| 163 | bool uses_atomic_f16x2_add{}; | ||
| 164 | bool uses_atomic_f16x2_min{}; | ||
| 165 | bool uses_atomic_f16x2_max{}; | ||
| 166 | bool uses_atomic_f32x2_add{}; | ||
| 167 | bool uses_atomic_f32x2_min{}; | ||
| 168 | bool uses_atomic_f32x2_max{}; | ||
| 169 | bool uses_atomic_s32_min{}; | ||
| 170 | bool uses_atomic_s32_max{}; | ||
| 171 | bool uses_int64_bit_atomics{}; | ||
| 172 | bool uses_global_memory{}; | ||
| 173 | bool uses_atomic_image_u32{}; | ||
| 174 | bool uses_shadow_lod{}; | ||
| 175 | |||
| 176 | IR::Type used_constant_buffer_types{}; | ||
| 177 | IR::Type used_storage_buffer_types{}; | ||
| 178 | |||
| 179 | u32 constant_buffer_mask{}; | ||
| 180 | std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; | ||
| 181 | u32 nvn_buffer_base{}; | ||
| 182 | std::bitset<16> nvn_buffer_used{}; | ||
| 183 | |||
| 184 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | ||
| 185 | constant_buffer_descriptors; | ||
| 186 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | ||
| 187 | TextureBufferDescriptors texture_buffer_descriptors; | ||
| 188 | ImageBufferDescriptors image_buffer_descriptors; | ||
| 189 | TextureDescriptors texture_descriptors; | ||
| 190 | ImageDescriptors image_descriptors; | ||
| 191 | }; | ||
| 192 | |||
| 193 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..5c1c8d8fc --- /dev/null +++ b/src/shader_recompiler/stage.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader { | ||
| 10 | |||
| 11 | enum class Stage : u32 { | ||
| 12 | VertexB, | ||
| 13 | TessellationControl, | ||
| 14 | TessellationEval, | ||
| 15 | Geometry, | ||
| 16 | Fragment, | ||
| 17 | |||
| 18 | Compute, | ||
| 19 | |||
| 20 | VertexA, | ||
| 21 | }; | ||
| 22 | constexpr u32 MaxStageTypes = 6; | ||
| 23 | |||
| 24 | [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { | ||
| 25 | return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index); | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <cstddef> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 11 | |||
| 12 | namespace Shader { | ||
| 13 | |||
| 14 | struct VaryingState { | ||
| 15 | std::bitset<256> mask{}; | ||
| 16 | |||
| 17 | void Set(IR::Attribute attribute, bool state = true) { | ||
| 18 | mask[static_cast<size_t>(attribute)] = state; | ||
| 19 | } | ||
| 20 | |||
| 21 | [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { | ||
| 22 | return mask[static_cast<size_t>(attribute)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { | ||
| 26 | return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] || | ||
| 27 | mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3]; | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { | ||
| 31 | return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] && | ||
| 32 | mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3]; | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { | ||
| 36 | return AnyComponent(base) == AllComponents(base); | ||
| 37 | } | ||
| 38 | |||
| 39 | [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { | ||
| 40 | return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component]; | ||
| 41 | } | ||
| 42 | |||
| 43 | [[nodiscard]] bool Generic(size_t index) const noexcept { | ||
| 44 | return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); | ||
| 45 | } | ||
| 46 | |||
| 47 | [[nodiscard]] bool ClipDistances() const noexcept { | ||
| 48 | return AnyComponent(IR::Attribute::ClipDistance0) || | ||
| 49 | AnyComponent(IR::Attribute::ClipDistance4); | ||
| 50 | } | ||
| 51 | |||
| 52 | [[nodiscard]] bool Legacy() const noexcept { | ||
| 53 | return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || | ||
| 54 | AnyComponent(IR::Attribute::ColorFrontSpecularR) || | ||
| 55 | AnyComponent(IR::Attribute::ColorBackDiffuseR) || | ||
| 56 | AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); | ||
| 57 | } | ||
| 58 | |||
| 59 | [[nodiscard]] bool FixedFunctionTexture() const noexcept { | ||
| 60 | for (size_t index = 0; index < 10; ++index) { | ||
| 61 | if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 62 | return true; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | return false; | ||
| 66 | } | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace Shader | ||
diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738..aa6e86593 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp | |||
| @@ -17,10 +17,12 @@ struct Noisy { | |||
| 17 | Noisy& operator=(Noisy&& rhs) noexcept { | 17 | Noisy& operator=(Noisy&& rhs) noexcept { |
| 18 | state = "Move assigned"; | 18 | state = "Move assigned"; |
| 19 | rhs.state = "Moved away"; | 19 | rhs.state = "Moved away"; |
| 20 | return *this; | ||
| 20 | } | 21 | } |
| 21 | Noisy(const Noisy&) : state{"Copied constructed"} {} | 22 | Noisy(const Noisy&) : state{"Copied constructed"} {} |
| 22 | Noisy& operator=(const Noisy&) { | 23 | Noisy& operator=(const Noisy&) { |
| 23 | state = "Copied assigned"; | 24 | state = "Copied assigned"; |
| 25 | return *this; | ||
| 24 | } | 26 | } |
| 25 | 27 | ||
| 26 | std::string state; | 28 | std::string state; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -29,7 +29,6 @@ add_library(video_core STATIC | |||
| 29 | dirty_flags.h | 29 | dirty_flags.h |
| 30 | dma_pusher.cpp | 30 | dma_pusher.cpp |
| 31 | dma_pusher.h | 31 | dma_pusher.h |
| 32 | engines/const_buffer_engine_interface.h | ||
| 33 | engines/const_buffer_info.h | 32 | engines/const_buffer_info.h |
| 34 | engines/engine_interface.h | 33 | engines/engine_interface.h |
| 35 | engines/engine_upload.cpp | 34 | engines/engine_upload.cpp |
| @@ -44,9 +43,6 @@ add_library(video_core STATIC | |||
| 44 | engines/maxwell_3d.h | 43 | engines/maxwell_3d.h |
| 45 | engines/maxwell_dma.cpp | 44 | engines/maxwell_dma.cpp |
| 46 | engines/maxwell_dma.h | 45 | engines/maxwell_dma.h |
| 47 | engines/shader_bytecode.h | ||
| 48 | engines/shader_header.h | ||
| 49 | engines/shader_type.h | ||
| 50 | framebuffer_config.h | 46 | framebuffer_config.h |
| 51 | macro/macro.cpp | 47 | macro/macro.cpp |
| 52 | macro/macro.h | 48 | macro/macro.h |
| @@ -61,8 +57,6 @@ add_library(video_core STATIC | |||
| 61 | gpu.h | 57 | gpu.h |
| 62 | gpu_thread.cpp | 58 | gpu_thread.cpp |
| 63 | gpu_thread.h | 59 | gpu_thread.h |
| 64 | guest_driver.cpp | ||
| 65 | guest_driver.h | ||
| 66 | memory_manager.cpp | 60 | memory_manager.cpp |
| 67 | memory_manager.h | 61 | memory_manager.h |
| 68 | query_cache.h | 62 | query_cache.h |
| @@ -71,26 +65,25 @@ add_library(video_core STATIC | |||
| 71 | rasterizer_interface.h | 65 | rasterizer_interface.h |
| 72 | renderer_base.cpp | 66 | renderer_base.cpp |
| 73 | renderer_base.h | 67 | renderer_base.h |
| 74 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 75 | renderer_opengl/gl_arb_decompiler.h | ||
| 76 | renderer_opengl/gl_buffer_cache.cpp | 68 | renderer_opengl/gl_buffer_cache.cpp |
| 77 | renderer_opengl/gl_buffer_cache.h | 69 | renderer_opengl/gl_buffer_cache.h |
| 70 | renderer_opengl/gl_compute_pipeline.cpp | ||
| 71 | renderer_opengl/gl_compute_pipeline.h | ||
| 78 | renderer_opengl/gl_device.cpp | 72 | renderer_opengl/gl_device.cpp |
| 79 | renderer_opengl/gl_device.h | 73 | renderer_opengl/gl_device.h |
| 80 | renderer_opengl/gl_fence_manager.cpp | 74 | renderer_opengl/gl_fence_manager.cpp |
| 81 | renderer_opengl/gl_fence_manager.h | 75 | renderer_opengl/gl_fence_manager.h |
| 76 | renderer_opengl/gl_graphics_pipeline.cpp | ||
| 77 | renderer_opengl/gl_graphics_pipeline.h | ||
| 82 | renderer_opengl/gl_rasterizer.cpp | 78 | renderer_opengl/gl_rasterizer.cpp |
| 83 | renderer_opengl/gl_rasterizer.h | 79 | renderer_opengl/gl_rasterizer.h |
| 84 | renderer_opengl/gl_resource_manager.cpp | 80 | renderer_opengl/gl_resource_manager.cpp |
| 85 | renderer_opengl/gl_resource_manager.h | 81 | renderer_opengl/gl_resource_manager.h |
| 86 | renderer_opengl/gl_shader_cache.cpp | 82 | renderer_opengl/gl_shader_cache.cpp |
| 87 | renderer_opengl/gl_shader_cache.h | 83 | renderer_opengl/gl_shader_cache.h |
| 88 | renderer_opengl/gl_shader_decompiler.cpp | ||
| 89 | renderer_opengl/gl_shader_decompiler.h | ||
| 90 | renderer_opengl/gl_shader_disk_cache.cpp | ||
| 91 | renderer_opengl/gl_shader_disk_cache.h | ||
| 92 | renderer_opengl/gl_shader_manager.cpp | 84 | renderer_opengl/gl_shader_manager.cpp |
| 93 | renderer_opengl/gl_shader_manager.h | 85 | renderer_opengl/gl_shader_manager.h |
| 86 | renderer_opengl/gl_shader_context.h | ||
| 94 | renderer_opengl/gl_shader_util.cpp | 87 | renderer_opengl/gl_shader_util.cpp |
| 95 | renderer_opengl/gl_shader_util.h | 88 | renderer_opengl/gl_shader_util.h |
| 96 | renderer_opengl/gl_state_tracker.cpp | 89 | renderer_opengl/gl_state_tracker.cpp |
| @@ -112,6 +105,7 @@ add_library(video_core STATIC | |||
| 112 | renderer_vulkan/fixed_pipeline_state.h | 105 | renderer_vulkan/fixed_pipeline_state.h |
| 113 | renderer_vulkan/maxwell_to_vk.cpp | 106 | renderer_vulkan/maxwell_to_vk.cpp |
| 114 | renderer_vulkan/maxwell_to_vk.h | 107 | renderer_vulkan/maxwell_to_vk.h |
| 108 | renderer_vulkan/pipeline_helper.h | ||
| 115 | renderer_vulkan/renderer_vulkan.h | 109 | renderer_vulkan/renderer_vulkan.h |
| 116 | renderer_vulkan/renderer_vulkan.cpp | 110 | renderer_vulkan/renderer_vulkan.cpp |
| 117 | renderer_vulkan/vk_blit_screen.cpp | 111 | renderer_vulkan/vk_blit_screen.cpp |
| @@ -138,12 +132,12 @@ add_library(video_core STATIC | |||
| 138 | renderer_vulkan/vk_query_cache.h | 132 | renderer_vulkan/vk_query_cache.h |
| 139 | renderer_vulkan/vk_rasterizer.cpp | 133 | renderer_vulkan/vk_rasterizer.cpp |
| 140 | renderer_vulkan/vk_rasterizer.h | 134 | renderer_vulkan/vk_rasterizer.h |
| 135 | renderer_vulkan/vk_render_pass_cache.cpp | ||
| 136 | renderer_vulkan/vk_render_pass_cache.h | ||
| 141 | renderer_vulkan/vk_resource_pool.cpp | 137 | renderer_vulkan/vk_resource_pool.cpp |
| 142 | renderer_vulkan/vk_resource_pool.h | 138 | renderer_vulkan/vk_resource_pool.h |
| 143 | renderer_vulkan/vk_scheduler.cpp | 139 | renderer_vulkan/vk_scheduler.cpp |
| 144 | renderer_vulkan/vk_scheduler.h | 140 | renderer_vulkan/vk_scheduler.h |
| 145 | renderer_vulkan/vk_shader_decompiler.cpp | ||
| 146 | renderer_vulkan/vk_shader_decompiler.h | ||
| 147 | renderer_vulkan/vk_shader_util.cpp | 141 | renderer_vulkan/vk_shader_util.cpp |
| 148 | renderer_vulkan/vk_shader_util.h | 142 | renderer_vulkan/vk_shader_util.h |
| 149 | renderer_vulkan/vk_staging_buffer_pool.cpp | 143 | renderer_vulkan/vk_staging_buffer_pool.cpp |
| @@ -156,60 +150,12 @@ add_library(video_core STATIC | |||
| 156 | renderer_vulkan/vk_texture_cache.h | 150 | renderer_vulkan/vk_texture_cache.h |
| 157 | renderer_vulkan/vk_update_descriptor.cpp | 151 | renderer_vulkan/vk_update_descriptor.cpp |
| 158 | renderer_vulkan/vk_update_descriptor.h | 152 | renderer_vulkan/vk_update_descriptor.h |
| 153 | shader_cache.cpp | ||
| 159 | shader_cache.h | 154 | shader_cache.h |
| 155 | shader_environment.cpp | ||
| 156 | shader_environment.h | ||
| 160 | shader_notify.cpp | 157 | shader_notify.cpp |
| 161 | shader_notify.h | 158 | shader_notify.h |
| 162 | shader/decode/arithmetic.cpp | ||
| 163 | shader/decode/arithmetic_immediate.cpp | ||
| 164 | shader/decode/bfe.cpp | ||
| 165 | shader/decode/bfi.cpp | ||
| 166 | shader/decode/shift.cpp | ||
| 167 | shader/decode/arithmetic_integer.cpp | ||
| 168 | shader/decode/arithmetic_integer_immediate.cpp | ||
| 169 | shader/decode/arithmetic_half.cpp | ||
| 170 | shader/decode/arithmetic_half_immediate.cpp | ||
| 171 | shader/decode/ffma.cpp | ||
| 172 | shader/decode/hfma2.cpp | ||
| 173 | shader/decode/conversion.cpp | ||
| 174 | shader/decode/memory.cpp | ||
| 175 | shader/decode/texture.cpp | ||
| 176 | shader/decode/image.cpp | ||
| 177 | shader/decode/float_set_predicate.cpp | ||
| 178 | shader/decode/integer_set_predicate.cpp | ||
| 179 | shader/decode/half_set_predicate.cpp | ||
| 180 | shader/decode/predicate_set_register.cpp | ||
| 181 | shader/decode/predicate_set_predicate.cpp | ||
| 182 | shader/decode/register_set_predicate.cpp | ||
| 183 | shader/decode/float_set.cpp | ||
| 184 | shader/decode/integer_set.cpp | ||
| 185 | shader/decode/half_set.cpp | ||
| 186 | shader/decode/video.cpp | ||
| 187 | shader/decode/warp.cpp | ||
| 188 | shader/decode/xmad.cpp | ||
| 189 | shader/decode/other.cpp | ||
| 190 | shader/ast.cpp | ||
| 191 | shader/ast.h | ||
| 192 | shader/async_shaders.cpp | ||
| 193 | shader/async_shaders.h | ||
| 194 | shader/compiler_settings.cpp | ||
| 195 | shader/compiler_settings.h | ||
| 196 | shader/control_flow.cpp | ||
| 197 | shader/control_flow.h | ||
| 198 | shader/decode.cpp | ||
| 199 | shader/expr.cpp | ||
| 200 | shader/expr.h | ||
| 201 | shader/memory_util.cpp | ||
| 202 | shader/memory_util.h | ||
| 203 | shader/node_helper.cpp | ||
| 204 | shader/node_helper.h | ||
| 205 | shader/node.h | ||
| 206 | shader/registry.cpp | ||
| 207 | shader/registry.h | ||
| 208 | shader/shader_ir.cpp | ||
| 209 | shader/shader_ir.h | ||
| 210 | shader/track.cpp | ||
| 211 | shader/transform_feedback.cpp | ||
| 212 | shader/transform_feedback.h | ||
| 213 | surface.cpp | 159 | surface.cpp |
| 214 | surface.h | 160 | surface.h |
| 215 | texture_cache/accelerated_swizzle.cpp | 161 | texture_cache/accelerated_swizzle.cpp |
| @@ -242,6 +188,8 @@ add_library(video_core STATIC | |||
| 242 | textures/decoders.h | 188 | textures/decoders.h |
| 243 | textures/texture.cpp | 189 | textures/texture.cpp |
| 244 | textures/texture.h | 190 | textures/texture.h |
| 191 | transform_feedback.cpp | ||
| 192 | transform_feedback.h | ||
| 245 | video_core.cpp | 193 | video_core.cpp |
| 246 | video_core.h | 194 | video_core.h |
| 247 | vulkan_common/vulkan_debug_callback.cpp | 195 | vulkan_common/vulkan_debug_callback.cpp |
| @@ -265,7 +213,7 @@ add_library(video_core STATIC | |||
| 265 | create_target_directory_groups(video_core) | 213 | create_target_directory_groups(video_core) |
| 266 | 214 | ||
| 267 | target_link_libraries(video_core PUBLIC common core) | 215 | target_link_libraries(video_core PUBLIC common core) |
| 268 | target_link_libraries(video_core PRIVATE glad xbyak) | 216 | target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) |
| 269 | 217 | ||
| 270 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) | 218 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) |
| 271 | add_dependencies(video_core ffmpeg-build) | 219 | add_dependencies(video_core ffmpeg-build) |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..24c858104 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include "video_core/engines/maxwell_3d.h" | 31 | #include "video_core/engines/maxwell_3d.h" |
| 32 | #include "video_core/memory_manager.h" | 32 | #include "video_core/memory_manager.h" |
| 33 | #include "video_core/rasterizer_interface.h" | 33 | #include "video_core/rasterizer_interface.h" |
| 34 | #include "video_core/surface.h" | ||
| 34 | #include "video_core/texture_cache/slot_vector.h" | 35 | #include "video_core/texture_cache/slot_vector.h" |
| 35 | #include "video_core/texture_cache/types.h" | 36 | #include "video_core/texture_cache/types.h" |
| 36 | 37 | ||
| @@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); | |||
| 42 | 43 | ||
| 43 | using BufferId = SlotId; | 44 | using BufferId = SlotId; |
| 44 | 45 | ||
| 46 | using VideoCore::Surface::PixelFormat; | ||
| 47 | using namespace Common::Literals; | ||
| 48 | |||
| 45 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | 49 | constexpr u32 NUM_VERTEX_BUFFERS = 32; |
| 46 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | 50 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; |
| 47 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | 51 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; |
| 48 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | 52 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; |
| 49 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | 53 | constexpr u32 NUM_STORAGE_BUFFERS = 16; |
| 54 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 50 | constexpr u32 NUM_STAGES = 5; | 55 | constexpr u32 NUM_STAGES = 5; |
| 51 | 56 | ||
| 52 | using namespace Common::Literals; | 57 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; |
| 58 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 53 | 59 | ||
| 54 | template <typename P> | 60 | template <typename P> |
| 55 | class BufferCache { | 61 | class BufferCache { |
| @@ -67,6 +73,7 @@ class BufferCache { | |||
| 67 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | 73 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; |
| 68 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | 74 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; |
| 69 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | 75 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; |
| 76 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 70 | 77 | ||
| 71 | static constexpr BufferId NULL_BUFFER_ID{0}; | 78 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 72 | 79 | ||
| @@ -96,6 +103,10 @@ class BufferCache { | |||
| 96 | BufferId buffer_id; | 103 | BufferId buffer_id; |
| 97 | }; | 104 | }; |
| 98 | 105 | ||
| 106 | struct TextureBufferBinding : Binding { | ||
| 107 | PixelFormat format; | ||
| 108 | }; | ||
| 109 | |||
| 99 | static constexpr Binding NULL_BINDING{ | 110 | static constexpr Binding NULL_BINDING{ |
| 100 | .cpu_addr = 0, | 111 | .cpu_addr = 0, |
| 101 | .size = 0, | 112 | .size = 0, |
| @@ -133,20 +144,31 @@ public: | |||
| 133 | 144 | ||
| 134 | void BindHostComputeBuffers(); | 145 | void BindHostComputeBuffers(); |
| 135 | 146 | ||
| 136 | void SetEnabledUniformBuffers(size_t stage, u32 enabled); | 147 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 148 | const UniformBufferSizes* sizes); | ||
| 137 | 149 | ||
| 138 | void SetEnabledComputeUniformBuffers(u32 enabled); | 150 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); |
| 139 | 151 | ||
| 140 | void UnbindGraphicsStorageBuffers(size_t stage); | 152 | void UnbindGraphicsStorageBuffers(size_t stage); |
| 141 | 153 | ||
| 142 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | 154 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 143 | bool is_written); | 155 | bool is_written); |
| 144 | 156 | ||
| 157 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 158 | |||
| 159 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 160 | PixelFormat format, bool is_written, bool is_image); | ||
| 161 | |||
| 145 | void UnbindComputeStorageBuffers(); | 162 | void UnbindComputeStorageBuffers(); |
| 146 | 163 | ||
| 147 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | 164 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 148 | bool is_written); | 165 | bool is_written); |
| 149 | 166 | ||
| 167 | void UnbindComputeTextureBuffers(); | ||
| 168 | |||
| 169 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 170 | bool is_written, bool is_image); | ||
| 171 | |||
| 150 | void FlushCachedWrites(); | 172 | void FlushCachedWrites(); |
| 151 | 173 | ||
| 152 | /// Return true when there are uncommitted buffers to be downloaded | 174 | /// Return true when there are uncommitted buffers to be downloaded |
| @@ -178,6 +200,7 @@ public: | |||
| 178 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 200 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |
| 179 | 201 | ||
| 180 | std::mutex mutex; | 202 | std::mutex mutex; |
| 203 | Runtime& runtime; | ||
| 181 | 204 | ||
| 182 | private: | 205 | private: |
| 183 | template <typename Func> | 206 | template <typename Func> |
| @@ -254,12 +277,16 @@ private: | |||
| 254 | 277 | ||
| 255 | void BindHostGraphicsStorageBuffers(size_t stage); | 278 | void BindHostGraphicsStorageBuffers(size_t stage); |
| 256 | 279 | ||
| 280 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 281 | |||
| 257 | void BindHostTransformFeedbackBuffers(); | 282 | void BindHostTransformFeedbackBuffers(); |
| 258 | 283 | ||
| 259 | void BindHostComputeUniformBuffers(); | 284 | void BindHostComputeUniformBuffers(); |
| 260 | 285 | ||
| 261 | void BindHostComputeStorageBuffers(); | 286 | void BindHostComputeStorageBuffers(); |
| 262 | 287 | ||
| 288 | void BindHostComputeTextureBuffers(); | ||
| 289 | |||
| 263 | void DoUpdateGraphicsBuffers(bool is_indexed); | 290 | void DoUpdateGraphicsBuffers(bool is_indexed); |
| 264 | 291 | ||
| 265 | void DoUpdateComputeBuffers(); | 292 | void DoUpdateComputeBuffers(); |
| @@ -274,6 +301,8 @@ private: | |||
| 274 | 301 | ||
| 275 | void UpdateStorageBuffers(size_t stage); | 302 | void UpdateStorageBuffers(size_t stage); |
| 276 | 303 | ||
| 304 | void UpdateTextureBuffers(size_t stage); | ||
| 305 | |||
| 277 | void UpdateTransformFeedbackBuffers(); | 306 | void UpdateTransformFeedbackBuffers(); |
| 278 | 307 | ||
| 279 | void UpdateTransformFeedbackBuffer(u32 index); | 308 | void UpdateTransformFeedbackBuffer(u32 index); |
| @@ -282,6 +311,8 @@ private: | |||
| 282 | 311 | ||
| 283 | void UpdateComputeStorageBuffers(); | 312 | void UpdateComputeStorageBuffers(); |
| 284 | 313 | ||
| 314 | void UpdateComputeTextureBuffers(); | ||
| 315 | |||
| 285 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | 316 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); |
| 286 | 317 | ||
| 287 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | 318 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); |
| @@ -323,6 +354,9 @@ private: | |||
| 323 | 354 | ||
| 324 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; | 355 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; |
| 325 | 356 | ||
| 357 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 358 | PixelFormat format); | ||
| 359 | |||
| 326 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | 360 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); |
| 327 | 361 | ||
| 328 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | 362 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); |
| @@ -336,7 +370,6 @@ private: | |||
| 336 | Tegra::Engines::KeplerCompute& kepler_compute; | 370 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 337 | Tegra::MemoryManager& gpu_memory; | 371 | Tegra::MemoryManager& gpu_memory; |
| 338 | Core::Memory::Memory& cpu_memory; | 372 | Core::Memory::Memory& cpu_memory; |
| 339 | Runtime& runtime; | ||
| 340 | 373 | ||
| 341 | SlotVector<Buffer> slot_buffers; | 374 | SlotVector<Buffer> slot_buffers; |
| 342 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 375 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| @@ -347,20 +380,30 @@ private: | |||
| 347 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | 380 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; |
| 348 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | 381 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; |
| 349 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | 382 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; |
| 383 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 350 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | 384 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; |
| 351 | 385 | ||
| 352 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | 386 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; |
| 353 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | 387 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; |
| 388 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 354 | 389 | ||
| 355 | std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; | 390 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; |
| 356 | u32 enabled_compute_uniform_buffers = 0; | 391 | u32 enabled_compute_uniform_buffer_mask = 0; |
| 392 | |||
| 393 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 394 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 357 | 395 | ||
| 358 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | 396 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; |
| 359 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | 397 | std::array<u32, NUM_STAGES> written_storage_buffers{}; |
| 360 | u32 enabled_compute_storage_buffers = 0; | 398 | u32 enabled_compute_storage_buffers = 0; |
| 361 | u32 written_compute_storage_buffers = 0; | 399 | u32 written_compute_storage_buffers = 0; |
| 362 | 400 | ||
| 363 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | 401 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; |
| 402 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 403 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 404 | u32 enabled_compute_texture_buffers = 0; | ||
| 405 | u32 written_compute_texture_buffers = 0; | ||
| 406 | u32 image_compute_texture_buffers = 0; | ||
| 364 | 407 | ||
| 365 | std::array<u32, 16> uniform_cache_hits{}; | 408 | std::array<u32, 16> uniform_cache_hits{}; |
| 366 | std::array<u32, 16> uniform_cache_shots{}; | 409 | std::array<u32, 16> uniform_cache_shots{}; |
| @@ -371,6 +414,10 @@ private: | |||
| 371 | 414 | ||
| 372 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | 415 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> |
| 373 | dirty_uniform_buffers{}; | 416 | dirty_uniform_buffers{}; |
| 417 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 418 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 419 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 420 | uniform_buffer_binding_sizes{}; | ||
| 374 | 421 | ||
| 375 | std::vector<BufferId> cached_write_buffer_ids; | 422 | std::vector<BufferId> cached_write_buffer_ids; |
| 376 | 423 | ||
| @@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 394 | Tegra::Engines::KeplerCompute& kepler_compute_, | 441 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 395 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 442 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 396 | Runtime& runtime_) | 443 | Runtime& runtime_) |
| 397 | : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | 444 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, |
| 398 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 445 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { |
| 399 | // Ensure the first slot is used for the null buffer | 446 | // Ensure the first slot is used for the null buffer |
| 400 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 447 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 401 | deletion_iterator = slot_buffers.end(); | 448 | deletion_iterator = slot_buffers.end(); |
| @@ -553,13 +600,9 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 553 | ClearDownload(subtract_interval); | 600 | ClearDownload(subtract_interval); |
| 554 | common_ranges.subtract(subtract_interval); | 601 | common_ranges.subtract(subtract_interval); |
| 555 | 602 | ||
| 556 | BufferId buffer; | 603 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); |
| 557 | do { | ||
| 558 | has_deleted_buffers = false; | ||
| 559 | buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | ||
| 560 | } while (has_deleted_buffers); | ||
| 561 | auto& dest_buffer = slot_buffers[buffer]; | 604 | auto& dest_buffer = slot_buffers[buffer]; |
| 562 | const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); | 605 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); |
| 563 | runtime.ClearBuffer(dest_buffer, offset, size, value); | 606 | runtime.ClearBuffer(dest_buffer, offset, size, value); |
| 564 | return true; | 607 | return true; |
| 565 | } | 608 | } |
| @@ -619,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) { | |||
| 619 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | 662 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); |
| 620 | BindHostGraphicsUniformBuffers(stage); | 663 | BindHostGraphicsUniformBuffers(stage); |
| 621 | BindHostGraphicsStorageBuffers(stage); | 664 | BindHostGraphicsStorageBuffers(stage); |
| 665 | BindHostGraphicsTextureBuffers(stage); | ||
| 622 | } | 666 | } |
| 623 | 667 | ||
| 624 | template <class P> | 668 | template <class P> |
| @@ -626,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() { | |||
| 626 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | 670 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); |
| 627 | BindHostComputeUniformBuffers(); | 671 | BindHostComputeUniformBuffers(); |
| 628 | BindHostComputeStorageBuffers(); | 672 | BindHostComputeStorageBuffers(); |
| 673 | BindHostComputeTextureBuffers(); | ||
| 629 | } | 674 | } |
| 630 | 675 | ||
| 631 | template <class P> | 676 | template <class P> |
| 632 | void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { | 677 | void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 678 | const UniformBufferSizes* sizes) { | ||
| 633 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 679 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 634 | if (enabled_uniform_buffers[stage] != enabled) { | 680 | if (enabled_uniform_buffer_masks != mask) { |
| 635 | dirty_uniform_buffers[stage] = ~u32{0}; | 681 | if constexpr (IS_OPENGL) { |
| 682 | fast_bound_uniform_buffers.fill(0); | ||
| 683 | } | ||
| 684 | dirty_uniform_buffers.fill(~u32{0}); | ||
| 685 | uniform_buffer_binding_sizes.fill({}); | ||
| 636 | } | 686 | } |
| 637 | } | 687 | } |
| 638 | enabled_uniform_buffers[stage] = enabled; | 688 | enabled_uniform_buffer_masks = mask; |
| 689 | uniform_buffer_sizes = sizes; | ||
| 639 | } | 690 | } |
| 640 | 691 | ||
| 641 | template <class P> | 692 | template <class P> |
| 642 | void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { | 693 | void BufferCache<P>::SetComputeUniformBufferState(u32 mask, |
| 643 | enabled_compute_uniform_buffers = enabled; | 694 | const ComputeUniformBufferSizes* sizes) { |
| 695 | enabled_compute_uniform_buffer_mask = mask; | ||
| 696 | compute_uniform_buffer_sizes = sizes; | ||
| 644 | } | 697 | } |
| 645 | 698 | ||
| 646 | template <class P> | 699 | template <class P> |
| @@ -661,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, | |||
| 661 | } | 714 | } |
| 662 | 715 | ||
| 663 | template <class P> | 716 | template <class P> |
| 717 | void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { | ||
| 718 | enabled_texture_buffers[stage] = 0; | ||
| 719 | written_texture_buffers[stage] = 0; | ||
| 720 | image_texture_buffers[stage] = 0; | ||
| 721 | } | ||
| 722 | |||
| 723 | template <class P> | ||
| 724 | void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, | ||
| 725 | u32 size, PixelFormat format, bool is_written, | ||
| 726 | bool is_image) { | ||
| 727 | enabled_texture_buffers[stage] |= 1U << tbo_index; | ||
| 728 | written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; | ||
| 729 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 730 | image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; | ||
| 731 | } | ||
| 732 | texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||
| 733 | } | ||
| 734 | |||
| 735 | template <class P> | ||
| 664 | void BufferCache<P>::UnbindComputeStorageBuffers() { | 736 | void BufferCache<P>::UnbindComputeStorageBuffers() { |
| 665 | enabled_compute_storage_buffers = 0; | 737 | enabled_compute_storage_buffers = 0; |
| 666 | written_compute_storage_buffers = 0; | 738 | written_compute_storage_buffers = 0; |
| 739 | image_compute_texture_buffers = 0; | ||
| 667 | } | 740 | } |
| 668 | 741 | ||
| 669 | template <class P> | 742 | template <class P> |
| @@ -681,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, | |||
| 681 | } | 754 | } |
| 682 | 755 | ||
| 683 | template <class P> | 756 | template <class P> |
| 757 | void BufferCache<P>::UnbindComputeTextureBuffers() { | ||
| 758 | enabled_compute_texture_buffers = 0; | ||
| 759 | written_compute_texture_buffers = 0; | ||
| 760 | image_compute_texture_buffers = 0; | ||
| 761 | } | ||
| 762 | |||
| 763 | template <class P> | ||
| 764 | void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 765 | PixelFormat format, bool is_written, bool is_image) { | ||
| 766 | enabled_compute_texture_buffers |= 1U << tbo_index; | ||
| 767 | written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; | ||
| 768 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 769 | image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; | ||
| 770 | } | ||
| 771 | compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||
| 772 | } | ||
| 773 | |||
| 774 | template <class P> | ||
| 684 | void BufferCache<P>::FlushCachedWrites() { | 775 | void BufferCache<P>::FlushCachedWrites() { |
| 685 | for (const BufferId buffer_id : cached_write_buffer_ids) { | 776 | for (const BufferId buffer_id : cached_write_buffer_ids) { |
| 686 | slot_buffers[buffer_id].FlushCachedWrites(); | 777 | slot_buffers[buffer_id].FlushCachedWrites(); |
| @@ -905,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | |||
| 905 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | 996 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); |
| 906 | } | 997 | } |
| 907 | u32 binding_index = 0; | 998 | u32 binding_index = 0; |
| 908 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 999 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 909 | const bool needs_bind = ((dirty >> index) & 1) != 0; | 1000 | const bool needs_bind = ((dirty >> index) & 1) != 0; |
| 910 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | 1001 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); |
| 911 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1002 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| @@ -919,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 919 | bool needs_bind) { | 1010 | bool needs_bind) { |
| 920 | const Binding& binding = uniform_buffers[stage][index]; | 1011 | const Binding& binding = uniform_buffers[stage][index]; |
| 921 | const VAddr cpu_addr = binding.cpu_addr; | 1012 | const VAddr cpu_addr = binding.cpu_addr; |
| 922 | const u32 size = binding.size; | 1013 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); |
| 923 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1014 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 924 | TouchBuffer(buffer); | 1015 | TouchBuffer(buffer); |
| 925 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 1016 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| @@ -929,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 929 | if constexpr (IS_OPENGL) { | 1020 | if constexpr (IS_OPENGL) { |
| 930 | if (runtime.HasFastBufferSubData()) { | 1021 | if (runtime.HasFastBufferSubData()) { |
| 931 | // Fast path for Nvidia | 1022 | // Fast path for Nvidia |
| 932 | if (!HasFastUniformBufferBound(stage, binding_index)) { | 1023 | const bool should_fast_bind = |
| 1024 | !HasFastUniformBufferBound(stage, binding_index) || | ||
| 1025 | uniform_buffer_binding_sizes[stage][binding_index] != size; | ||
| 1026 | if (should_fast_bind) { | ||
| 933 | // We only have to bind when the currently bound buffer is not the fast version | 1027 | // We only have to bind when the currently bound buffer is not the fast version |
| 1028 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | ||
| 1029 | uniform_buffer_binding_sizes[stage][binding_index] = size; | ||
| 934 | runtime.BindFastUniformBuffer(stage, binding_index, size); | 1030 | runtime.BindFastUniformBuffer(stage, binding_index, size); |
| 935 | } | 1031 | } |
| 936 | const auto span = ImmediateBufferWithData(cpu_addr, size); | 1032 | const auto span = ImmediateBufferWithData(cpu_addr, size); |
| @@ -938,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 938 | return; | 1034 | return; |
| 939 | } | 1035 | } |
| 940 | } | 1036 | } |
| 941 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | 1037 | if constexpr (IS_OPENGL) { |
| 942 | 1038 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | |
| 1039 | uniform_buffer_binding_sizes[stage][binding_index] = size; | ||
| 1040 | } | ||
| 943 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan | 1041 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 944 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | 1042 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| 945 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 1043 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); |
| @@ -952,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 952 | } | 1050 | } |
| 953 | ++uniform_cache_shots[0]; | 1051 | ++uniform_cache_shots[0]; |
| 954 | 1052 | ||
| 955 | if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { | 1053 | // Skip binding if it's not needed and if the bound buffer is not the fast version |
| 956 | // Skip binding if it's not needed and if the bound buffer is not the fast version | 1054 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens |
| 957 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens | 1055 | needs_bind |= HasFastUniformBufferBound(stage, binding_index); |
| 1056 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 1057 | needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; | ||
| 1058 | } | ||
| 1059 | if (!needs_bind) { | ||
| 958 | return; | 1060 | return; |
| 959 | } | 1061 | } |
| 960 | fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | ||
| 961 | |||
| 962 | const u32 offset = buffer.Offset(cpu_addr); | 1062 | const u32 offset = buffer.Offset(cpu_addr); |
| 1063 | if constexpr (IS_OPENGL) { | ||
| 1064 | // Fast buffer will be unbound | ||
| 1065 | fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | ||
| 1066 | |||
| 1067 | // Mark the index as dirty if offset doesn't match | ||
| 1068 | const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); | ||
| 1069 | dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; | ||
| 1070 | } | ||
| 1071 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 1072 | uniform_buffer_binding_sizes[stage][binding_index] = size; | ||
| 1073 | } | ||
| 963 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1074 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 964 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); | 1075 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 965 | } else { | 1076 | } else { |
| @@ -989,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 989 | } | 1100 | } |
| 990 | 1101 | ||
| 991 | template <class P> | 1102 | template <class P> |
| 1103 | void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | ||
| 1104 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | ||
| 1105 | const TextureBufferBinding& binding = texture_buffers[stage][index]; | ||
| 1106 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 1107 | const u32 size = binding.size; | ||
| 1108 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 1109 | |||
| 1110 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 1111 | const PixelFormat format = binding.format; | ||
| 1112 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 1113 | if (((image_texture_buffers[stage] >> index) & 1) != 0) { | ||
| 1114 | runtime.BindImageBuffer(buffer, offset, size, format); | ||
| 1115 | } else { | ||
| 1116 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1117 | } | ||
| 1118 | } else { | ||
| 1119 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1120 | } | ||
| 1121 | }); | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | template <class P> | ||
| 992 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { | 1125 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { |
| 993 | if (maxwell3d.regs.tfb_enabled == 0) { | 1126 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 994 | return; | 1127 | return; |
| @@ -1010,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1010 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1143 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 1011 | // Mark all uniform buffers as dirty | 1144 | // Mark all uniform buffers as dirty |
| 1012 | dirty_uniform_buffers.fill(~u32{0}); | 1145 | dirty_uniform_buffers.fill(~u32{0}); |
| 1146 | fast_bound_uniform_buffers.fill(0); | ||
| 1013 | } | 1147 | } |
| 1014 | u32 binding_index = 0; | 1148 | u32 binding_index = 0; |
| 1015 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1149 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1016 | const Binding& binding = compute_uniform_buffers[index]; | 1150 | const Binding& binding = compute_uniform_buffers[index]; |
| 1017 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1151 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1018 | TouchBuffer(buffer); | 1152 | TouchBuffer(buffer); |
| 1019 | const u32 size = binding.size; | 1153 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); |
| 1020 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1154 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1021 | 1155 | ||
| 1022 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1156 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| @@ -1051,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1051 | } | 1185 | } |
| 1052 | 1186 | ||
| 1053 | template <class P> | 1187 | template <class P> |
| 1188 | void BufferCache<P>::BindHostComputeTextureBuffers() { | ||
| 1189 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | ||
| 1190 | const TextureBufferBinding& binding = compute_texture_buffers[index]; | ||
| 1191 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 1192 | const u32 size = binding.size; | ||
| 1193 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 1194 | |||
| 1195 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 1196 | const PixelFormat format = binding.format; | ||
| 1197 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 1198 | if (((image_compute_texture_buffers >> index) & 1) != 0) { | ||
| 1199 | runtime.BindImageBuffer(buffer, offset, size, format); | ||
| 1200 | } else { | ||
| 1201 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1202 | } | ||
| 1203 | } else { | ||
| 1204 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1205 | } | ||
| 1206 | }); | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | template <class P> | ||
| 1054 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | 1210 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { |
| 1055 | if (is_indexed) { | 1211 | if (is_indexed) { |
| 1056 | UpdateIndexBuffer(); | 1212 | UpdateIndexBuffer(); |
| @@ -1060,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | |||
| 1060 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | 1216 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { |
| 1061 | UpdateUniformBuffers(stage); | 1217 | UpdateUniformBuffers(stage); |
| 1062 | UpdateStorageBuffers(stage); | 1218 | UpdateStorageBuffers(stage); |
| 1219 | UpdateTextureBuffers(stage); | ||
| 1063 | } | 1220 | } |
| 1064 | } | 1221 | } |
| 1065 | 1222 | ||
| @@ -1067,6 +1224,7 @@ template <class P> | |||
| 1067 | void BufferCache<P>::DoUpdateComputeBuffers() { | 1224 | void BufferCache<P>::DoUpdateComputeBuffers() { |
| 1068 | UpdateComputeUniformBuffers(); | 1225 | UpdateComputeUniformBuffers(); |
| 1069 | UpdateComputeStorageBuffers(); | 1226 | UpdateComputeStorageBuffers(); |
| 1227 | UpdateComputeTextureBuffers(); | ||
| 1070 | } | 1228 | } |
| 1071 | 1229 | ||
| 1072 | template <class P> | 1230 | template <class P> |
| @@ -1136,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1136 | 1294 | ||
| 1137 | template <class P> | 1295 | template <class P> |
| 1138 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | 1296 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { |
| 1139 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 1297 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 1140 | Binding& binding = uniform_buffers[stage][index]; | 1298 | Binding& binding = uniform_buffers[stage][index]; |
| 1141 | if (binding.buffer_id) { | 1299 | if (binding.buffer_id) { |
| 1142 | // Already updated | 1300 | // Already updated |
| @@ -1167,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | |||
| 1167 | } | 1325 | } |
| 1168 | 1326 | ||
| 1169 | template <class P> | 1327 | template <class P> |
| 1328 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | ||
| 1329 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | ||
| 1330 | Binding& binding = texture_buffers[stage][index]; | ||
| 1331 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 1332 | // Mark buffer as written if needed | ||
| 1333 | if (((written_texture_buffers[stage] >> index) & 1) != 0) { | ||
| 1334 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | ||
| 1335 | } | ||
| 1336 | }); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | template <class P> | ||
| 1170 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { | 1340 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { |
| 1171 | if (maxwell3d.regs.tfb_enabled == 0) { | 1341 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 1172 | return; | 1342 | return; |
| @@ -1197,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1197 | 1367 | ||
| 1198 | template <class P> | 1368 | template <class P> |
| 1199 | void BufferCache<P>::UpdateComputeUniformBuffers() { | 1369 | void BufferCache<P>::UpdateComputeUniformBuffers() { |
| 1200 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1370 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1201 | Binding& binding = compute_uniform_buffers[index]; | 1371 | Binding& binding = compute_uniform_buffers[index]; |
| 1202 | binding = NULL_BINDING; | 1372 | binding = NULL_BINDING; |
| 1203 | const auto& launch_desc = kepler_compute.launch_description; | 1373 | const auto& launch_desc = kepler_compute.launch_description; |
| @@ -1218,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1218 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 1388 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 1219 | // Resolve buffer | 1389 | // Resolve buffer |
| 1220 | Binding& binding = compute_storage_buffers[index]; | 1390 | Binding& binding = compute_storage_buffers[index]; |
| 1221 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1391 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1222 | binding.buffer_id = buffer_id; | ||
| 1223 | // Mark as written if needed | 1392 | // Mark as written if needed |
| 1224 | if (((written_compute_storage_buffers >> index) & 1) != 0) { | 1393 | if (((written_compute_storage_buffers >> index) & 1) != 0) { |
| 1225 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | 1394 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); |
| 1395 | } | ||
| 1396 | }); | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | template <class P> | ||
| 1400 | void BufferCache<P>::UpdateComputeTextureBuffers() { | ||
| 1401 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | ||
| 1402 | Binding& binding = compute_texture_buffers[index]; | ||
| 1403 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 1404 | // Mark as written if needed | ||
| 1405 | if (((written_compute_texture_buffers >> index) & 1) != 0) { | ||
| 1406 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | ||
| 1226 | } | 1407 | } |
| 1227 | }); | 1408 | }); |
| 1228 | } | 1409 | } |
| @@ -1555,6 +1736,7 @@ template <class P> | |||
| 1555 | void BufferCache<P>::NotifyBufferDeletion() { | 1736 | void BufferCache<P>::NotifyBufferDeletion() { |
| 1556 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1737 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 1557 | dirty_uniform_buffers.fill(~u32{0}); | 1738 | dirty_uniform_buffers.fill(~u32{0}); |
| 1739 | uniform_buffer_binding_sizes.fill({}); | ||
| 1558 | } | 1740 | } |
| 1559 | auto& flags = maxwell3d.dirty.flags; | 1741 | auto& flags = maxwell3d.dirty.flags; |
| 1560 | flags[Dirty::IndexBuffer] = true; | 1742 | flags[Dirty::IndexBuffer] = true; |
| @@ -1582,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 1582 | } | 1764 | } |
| 1583 | 1765 | ||
| 1584 | template <class P> | 1766 | template <class P> |
| 1767 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( | ||
| 1768 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { | ||
| 1769 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1770 | TextureBufferBinding binding; | ||
| 1771 | if (!cpu_addr || size == 0) { | ||
| 1772 | binding.cpu_addr = 0; | ||
| 1773 | binding.size = 0; | ||
| 1774 | binding.buffer_id = NULL_BUFFER_ID; | ||
| 1775 | binding.format = PixelFormat::Invalid; | ||
| 1776 | } else { | ||
| 1777 | binding.cpu_addr = *cpu_addr; | ||
| 1778 | binding.size = size; | ||
| 1779 | binding.buffer_id = BufferId{}; | ||
| 1780 | binding.format = format; | ||
| 1781 | } | ||
| 1782 | return binding; | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | template <class P> | ||
| 1585 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | 1786 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { |
| 1586 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | 1787 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); |
| 1587 | if (IsRangeGranular(cpu_addr, size) || | 1788 | if (IsRangeGranular(cpu_addr, size) || |
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { | |||
| 58 | FillBlock(table, OFF(zeta), NUM(zeta), flag); | 58 | FillBlock(table, OFF(zeta), NUM(zeta), flag); |
| 59 | } | 59 | } |
| 60 | } | 60 | } |
| 61 | |||
| 62 | void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { | ||
| 63 | FillBlock(tables[0], OFF(shader_config[0]), | ||
| 64 | NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); | ||
| 65 | } | ||
| 61 | } // Anonymous namespace | 66 | } // Anonymous namespace |
| 62 | 67 | ||
| 63 | void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { | 68 | void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { |
| @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { | |||
| 65 | SetupIndexBuffer(tables); | 70 | SetupIndexBuffer(tables); |
| 66 | SetupDirtyDescriptors(tables); | 71 | SetupDirtyDescriptors(tables); |
| 67 | SetupDirtyRenderTargets(tables); | 72 | SetupDirtyRenderTargets(tables); |
| 73 | SetupDirtyShaders(tables); | ||
| 68 | } | 74 | } |
| 69 | 75 | ||
| 70 | } // namespace VideoCommon::Dirty | 76 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -36,6 +36,8 @@ enum : u8 { | |||
| 36 | 36 | ||
| 37 | IndexBuffer, | 37 | IndexBuffer, |
| 38 | 38 | ||
| 39 | Shaders, | ||
| 40 | |||
| 39 | LastCommonEntry, | 41 | LastCommonEntry, |
| 40 | }; | 42 | }; |
| 41 | 43 | ||
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/guest_driver.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace Tegra::Engines { | ||
| 16 | |||
| 17 | struct SamplerDescriptor { | ||
| 18 | union { | ||
| 19 | u32 raw = 0; | ||
| 20 | BitField<0, 2, Tegra::Shader::TextureType> texture_type; | ||
| 21 | BitField<2, 3, Tegra::Texture::ComponentType> r_type; | ||
| 22 | BitField<5, 1, u32> is_array; | ||
| 23 | BitField<6, 1, u32> is_buffer; | ||
| 24 | BitField<7, 1, u32> is_shadow; | ||
| 25 | BitField<8, 3, Tegra::Texture::ComponentType> g_type; | ||
| 26 | BitField<11, 3, Tegra::Texture::ComponentType> b_type; | ||
| 27 | BitField<14, 3, Tegra::Texture::ComponentType> a_type; | ||
| 28 | BitField<17, 7, Tegra::Texture::TextureFormat> format; | ||
| 29 | }; | ||
| 30 | |||
| 31 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | ||
| 32 | return raw == rhs.raw; | ||
| 33 | } | ||
| 34 | |||
| 35 | bool operator!=(const SamplerDescriptor& rhs) const noexcept { | ||
| 36 | return !operator==(rhs); | ||
| 37 | } | ||
| 38 | |||
| 39 | static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { | ||
| 40 | using Tegra::Shader::TextureType; | ||
| 41 | SamplerDescriptor result; | ||
| 42 | |||
| 43 | result.format.Assign(tic.format.Value()); | ||
| 44 | result.r_type.Assign(tic.r_type.Value()); | ||
| 45 | result.g_type.Assign(tic.g_type.Value()); | ||
| 46 | result.b_type.Assign(tic.b_type.Value()); | ||
| 47 | result.a_type.Assign(tic.a_type.Value()); | ||
| 48 | |||
| 49 | switch (tic.texture_type.Value()) { | ||
| 50 | case Tegra::Texture::TextureType::Texture1D: | ||
| 51 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 52 | return result; | ||
| 53 | case Tegra::Texture::TextureType::Texture2D: | ||
| 54 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 55 | return result; | ||
| 56 | case Tegra::Texture::TextureType::Texture3D: | ||
| 57 | result.texture_type.Assign(TextureType::Texture3D); | ||
| 58 | return result; | ||
| 59 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 60 | result.texture_type.Assign(TextureType::TextureCube); | ||
| 61 | return result; | ||
| 62 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 63 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 64 | result.is_array.Assign(1); | ||
| 65 | return result; | ||
| 66 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 67 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 68 | result.is_array.Assign(1); | ||
| 69 | return result; | ||
| 70 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 71 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 72 | result.is_buffer.Assign(1); | ||
| 73 | return result; | ||
| 74 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 75 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 76 | return result; | ||
| 77 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 78 | result.texture_type.Assign(TextureType::TextureCube); | ||
| 79 | result.is_array.Assign(1); | ||
| 80 | return result; | ||
| 81 | default: | ||
| 82 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 83 | return result; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | }; | ||
| 87 | static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); | ||
| 88 | |||
| 89 | class ConstBufferEngineInterface { | ||
| 90 | public: | ||
| 91 | virtual ~ConstBufferEngineInterface() = default; | ||
| 92 | virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; | ||
| 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | ||
| 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 95 | u64 offset) const = 0; | ||
| 96 | virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; | ||
| 97 | virtual u32 GetBoundBuffer() const = 0; | ||
| 98 | |||
| 99 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | ||
| 100 | virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "video_core/engines/kepler_compute.h" | 9 | #include "video_core/engines/kepler_compute.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 13 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 14 | #include "video_core/renderer_base.h" | 13 | #include "video_core/renderer_base.h" |
| @@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | |||
| 57 | } | 56 | } |
| 58 | } | 57 | } |
| 59 | 58 | ||
| 60 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||
| 61 | ASSERT(stage == ShaderType::Compute); | ||
| 62 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 63 | u32 result; | ||
| 64 | std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); | ||
| 65 | return result; | ||
| 66 | } | ||
| 67 | |||
| 68 | SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 69 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 70 | } | ||
| 71 | |||
| 72 | SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 73 | u64 offset) const { | ||
| 74 | ASSERT(stage == ShaderType::Compute); | ||
| 75 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 76 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | ||
| 77 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 78 | } | ||
| 79 | |||
| 80 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { | ||
| 81 | const Texture::TextureHandle tex_handle{handle}; | ||
| 82 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); | ||
| 83 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); | ||
| 84 | |||
| 85 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 86 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 87 | return result; | ||
| 88 | } | ||
| 89 | |||
| 90 | VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { | ||
| 91 | return rasterizer->AccessGuestDriverProfile(); | ||
| 92 | } | ||
| 93 | |||
| 94 | const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { | ||
| 95 | return rasterizer->AccessGuestDriverProfile(); | ||
| 96 | } | ||
| 97 | |||
| 98 | void KeplerCompute::ProcessLaunch() { | 59 | void KeplerCompute::ProcessLaunch() { |
| 99 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 60 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 100 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 61 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 101 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 62 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 102 | 63 | rasterizer->DispatchCompute(); | |
| 103 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; | ||
| 104 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); | ||
| 105 | |||
| 106 | rasterizer->DispatchCompute(code_addr); | ||
| 107 | } | 64 | } |
| 108 | 65 | ||
| 109 | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | 66 | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -10,10 +10,8 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 14 | #include "video_core/engines/engine_interface.h" | 13 | #include "video_core/engines/engine_interface.h" |
| 15 | #include "video_core/engines/engine_upload.h" | 14 | #include "video_core/engines/engine_upload.h" |
| 16 | #include "video_core/engines/shader_type.h" | ||
| 17 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 18 | #include "video_core/textures/texture.h" | 16 | #include "video_core/textures/texture.h" |
| 19 | 17 | ||
| @@ -40,7 +38,7 @@ namespace Tegra::Engines { | |||
| 40 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 38 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 41 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 39 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 42 | 40 | ||
| 43 | class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { | 41 | class KeplerCompute final : public EngineInterface { |
| 44 | public: | 42 | public: |
| 45 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); | 43 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); |
| 46 | ~KeplerCompute(); | 44 | ~KeplerCompute(); |
| @@ -209,23 +207,6 @@ public: | |||
| 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 207 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 210 | u32 methods_pending) override; | 208 | u32 methods_pending) override; |
| 211 | 209 | ||
| 212 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||
| 213 | |||
| 214 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 215 | |||
| 216 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 217 | u64 offset) const override; | ||
| 218 | |||
| 219 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 220 | |||
| 221 | u32 GetBoundBuffer() const override { | ||
| 222 | return regs.tex_cb_index; | ||
| 223 | } | ||
| 224 | |||
| 225 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 226 | |||
| 227 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 228 | |||
| 229 | private: | 210 | private: |
| 230 | void ProcessLaunch(); | 211 | void ProcessLaunch(); |
| 231 | 212 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 13 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| @@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() { | |||
| 670 | rasterizer->Clear(); | 669 | rasterizer->Clear(); |
| 671 | } | 670 | } |
| 672 | 671 | ||
| 673 | u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||
| 674 | ASSERT(stage != ShaderType::Compute); | ||
| 675 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 676 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | ||
| 677 | return memory_manager.Read<u32>(buffer.address + offset); | ||
| 678 | } | ||
| 679 | |||
| 680 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 681 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 682 | } | ||
| 683 | |||
| 684 | SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 685 | u64 offset) const { | ||
| 686 | ASSERT(stage != ShaderType::Compute); | ||
| 687 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 688 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | ||
| 689 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | ||
| 690 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 691 | } | ||
| 692 | |||
| 693 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { | ||
| 694 | const Texture::TextureHandle tex_handle{handle}; | ||
| 695 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); | ||
| 696 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); | ||
| 697 | |||
| 698 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 699 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 700 | return result; | ||
| 701 | } | ||
| 702 | |||
| 703 | VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { | ||
| 704 | return rasterizer->AccessGuestDriverProfile(); | ||
| 705 | } | ||
| 706 | |||
| 707 | const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { | ||
| 708 | return rasterizer->AccessGuestDriverProfile(); | ||
| 709 | } | ||
| 710 | |||
| 711 | } // namespace Tegra::Engines | 672 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -17,11 +17,9 @@ | |||
| 17 | #include "common/common_funcs.h" | 17 | #include "common/common_funcs.h" |
| 18 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 19 | #include "common/math_util.h" | 19 | #include "common/math_util.h" |
| 20 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 21 | #include "video_core/engines/const_buffer_info.h" | 20 | #include "video_core/engines/const_buffer_info.h" |
| 22 | #include "video_core/engines/engine_interface.h" | 21 | #include "video_core/engines/engine_interface.h" |
| 23 | #include "video_core/engines/engine_upload.h" | 22 | #include "video_core/engines/engine_upload.h" |
| 24 | #include "video_core/engines/shader_type.h" | ||
| 25 | #include "video_core/gpu.h" | 23 | #include "video_core/gpu.h" |
| 26 | #include "video_core/macro/macro.h" | 24 | #include "video_core/macro/macro.h" |
| 27 | #include "video_core/textures/texture.h" | 25 | #include "video_core/textures/texture.h" |
| @@ -49,7 +47,7 @@ namespace Tegra::Engines { | |||
| 49 | #define MAXWELL3D_REG_INDEX(field_name) \ | 47 | #define MAXWELL3D_REG_INDEX(field_name) \ |
| 50 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | 48 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |
| 51 | 49 | ||
| 52 | class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { | 50 | class Maxwell3D final : public EngineInterface { |
| 53 | public: | 51 | public: |
| 54 | explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); | 52 | explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); |
| 55 | ~Maxwell3D(); | 53 | ~Maxwell3D(); |
| @@ -307,10 +305,6 @@ public: | |||
| 307 | return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); | 305 | return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); |
| 308 | } | 306 | } |
| 309 | 307 | ||
| 310 | bool IsConstant() const { | ||
| 311 | return constant; | ||
| 312 | } | ||
| 313 | |||
| 314 | bool IsValid() const { | 308 | bool IsValid() const { |
| 315 | return size != Size::Invalid; | 309 | return size != Size::Invalid; |
| 316 | } | 310 | } |
| @@ -912,7 +906,11 @@ public: | |||
| 912 | 906 | ||
| 913 | u32 fill_rectangle; | 907 | u32 fill_rectangle; |
| 914 | 908 | ||
| 915 | INSERT_PADDING_WORDS_NOINIT(0x8); | 909 | INSERT_PADDING_WORDS_NOINIT(0x2); |
| 910 | |||
| 911 | u32 conservative_raster_enable; | ||
| 912 | |||
| 913 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 916 | 914 | ||
| 917 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; | 915 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; |
| 918 | 916 | ||
| @@ -959,7 +957,11 @@ public: | |||
| 959 | 957 | ||
| 960 | SamplerIndex sampler_index; | 958 | SamplerIndex sampler_index; |
| 961 | 959 | ||
| 962 | INSERT_PADDING_WORDS_NOINIT(0x25); | 960 | INSERT_PADDING_WORDS_NOINIT(0x2); |
| 961 | |||
| 962 | std::array<u32, 8> gp_passthrough_mask; | ||
| 963 | |||
| 964 | INSERT_PADDING_WORDS_NOINIT(0x1B); | ||
| 963 | 965 | ||
| 964 | u32 depth_test_enable; | 966 | u32 depth_test_enable; |
| 965 | 967 | ||
| @@ -1152,7 +1154,11 @@ public: | |||
| 1152 | u32 index; | 1154 | u32 index; |
| 1153 | } primitive_restart; | 1155 | } primitive_restart; |
| 1154 | 1156 | ||
| 1155 | INSERT_PADDING_WORDS_NOINIT(0x5F); | 1157 | INSERT_PADDING_WORDS_NOINIT(0xE); |
| 1158 | |||
| 1159 | u32 provoking_vertex_last; | ||
| 1160 | |||
| 1161 | INSERT_PADDING_WORDS_NOINIT(0x50); | ||
| 1156 | 1162 | ||
| 1157 | struct { | 1163 | struct { |
| 1158 | u32 start_addr_high; | 1164 | u32 start_addr_high; |
| @@ -1424,23 +1430,6 @@ public: | |||
| 1424 | 1430 | ||
| 1425 | void FlushMMEInlineDraw(); | 1431 | void FlushMMEInlineDraw(); |
| 1426 | 1432 | ||
| 1427 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||
| 1428 | |||
| 1429 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 1430 | |||
| 1431 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 1432 | u64 offset) const override; | ||
| 1433 | |||
| 1434 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 1435 | |||
| 1436 | u32 GetBoundBuffer() const override { | ||
| 1437 | return regs.tex_cb_index; | ||
| 1438 | } | ||
| 1439 | |||
| 1440 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 1441 | |||
| 1442 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 1443 | |||
| 1444 | bool ShouldExecute() const { | 1433 | bool ShouldExecute() const { |
| 1445 | return execute_on; | 1434 | return execute_on; |
| 1446 | } | 1435 | } |
| @@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); | |||
| 1630 | ASSERT_REG_POSITION(render_area, 0x3FD); | 1619 | ASSERT_REG_POSITION(render_area, 0x3FD); |
| 1631 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1620 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1632 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | 1621 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); |
| 1622 | ASSERT_REG_POSITION(conservative_raster_enable, 0x452); | ||
| 1633 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1623 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| 1634 | ASSERT_REG_POSITION(multisample_sample_locations, 0x478); | 1624 | ASSERT_REG_POSITION(multisample_sample_locations, 0x478); |
| 1635 | ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); | 1625 | ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); |
| @@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); | |||
| 1638 | ASSERT_REG_POSITION(zeta_height, 0x48b); | 1628 | ASSERT_REG_POSITION(zeta_height, 0x48b); |
| 1639 | ASSERT_REG_POSITION(zeta_depth, 0x48c); | 1629 | ASSERT_REG_POSITION(zeta_depth, 0x48c); |
| 1640 | ASSERT_REG_POSITION(sampler_index, 0x48D); | 1630 | ASSERT_REG_POSITION(sampler_index, 0x48D); |
| 1631 | ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); | ||
| 1641 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); | 1632 | ASSERT_REG_POSITION(depth_test_enable, 0x4B3); |
| 1642 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); | 1633 | ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); |
| 1643 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); | 1634 | ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); |
| @@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); | |||
| 1690 | ASSERT_REG_POSITION(code_address, 0x582); | 1681 | ASSERT_REG_POSITION(code_address, 0x582); |
| 1691 | ASSERT_REG_POSITION(draw, 0x585); | 1682 | ASSERT_REG_POSITION(draw, 0x585); |
| 1692 | ASSERT_REG_POSITION(primitive_restart, 0x591); | 1683 | ASSERT_REG_POSITION(primitive_restart, 0x591); |
| 1684 | ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); | ||
| 1693 | ASSERT_REG_POSITION(index_array, 0x5F2); | 1685 | ASSERT_REG_POSITION(index_array, 0x5F2); |
| 1694 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); | 1686 | ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); |
| 1695 | ASSERT_REG_POSITION(instanced_arrays, 0x620); | 1687 | ASSERT_REG_POSITION(instanced_arrays, 0x620); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 127 | 127 | ||
| 128 | // Optimized path for micro copies. | 128 | // Optimized path for micro copies. |
| 129 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 129 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 130 | if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { | 130 | if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && |
| 131 | regs.src_params.height > GOB_SIZE_Y) { | ||
| 131 | FastCopyBlockLinearToPitch(); | 132 | FastCopyBlockLinearToPitch(); |
| 132 | return; | 133 | return; |
| 133 | } | 134 | } |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null | |||
| @@ -1,2298 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <optional> | ||
| 10 | #include <tuple> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/assert.h" | ||
| 14 | #include "common/bit_field.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | |||
| 17 | namespace Tegra::Shader { | ||
| 18 | |||
| 19 | struct Register { | ||
| 20 | /// Number of registers | ||
| 21 | static constexpr std::size_t NumRegisters = 256; | ||
| 22 | |||
| 23 | /// Register 255 is special cased to always be 0 | ||
| 24 | static constexpr std::size_t ZeroIndex = 255; | ||
| 25 | |||
| 26 | enum class Size : u64 { | ||
| 27 | Byte = 0, | ||
| 28 | Short = 1, | ||
| 29 | Word = 2, | ||
| 30 | Long = 3, | ||
| 31 | }; | ||
| 32 | |||
| 33 | constexpr Register() = default; | ||
| 34 | |||
| 35 | constexpr Register(u64 value_) : value(value_) {} | ||
| 36 | |||
| 37 | [[nodiscard]] constexpr operator u64() const { | ||
| 38 | return value; | ||
| 39 | } | ||
| 40 | |||
| 41 | template <typename T> | ||
| 42 | [[nodiscard]] constexpr u64 operator-(const T& oth) const { | ||
| 43 | return value - oth; | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename T> | ||
| 47 | [[nodiscard]] constexpr u64 operator&(const T& oth) const { | ||
| 48 | return value & oth; | ||
| 49 | } | ||
| 50 | |||
| 51 | [[nodiscard]] constexpr u64 operator&(const Register& oth) const { | ||
| 52 | return value & oth.value; | ||
| 53 | } | ||
| 54 | |||
| 55 | [[nodiscard]] constexpr u64 operator~() const { | ||
| 56 | return ~value; | ||
| 57 | } | ||
| 58 | |||
| 59 | [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { | ||
| 60 | elem = (value + elem) & 3; | ||
| 61 | return (value & ~3) + elem; | ||
| 62 | } | ||
| 63 | |||
| 64 | private: | ||
| 65 | u64 value{}; | ||
| 66 | }; | ||
| 67 | |||
| 68 | enum class AttributeSize : u64 { | ||
| 69 | Word = 0, | ||
| 70 | DoubleWord = 1, | ||
| 71 | TripleWord = 2, | ||
| 72 | QuadWord = 3, | ||
| 73 | }; | ||
| 74 | |||
| 75 | union Attribute { | ||
| 76 | Attribute() = default; | ||
| 77 | |||
| 78 | constexpr explicit Attribute(u64 value_) : value(value_) {} | ||
| 79 | |||
| 80 | enum class Index : u64 { | ||
| 81 | LayerViewportPointSize = 6, | ||
| 82 | Position = 7, | ||
| 83 | Attribute_0 = 8, | ||
| 84 | Attribute_31 = 39, | ||
| 85 | FrontColor = 40, | ||
| 86 | FrontSecondaryColor = 41, | ||
| 87 | BackColor = 42, | ||
| 88 | BackSecondaryColor = 43, | ||
| 89 | ClipDistances0123 = 44, | ||
| 90 | ClipDistances4567 = 45, | ||
| 91 | PointCoord = 46, | ||
| 92 | // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex | ||
| 93 | // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval | ||
| 94 | // shader. | ||
| 95 | TessCoordInstanceIDVertexID = 47, | ||
| 96 | TexCoord_0 = 48, | ||
| 97 | TexCoord_7 = 55, | ||
| 98 | // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment | ||
| 99 | // shader. It is unknown what the other values contain. | ||
| 100 | FrontFacing = 63, | ||
| 101 | }; | ||
| 102 | |||
| 103 | union { | ||
| 104 | BitField<20, 10, u64> immediate; | ||
| 105 | BitField<22, 2, u64> element; | ||
| 106 | BitField<24, 6, Index> index; | ||
| 107 | BitField<31, 1, u64> patch; | ||
| 108 | BitField<47, 3, AttributeSize> size; | ||
| 109 | |||
| 110 | [[nodiscard]] bool IsPhysical() const { | ||
| 111 | return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0; | ||
| 112 | } | ||
| 113 | } fmt20; | ||
| 114 | |||
| 115 | union { | ||
| 116 | BitField<30, 2, u64> element; | ||
| 117 | BitField<32, 6, Index> index; | ||
| 118 | } fmt28; | ||
| 119 | |||
| 120 | BitField<39, 8, u64> reg; | ||
| 121 | u64 value{}; | ||
| 122 | }; | ||
| 123 | |||
| 124 | union Sampler { | ||
| 125 | Sampler() = default; | ||
| 126 | |||
| 127 | constexpr explicit Sampler(u64 value_) : value(value_) {} | ||
| 128 | |||
| 129 | enum class Index : u64 { | ||
| 130 | Sampler_0 = 8, | ||
| 131 | }; | ||
| 132 | |||
| 133 | BitField<36, 13, Index> index; | ||
| 134 | u64 value{}; | ||
| 135 | }; | ||
| 136 | |||
| 137 | union Image { | ||
| 138 | Image() = default; | ||
| 139 | |||
| 140 | constexpr explicit Image(u64 value_) : value{value_} {} | ||
| 141 | |||
| 142 | BitField<36, 13, u64> index; | ||
| 143 | u64 value; | ||
| 144 | }; | ||
| 145 | |||
| 146 | } // namespace Tegra::Shader | ||
| 147 | |||
| 148 | namespace std { | ||
| 149 | |||
| 150 | // TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. | ||
| 151 | template <> | ||
| 152 | struct make_unsigned<Tegra::Shader::Attribute> { | ||
| 153 | using type = Tegra::Shader::Attribute; | ||
| 154 | }; | ||
| 155 | |||
| 156 | template <> | ||
| 157 | struct make_unsigned<Tegra::Shader::Register> { | ||
| 158 | using type = Tegra::Shader::Register; | ||
| 159 | }; | ||
| 160 | |||
| 161 | } // namespace std | ||
| 162 | |||
| 163 | namespace Tegra::Shader { | ||
| 164 | |||
| 165 | enum class Pred : u64 { | ||
| 166 | UnusedIndex = 0x7, | ||
| 167 | NeverExecute = 0xF, | ||
| 168 | }; | ||
| 169 | |||
| 170 | enum class PredCondition : u64 { | ||
| 171 | F = 0, // Always false | ||
| 172 | LT = 1, // Ordered less than | ||
| 173 | EQ = 2, // Ordered equal | ||
| 174 | LE = 3, // Ordered less than or equal | ||
| 175 | GT = 4, // Ordered greater than | ||
| 176 | NE = 5, // Ordered not equal | ||
| 177 | GE = 6, // Ordered greater than or equal | ||
| 178 | NUM = 7, // Ordered | ||
| 179 | NAN_ = 8, // Unordered | ||
| 180 | LTU = 9, // Unordered less than | ||
| 181 | EQU = 10, // Unordered equal | ||
| 182 | LEU = 11, // Unordered less than or equal | ||
| 183 | GTU = 12, // Unordered greater than | ||
| 184 | NEU = 13, // Unordered not equal | ||
| 185 | GEU = 14, // Unordered greater than or equal | ||
| 186 | T = 15, // Always true | ||
| 187 | }; | ||
| 188 | |||
| 189 | enum class PredOperation : u64 { | ||
| 190 | And = 0, | ||
| 191 | Or = 1, | ||
| 192 | Xor = 2, | ||
| 193 | }; | ||
| 194 | |||
| 195 | enum class LogicOperation : u64 { | ||
| 196 | And = 0, | ||
| 197 | Or = 1, | ||
| 198 | Xor = 2, | ||
| 199 | PassB = 3, | ||
| 200 | }; | ||
| 201 | |||
| 202 | enum class SubOp : u64 { | ||
| 203 | Cos = 0x0, | ||
| 204 | Sin = 0x1, | ||
| 205 | Ex2 = 0x2, | ||
| 206 | Lg2 = 0x3, | ||
| 207 | Rcp = 0x4, | ||
| 208 | Rsq = 0x5, | ||
| 209 | Sqrt = 0x8, | ||
| 210 | }; | ||
| 211 | |||
| 212 | enum class F2iRoundingOp : u64 { | ||
| 213 | RoundEven = 0, | ||
| 214 | Floor = 1, | ||
| 215 | Ceil = 2, | ||
| 216 | Trunc = 3, | ||
| 217 | }; | ||
| 218 | |||
| 219 | enum class F2fRoundingOp : u64 { | ||
| 220 | None = 0, | ||
| 221 | Pass = 3, | ||
| 222 | Round = 8, | ||
| 223 | Floor = 9, | ||
| 224 | Ceil = 10, | ||
| 225 | Trunc = 11, | ||
| 226 | }; | ||
| 227 | |||
| 228 | enum class AtomicOp : u64 { | ||
| 229 | Add = 0, | ||
| 230 | Min = 1, | ||
| 231 | Max = 2, | ||
| 232 | Inc = 3, | ||
| 233 | Dec = 4, | ||
| 234 | And = 5, | ||
| 235 | Or = 6, | ||
| 236 | Xor = 7, | ||
| 237 | Exch = 8, | ||
| 238 | SafeAdd = 10, | ||
| 239 | }; | ||
| 240 | |||
| 241 | enum class GlobalAtomicType : u64 { | ||
| 242 | U32 = 0, | ||
| 243 | S32 = 1, | ||
| 244 | U64 = 2, | ||
| 245 | F32_FTZ_RN = 3, | ||
| 246 | F16x2_FTZ_RN = 4, | ||
| 247 | S64 = 5, | ||
| 248 | }; | ||
| 249 | |||
| 250 | enum class UniformType : u64 { | ||
| 251 | UnsignedByte = 0, | ||
| 252 | SignedByte = 1, | ||
| 253 | UnsignedShort = 2, | ||
| 254 | SignedShort = 3, | ||
| 255 | Single = 4, | ||
| 256 | Double = 5, | ||
| 257 | Quad = 6, | ||
| 258 | UnsignedQuad = 7, | ||
| 259 | }; | ||
| 260 | |||
| 261 | enum class StoreType : u64 { | ||
| 262 | Unsigned8 = 0, | ||
| 263 | Signed8 = 1, | ||
| 264 | Unsigned16 = 2, | ||
| 265 | Signed16 = 3, | ||
| 266 | Bits32 = 4, | ||
| 267 | Bits64 = 5, | ||
| 268 | Bits128 = 6, | ||
| 269 | }; | ||
| 270 | |||
| 271 | enum class AtomicType : u64 { | ||
| 272 | U32 = 0, | ||
| 273 | S32 = 1, | ||
| 274 | U64 = 2, | ||
| 275 | S64 = 3, | ||
| 276 | }; | ||
| 277 | |||
| 278 | enum class IMinMaxExchange : u64 { | ||
| 279 | None = 0, | ||
| 280 | XLo = 1, | ||
| 281 | XMed = 2, | ||
| 282 | XHi = 3, | ||
| 283 | }; | ||
| 284 | |||
| 285 | enum class VideoType : u64 { | ||
| 286 | Size16_Low = 0, | ||
| 287 | Size16_High = 1, | ||
| 288 | Size32 = 2, | ||
| 289 | Invalid = 3, | ||
| 290 | }; | ||
| 291 | |||
| 292 | enum class VmadShr : u64 { | ||
| 293 | Shr7 = 1, | ||
| 294 | Shr15 = 2, | ||
| 295 | }; | ||
| 296 | |||
| 297 | enum class VmnmxType : u64 { | ||
| 298 | Bits8, | ||
| 299 | Bits16, | ||
| 300 | Bits32, | ||
| 301 | }; | ||
| 302 | |||
| 303 | enum class VmnmxOperation : u64 { | ||
| 304 | Mrg_16H = 0, | ||
| 305 | Mrg_16L = 1, | ||
| 306 | Mrg_8B0 = 2, | ||
| 307 | Mrg_8B2 = 3, | ||
| 308 | Acc = 4, | ||
| 309 | Min = 5, | ||
| 310 | Max = 6, | ||
| 311 | Nop = 7, | ||
| 312 | }; | ||
| 313 | |||
| 314 | enum class XmadMode : u64 { | ||
| 315 | None = 0, | ||
| 316 | CLo = 1, | ||
| 317 | CHi = 2, | ||
| 318 | CSfu = 3, | ||
| 319 | CBcc = 4, | ||
| 320 | }; | ||
| 321 | |||
| 322 | enum class IAdd3Mode : u64 { | ||
| 323 | None = 0, | ||
| 324 | RightShift = 1, | ||
| 325 | LeftShift = 2, | ||
| 326 | }; | ||
| 327 | |||
| 328 | enum class IAdd3Height : u64 { | ||
| 329 | None = 0, | ||
| 330 | LowerHalfWord = 1, | ||
| 331 | UpperHalfWord = 2, | ||
| 332 | }; | ||
| 333 | |||
| 334 | enum class FlowCondition : u64 { | ||
| 335 | Always = 0xF, | ||
| 336 | Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? | ||
| 337 | }; | ||
| 338 | |||
| 339 | enum class ConditionCode : u64 { | ||
| 340 | F = 0, | ||
| 341 | LT = 1, | ||
| 342 | EQ = 2, | ||
| 343 | LE = 3, | ||
| 344 | GT = 4, | ||
| 345 | NE = 5, | ||
| 346 | GE = 6, | ||
| 347 | Num = 7, | ||
| 348 | Nan = 8, | ||
| 349 | LTU = 9, | ||
| 350 | EQU = 10, | ||
| 351 | LEU = 11, | ||
| 352 | GTU = 12, | ||
| 353 | NEU = 13, | ||
| 354 | GEU = 14, | ||
| 355 | T = 15, | ||
| 356 | OFF = 16, | ||
| 357 | LO = 17, | ||
| 358 | SFF = 18, | ||
| 359 | LS = 19, | ||
| 360 | HI = 20, | ||
| 361 | SFT = 21, | ||
| 362 | HS = 22, | ||
| 363 | OFT = 23, | ||
| 364 | CSM_TA = 24, | ||
| 365 | CSM_TR = 25, | ||
| 366 | CSM_MX = 26, | ||
| 367 | FCSM_TA = 27, | ||
| 368 | FCSM_TR = 28, | ||
| 369 | FCSM_MX = 29, | ||
| 370 | RLE = 30, | ||
| 371 | RGT = 31, | ||
| 372 | }; | ||
| 373 | |||
| 374 | enum class PredicateResultMode : u64 { | ||
| 375 | None = 0x0, | ||
| 376 | NotZero = 0x3, | ||
| 377 | }; | ||
| 378 | |||
| 379 | enum class TextureType : u64 { | ||
| 380 | Texture1D = 0, | ||
| 381 | Texture2D = 1, | ||
| 382 | Texture3D = 2, | ||
| 383 | TextureCube = 3, | ||
| 384 | }; | ||
| 385 | |||
| 386 | enum class TextureQueryType : u64 { | ||
| 387 | Dimension = 1, | ||
| 388 | TextureType = 2, | ||
| 389 | SamplePosition = 5, | ||
| 390 | Filter = 16, | ||
| 391 | LevelOfDetail = 18, | ||
| 392 | Wrap = 20, | ||
| 393 | BorderColor = 22, | ||
| 394 | }; | ||
| 395 | |||
| 396 | enum class TextureProcessMode : u64 { | ||
| 397 | None = 0, | ||
| 398 | LZ = 1, // Load LOD of zero. | ||
| 399 | LB = 2, // Load Bias. | ||
| 400 | LL = 3, // Load LOD. | ||
| 401 | LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. | ||
| 402 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. | ||
| 403 | }; | ||
| 404 | |||
| 405 | enum class TextureMiscMode : u64 { | ||
| 406 | DC, | ||
| 407 | AOFFI, // Uses Offset | ||
| 408 | NDV, | ||
| 409 | NODEP, | ||
| 410 | MZ, | ||
| 411 | PTP, | ||
| 412 | }; | ||
| 413 | |||
| 414 | enum class SurfaceDataMode : u64 { | ||
| 415 | P = 0, | ||
| 416 | D_BA = 1, | ||
| 417 | }; | ||
| 418 | |||
| 419 | enum class OutOfBoundsStore : u64 { | ||
| 420 | Ignore = 0, | ||
| 421 | Clamp = 1, | ||
| 422 | Trap = 2, | ||
| 423 | }; | ||
| 424 | |||
| 425 | enum class ImageType : u64 { | ||
| 426 | Texture1D = 0, | ||
| 427 | TextureBuffer = 1, | ||
| 428 | Texture1DArray = 2, | ||
| 429 | Texture2D = 3, | ||
| 430 | Texture2DArray = 4, | ||
| 431 | Texture3D = 5, | ||
| 432 | }; | ||
| 433 | |||
| 434 | enum class IsberdMode : u64 { | ||
| 435 | None = 0, | ||
| 436 | Patch = 1, | ||
| 437 | Prim = 2, | ||
| 438 | Attr = 3, | ||
| 439 | }; | ||
| 440 | |||
| 441 | enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; | ||
| 442 | |||
| 443 | enum class MembarType : u64 { | ||
| 444 | CTA = 0, | ||
| 445 | GL = 1, | ||
| 446 | SYS = 2, | ||
| 447 | VC = 3, | ||
| 448 | }; | ||
| 449 | |||
| 450 | enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; | ||
| 451 | |||
| 452 | enum class HalfType : u64 { | ||
| 453 | H0_H1 = 0, | ||
| 454 | F32 = 1, | ||
| 455 | H0_H0 = 2, | ||
| 456 | H1_H1 = 3, | ||
| 457 | }; | ||
| 458 | |||
| 459 | enum class HalfMerge : u64 { | ||
| 460 | H0_H1 = 0, | ||
| 461 | F32 = 1, | ||
| 462 | Mrg_H0 = 2, | ||
| 463 | Mrg_H1 = 3, | ||
| 464 | }; | ||
| 465 | |||
| 466 | enum class HalfPrecision : u64 { | ||
| 467 | None = 0, | ||
| 468 | FTZ = 1, | ||
| 469 | FMZ = 2, | ||
| 470 | }; | ||
| 471 | |||
| 472 | enum class R2pMode : u64 { | ||
| 473 | Pr = 0, | ||
| 474 | Cc = 1, | ||
| 475 | }; | ||
| 476 | |||
| 477 | enum class IpaInterpMode : u64 { | ||
| 478 | Pass = 0, | ||
| 479 | Multiply = 1, | ||
| 480 | Constant = 2, | ||
| 481 | Sc = 3, | ||
| 482 | }; | ||
| 483 | |||
| 484 | enum class IpaSampleMode : u64 { | ||
| 485 | Default = 0, | ||
| 486 | Centroid = 1, | ||
| 487 | Offset = 2, | ||
| 488 | }; | ||
| 489 | |||
| 490 | enum class LmemLoadCacheManagement : u64 { | ||
| 491 | Default = 0, | ||
| 492 | LU = 1, | ||
| 493 | CI = 2, | ||
| 494 | CV = 3, | ||
| 495 | }; | ||
| 496 | |||
| 497 | enum class StoreCacheManagement : u64 { | ||
| 498 | Default = 0, | ||
| 499 | CG = 1, | ||
| 500 | CS = 2, | ||
| 501 | WT = 3, | ||
| 502 | }; | ||
| 503 | |||
| 504 | struct IpaMode { | ||
| 505 | IpaInterpMode interpolation_mode; | ||
| 506 | IpaSampleMode sampling_mode; | ||
| 507 | |||
| 508 | [[nodiscard]] bool operator==(const IpaMode& a) const { | ||
| 509 | return std::tie(interpolation_mode, sampling_mode) == | ||
| 510 | std::tie(a.interpolation_mode, a.sampling_mode); | ||
| 511 | } | ||
| 512 | [[nodiscard]] bool operator!=(const IpaMode& a) const { | ||
| 513 | return !operator==(a); | ||
| 514 | } | ||
| 515 | [[nodiscard]] bool operator<(const IpaMode& a) const { | ||
| 516 | return std::tie(interpolation_mode, sampling_mode) < | ||
| 517 | std::tie(a.interpolation_mode, a.sampling_mode); | ||
| 518 | } | ||
| 519 | }; | ||
| 520 | |||
| 521 | enum class SystemVariable : u64 { | ||
| 522 | LaneId = 0x00, | ||
| 523 | VirtCfg = 0x02, | ||
| 524 | VirtId = 0x03, | ||
| 525 | Pm0 = 0x04, | ||
| 526 | Pm1 = 0x05, | ||
| 527 | Pm2 = 0x06, | ||
| 528 | Pm3 = 0x07, | ||
| 529 | Pm4 = 0x08, | ||
| 530 | Pm5 = 0x09, | ||
| 531 | Pm6 = 0x0a, | ||
| 532 | Pm7 = 0x0b, | ||
| 533 | OrderingTicket = 0x0f, | ||
| 534 | PrimType = 0x10, | ||
| 535 | InvocationId = 0x11, | ||
| 536 | Ydirection = 0x12, | ||
| 537 | ThreadKill = 0x13, | ||
| 538 | ShaderType = 0x14, | ||
| 539 | DirectBeWriteAddressLow = 0x15, | ||
| 540 | DirectBeWriteAddressHigh = 0x16, | ||
| 541 | DirectBeWriteEnabled = 0x17, | ||
| 542 | MachineId0 = 0x18, | ||
| 543 | MachineId1 = 0x19, | ||
| 544 | MachineId2 = 0x1a, | ||
| 545 | MachineId3 = 0x1b, | ||
| 546 | Affinity = 0x1c, | ||
| 547 | InvocationInfo = 0x1d, | ||
| 548 | WscaleFactorXY = 0x1e, | ||
| 549 | WscaleFactorZ = 0x1f, | ||
| 550 | Tid = 0x20, | ||
| 551 | TidX = 0x21, | ||
| 552 | TidY = 0x22, | ||
| 553 | TidZ = 0x23, | ||
| 554 | CtaParam = 0x24, | ||
| 555 | CtaIdX = 0x25, | ||
| 556 | CtaIdY = 0x26, | ||
| 557 | CtaIdZ = 0x27, | ||
| 558 | NtId = 0x28, | ||
| 559 | CirQueueIncrMinusOne = 0x29, | ||
| 560 | Nlatc = 0x2a, | ||
| 561 | SmSpaVersion = 0x2c, | ||
| 562 | MultiPassShaderInfo = 0x2d, | ||
| 563 | LwinHi = 0x2e, | ||
| 564 | SwinHi = 0x2f, | ||
| 565 | SwinLo = 0x30, | ||
| 566 | SwinSz = 0x31, | ||
| 567 | SmemSz = 0x32, | ||
| 568 | SmemBanks = 0x33, | ||
| 569 | LwinLo = 0x34, | ||
| 570 | LwinSz = 0x35, | ||
| 571 | LmemLosz = 0x36, | ||
| 572 | LmemHioff = 0x37, | ||
| 573 | EqMask = 0x38, | ||
| 574 | LtMask = 0x39, | ||
| 575 | LeMask = 0x3a, | ||
| 576 | GtMask = 0x3b, | ||
| 577 | GeMask = 0x3c, | ||
| 578 | RegAlloc = 0x3d, | ||
| 579 | CtxAddr = 0x3e, // .fmask = F_SM50 | ||
| 580 | BarrierAlloc = 0x3e, // .fmask = F_SM60 | ||
| 581 | GlobalErrorStatus = 0x40, | ||
| 582 | WarpErrorStatus = 0x42, | ||
| 583 | WarpErrorStatusClear = 0x43, | ||
| 584 | PmHi0 = 0x48, | ||
| 585 | PmHi1 = 0x49, | ||
| 586 | PmHi2 = 0x4a, | ||
| 587 | PmHi3 = 0x4b, | ||
| 588 | PmHi4 = 0x4c, | ||
| 589 | PmHi5 = 0x4d, | ||
| 590 | PmHi6 = 0x4e, | ||
| 591 | PmHi7 = 0x4f, | ||
| 592 | ClockLo = 0x50, | ||
| 593 | ClockHi = 0x51, | ||
| 594 | GlobalTimerLo = 0x52, | ||
| 595 | GlobalTimerHi = 0x53, | ||
| 596 | HwTaskId = 0x60, | ||
| 597 | CircularQueueEntryIndex = 0x61, | ||
| 598 | CircularQueueEntryAddressLow = 0x62, | ||
| 599 | CircularQueueEntryAddressHigh = 0x63, | ||
| 600 | }; | ||
| 601 | |||
| 602 | enum class PhysicalAttributeDirection : u64 { | ||
| 603 | Input = 0, | ||
| 604 | Output = 1, | ||
| 605 | }; | ||
| 606 | |||
| 607 | enum class VoteOperation : u64 { | ||
| 608 | All = 0, // allThreadsNV | ||
| 609 | Any = 1, // anyThreadNV | ||
| 610 | Eq = 2, // allThreadsEqualNV | ||
| 611 | }; | ||
| 612 | |||
| 613 | enum class ImageAtomicOperationType : u64 { | ||
| 614 | U32 = 0, | ||
| 615 | S32 = 1, | ||
| 616 | U64 = 2, | ||
| 617 | F32 = 3, | ||
| 618 | S64 = 5, | ||
| 619 | SD32 = 6, | ||
| 620 | SD64 = 7, | ||
| 621 | }; | ||
| 622 | |||
| 623 | enum class ImageAtomicOperation : u64 { | ||
| 624 | Add = 0, | ||
| 625 | Min = 1, | ||
| 626 | Max = 2, | ||
| 627 | Inc = 3, | ||
| 628 | Dec = 4, | ||
| 629 | And = 5, | ||
| 630 | Or = 6, | ||
| 631 | Xor = 7, | ||
| 632 | Exch = 8, | ||
| 633 | }; | ||
| 634 | |||
| 635 | enum class ShuffleOperation : u64 { | ||
| 636 | Idx = 0, // shuffleNV | ||
| 637 | Up = 1, // shuffleUpNV | ||
| 638 | Down = 2, // shuffleDownNV | ||
| 639 | Bfly = 3, // shuffleXorNV | ||
| 640 | }; | ||
| 641 | |||
| 642 | enum class ShfType : u64 { | ||
| 643 | Bits32 = 0, | ||
| 644 | U64 = 2, | ||
| 645 | S64 = 3, | ||
| 646 | }; | ||
| 647 | |||
| 648 | enum class ShfXmode : u64 { | ||
| 649 | None = 0, | ||
| 650 | HI = 1, | ||
| 651 | X = 2, | ||
| 652 | XHI = 3, | ||
| 653 | }; | ||
| 654 | |||
| 655 | union Instruction { | ||
| 656 | constexpr Instruction& operator=(const Instruction& instr) { | ||
| 657 | value = instr.value; | ||
| 658 | return *this; | ||
| 659 | } | ||
| 660 | |||
| 661 | constexpr Instruction(u64 value_) : value{value_} {} | ||
| 662 | constexpr Instruction(const Instruction& instr) : value(instr.value) {} | ||
| 663 | |||
| 664 | [[nodiscard]] constexpr bool Bit(u64 offset) const { | ||
| 665 | return ((value >> offset) & 1) != 0; | ||
| 666 | } | ||
| 667 | |||
| 668 | BitField<0, 8, Register> gpr0; | ||
| 669 | BitField<8, 8, Register> gpr8; | ||
| 670 | union { | ||
| 671 | BitField<16, 4, Pred> full_pred; | ||
| 672 | BitField<16, 3, u64> pred_index; | ||
| 673 | } pred; | ||
| 674 | BitField<19, 1, u64> negate_pred; | ||
| 675 | BitField<20, 8, Register> gpr20; | ||
| 676 | BitField<20, 4, SubOp> sub_op; | ||
| 677 | BitField<28, 8, Register> gpr28; | ||
| 678 | BitField<39, 8, Register> gpr39; | ||
| 679 | BitField<48, 16, u64> opcode; | ||
| 680 | |||
| 681 | union { | ||
| 682 | BitField<8, 5, ConditionCode> cc; | ||
| 683 | BitField<13, 1, u64> trigger; | ||
| 684 | } nop; | ||
| 685 | |||
| 686 | union { | ||
| 687 | BitField<48, 2, VoteOperation> operation; | ||
| 688 | BitField<45, 3, u64> dest_pred; | ||
| 689 | BitField<39, 3, u64> value; | ||
| 690 | BitField<42, 1, u64> negate_value; | ||
| 691 | } vote; | ||
| 692 | |||
| 693 | union { | ||
| 694 | BitField<30, 2, ShuffleOperation> operation; | ||
| 695 | BitField<48, 3, u64> pred48; | ||
| 696 | BitField<28, 1, u64> is_index_imm; | ||
| 697 | BitField<29, 1, u64> is_mask_imm; | ||
| 698 | BitField<20, 5, u64> index_imm; | ||
| 699 | BitField<34, 13, u64> mask_imm; | ||
| 700 | } shfl; | ||
| 701 | |||
| 702 | union { | ||
| 703 | BitField<44, 1, u64> ftz; | ||
| 704 | BitField<39, 2, u64> tab5cb8_2; | ||
| 705 | BitField<38, 1, u64> ndv; | ||
| 706 | BitField<47, 1, u64> cc; | ||
| 707 | BitField<28, 8, u64> swizzle; | ||
| 708 | } fswzadd; | ||
| 709 | |||
| 710 | union { | ||
| 711 | BitField<8, 8, Register> gpr; | ||
| 712 | BitField<20, 24, s64> offset; | ||
| 713 | } gmem; | ||
| 714 | |||
| 715 | union { | ||
| 716 | BitField<20, 16, u64> imm20_16; | ||
| 717 | BitField<20, 19, u64> imm20_19; | ||
| 718 | BitField<20, 32, s64> imm20_32; | ||
| 719 | BitField<45, 1, u64> negate_b; | ||
| 720 | BitField<46, 1, u64> abs_a; | ||
| 721 | BitField<48, 1, u64> negate_a; | ||
| 722 | BitField<49, 1, u64> abs_b; | ||
| 723 | BitField<50, 1, u64> saturate_d; | ||
| 724 | BitField<56, 1, u64> negate_imm; | ||
| 725 | |||
| 726 | union { | ||
| 727 | BitField<39, 3, u64> pred; | ||
| 728 | BitField<42, 1, u64> negate_pred; | ||
| 729 | } fmnmx; | ||
| 730 | |||
| 731 | union { | ||
| 732 | BitField<39, 1, u64> invert_a; | ||
| 733 | BitField<40, 1, u64> invert_b; | ||
| 734 | BitField<41, 2, LogicOperation> operation; | ||
| 735 | BitField<44, 2, PredicateResultMode> pred_result_mode; | ||
| 736 | BitField<48, 3, Pred> pred48; | ||
| 737 | } lop; | ||
| 738 | |||
| 739 | union { | ||
| 740 | BitField<53, 2, LogicOperation> operation; | ||
| 741 | BitField<55, 1, u64> invert_a; | ||
| 742 | BitField<56, 1, u64> invert_b; | ||
| 743 | } lop32i; | ||
| 744 | |||
| 745 | union { | ||
| 746 | BitField<28, 8, u64> imm_lut28; | ||
| 747 | BitField<48, 8, u64> imm_lut48; | ||
| 748 | |||
| 749 | [[nodiscard]] u32 GetImmLut28() const { | ||
| 750 | return static_cast<u32>(imm_lut28); | ||
| 751 | } | ||
| 752 | |||
| 753 | [[nodiscard]] u32 GetImmLut48() const { | ||
| 754 | return static_cast<u32>(imm_lut48); | ||
| 755 | } | ||
| 756 | } lop3; | ||
| 757 | |||
| 758 | [[nodiscard]] u16 GetImm20_16() const { | ||
| 759 | return static_cast<u16>(imm20_16); | ||
| 760 | } | ||
| 761 | |||
| 762 | [[nodiscard]] u32 GetImm20_19() const { | ||
| 763 | u32 imm{static_cast<u32>(imm20_19)}; | ||
| 764 | imm <<= 12; | ||
| 765 | imm |= negate_imm ? 0x80000000 : 0; | ||
| 766 | return imm; | ||
| 767 | } | ||
| 768 | |||
| 769 | [[nodiscard]] u32 GetImm20_32() const { | ||
| 770 | return static_cast<u32>(imm20_32); | ||
| 771 | } | ||
| 772 | |||
| 773 | [[nodiscard]] s32 GetSignedImm20_20() const { | ||
| 774 | const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); | ||
| 775 | // Sign extend the 20-bit value. | ||
| 776 | const auto mask = 1U << (20 - 1); | ||
| 777 | return static_cast<s32>((immediate ^ mask) - mask); | ||
| 778 | } | ||
| 779 | } alu; | ||
| 780 | |||
| 781 | union { | ||
| 782 | BitField<38, 1, u64> idx; | ||
| 783 | BitField<51, 1, u64> saturate; | ||
| 784 | BitField<52, 2, IpaSampleMode> sample_mode; | ||
| 785 | BitField<54, 2, IpaInterpMode> interp_mode; | ||
| 786 | } ipa; | ||
| 787 | |||
| 788 | union { | ||
| 789 | BitField<39, 2, u64> tab5cb8_2; | ||
| 790 | BitField<41, 3, u64> postfactor; | ||
| 791 | BitField<44, 2, u64> tab5c68_0; | ||
| 792 | BitField<48, 1, u64> negate_b; | ||
| 793 | } fmul; | ||
| 794 | |||
| 795 | union { | ||
| 796 | BitField<55, 1, u64> saturate; | ||
| 797 | } fmul32; | ||
| 798 | |||
| 799 | union { | ||
| 800 | BitField<52, 1, u64> generates_cc; | ||
| 801 | } op_32; | ||
| 802 | |||
| 803 | union { | ||
| 804 | BitField<48, 1, u64> is_signed; | ||
| 805 | } shift; | ||
| 806 | |||
| 807 | union { | ||
| 808 | BitField<39, 1, u64> wrap; | ||
| 809 | } shr; | ||
| 810 | |||
| 811 | union { | ||
| 812 | BitField<37, 2, ShfType> type; | ||
| 813 | BitField<48, 2, ShfXmode> xmode; | ||
| 814 | BitField<50, 1, u64> wrap; | ||
| 815 | BitField<20, 6, u64> immediate; | ||
| 816 | } shf; | ||
| 817 | |||
| 818 | union { | ||
| 819 | BitField<39, 5, u64> shift_amount; | ||
| 820 | BitField<48, 1, u64> negate_b; | ||
| 821 | BitField<49, 1, u64> negate_a; | ||
| 822 | } alu_integer; | ||
| 823 | |||
| 824 | union { | ||
| 825 | BitField<43, 1, u64> x; | ||
| 826 | } iadd; | ||
| 827 | |||
| 828 | union { | ||
| 829 | BitField<39, 1, u64> ftz; | ||
| 830 | BitField<32, 1, u64> saturate; | ||
| 831 | BitField<49, 2, HalfMerge> merge; | ||
| 832 | |||
| 833 | BitField<44, 1, u64> abs_a; | ||
| 834 | BitField<47, 2, HalfType> type_a; | ||
| 835 | |||
| 836 | BitField<30, 1, u64> abs_b; | ||
| 837 | BitField<28, 2, HalfType> type_b; | ||
| 838 | |||
| 839 | BitField<35, 2, HalfType> type_c; | ||
| 840 | } alu_half; | ||
| 841 | |||
| 842 | union { | ||
| 843 | BitField<39, 2, HalfPrecision> precision; | ||
| 844 | BitField<39, 1, u64> ftz; | ||
| 845 | BitField<52, 1, u64> saturate; | ||
| 846 | BitField<49, 2, HalfMerge> merge; | ||
| 847 | |||
| 848 | BitField<43, 1, u64> negate_a; | ||
| 849 | BitField<44, 1, u64> abs_a; | ||
| 850 | BitField<47, 2, HalfType> type_a; | ||
| 851 | } alu_half_imm; | ||
| 852 | |||
| 853 | union { | ||
| 854 | BitField<29, 1, u64> first_negate; | ||
| 855 | BitField<20, 9, u64> first; | ||
| 856 | |||
| 857 | BitField<56, 1, u64> second_negate; | ||
| 858 | BitField<30, 9, u64> second; | ||
| 859 | |||
| 860 | [[nodiscard]] u32 PackImmediates() const { | ||
| 861 | // Immediates are half floats shifted. | ||
| 862 | constexpr u32 imm_shift = 6; | ||
| 863 | return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift))); | ||
| 864 | } | ||
| 865 | } half_imm; | ||
| 866 | |||
| 867 | union { | ||
| 868 | union { | ||
| 869 | BitField<37, 2, HalfPrecision> precision; | ||
| 870 | BitField<32, 1, u64> saturate; | ||
| 871 | |||
| 872 | BitField<31, 1, u64> negate_b; | ||
| 873 | BitField<30, 1, u64> negate_c; | ||
| 874 | BitField<35, 2, HalfType> type_c; | ||
| 875 | } rr; | ||
| 876 | |||
| 877 | BitField<57, 2, HalfPrecision> precision; | ||
| 878 | BitField<52, 1, u64> saturate; | ||
| 879 | |||
| 880 | BitField<49, 2, HalfMerge> merge; | ||
| 881 | |||
| 882 | BitField<47, 2, HalfType> type_a; | ||
| 883 | |||
| 884 | BitField<56, 1, u64> negate_b; | ||
| 885 | BitField<28, 2, HalfType> type_b; | ||
| 886 | |||
| 887 | BitField<51, 1, u64> negate_c; | ||
| 888 | BitField<53, 2, HalfType> type_reg39; | ||
| 889 | } hfma2; | ||
| 890 | |||
| 891 | union { | ||
| 892 | BitField<40, 1, u64> invert; | ||
| 893 | } popc; | ||
| 894 | |||
| 895 | union { | ||
| 896 | BitField<41, 1, u64> sh; | ||
| 897 | BitField<40, 1, u64> invert; | ||
| 898 | BitField<48, 1, u64> is_signed; | ||
| 899 | } flo; | ||
| 900 | |||
| 901 | union { | ||
| 902 | BitField<39, 3, u64> pred; | ||
| 903 | BitField<42, 1, u64> neg_pred; | ||
| 904 | } sel; | ||
| 905 | |||
| 906 | union { | ||
| 907 | BitField<39, 3, u64> pred; | ||
| 908 | BitField<42, 1, u64> negate_pred; | ||
| 909 | BitField<43, 2, IMinMaxExchange> exchange; | ||
| 910 | BitField<48, 1, u64> is_signed; | ||
| 911 | } imnmx; | ||
| 912 | |||
| 913 | union { | ||
| 914 | BitField<31, 2, IAdd3Height> height_c; | ||
| 915 | BitField<33, 2, IAdd3Height> height_b; | ||
| 916 | BitField<35, 2, IAdd3Height> height_a; | ||
| 917 | BitField<37, 2, IAdd3Mode> mode; | ||
| 918 | BitField<49, 1, u64> neg_c; | ||
| 919 | BitField<50, 1, u64> neg_b; | ||
| 920 | BitField<51, 1, u64> neg_a; | ||
| 921 | } iadd3; | ||
| 922 | |||
| 923 | union { | ||
| 924 | BitField<54, 1, u64> saturate; | ||
| 925 | BitField<56, 1, u64> negate_a; | ||
| 926 | } iadd32i; | ||
| 927 | |||
| 928 | union { | ||
| 929 | BitField<53, 1, u64> negate_b; | ||
| 930 | BitField<54, 1, u64> abs_a; | ||
| 931 | BitField<56, 1, u64> negate_a; | ||
| 932 | BitField<57, 1, u64> abs_b; | ||
| 933 | } fadd32i; | ||
| 934 | |||
| 935 | union { | ||
| 936 | BitField<40, 1, u64> brev; | ||
| 937 | BitField<47, 1, u64> rd_cc; | ||
| 938 | BitField<48, 1, u64> is_signed; | ||
| 939 | } bfe; | ||
| 940 | |||
| 941 | union { | ||
| 942 | BitField<48, 3, u64> pred48; | ||
| 943 | |||
| 944 | union { | ||
| 945 | BitField<20, 20, u64> entry_a; | ||
| 946 | BitField<39, 5, u64> entry_b; | ||
| 947 | BitField<45, 1, u64> neg; | ||
| 948 | BitField<46, 1, u64> uses_cc; | ||
| 949 | } imm; | ||
| 950 | |||
| 951 | union { | ||
| 952 | BitField<20, 14, u64> cb_index; | ||
| 953 | BitField<34, 5, u64> cb_offset; | ||
| 954 | BitField<56, 1, u64> neg; | ||
| 955 | BitField<57, 1, u64> uses_cc; | ||
| 956 | } hi; | ||
| 957 | |||
| 958 | union { | ||
| 959 | BitField<20, 14, u64> cb_index; | ||
| 960 | BitField<34, 5, u64> cb_offset; | ||
| 961 | BitField<39, 5, u64> entry_a; | ||
| 962 | BitField<45, 1, u64> neg; | ||
| 963 | BitField<46, 1, u64> uses_cc; | ||
| 964 | } rz; | ||
| 965 | |||
| 966 | union { | ||
| 967 | BitField<39, 5, u64> entry_a; | ||
| 968 | BitField<45, 1, u64> neg; | ||
| 969 | BitField<46, 1, u64> uses_cc; | ||
| 970 | } r1; | ||
| 971 | |||
| 972 | union { | ||
| 973 | BitField<28, 8, u64> entry_a; | ||
| 974 | BitField<37, 1, u64> neg; | ||
| 975 | BitField<38, 1, u64> uses_cc; | ||
| 976 | } r2; | ||
| 977 | |||
| 978 | } lea; | ||
| 979 | |||
| 980 | union { | ||
| 981 | BitField<0, 5, FlowCondition> cond; | ||
| 982 | } flow; | ||
| 983 | |||
| 984 | union { | ||
| 985 | BitField<47, 1, u64> cc; | ||
| 986 | BitField<48, 1, u64> negate_b; | ||
| 987 | BitField<49, 1, u64> negate_c; | ||
| 988 | BitField<51, 2, u64> tab5980_1; | ||
| 989 | BitField<53, 2, u64> tab5980_0; | ||
| 990 | } ffma; | ||
| 991 | |||
| 992 | union { | ||
| 993 | BitField<48, 3, UniformType> type; | ||
| 994 | BitField<44, 2, u64> unknown; | ||
| 995 | } ld_c; | ||
| 996 | |||
| 997 | union { | ||
| 998 | BitField<48, 3, StoreType> type; | ||
| 999 | } ldst_sl; | ||
| 1000 | |||
| 1001 | union { | ||
| 1002 | BitField<44, 2, u64> unknown; | ||
| 1003 | } ld_l; | ||
| 1004 | |||
| 1005 | union { | ||
| 1006 | BitField<44, 2, StoreCacheManagement> cache_management; | ||
| 1007 | } st_l; | ||
| 1008 | |||
| 1009 | union { | ||
| 1010 | BitField<48, 3, UniformType> type; | ||
| 1011 | BitField<46, 2, u64> cache_mode; | ||
| 1012 | } ldg; | ||
| 1013 | |||
| 1014 | union { | ||
| 1015 | BitField<48, 3, UniformType> type; | ||
| 1016 | BitField<46, 2, u64> cache_mode; | ||
| 1017 | } stg; | ||
| 1018 | |||
| 1019 | union { | ||
| 1020 | BitField<23, 3, AtomicOp> operation; | ||
| 1021 | BitField<48, 1, u64> extended; | ||
| 1022 | BitField<20, 3, GlobalAtomicType> type; | ||
| 1023 | } red; | ||
| 1024 | |||
| 1025 | union { | ||
| 1026 | BitField<52, 4, AtomicOp> operation; | ||
| 1027 | BitField<49, 3, GlobalAtomicType> type; | ||
| 1028 | BitField<28, 20, s64> offset; | ||
| 1029 | } atom; | ||
| 1030 | |||
| 1031 | union { | ||
| 1032 | BitField<52, 4, AtomicOp> operation; | ||
| 1033 | BitField<28, 2, AtomicType> type; | ||
| 1034 | BitField<30, 22, s64> offset; | ||
| 1035 | |||
| 1036 | [[nodiscard]] s32 GetImmediateOffset() const { | ||
| 1037 | return static_cast<s32>(offset << 2); | ||
| 1038 | } | ||
| 1039 | } atoms; | ||
| 1040 | |||
| 1041 | union { | ||
| 1042 | BitField<32, 1, PhysicalAttributeDirection> direction; | ||
| 1043 | BitField<47, 3, AttributeSize> size; | ||
| 1044 | BitField<20, 11, u64> address; | ||
| 1045 | } al2p; | ||
| 1046 | |||
| 1047 | union { | ||
| 1048 | BitField<53, 3, UniformType> type; | ||
| 1049 | BitField<52, 1, u64> extended; | ||
| 1050 | } generic; | ||
| 1051 | |||
| 1052 | union { | ||
| 1053 | BitField<0, 3, u64> pred0; | ||
| 1054 | BitField<3, 3, u64> pred3; | ||
| 1055 | BitField<6, 1, u64> neg_b; | ||
| 1056 | BitField<7, 1, u64> abs_a; | ||
| 1057 | BitField<39, 3, u64> pred39; | ||
| 1058 | BitField<42, 1, u64> neg_pred; | ||
| 1059 | BitField<43, 1, u64> neg_a; | ||
| 1060 | BitField<44, 1, u64> abs_b; | ||
| 1061 | BitField<45, 2, PredOperation> op; | ||
| 1062 | BitField<47, 1, u64> ftz; | ||
| 1063 | BitField<48, 4, PredCondition> cond; | ||
| 1064 | } fsetp; | ||
| 1065 | |||
| 1066 | union { | ||
| 1067 | BitField<0, 3, u64> pred0; | ||
| 1068 | BitField<3, 3, u64> pred3; | ||
| 1069 | BitField<39, 3, u64> pred39; | ||
| 1070 | BitField<42, 1, u64> neg_pred; | ||
| 1071 | BitField<45, 2, PredOperation> op; | ||
| 1072 | BitField<48, 1, u64> is_signed; | ||
| 1073 | BitField<49, 3, PredCondition> cond; | ||
| 1074 | } isetp; | ||
| 1075 | |||
| 1076 | union { | ||
| 1077 | BitField<48, 1, u64> is_signed; | ||
| 1078 | BitField<49, 3, PredCondition> cond; | ||
| 1079 | } icmp; | ||
| 1080 | |||
| 1081 | union { | ||
| 1082 | BitField<0, 3, u64> pred0; | ||
| 1083 | BitField<3, 3, u64> pred3; | ||
| 1084 | BitField<12, 3, u64> pred12; | ||
| 1085 | BitField<15, 1, u64> neg_pred12; | ||
| 1086 | BitField<24, 2, PredOperation> cond; | ||
| 1087 | BitField<29, 3, u64> pred29; | ||
| 1088 | BitField<32, 1, u64> neg_pred29; | ||
| 1089 | BitField<39, 3, u64> pred39; | ||
| 1090 | BitField<42, 1, u64> neg_pred39; | ||
| 1091 | BitField<45, 2, PredOperation> op; | ||
| 1092 | } psetp; | ||
| 1093 | |||
| 1094 | union { | ||
| 1095 | BitField<43, 4, PredCondition> cond; | ||
| 1096 | BitField<45, 2, PredOperation> op; | ||
| 1097 | BitField<3, 3, u64> pred3; | ||
| 1098 | BitField<0, 3, u64> pred0; | ||
| 1099 | BitField<39, 3, u64> pred39; | ||
| 1100 | } vsetp; | ||
| 1101 | |||
| 1102 | union { | ||
| 1103 | BitField<12, 3, u64> pred12; | ||
| 1104 | BitField<15, 1, u64> neg_pred12; | ||
| 1105 | BitField<24, 2, PredOperation> cond; | ||
| 1106 | BitField<29, 3, u64> pred29; | ||
| 1107 | BitField<32, 1, u64> neg_pred29; | ||
| 1108 | BitField<39, 3, u64> pred39; | ||
| 1109 | BitField<42, 1, u64> neg_pred39; | ||
| 1110 | BitField<44, 1, u64> bf; | ||
| 1111 | BitField<45, 2, PredOperation> op; | ||
| 1112 | } pset; | ||
| 1113 | |||
| 1114 | union { | ||
| 1115 | BitField<0, 3, u64> pred0; | ||
| 1116 | BitField<3, 3, u64> pred3; | ||
| 1117 | BitField<8, 5, ConditionCode> cc; // flag in cc | ||
| 1118 | BitField<39, 3, u64> pred39; | ||
| 1119 | BitField<42, 1, u64> neg_pred39; | ||
| 1120 | BitField<45, 4, PredOperation> op; // op with pred39 | ||
| 1121 | } csetp; | ||
| 1122 | |||
| 1123 | union { | ||
| 1124 | BitField<6, 1, u64> ftz; | ||
| 1125 | BitField<45, 2, PredOperation> op; | ||
| 1126 | BitField<3, 3, u64> pred3; | ||
| 1127 | BitField<0, 3, u64> pred0; | ||
| 1128 | BitField<43, 1, u64> negate_a; | ||
| 1129 | BitField<44, 1, u64> abs_a; | ||
| 1130 | BitField<47, 2, HalfType> type_a; | ||
| 1131 | union { | ||
| 1132 | BitField<35, 4, PredCondition> cond; | ||
| 1133 | BitField<49, 1, u64> h_and; | ||
| 1134 | BitField<31, 1, u64> negate_b; | ||
| 1135 | BitField<30, 1, u64> abs_b; | ||
| 1136 | BitField<28, 2, HalfType> type_b; | ||
| 1137 | } reg; | ||
| 1138 | union { | ||
| 1139 | BitField<56, 1, u64> negate_b; | ||
| 1140 | BitField<54, 1, u64> abs_b; | ||
| 1141 | } cbuf; | ||
| 1142 | union { | ||
| 1143 | BitField<49, 4, PredCondition> cond; | ||
| 1144 | BitField<53, 1, u64> h_and; | ||
| 1145 | } cbuf_and_imm; | ||
| 1146 | BitField<42, 1, u64> neg_pred; | ||
| 1147 | BitField<39, 3, u64> pred39; | ||
| 1148 | } hsetp2; | ||
| 1149 | |||
| 1150 | union { | ||
| 1151 | BitField<40, 1, R2pMode> mode; | ||
| 1152 | BitField<41, 2, u64> byte; | ||
| 1153 | BitField<20, 7, u64> immediate_mask; | ||
| 1154 | } p2r_r2p; | ||
| 1155 | |||
| 1156 | union { | ||
| 1157 | BitField<39, 3, u64> pred39; | ||
| 1158 | BitField<42, 1, u64> neg_pred; | ||
| 1159 | BitField<43, 1, u64> neg_a; | ||
| 1160 | BitField<44, 1, u64> abs_b; | ||
| 1161 | BitField<45, 2, PredOperation> op; | ||
| 1162 | BitField<48, 4, PredCondition> cond; | ||
| 1163 | BitField<52, 1, u64> bf; | ||
| 1164 | BitField<53, 1, u64> neg_b; | ||
| 1165 | BitField<54, 1, u64> abs_a; | ||
| 1166 | BitField<55, 1, u64> ftz; | ||
| 1167 | } fset; | ||
| 1168 | |||
| 1169 | union { | ||
| 1170 | BitField<47, 1, u64> ftz; | ||
| 1171 | BitField<48, 4, PredCondition> cond; | ||
| 1172 | } fcmp; | ||
| 1173 | |||
| 1174 | union { | ||
| 1175 | BitField<49, 1, u64> bf; | ||
| 1176 | BitField<35, 3, PredCondition> cond; | ||
| 1177 | BitField<50, 1, u64> ftz; | ||
| 1178 | BitField<45, 2, PredOperation> op; | ||
| 1179 | BitField<43, 1, u64> negate_a; | ||
| 1180 | BitField<44, 1, u64> abs_a; | ||
| 1181 | BitField<47, 2, HalfType> type_a; | ||
| 1182 | BitField<31, 1, u64> negate_b; | ||
| 1183 | BitField<30, 1, u64> abs_b; | ||
| 1184 | BitField<28, 2, HalfType> type_b; | ||
| 1185 | BitField<42, 1, u64> neg_pred; | ||
| 1186 | BitField<39, 3, u64> pred39; | ||
| 1187 | } hset2; | ||
| 1188 | |||
| 1189 | union { | ||
| 1190 | BitField<39, 3, u64> pred39; | ||
| 1191 | BitField<42, 1, u64> neg_pred; | ||
| 1192 | BitField<44, 1, u64> bf; | ||
| 1193 | BitField<45, 2, PredOperation> op; | ||
| 1194 | BitField<48, 1, u64> is_signed; | ||
| 1195 | BitField<49, 3, PredCondition> cond; | ||
| 1196 | } iset; | ||
| 1197 | |||
| 1198 | union { | ||
| 1199 | BitField<45, 1, u64> negate_a; | ||
| 1200 | BitField<49, 1, u64> abs_a; | ||
| 1201 | BitField<10, 2, Register::Size> src_size; | ||
| 1202 | BitField<13, 1, u64> is_input_signed; | ||
| 1203 | BitField<8, 2, Register::Size> dst_size; | ||
| 1204 | BitField<12, 1, u64> is_output_signed; | ||
| 1205 | |||
| 1206 | union { | ||
| 1207 | BitField<39, 2, u64> tab5cb8_2; | ||
| 1208 | } i2f; | ||
| 1209 | |||
| 1210 | union { | ||
| 1211 | BitField<39, 2, F2iRoundingOp> rounding; | ||
| 1212 | } f2i; | ||
| 1213 | |||
| 1214 | union { | ||
| 1215 | BitField<39, 4, u64> rounding; | ||
| 1216 | // H0, H1 extract for F16 missing | ||
| 1217 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | ||
| 1218 | [[nodiscard]] F2fRoundingOp GetRoundingMode() const { | ||
| 1219 | constexpr u64 rounding_mask = 0x0B; | ||
| 1220 | return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); | ||
| 1221 | } | ||
| 1222 | } f2f; | ||
| 1223 | |||
| 1224 | union { | ||
| 1225 | BitField<41, 2, u64> selector; | ||
| 1226 | } int_src; | ||
| 1227 | |||
| 1228 | union { | ||
| 1229 | BitField<41, 1, u64> selector; | ||
| 1230 | } float_src; | ||
| 1231 | } conversion; | ||
| 1232 | |||
| 1233 | union { | ||
| 1234 | BitField<28, 1, u64> array; | ||
| 1235 | BitField<29, 2, TextureType> texture_type; | ||
| 1236 | BitField<31, 4, u64> component_mask; | ||
| 1237 | BitField<49, 1, u64> nodep_flag; | ||
| 1238 | BitField<50, 1, u64> dc_flag; | ||
| 1239 | BitField<54, 1, u64> aoffi_flag; | ||
| 1240 | BitField<55, 3, TextureProcessMode> process_mode; | ||
| 1241 | |||
| 1242 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1243 | return ((1ULL << component) & component_mask) != 0; | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { | ||
| 1247 | return process_mode; | ||
| 1248 | } | ||
| 1249 | |||
| 1250 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1251 | switch (mode) { | ||
| 1252 | case TextureMiscMode::DC: | ||
| 1253 | return dc_flag != 0; | ||
| 1254 | case TextureMiscMode::NODEP: | ||
| 1255 | return nodep_flag != 0; | ||
| 1256 | case TextureMiscMode::AOFFI: | ||
| 1257 | return aoffi_flag != 0; | ||
| 1258 | default: | ||
| 1259 | break; | ||
| 1260 | } | ||
| 1261 | return false; | ||
| 1262 | } | ||
| 1263 | } tex; | ||
| 1264 | |||
| 1265 | union { | ||
| 1266 | BitField<28, 1, u64> array; | ||
| 1267 | BitField<29, 2, TextureType> texture_type; | ||
| 1268 | BitField<31, 4, u64> component_mask; | ||
| 1269 | BitField<49, 1, u64> nodep_flag; | ||
| 1270 | BitField<50, 1, u64> dc_flag; | ||
| 1271 | BitField<36, 1, u64> aoffi_flag; | ||
| 1272 | BitField<37, 3, TextureProcessMode> process_mode; | ||
| 1273 | |||
| 1274 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1275 | return ((1ULL << component) & component_mask) != 0; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { | ||
| 1279 | return process_mode; | ||
| 1280 | } | ||
| 1281 | |||
| 1282 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1283 | switch (mode) { | ||
| 1284 | case TextureMiscMode::DC: | ||
| 1285 | return dc_flag != 0; | ||
| 1286 | case TextureMiscMode::NODEP: | ||
| 1287 | return nodep_flag != 0; | ||
| 1288 | case TextureMiscMode::AOFFI: | ||
| 1289 | return aoffi_flag != 0; | ||
| 1290 | default: | ||
| 1291 | break; | ||
| 1292 | } | ||
| 1293 | return false; | ||
| 1294 | } | ||
| 1295 | } tex_b; | ||
| 1296 | |||
| 1297 | union { | ||
| 1298 | BitField<22, 6, TextureQueryType> query_type; | ||
| 1299 | BitField<31, 4, u64> component_mask; | ||
| 1300 | BitField<49, 1, u64> nodep_flag; | ||
| 1301 | |||
| 1302 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1303 | switch (mode) { | ||
| 1304 | case TextureMiscMode::NODEP: | ||
| 1305 | return nodep_flag != 0; | ||
| 1306 | default: | ||
| 1307 | break; | ||
| 1308 | } | ||
| 1309 | return false; | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1313 | return ((1ULL << component) & component_mask) != 0; | ||
| 1314 | } | ||
| 1315 | } txq; | ||
| 1316 | |||
| 1317 | union { | ||
| 1318 | BitField<28, 1, u64> array; | ||
| 1319 | BitField<29, 2, TextureType> texture_type; | ||
| 1320 | BitField<31, 4, u64> component_mask; | ||
| 1321 | BitField<35, 1, u64> ndv_flag; | ||
| 1322 | BitField<49, 1, u64> nodep_flag; | ||
| 1323 | |||
| 1324 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1325 | return ((1ULL << component) & component_mask) != 0; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1329 | switch (mode) { | ||
| 1330 | case TextureMiscMode::NDV: | ||
| 1331 | return (ndv_flag != 0); | ||
| 1332 | case TextureMiscMode::NODEP: | ||
| 1333 | return (nodep_flag != 0); | ||
| 1334 | default: | ||
| 1335 | break; | ||
| 1336 | } | ||
| 1337 | return false; | ||
| 1338 | } | ||
| 1339 | } tmml; | ||
| 1340 | |||
| 1341 | union { | ||
| 1342 | BitField<28, 1, u64> array; | ||
| 1343 | BitField<29, 2, TextureType> texture_type; | ||
| 1344 | BitField<35, 1, u64> ndv_flag; | ||
| 1345 | BitField<49, 1, u64> nodep_flag; | ||
| 1346 | BitField<50, 1, u64> dc_flag; | ||
| 1347 | BitField<54, 2, u64> offset_mode; | ||
| 1348 | BitField<56, 2, u64> component; | ||
| 1349 | |||
| 1350 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1351 | switch (mode) { | ||
| 1352 | case TextureMiscMode::NDV: | ||
| 1353 | return ndv_flag != 0; | ||
| 1354 | case TextureMiscMode::NODEP: | ||
| 1355 | return nodep_flag != 0; | ||
| 1356 | case TextureMiscMode::DC: | ||
| 1357 | return dc_flag != 0; | ||
| 1358 | case TextureMiscMode::AOFFI: | ||
| 1359 | return offset_mode == 1; | ||
| 1360 | case TextureMiscMode::PTP: | ||
| 1361 | return offset_mode == 2; | ||
| 1362 | default: | ||
| 1363 | break; | ||
| 1364 | } | ||
| 1365 | return false; | ||
| 1366 | } | ||
| 1367 | } tld4; | ||
| 1368 | |||
| 1369 | union { | ||
| 1370 | BitField<35, 1, u64> ndv_flag; | ||
| 1371 | BitField<49, 1, u64> nodep_flag; | ||
| 1372 | BitField<50, 1, u64> dc_flag; | ||
| 1373 | BitField<33, 2, u64> offset_mode; | ||
| 1374 | BitField<37, 2, u64> component; | ||
| 1375 | |||
| 1376 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1377 | switch (mode) { | ||
| 1378 | case TextureMiscMode::NDV: | ||
| 1379 | return ndv_flag != 0; | ||
| 1380 | case TextureMiscMode::NODEP: | ||
| 1381 | return nodep_flag != 0; | ||
| 1382 | case TextureMiscMode::DC: | ||
| 1383 | return dc_flag != 0; | ||
| 1384 | case TextureMiscMode::AOFFI: | ||
| 1385 | return offset_mode == 1; | ||
| 1386 | case TextureMiscMode::PTP: | ||
| 1387 | return offset_mode == 2; | ||
| 1388 | default: | ||
| 1389 | break; | ||
| 1390 | } | ||
| 1391 | return false; | ||
| 1392 | } | ||
| 1393 | } tld4_b; | ||
| 1394 | |||
| 1395 | union { | ||
| 1396 | BitField<49, 1, u64> nodep_flag; | ||
| 1397 | BitField<50, 1, u64> dc_flag; | ||
| 1398 | BitField<51, 1, u64> aoffi_flag; | ||
| 1399 | BitField<52, 2, u64> component; | ||
| 1400 | BitField<55, 1, u64> fp16_flag; | ||
| 1401 | |||
| 1402 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1403 | switch (mode) { | ||
| 1404 | case TextureMiscMode::DC: | ||
| 1405 | return dc_flag != 0; | ||
| 1406 | case TextureMiscMode::NODEP: | ||
| 1407 | return nodep_flag != 0; | ||
| 1408 | case TextureMiscMode::AOFFI: | ||
| 1409 | return aoffi_flag != 0; | ||
| 1410 | default: | ||
| 1411 | break; | ||
| 1412 | } | ||
| 1413 | return false; | ||
| 1414 | } | ||
| 1415 | } tld4s; | ||
| 1416 | |||
| 1417 | union { | ||
| 1418 | BitField<0, 8, Register> gpr0; | ||
| 1419 | BitField<28, 8, Register> gpr28; | ||
| 1420 | BitField<49, 1, u64> nodep_flag; | ||
| 1421 | BitField<50, 3, u64> component_mask_selector; | ||
| 1422 | BitField<53, 4, u64> texture_info; | ||
| 1423 | BitField<59, 1, u64> fp32_flag; | ||
| 1424 | |||
| 1425 | [[nodiscard]] TextureType GetTextureType() const { | ||
| 1426 | // The TEXS instruction has a weird encoding for the texture type. | ||
| 1427 | if (texture_info == 0) { | ||
| 1428 | return TextureType::Texture1D; | ||
| 1429 | } | ||
| 1430 | if (texture_info >= 1 && texture_info <= 9) { | ||
| 1431 | return TextureType::Texture2D; | ||
| 1432 | } | ||
| 1433 | if (texture_info >= 10 && texture_info <= 11) { | ||
| 1434 | return TextureType::Texture3D; | ||
| 1435 | } | ||
| 1436 | if (texture_info >= 12 && texture_info <= 13) { | ||
| 1437 | return TextureType::TextureCube; | ||
| 1438 | } | ||
| 1439 | |||
| 1440 | LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); | ||
| 1441 | UNREACHABLE(); | ||
| 1442 | return TextureType::Texture1D; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { | ||
| 1446 | switch (texture_info) { | ||
| 1447 | case 0: | ||
| 1448 | case 2: | ||
| 1449 | case 6: | ||
| 1450 | case 8: | ||
| 1451 | case 9: | ||
| 1452 | case 11: | ||
| 1453 | return TextureProcessMode::LZ; | ||
| 1454 | case 3: | ||
| 1455 | case 5: | ||
| 1456 | case 13: | ||
| 1457 | return TextureProcessMode::LL; | ||
| 1458 | default: | ||
| 1459 | break; | ||
| 1460 | } | ||
| 1461 | return TextureProcessMode::None; | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1465 | switch (mode) { | ||
| 1466 | case TextureMiscMode::DC: | ||
| 1467 | return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; | ||
| 1468 | case TextureMiscMode::NODEP: | ||
| 1469 | return nodep_flag != 0; | ||
| 1470 | default: | ||
| 1471 | break; | ||
| 1472 | } | ||
| 1473 | return false; | ||
| 1474 | } | ||
| 1475 | |||
| 1476 | [[nodiscard]] bool IsArrayTexture() const { | ||
| 1477 | // TEXS only supports Texture2D arrays. | ||
| 1478 | return texture_info >= 7 && texture_info <= 9; | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | [[nodiscard]] bool HasTwoDestinations() const { | ||
| 1482 | return gpr28.Value() != Register::ZeroIndex; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1486 | static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ | ||
| 1487 | {}, | ||
| 1488 | {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, | ||
| 1489 | {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, | ||
| 1490 | {0x7, 0xb, 0xd, 0xe, 0xf}, | ||
| 1491 | }}; | ||
| 1492 | |||
| 1493 | std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; | ||
| 1494 | index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; | ||
| 1495 | |||
| 1496 | u32 mask = mask_lut[index][component_mask_selector]; | ||
| 1497 | // A mask of 0 means this instruction uses an unimplemented mask. | ||
| 1498 | ASSERT(mask != 0); | ||
| 1499 | return ((1ull << component) & mask) != 0; | ||
| 1500 | } | ||
| 1501 | } texs; | ||
| 1502 | |||
| 1503 | union { | ||
| 1504 | BitField<28, 1, u64> is_array; | ||
| 1505 | BitField<29, 2, TextureType> texture_type; | ||
| 1506 | BitField<35, 1, u64> aoffi; | ||
| 1507 | BitField<49, 1, u64> nodep_flag; | ||
| 1508 | BitField<50, 1, u64> ms; // Multisample? | ||
| 1509 | BitField<54, 1, u64> cl; | ||
| 1510 | BitField<55, 1, u64> process_mode; | ||
| 1511 | |||
| 1512 | [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { | ||
| 1513 | return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; | ||
| 1514 | } | ||
| 1515 | } tld; | ||
| 1516 | |||
| 1517 | union { | ||
| 1518 | BitField<49, 1, u64> nodep_flag; | ||
| 1519 | BitField<53, 4, u64> texture_info; | ||
| 1520 | BitField<59, 1, u64> fp32_flag; | ||
| 1521 | |||
| 1522 | [[nodiscard]] TextureType GetTextureType() const { | ||
| 1523 | // The TLDS instruction has a weird encoding for the texture type. | ||
| 1524 | if (texture_info <= 1) { | ||
| 1525 | return TextureType::Texture1D; | ||
| 1526 | } | ||
| 1527 | if (texture_info == 2 || texture_info == 8 || texture_info == 12 || | ||
| 1528 | (texture_info >= 4 && texture_info <= 6)) { | ||
| 1529 | return TextureType::Texture2D; | ||
| 1530 | } | ||
| 1531 | if (texture_info == 7) { | ||
| 1532 | return TextureType::Texture3D; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); | ||
| 1536 | UNREACHABLE(); | ||
| 1537 | return TextureType::Texture1D; | ||
| 1538 | } | ||
| 1539 | |||
| 1540 | [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { | ||
| 1541 | if (texture_info == 1 || texture_info == 5 || texture_info == 12) { | ||
| 1542 | return TextureProcessMode::LL; | ||
| 1543 | } | ||
| 1544 | return TextureProcessMode::LZ; | ||
| 1545 | } | ||
| 1546 | |||
| 1547 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1548 | switch (mode) { | ||
| 1549 | case TextureMiscMode::AOFFI: | ||
| 1550 | return texture_info == 12 || texture_info == 4; | ||
| 1551 | case TextureMiscMode::MZ: | ||
| 1552 | return texture_info == 5; | ||
| 1553 | case TextureMiscMode::NODEP: | ||
| 1554 | return nodep_flag != 0; | ||
| 1555 | default: | ||
| 1556 | break; | ||
| 1557 | } | ||
| 1558 | return false; | ||
| 1559 | } | ||
| 1560 | |||
| 1561 | [[nodiscard]] bool IsArrayTexture() const { | ||
| 1562 | // TEXS only supports Texture2D arrays. | ||
| 1563 | return texture_info == 8; | ||
| 1564 | } | ||
| 1565 | } tlds; | ||
| 1566 | |||
| 1567 | union { | ||
| 1568 | BitField<28, 1, u64> is_array; | ||
| 1569 | BitField<29, 2, TextureType> texture_type; | ||
| 1570 | BitField<35, 1, u64> aoffi_flag; | ||
| 1571 | BitField<49, 1, u64> nodep_flag; | ||
| 1572 | |||
| 1573 | [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { | ||
| 1574 | switch (mode) { | ||
| 1575 | case TextureMiscMode::AOFFI: | ||
| 1576 | return aoffi_flag != 0; | ||
| 1577 | case TextureMiscMode::NODEP: | ||
| 1578 | return nodep_flag != 0; | ||
| 1579 | default: | ||
| 1580 | break; | ||
| 1581 | } | ||
| 1582 | return false; | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | } txd; | ||
| 1586 | |||
| 1587 | union { | ||
| 1588 | BitField<24, 2, StoreCacheManagement> cache_management; | ||
| 1589 | BitField<33, 3, ImageType> image_type; | ||
| 1590 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1591 | BitField<51, 1, u64> is_immediate; | ||
| 1592 | BitField<52, 1, SurfaceDataMode> mode; | ||
| 1593 | |||
| 1594 | BitField<20, 3, StoreType> store_data_layout; | ||
| 1595 | BitField<20, 4, u64> component_mask_selector; | ||
| 1596 | |||
| 1597 | [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { | ||
| 1598 | ASSERT(mode == SurfaceDataMode::P); | ||
| 1599 | constexpr u8 R = 0b0001; | ||
| 1600 | constexpr u8 G = 0b0010; | ||
| 1601 | constexpr u8 B = 0b0100; | ||
| 1602 | constexpr u8 A = 0b1000; | ||
| 1603 | constexpr std::array<u8, 16> mask = { | ||
| 1604 | 0, (R), (G), (R | G), (B), (R | B), | ||
| 1605 | (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), | ||
| 1606 | (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 1607 | return std::bitset<4>{mask.at(component_mask_selector)}.test(component); | ||
| 1608 | } | ||
| 1609 | |||
| 1610 | [[nodiscard]] StoreType GetStoreDataLayout() const { | ||
| 1611 | ASSERT(mode == SurfaceDataMode::D_BA); | ||
| 1612 | return store_data_layout; | ||
| 1613 | } | ||
| 1614 | } suldst; | ||
| 1615 | |||
| 1616 | union { | ||
| 1617 | BitField<28, 1, u64> is_ba; | ||
| 1618 | BitField<51, 3, ImageAtomicOperationType> operation_type; | ||
| 1619 | BitField<33, 3, ImageType> image_type; | ||
| 1620 | BitField<29, 4, ImageAtomicOperation> operation; | ||
| 1621 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1622 | } suatom_d; | ||
| 1623 | |||
| 1624 | union { | ||
| 1625 | BitField<20, 24, u64> target; | ||
| 1626 | BitField<5, 1, u64> constant_buffer; | ||
| 1627 | |||
| 1628 | [[nodiscard]] s32 GetBranchTarget() const { | ||
| 1629 | // Sign extend the branch target offset | ||
| 1630 | const auto mask = 1U << (24 - 1); | ||
| 1631 | const auto target_value = static_cast<u32>(target); | ||
| 1632 | constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); | ||
| 1633 | |||
| 1634 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1635 | // divide it by the size of an instruction and add 1 to it. | ||
| 1636 | return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; | ||
| 1637 | } | ||
| 1638 | } bra; | ||
| 1639 | |||
| 1640 | union { | ||
| 1641 | BitField<20, 24, u64> target; | ||
| 1642 | BitField<5, 1, u64> constant_buffer; | ||
| 1643 | |||
| 1644 | [[nodiscard]] s32 GetBranchExtend() const { | ||
| 1645 | // Sign extend the branch target offset | ||
| 1646 | const auto mask = 1U << (24 - 1); | ||
| 1647 | const auto target_value = static_cast<u32>(target); | ||
| 1648 | constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); | ||
| 1649 | |||
| 1650 | // The branch offset is relative to the next instruction and is stored in bytes, so | ||
| 1651 | // divide it by the size of an instruction and add 1 to it. | ||
| 1652 | return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; | ||
| 1653 | } | ||
| 1654 | } brx; | ||
| 1655 | |||
| 1656 | union { | ||
| 1657 | BitField<39, 1, u64> emit; // EmitVertex | ||
| 1658 | BitField<40, 1, u64> cut; // EndPrimitive | ||
| 1659 | } out; | ||
| 1660 | |||
| 1661 | union { | ||
| 1662 | BitField<31, 1, u64> skew; | ||
| 1663 | BitField<32, 1, u64> o; | ||
| 1664 | BitField<33, 2, IsberdMode> mode; | ||
| 1665 | BitField<47, 2, IsberdShift> shift; | ||
| 1666 | } isberd; | ||
| 1667 | |||
| 1668 | union { | ||
| 1669 | BitField<8, 2, MembarType> type; | ||
| 1670 | BitField<0, 2, MembarUnknown> unknown; | ||
| 1671 | } membar; | ||
| 1672 | |||
| 1673 | union { | ||
| 1674 | BitField<48, 1, u64> signed_a; | ||
| 1675 | BitField<38, 1, u64> is_byte_chunk_a; | ||
| 1676 | BitField<36, 2, VideoType> type_a; | ||
| 1677 | BitField<36, 2, u64> byte_height_a; | ||
| 1678 | |||
| 1679 | BitField<49, 1, u64> signed_b; | ||
| 1680 | BitField<50, 1, u64> use_register_b; | ||
| 1681 | BitField<30, 1, u64> is_byte_chunk_b; | ||
| 1682 | BitField<28, 2, VideoType> type_b; | ||
| 1683 | BitField<28, 2, u64> byte_height_b; | ||
| 1684 | } video; | ||
| 1685 | |||
| 1686 | union { | ||
| 1687 | BitField<51, 2, VmadShr> shr; | ||
| 1688 | BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) | ||
| 1689 | BitField<47, 1, u64> cc; | ||
| 1690 | } vmad; | ||
| 1691 | |||
| 1692 | union { | ||
| 1693 | BitField<54, 1, u64> is_dest_signed; | ||
| 1694 | BitField<48, 1, u64> is_src_a_signed; | ||
| 1695 | BitField<49, 1, u64> is_src_b_signed; | ||
| 1696 | BitField<37, 2, u64> src_format_a; | ||
| 1697 | BitField<29, 2, u64> src_format_b; | ||
| 1698 | BitField<56, 1, u64> mx; | ||
| 1699 | BitField<55, 1, u64> sat; | ||
| 1700 | BitField<36, 2, u64> selector_a; | ||
| 1701 | BitField<28, 2, u64> selector_b; | ||
| 1702 | BitField<50, 1, u64> is_op_b_register; | ||
| 1703 | BitField<51, 3, VmnmxOperation> operation; | ||
| 1704 | |||
| 1705 | [[nodiscard]] VmnmxType SourceFormatA() const { | ||
| 1706 | switch (src_format_a) { | ||
| 1707 | case 0b11: | ||
| 1708 | return VmnmxType::Bits32; | ||
| 1709 | case 0b10: | ||
| 1710 | return VmnmxType::Bits16; | ||
| 1711 | default: | ||
| 1712 | return VmnmxType::Bits8; | ||
| 1713 | } | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | [[nodiscard]] VmnmxType SourceFormatB() const { | ||
| 1717 | switch (src_format_b) { | ||
| 1718 | case 0b11: | ||
| 1719 | return VmnmxType::Bits32; | ||
| 1720 | case 0b10: | ||
| 1721 | return VmnmxType::Bits16; | ||
| 1722 | default: | ||
| 1723 | return VmnmxType::Bits8; | ||
| 1724 | } | ||
| 1725 | } | ||
| 1726 | } vmnmx; | ||
| 1727 | |||
| 1728 | union { | ||
| 1729 | BitField<20, 16, u64> imm20_16; | ||
| 1730 | BitField<35, 1, u64> high_b_rr; // used on RR | ||
| 1731 | BitField<36, 1, u64> product_shift_left; | ||
| 1732 | BitField<37, 1, u64> merge_37; | ||
| 1733 | BitField<48, 1, u64> sign_a; | ||
| 1734 | BitField<49, 1, u64> sign_b; | ||
| 1735 | BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC | ||
| 1736 | BitField<50, 3, XmadMode> mode; | ||
| 1737 | BitField<52, 1, u64> high_b; | ||
| 1738 | BitField<53, 1, u64> high_a; | ||
| 1739 | BitField<55, 1, u64> product_shift_left_second; // used on CR | ||
| 1740 | BitField<56, 1, u64> merge_56; | ||
| 1741 | } xmad; | ||
| 1742 | |||
| 1743 | union { | ||
| 1744 | BitField<20, 14, u64> shifted_offset; | ||
| 1745 | BitField<34, 5, u64> index; | ||
| 1746 | |||
| 1747 | [[nodiscard]] u64 GetOffset() const { | ||
| 1748 | return shifted_offset * 4; | ||
| 1749 | } | ||
| 1750 | } cbuf34; | ||
| 1751 | |||
| 1752 | union { | ||
| 1753 | BitField<20, 16, s64> offset; | ||
| 1754 | BitField<36, 5, u64> index; | ||
| 1755 | |||
| 1756 | [[nodiscard]] s64 GetOffset() const { | ||
| 1757 | return offset; | ||
| 1758 | } | ||
| 1759 | } cbuf36; | ||
| 1760 | |||
| 1761 | // Unsure about the size of this one. | ||
| 1762 | // It's always used with a gpr0, so any size should be fine. | ||
| 1763 | BitField<20, 8, SystemVariable> sys20; | ||
| 1764 | |||
| 1765 | BitField<47, 1, u64> generates_cc; | ||
| 1766 | BitField<61, 1, u64> is_b_imm; | ||
| 1767 | BitField<60, 1, u64> is_b_gpr; | ||
| 1768 | BitField<59, 1, u64> is_c_gpr; | ||
| 1769 | BitField<20, 24, s64> smem_imm; | ||
| 1770 | BitField<0, 5, ConditionCode> flow_condition_code; | ||
| 1771 | |||
| 1772 | Attribute attribute; | ||
| 1773 | Sampler sampler; | ||
| 1774 | Image image; | ||
| 1775 | |||
| 1776 | u64 value; | ||
| 1777 | }; | ||
| 1778 | static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); | ||
| 1779 | static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout"); | ||
| 1780 | |||
| 1781 | class OpCode { | ||
| 1782 | public: | ||
| 1783 | enum class Id { | ||
| 1784 | KIL, | ||
| 1785 | SSY, | ||
| 1786 | SYNC, | ||
| 1787 | BRK, | ||
| 1788 | DEPBAR, | ||
| 1789 | VOTE, | ||
| 1790 | VOTE_VTG, | ||
| 1791 | SHFL, | ||
| 1792 | FSWZADD, | ||
| 1793 | BFE_C, | ||
| 1794 | BFE_R, | ||
| 1795 | BFE_IMM, | ||
| 1796 | BFI_RC, | ||
| 1797 | BFI_IMM_R, | ||
| 1798 | BRA, | ||
| 1799 | BRX, | ||
| 1800 | PBK, | ||
| 1801 | LD_A, | ||
| 1802 | LD_L, | ||
| 1803 | LD_S, | ||
| 1804 | LD_C, | ||
| 1805 | LD, // Load from generic memory | ||
| 1806 | LDG, // Load from global memory | ||
| 1807 | ST_A, | ||
| 1808 | ST_L, | ||
| 1809 | ST_S, | ||
| 1810 | ST, // Store in generic memory | ||
| 1811 | STG, // Store in global memory | ||
| 1812 | RED, // Reduction operation | ||
| 1813 | ATOM, // Atomic operation on global memory | ||
| 1814 | ATOMS, // Atomic operation on shared memory | ||
| 1815 | AL2P, // Transforms attribute memory into physical memory | ||
| 1816 | TEX, | ||
| 1817 | TEX_B, // Texture Load Bindless | ||
| 1818 | TXQ, // Texture Query | ||
| 1819 | TXQ_B, // Texture Query Bindless | ||
| 1820 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations | ||
| 1821 | TLD, // Texture Load | ||
| 1822 | TLDS, // Texture Load with scalar/non-vec4 source/destinations | ||
| 1823 | TLD4, // Texture Gather 4 | ||
| 1824 | TLD4_B, // Texture Gather 4 Bindless | ||
| 1825 | TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations | ||
| 1826 | TMML_B, // Texture Mip Map Level | ||
| 1827 | TMML, // Texture Mip Map Level | ||
| 1828 | TXD, // Texture Gradient/Load with Derivates | ||
| 1829 | TXD_B, // Texture Gradient/Load with Derivates Bindless | ||
| 1830 | SUST, // Surface Store | ||
| 1831 | SULD, // Surface Load | ||
| 1832 | SUATOM, // Surface Atomic Operation | ||
| 1833 | EXIT, | ||
| 1834 | NOP, | ||
| 1835 | IPA, | ||
| 1836 | OUT_R, // Emit vertex/primitive | ||
| 1837 | ISBERD, | ||
| 1838 | BAR, | ||
| 1839 | MEMBAR, | ||
| 1840 | VMAD, | ||
| 1841 | VSETP, | ||
| 1842 | VMNMX, | ||
| 1843 | FFMA_IMM, // Fused Multiply and Add | ||
| 1844 | FFMA_CR, | ||
| 1845 | FFMA_RC, | ||
| 1846 | FFMA_RR, | ||
| 1847 | FADD_C, | ||
| 1848 | FADD_R, | ||
| 1849 | FADD_IMM, | ||
| 1850 | FADD32I, | ||
| 1851 | FMUL_C, | ||
| 1852 | FMUL_R, | ||
| 1853 | FMUL_IMM, | ||
| 1854 | FMUL32_IMM, | ||
| 1855 | IADD_C, | ||
| 1856 | IADD_R, | ||
| 1857 | IADD_IMM, | ||
| 1858 | IADD3_C, // Add 3 Integers | ||
| 1859 | IADD3_R, | ||
| 1860 | IADD3_IMM, | ||
| 1861 | IADD32I, | ||
| 1862 | ISCADD_C, // Scale and Add | ||
| 1863 | ISCADD_R, | ||
| 1864 | ISCADD_IMM, | ||
| 1865 | FLO_R, | ||
| 1866 | FLO_C, | ||
| 1867 | FLO_IMM, | ||
| 1868 | LEA_R1, | ||
| 1869 | LEA_R2, | ||
| 1870 | LEA_RZ, | ||
| 1871 | LEA_IMM, | ||
| 1872 | LEA_HI, | ||
| 1873 | HADD2_C, | ||
| 1874 | HADD2_R, | ||
| 1875 | HADD2_IMM, | ||
| 1876 | HMUL2_C, | ||
| 1877 | HMUL2_R, | ||
| 1878 | HMUL2_IMM, | ||
| 1879 | HFMA2_CR, | ||
| 1880 | HFMA2_RC, | ||
| 1881 | HFMA2_RR, | ||
| 1882 | HFMA2_IMM_R, | ||
| 1883 | HSETP2_C, | ||
| 1884 | HSETP2_R, | ||
| 1885 | HSETP2_IMM, | ||
| 1886 | HSET2_C, | ||
| 1887 | HSET2_R, | ||
| 1888 | HSET2_IMM, | ||
| 1889 | POPC_C, | ||
| 1890 | POPC_R, | ||
| 1891 | POPC_IMM, | ||
| 1892 | SEL_C, | ||
| 1893 | SEL_R, | ||
| 1894 | SEL_IMM, | ||
| 1895 | ICMP_RC, | ||
| 1896 | ICMP_R, | ||
| 1897 | ICMP_CR, | ||
| 1898 | ICMP_IMM, | ||
| 1899 | FCMP_RR, | ||
| 1900 | FCMP_RC, | ||
| 1901 | FCMP_IMMR, | ||
| 1902 | MUFU, // Multi-Function Operator | ||
| 1903 | RRO_C, // Range Reduction Operator | ||
| 1904 | RRO_R, | ||
| 1905 | RRO_IMM, | ||
| 1906 | F2F_C, | ||
| 1907 | F2F_R, | ||
| 1908 | F2F_IMM, | ||
| 1909 | F2I_C, | ||
| 1910 | F2I_R, | ||
| 1911 | F2I_IMM, | ||
| 1912 | I2F_C, | ||
| 1913 | I2F_R, | ||
| 1914 | I2F_IMM, | ||
| 1915 | I2I_C, | ||
| 1916 | I2I_R, | ||
| 1917 | I2I_IMM, | ||
| 1918 | LOP_C, | ||
| 1919 | LOP_R, | ||
| 1920 | LOP_IMM, | ||
| 1921 | LOP32I, | ||
| 1922 | LOP3_C, | ||
| 1923 | LOP3_R, | ||
| 1924 | LOP3_IMM, | ||
| 1925 | MOV_C, | ||
| 1926 | MOV_R, | ||
| 1927 | MOV_IMM, | ||
| 1928 | S2R, | ||
| 1929 | MOV32_IMM, | ||
| 1930 | SHL_C, | ||
| 1931 | SHL_R, | ||
| 1932 | SHL_IMM, | ||
| 1933 | SHR_C, | ||
| 1934 | SHR_R, | ||
| 1935 | SHR_IMM, | ||
| 1936 | SHF_RIGHT_R, | ||
| 1937 | SHF_RIGHT_IMM, | ||
| 1938 | SHF_LEFT_R, | ||
| 1939 | SHF_LEFT_IMM, | ||
| 1940 | FMNMX_C, | ||
| 1941 | FMNMX_R, | ||
| 1942 | FMNMX_IMM, | ||
| 1943 | IMNMX_C, | ||
| 1944 | IMNMX_R, | ||
| 1945 | IMNMX_IMM, | ||
| 1946 | FSETP_C, // Set Predicate | ||
| 1947 | FSETP_R, | ||
| 1948 | FSETP_IMM, | ||
| 1949 | FSET_C, | ||
| 1950 | FSET_R, | ||
| 1951 | FSET_IMM, | ||
| 1952 | ISETP_C, | ||
| 1953 | ISETP_IMM, | ||
| 1954 | ISETP_R, | ||
| 1955 | ISET_R, | ||
| 1956 | ISET_C, | ||
| 1957 | ISET_IMM, | ||
| 1958 | PSETP, | ||
| 1959 | PSET, | ||
| 1960 | CSETP, | ||
| 1961 | R2P_IMM, | ||
| 1962 | P2R_IMM, | ||
| 1963 | XMAD_IMM, | ||
| 1964 | XMAD_CR, | ||
| 1965 | XMAD_RC, | ||
| 1966 | XMAD_RR, | ||
| 1967 | }; | ||
| 1968 | |||
| 1969 | enum class Type { | ||
| 1970 | Trivial, | ||
| 1971 | Arithmetic, | ||
| 1972 | ArithmeticImmediate, | ||
| 1973 | ArithmeticInteger, | ||
| 1974 | ArithmeticIntegerImmediate, | ||
| 1975 | ArithmeticHalf, | ||
| 1976 | ArithmeticHalfImmediate, | ||
| 1977 | Bfe, | ||
| 1978 | Bfi, | ||
| 1979 | Shift, | ||
| 1980 | Ffma, | ||
| 1981 | Hfma2, | ||
| 1982 | Flow, | ||
| 1983 | Synch, | ||
| 1984 | Warp, | ||
| 1985 | Memory, | ||
| 1986 | Texture, | ||
| 1987 | Image, | ||
| 1988 | FloatSet, | ||
| 1989 | FloatSetPredicate, | ||
| 1990 | IntegerSet, | ||
| 1991 | IntegerSetPredicate, | ||
| 1992 | HalfSet, | ||
| 1993 | HalfSetPredicate, | ||
| 1994 | PredicateSetPredicate, | ||
| 1995 | PredicateSetRegister, | ||
| 1996 | RegisterSetPredicate, | ||
| 1997 | Conversion, | ||
| 1998 | Video, | ||
| 1999 | Xmad, | ||
| 2000 | Unknown, | ||
| 2001 | }; | ||
| 2002 | |||
| 2003 | /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be | ||
| 2004 | /// conditionally executed). | ||
| 2005 | [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { | ||
| 2006 | // TODO(Subv): Add the rest of unpredicated instructions. | ||
| 2007 | return opcode != Id::SSY && opcode != Id::PBK; | ||
| 2008 | } | ||
| 2009 | |||
| 2010 | class Matcher { | ||
| 2011 | public: | ||
| 2012 | constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) | ||
| 2013 | : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} | ||
| 2014 | |||
| 2015 | [[nodiscard]] constexpr const char* GetName() const { | ||
| 2016 | return name; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | [[nodiscard]] constexpr u16 GetMask() const { | ||
| 2020 | return mask; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | [[nodiscard]] constexpr Id GetId() const { | ||
| 2024 | return id; | ||
| 2025 | } | ||
| 2026 | |||
| 2027 | [[nodiscard]] constexpr Type GetType() const { | ||
| 2028 | return type; | ||
| 2029 | } | ||
| 2030 | |||
| 2031 | /** | ||
| 2032 | * Tests to see if the given instruction is the instruction this matcher represents. | ||
| 2033 | * @param instruction The instruction to test | ||
| 2034 | * @returns true if the given instruction matches. | ||
| 2035 | */ | ||
| 2036 | [[nodiscard]] constexpr bool Matches(u16 instruction) const { | ||
| 2037 | return (instruction & mask) == expected; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | private: | ||
| 2041 | const char* name; | ||
| 2042 | u16 mask; | ||
| 2043 | u16 expected; | ||
| 2044 | Id id; | ||
| 2045 | Type type; | ||
| 2046 | }; | ||
| 2047 | |||
| 2048 | using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>; | ||
| 2049 | [[nodiscard]] static DecodeResult Decode(Instruction instr) { | ||
| 2050 | static const auto table{GetDecodeTable()}; | ||
| 2051 | |||
| 2052 | const auto matches_instruction = [instr](const auto& matcher) { | ||
| 2053 | return matcher.Matches(static_cast<u16>(instr.opcode)); | ||
| 2054 | }; | ||
| 2055 | |||
| 2056 | auto iter = std::find_if(table.begin(), table.end(), matches_instruction); | ||
| 2057 | return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter) | ||
| 2058 | : std::nullopt; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | private: | ||
| 2062 | struct Detail { | ||
| 2063 | private: | ||
| 2064 | static constexpr std::size_t opcode_bitsize = 16; | ||
| 2065 | |||
| 2066 | /** | ||
| 2067 | * Generates the mask and the expected value after masking from a given bitstring. | ||
| 2068 | * A '0' in a bitstring indicates that a zero must be present at that bit position. | ||
| 2069 | * A '1' in a bitstring indicates that a one must be present at that bit position. | ||
| 2070 | */ | ||
| 2071 | [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { | ||
| 2072 | u16 mask = 0, expect = 0; | ||
| 2073 | for (std::size_t i = 0; i < opcode_bitsize; i++) { | ||
| 2074 | const std::size_t bit_position = opcode_bitsize - i - 1; | ||
| 2075 | switch (bitstring[i]) { | ||
| 2076 | case '0': | ||
| 2077 | mask |= static_cast<u16>(1U << bit_position); | ||
| 2078 | break; | ||
| 2079 | case '1': | ||
| 2080 | expect |= static_cast<u16>(1U << bit_position); | ||
| 2081 | mask |= static_cast<u16>(1U << bit_position); | ||
| 2082 | break; | ||
| 2083 | default: | ||
| 2084 | // Ignore | ||
| 2085 | break; | ||
| 2086 | } | ||
| 2087 | } | ||
| 2088 | return std::make_pair(mask, expect); | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | public: | ||
| 2092 | /// Creates a matcher that can match and parse instructions based on bitstring. | ||
| 2093 | [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, | ||
| 2094 | Type type, const char* const name) { | ||
| 2095 | const auto [mask, expected] = GetMaskAndExpect(bitstring); | ||
| 2096 | return Matcher(name, mask, expected, op, type); | ||
| 2097 | } | ||
| 2098 | }; | ||
| 2099 | |||
| 2100 | [[nodiscard]] static std::vector<Matcher> GetDecodeTable() { | ||
| 2101 | std::vector<Matcher> table = { | ||
| 2102 | #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) | ||
| 2103 | INST("111000110011----", Id::KIL, Type::Flow, "KIL"), | ||
| 2104 | INST("111000101001----", Id::SSY, Type::Flow, "SSY"), | ||
| 2105 | INST("111000101010----", Id::PBK, Type::Flow, "PBK"), | ||
| 2106 | INST("111000100100----", Id::BRA, Type::Flow, "BRA"), | ||
| 2107 | INST("111000100101----", Id::BRX, Type::Flow, "BRX"), | ||
| 2108 | INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), | ||
| 2109 | INST("111000110100----", Id::BRK, Type::Flow, "BRK"), | ||
| 2110 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | ||
| 2111 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | ||
| 2112 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | ||
| 2113 | INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), | ||
| 2114 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), | ||
| 2115 | INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), | ||
| 2116 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | ||
| 2117 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | ||
| 2118 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | ||
| 2119 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), | ||
| 2120 | INST("100-------------", Id::LD, Type::Memory, "LD"), | ||
| 2121 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | ||
| 2122 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), | ||
| 2123 | INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), | ||
| 2124 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | ||
| 2125 | INST("101-------------", Id::ST, Type::Memory, "ST"), | ||
| 2126 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | ||
| 2127 | INST("1110101111111---", Id::RED, Type::Memory, "RED"), | ||
| 2128 | INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||
| 2129 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||
| 2130 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | ||
| 2131 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | ||
| 2132 | INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), | ||
| 2133 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), | ||
| 2134 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | ||
| 2135 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | ||
| 2136 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | ||
| 2137 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | ||
| 2138 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | ||
| 2139 | INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), | ||
| 2140 | INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), | ||
| 2141 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | ||
| 2142 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | ||
| 2143 | INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), | ||
| 2144 | INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), | ||
| 2145 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | ||
| 2146 | INST("11101011000-----", Id::SULD, Type::Image, "SULD"), | ||
| 2147 | INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), | ||
| 2148 | INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), | ||
| 2149 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | ||
| 2150 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | ||
| 2151 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | ||
| 2152 | INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), | ||
| 2153 | INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), | ||
| 2154 | INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), | ||
| 2155 | INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), | ||
| 2156 | INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), | ||
| 2157 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | ||
| 2158 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | ||
| 2159 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | ||
| 2160 | INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), | ||
| 2161 | INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), | ||
| 2162 | INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), | ||
| 2163 | INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), | ||
| 2164 | INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), | ||
| 2165 | INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), | ||
| 2166 | INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), | ||
| 2167 | INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), | ||
| 2168 | INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), | ||
| 2169 | INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), | ||
| 2170 | INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), | ||
| 2171 | INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), | ||
| 2172 | INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), | ||
| 2173 | INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), | ||
| 2174 | INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), | ||
| 2175 | INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), | ||
| 2176 | INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), | ||
| 2177 | INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), | ||
| 2178 | INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), | ||
| 2179 | INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), | ||
| 2180 | INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), | ||
| 2181 | INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), | ||
| 2182 | INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), | ||
| 2183 | INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), | ||
| 2184 | INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), | ||
| 2185 | INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), | ||
| 2186 | INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), | ||
| 2187 | INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), | ||
| 2188 | INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), | ||
| 2189 | INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), | ||
| 2190 | INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), | ||
| 2191 | INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), | ||
| 2192 | INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), | ||
| 2193 | INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), | ||
| 2194 | INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), | ||
| 2195 | INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), | ||
| 2196 | INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), | ||
| 2197 | INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), | ||
| 2198 | INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), | ||
| 2199 | INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), | ||
| 2200 | INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), | ||
| 2201 | INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), | ||
| 2202 | INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), | ||
| 2203 | INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), | ||
| 2204 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | ||
| 2205 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | ||
| 2206 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | ||
| 2207 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), | ||
| 2208 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | ||
| 2209 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | ||
| 2210 | INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), | ||
| 2211 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | ||
| 2212 | INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), | ||
| 2213 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), | ||
| 2214 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), | ||
| 2215 | INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), | ||
| 2216 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | ||
| 2217 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | ||
| 2218 | INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), | ||
| 2219 | INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), | ||
| 2220 | INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), | ||
| 2221 | INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), | ||
| 2222 | INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), | ||
| 2223 | INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), | ||
| 2224 | INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), | ||
| 2225 | INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), | ||
| 2226 | INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), | ||
| 2227 | INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), | ||
| 2228 | INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), | ||
| 2229 | INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), | ||
| 2230 | INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), | ||
| 2231 | INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), | ||
| 2232 | INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), | ||
| 2233 | INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), | ||
| 2234 | INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), | ||
| 2235 | INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), | ||
| 2236 | INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), | ||
| 2237 | INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), | ||
| 2238 | INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), | ||
| 2239 | INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), | ||
| 2240 | INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), | ||
| 2241 | INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), | ||
| 2242 | INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), | ||
| 2243 | INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), | ||
| 2244 | INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), | ||
| 2245 | INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), | ||
| 2246 | INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), | ||
| 2247 | INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), | ||
| 2248 | INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), | ||
| 2249 | INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), | ||
| 2250 | INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), | ||
| 2251 | INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), | ||
| 2252 | INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), | ||
| 2253 | INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), | ||
| 2254 | INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), | ||
| 2255 | INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), | ||
| 2256 | INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), | ||
| 2257 | INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), | ||
| 2258 | INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), | ||
| 2259 | INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), | ||
| 2260 | INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), | ||
| 2261 | INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), | ||
| 2262 | INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), | ||
| 2263 | INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), | ||
| 2264 | INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), | ||
| 2265 | INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), | ||
| 2266 | INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), | ||
| 2267 | INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), | ||
| 2268 | INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), | ||
| 2269 | INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), | ||
| 2270 | INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), | ||
| 2271 | INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), | ||
| 2272 | INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), | ||
| 2273 | INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), | ||
| 2274 | INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), | ||
| 2275 | INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), | ||
| 2276 | INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), | ||
| 2277 | INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), | ||
| 2278 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), | ||
| 2279 | INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), | ||
| 2280 | INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), | ||
| 2281 | INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), | ||
| 2282 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), | ||
| 2283 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), | ||
| 2284 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), | ||
| 2285 | INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), | ||
| 2286 | }; | ||
| 2287 | #undef INST | ||
| 2288 | std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { | ||
| 2289 | // If a matcher has more bits in its mask it is more specific, so it | ||
| 2290 | // should come first. | ||
| 2291 | return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); | ||
| 2292 | }); | ||
| 2293 | |||
| 2294 | return table; | ||
| 2295 | } | ||
| 2296 | }; | ||
| 2297 | |||
| 2298 | } // namespace Tegra::Shader | ||
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null | |||
| @@ -1,158 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Tegra::Shader { | ||
| 15 | |||
| 16 | enum class OutputTopology : u32 { | ||
| 17 | PointList = 1, | ||
| 18 | LineStrip = 6, | ||
| 19 | TriangleStrip = 7, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class PixelImap : u8 { | ||
| 23 | Unused = 0, | ||
| 24 | Constant = 1, | ||
| 25 | Perspective = 2, | ||
| 26 | ScreenLinear = 3, | ||
| 27 | }; | ||
| 28 | |||
| 29 | // Documentation in: | ||
| 30 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html | ||
| 31 | struct Header { | ||
| 32 | union { | ||
| 33 | BitField<0, 5, u32> sph_type; | ||
| 34 | BitField<5, 5, u32> version; | ||
| 35 | BitField<10, 4, u32> shader_type; | ||
| 36 | BitField<14, 1, u32> mrt_enable; | ||
| 37 | BitField<15, 1, u32> kills_pixels; | ||
| 38 | BitField<16, 1, u32> does_global_store; | ||
| 39 | BitField<17, 4, u32> sass_version; | ||
| 40 | BitField<21, 5, u32> reserved; | ||
| 41 | BitField<26, 1, u32> does_load_or_store; | ||
| 42 | BitField<27, 1, u32> does_fp64; | ||
| 43 | BitField<28, 4, u32> stream_out_mask; | ||
| 44 | } common0; | ||
| 45 | |||
| 46 | union { | ||
| 47 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 48 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 49 | } common1; | ||
| 50 | |||
| 51 | union { | ||
| 52 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 53 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 54 | } common2; | ||
| 55 | |||
| 56 | union { | ||
| 57 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 58 | BitField<24, 4, OutputTopology> output_topology; | ||
| 59 | BitField<28, 4, u32> reserved; | ||
| 60 | } common3; | ||
| 61 | |||
| 62 | union { | ||
| 63 | BitField<0, 12, u32> max_output_vertices; | ||
| 64 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 65 | BitField<20, 4, u32> reserved; | ||
| 66 | BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 67 | } common4; | ||
| 68 | |||
| 69 | union { | ||
| 70 | struct { | ||
| 71 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 72 | INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB | ||
| 73 | INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] | ||
| 74 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 75 | union { | ||
| 76 | BitField<0, 8, u16> clip_distances; | ||
| 77 | BitField<8, 1, u16> point_sprite_s; | ||
| 78 | BitField<9, 1, u16> point_sprite_t; | ||
| 79 | BitField<10, 1, u16> fog_coordinate; | ||
| 80 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 81 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 82 | BitField<14, 1, u16> instance_id; | ||
| 83 | BitField<15, 1, u16> vertex_id; | ||
| 84 | }; | ||
| 85 | INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] | ||
| 86 | INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved | ||
| 87 | INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA | ||
| 88 | INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB | ||
| 89 | INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] | ||
| 90 | INSERT_PADDING_BYTES_NOINIT(2); // OmapColor | ||
| 91 | INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC | ||
| 92 | INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] | ||
| 93 | INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved | ||
| 94 | } vtg; | ||
| 95 | |||
| 96 | struct { | ||
| 97 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 98 | INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB | ||
| 99 | |||
| 100 | union { | ||
| 101 | BitField<0, 2, PixelImap> x; | ||
| 102 | BitField<2, 2, PixelImap> y; | ||
| 103 | BitField<4, 2, PixelImap> z; | ||
| 104 | BitField<6, 2, PixelImap> w; | ||
| 105 | u8 raw; | ||
| 106 | } imap_generic_vector[32]; | ||
| 107 | |||
| 108 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 109 | INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC | ||
| 110 | INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] | ||
| 111 | INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved | ||
| 112 | |||
| 113 | struct { | ||
| 114 | u32 target; | ||
| 115 | union { | ||
| 116 | BitField<0, 1, u32> sample_mask; | ||
| 117 | BitField<1, 1, u32> depth; | ||
| 118 | BitField<2, 30, u32> reserved; | ||
| 119 | }; | ||
| 120 | } omap; | ||
| 121 | |||
| 122 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||
| 123 | const u32 bit = render_target * 4 + component; | ||
| 124 | return omap.target & (1 << bit); | ||
| 125 | } | ||
| 126 | |||
| 127 | PixelImap GetPixelImap(u32 attribute) const { | ||
| 128 | const auto get_index = [this, attribute](u32 index) { | ||
| 129 | return static_cast<PixelImap>( | ||
| 130 | (imap_generic_vector[attribute].raw >> (index * 2)) & 3); | ||
| 131 | }; | ||
| 132 | |||
| 133 | std::optional<PixelImap> result; | ||
| 134 | for (u32 component = 0; component < 4; ++component) { | ||
| 135 | const PixelImap index = get_index(component); | ||
| 136 | if (index == PixelImap::Unused) { | ||
| 137 | continue; | ||
| 138 | } | ||
| 139 | if (result && result != index) { | ||
| 140 | LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); | ||
| 141 | } | ||
| 142 | result = index; | ||
| 143 | } | ||
| 144 | return result.value_or(PixelImap::Unused); | ||
| 145 | } | ||
| 146 | } ps; | ||
| 147 | |||
| 148 | std::array<u32, 0xF> raw; | ||
| 149 | }; | ||
| 150 | |||
| 151 | u64 GetLocalMemorySize() const { | ||
| 152 | return (common1.shader_local_memory_low_size | | ||
| 153 | (common2.shader_local_memory_high_size << 24)); | ||
| 154 | } | ||
| 155 | }; | ||
| 156 | static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); | ||
| 157 | |||
| 158 | } // namespace Tegra::Shader | ||
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null | |||
| @@ -1,21 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Tegra::Engines { | ||
| 10 | |||
| 11 | enum class ShaderType : u32 { | ||
| 12 | Vertex = 0, | ||
| 13 | TesselationControl = 1, | ||
| 14 | TesselationEval = 2, | ||
| 15 | Geometry = 3, | ||
| 16 | Fragment = 4, | ||
| 17 | Compute = 5, | ||
| 18 | }; | ||
| 19 | static constexpr std::size_t MaxShaderTypes = 6; | ||
| 20 | |||
| 21 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null | |||
| @@ -1,37 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <limits> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/guest_driver.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { | ||
| 15 | if (texture_handler_size) { | ||
| 16 | return; | ||
| 17 | } | ||
| 18 | const std::size_t size = bound_offsets.size(); | ||
| 19 | if (size < 2) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); | ||
| 23 | u32 min_val = std::numeric_limits<u32>::max(); | ||
| 24 | for (std::size_t i = 1; i < size; ++i) { | ||
| 25 | if (bound_offsets[i] == bound_offsets[i - 1]) { | ||
| 26 | continue; | ||
| 27 | } | ||
| 28 | const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; | ||
| 29 | min_val = std::min(min_val, new_min); | ||
| 30 | } | ||
| 31 | if (min_val > 2) { | ||
| 32 | return; | ||
| 33 | } | ||
| 34 | texture_handler_size = min_texture_handler_size * min_val; | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCore | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | /** | ||
| 15 | * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect | ||
| 16 | * information necessary for impossible to avoid HLE methods like shader tracks as they are | ||
| 17 | * Entscheidungsproblems. | ||
| 18 | */ | ||
| 19 | class GuestDriverProfile { | ||
| 20 | public: | ||
| 21 | explicit GuestDriverProfile() = default; | ||
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) | ||
| 23 | : texture_handler_size{texture_handler_size_} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 26 | |||
| 27 | u32 GetTextureHandlerSize() const { | ||
| 28 | return texture_handler_size.value_or(default_texture_handler_size); | ||
| 29 | } | ||
| 30 | |||
| 31 | bool IsTextureHandlerSizeKnown() const { | ||
| 32 | return texture_handler_size.has_value(); | ||
| 33 | } | ||
| 34 | |||
| 35 | private: | ||
| 36 | // Minimum size of texture handler any driver can use. | ||
| 37 | static constexpr u32 min_texture_handler_size = 4; | ||
| 38 | |||
| 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. | ||
| 40 | // Thus, certain drivers may squish the size. | ||
| 41 | static constexpr u32 default_texture_handler_size = 8; | ||
| 42 | |||
| 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; | ||
| 44 | }; | ||
| 45 | |||
| 46 | } // namespace VideoCore | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d2b9d5f2b..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 69 | } else { | 69 | } else { |
| 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); | 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); |
| 71 | } | 71 | } |
| 72 | |||
| 73 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); | 72 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); |
| 74 | 73 | ||
| 75 | for (const auto& map : submapped_ranges) { | 74 | for (const auto& map : submapped_ranges) { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/fermi_2d.h" | 12 | #include "video_core/engines/fermi_2d.h" |
| 13 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 14 | #include "video_core/guest_driver.h" | ||
| 15 | 14 | ||
| 16 | namespace Tegra { | 15 | namespace Tegra { |
| 17 | class MemoryManager; | 16 | class MemoryManager; |
| @@ -45,7 +44,7 @@ public: | |||
| 45 | virtual void Clear() = 0; | 44 | virtual void Clear() = 0; |
| 46 | 45 | ||
| 47 | /// Dispatches a compute shader invocation | 46 | /// Dispatches a compute shader invocation |
| 48 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | 47 | virtual void DispatchCompute() = 0; |
| 49 | 48 | ||
| 50 | /// Resets the counter of a query | 49 | /// Resets the counter of a query |
| 51 | virtual void ResetCounter(QueryType type) = 0; | 50 | virtual void ResetCounter(QueryType type) = 0; |
| @@ -136,18 +135,5 @@ public: | |||
| 136 | /// Initialize disk cached resources for the game being emulated | 135 | /// Initialize disk cached resources for the game being emulated |
| 137 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 136 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 138 | const DiskResourceLoadCallback& callback) {} | 137 | const DiskResourceLoadCallback& callback) {} |
| 139 | |||
| 140 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 141 | [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 142 | return guest_driver_profile; | ||
| 143 | } | ||
| 144 | |||
| 145 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 146 | [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { | ||
| 147 | return guest_driver_profile; | ||
| 148 | } | ||
| 149 | |||
| 150 | private: | ||
| 151 | GuestDriverProfile guest_driver_profile{}; | ||
| 152 | }; | 138 | }; |
| 153 | } // namespace VideoCore | 139 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null | |||
| @@ -1,2124 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | static constexpr std::string_view SWIZZLE{"xyzw"}; | ||
| 43 | return SWIZZLE.at(component); | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | class ARBDecompiler final { | ||
| 189 | public: | ||
| 190 | explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 191 | ShaderType stage_, std::string_view identifier); | ||
| 192 | |||
| 193 | std::string Code() const { | ||
| 194 | return shader_source; | ||
| 195 | } | ||
| 196 | |||
| 197 | private: | ||
| 198 | void DefineGlobalMemory(); | ||
| 199 | |||
| 200 | void DeclareHeader(); | ||
| 201 | void DeclareVertex(); | ||
| 202 | void DeclareGeometry(); | ||
| 203 | void DeclareFragment(); | ||
| 204 | void DeclareCompute(); | ||
| 205 | void DeclareInputAttributes(); | ||
| 206 | void DeclareOutputAttributes(); | ||
| 207 | void DeclareLocalMemory(); | ||
| 208 | void DeclareGlobalMemory(); | ||
| 209 | void DeclareConstantBuffers(); | ||
| 210 | void DeclareRegisters(); | ||
| 211 | void DeclareTemporaries(); | ||
| 212 | void DeclarePredicates(); | ||
| 213 | void DeclareInternalFlags(); | ||
| 214 | |||
| 215 | void InitializeVariables(); | ||
| 216 | |||
| 217 | void DecompileAST(); | ||
| 218 | void DecompileBranchMode(); | ||
| 219 | |||
| 220 | void VisitAST(const ASTNode& node); | ||
| 221 | std::string VisitExpression(const Expr& node); | ||
| 222 | |||
| 223 | void VisitBlock(const NodeBlock& bb); | ||
| 224 | |||
| 225 | std::string Visit(const Node& node); | ||
| 226 | |||
| 227 | std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation); | ||
| 228 | std::string BuildAoffi(Operation); | ||
| 229 | std::string GlobalMemoryPointer(const GmemNode& gmem); | ||
| 230 | void Exit(); | ||
| 231 | |||
| 232 | std::string Assign(Operation); | ||
| 233 | std::string Select(Operation); | ||
| 234 | std::string FClamp(Operation); | ||
| 235 | std::string FCastHalf0(Operation); | ||
| 236 | std::string FCastHalf1(Operation); | ||
| 237 | std::string FSqrt(Operation); | ||
| 238 | std::string FSwizzleAdd(Operation); | ||
| 239 | std::string HAdd2(Operation); | ||
| 240 | std::string HMul2(Operation); | ||
| 241 | std::string HFma2(Operation); | ||
| 242 | std::string HAbsolute(Operation); | ||
| 243 | std::string HNegate(Operation); | ||
| 244 | std::string HClamp(Operation); | ||
| 245 | std::string HCastFloat(Operation); | ||
| 246 | std::string HUnpack(Operation); | ||
| 247 | std::string HMergeF32(Operation); | ||
| 248 | std::string HMergeH0(Operation); | ||
| 249 | std::string HMergeH1(Operation); | ||
| 250 | std::string HPack2(Operation); | ||
| 251 | std::string LogicalAssign(Operation); | ||
| 252 | std::string LogicalPick2(Operation); | ||
| 253 | std::string LogicalAnd2(Operation); | ||
| 254 | std::string FloatOrdered(Operation); | ||
| 255 | std::string FloatUnordered(Operation); | ||
| 256 | std::string LogicalAddCarry(Operation); | ||
| 257 | std::string Texture(Operation); | ||
| 258 | std::string TextureGather(Operation); | ||
| 259 | std::string TextureQueryDimensions(Operation); | ||
| 260 | std::string TextureQueryLod(Operation); | ||
| 261 | std::string TexelFetch(Operation); | ||
| 262 | std::string TextureGradient(Operation); | ||
| 263 | std::string ImageLoad(Operation); | ||
| 264 | std::string ImageStore(Operation); | ||
| 265 | std::string Branch(Operation); | ||
| 266 | std::string BranchIndirect(Operation); | ||
| 267 | std::string PushFlowStack(Operation); | ||
| 268 | std::string PopFlowStack(Operation); | ||
| 269 | std::string Exit(Operation); | ||
| 270 | std::string Discard(Operation); | ||
| 271 | std::string EmitVertex(Operation); | ||
| 272 | std::string EndPrimitive(Operation); | ||
| 273 | std::string InvocationId(Operation); | ||
| 274 | std::string YNegate(Operation); | ||
| 275 | std::string ThreadId(Operation); | ||
| 276 | std::string ShuffleIndexed(Operation); | ||
| 277 | std::string Barrier(Operation); | ||
| 278 | std::string MemoryBarrierGroup(Operation); | ||
| 279 | std::string MemoryBarrierGlobal(Operation); | ||
| 280 | |||
| 281 | template <const std::string_view& op> | ||
| 282 | std::string Unary(Operation operation) { | ||
| 283 | std::string temporary = AllocTemporary(); | ||
| 284 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 285 | return temporary; | ||
| 286 | } | ||
| 287 | |||
| 288 | template <const std::string_view& op> | ||
| 289 | std::string Binary(Operation operation) { | ||
| 290 | std::string temporary = AllocTemporary(); | ||
| 291 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 292 | Visit(operation[1])); | ||
| 293 | return temporary; | ||
| 294 | } | ||
| 295 | |||
| 296 | template <const std::string_view& op> | ||
| 297 | std::string Trinary(Operation operation) { | ||
| 298 | std::string temporary = AllocTemporary(); | ||
| 299 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 300 | Visit(operation[1]), Visit(operation[2])); | ||
| 301 | return temporary; | ||
| 302 | } | ||
| 303 | |||
| 304 | template <const std::string_view& op, bool unordered> | ||
| 305 | std::string FloatComparison(Operation operation) { | ||
| 306 | std::string temporary = AllocTemporary(); | ||
| 307 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 308 | AddLine("MOV.S {}, 0;", temporary); | ||
| 309 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 310 | |||
| 311 | const std::string op_a = Visit(operation[0]); | ||
| 312 | const std::string op_b = Visit(operation[1]); | ||
| 313 | if constexpr (unordered) { | ||
| 314 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 315 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 316 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 317 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 318 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 319 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 320 | } else if (op == SNE_F) { | ||
| 321 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 322 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 323 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 324 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 325 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 326 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 327 | } | ||
| 328 | return temporary; | ||
| 329 | } | ||
| 330 | |||
| 331 | template <const std::string_view& op, bool is_nan> | ||
| 332 | std::string HalfComparison(Operation operation) { | ||
| 333 | std::string tmp1 = AllocVectorTemporary(); | ||
| 334 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 335 | const std::string op_a = Visit(operation[0]); | ||
| 336 | const std::string op_b = Visit(operation[1]); | ||
| 337 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 339 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 340 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 341 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 342 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 343 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 344 | if constexpr (is_nan) { | ||
| 345 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 346 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 347 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 348 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 349 | } | ||
| 350 | return tmp1; | ||
| 351 | } | ||
| 352 | |||
| 353 | template <const std::string_view& op, const std::string_view& type> | ||
| 354 | std::string AtomicImage(Operation operation) { | ||
| 355 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 356 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 357 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 358 | const std::size_t num_values = meta.values.size(); | ||
| 359 | |||
| 360 | const std::string coord = AllocVectorTemporary(); | ||
| 361 | const std::string value = AllocVectorTemporary(); | ||
| 362 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 363 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 364 | } | ||
| 365 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 366 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 367 | } | ||
| 368 | |||
| 369 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, | ||
| 370 | image_id, ImageType(meta.image.type)); | ||
| 371 | return fmt::format("{}.x", coord); | ||
| 372 | } | ||
| 373 | |||
| 374 | template <const std::string_view& op, const std::string_view& type> | ||
| 375 | std::string Atomic(Operation operation) { | ||
| 376 | std::string temporary = AllocTemporary(); | ||
| 377 | std::string address; | ||
| 378 | std::string_view opname; | ||
| 379 | bool robust = false; | ||
| 380 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 381 | address = GlobalMemoryPointer(*gmem); | ||
| 382 | opname = "ATOM"; | ||
| 383 | robust = true; | ||
| 384 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 385 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 386 | opname = "ATOMS"; | ||
| 387 | } else { | ||
| 388 | UNREACHABLE(); | ||
| 389 | return "{0, 0, 0, 0}"; | ||
| 390 | } | ||
| 391 | if (robust) { | ||
| 392 | AddLine("IF NE.x;"); | ||
| 393 | } | ||
| 394 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 395 | if (robust) { | ||
| 396 | AddLine("ELSE;"); | ||
| 397 | AddLine("MOV.S {}, 0;", temporary); | ||
| 398 | AddLine("ENDIF;"); | ||
| 399 | } | ||
| 400 | return temporary; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <char type> | ||
| 404 | std::string Negate(Operation operation) { | ||
| 405 | std::string temporary = AllocTemporary(); | ||
| 406 | if constexpr (type == 'F') { | ||
| 407 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 408 | } else { | ||
| 409 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 410 | } | ||
| 411 | return temporary; | ||
| 412 | } | ||
| 413 | |||
| 414 | template <char type> | ||
| 415 | std::string Absolute(Operation operation) { | ||
| 416 | std::string temporary = AllocTemporary(); | ||
| 417 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 418 | return temporary; | ||
| 419 | } | ||
| 420 | |||
| 421 | template <char type> | ||
| 422 | std::string BitfieldInsert(Operation operation) { | ||
| 423 | const std::string temporary = AllocVectorTemporary(); | ||
| 424 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 425 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 426 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 427 | Visit(operation[0])); | ||
| 428 | return fmt::format("{}.x", temporary); | ||
| 429 | } | ||
| 430 | |||
| 431 | template <char type> | ||
| 432 | std::string BitfieldExtract(Operation operation) { | ||
| 433 | const std::string temporary = AllocVectorTemporary(); | ||
| 434 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 435 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 436 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 437 | return fmt::format("{}.x", temporary); | ||
| 438 | } | ||
| 439 | |||
| 440 | template <char swizzle> | ||
| 441 | std::string LocalInvocationId(Operation) { | ||
| 442 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 443 | } | ||
| 444 | |||
| 445 | template <char swizzle> | ||
| 446 | std::string WorkGroupId(Operation) { | ||
| 447 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 448 | } | ||
| 449 | |||
| 450 | template <char c1, char c2> | ||
| 451 | std::string ThreadMask(Operation) { | ||
| 452 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 453 | } | ||
| 454 | |||
| 455 | template <typename... Args> | ||
| 456 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 457 | shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...); | ||
| 458 | } | ||
| 459 | |||
| 460 | template <typename... Args> | ||
| 461 | void AddLine(std::string_view text, Args&&... args) { | ||
| 462 | AddExpression(text, std::forward<Args>(args)...); | ||
| 463 | shader_source += '\n'; | ||
| 464 | } | ||
| 465 | |||
| 466 | std::string AllocLongVectorTemporary() { | ||
| 467 | max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); | ||
| 468 | return fmt::format("L{}", num_long_temporaries++); | ||
| 469 | } | ||
| 470 | |||
| 471 | std::string AllocLongTemporary() { | ||
| 472 | return fmt::format("{}.x", AllocLongVectorTemporary()); | ||
| 473 | } | ||
| 474 | |||
| 475 | std::string AllocVectorTemporary() { | ||
| 476 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 477 | return fmt::format("T{}", num_temporaries++); | ||
| 478 | } | ||
| 479 | |||
| 480 | std::string AllocTemporary() { | ||
| 481 | return fmt::format("{}.x", AllocVectorTemporary()); | ||
| 482 | } | ||
| 483 | |||
| 484 | void ResetTemporaries() noexcept { | ||
| 485 | num_temporaries = 0; | ||
| 486 | num_long_temporaries = 0; | ||
| 487 | } | ||
| 488 | |||
| 489 | const Device& device; | ||
| 490 | const ShaderIR& ir; | ||
| 491 | const Registry& registry; | ||
| 492 | const ShaderType stage; | ||
| 493 | |||
| 494 | std::size_t num_temporaries = 0; | ||
| 495 | std::size_t max_temporaries = 0; | ||
| 496 | |||
| 497 | std::size_t num_long_temporaries = 0; | ||
| 498 | std::size_t max_long_temporaries = 0; | ||
| 499 | |||
| 500 | std::map<GlobalMemoryBase, u32> global_memory_names; | ||
| 501 | |||
| 502 | std::string shader_source; | ||
| 503 | |||
| 504 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 505 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 506 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 507 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 508 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 509 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 510 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 511 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 512 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 513 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 514 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 515 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 516 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 517 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 518 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 519 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 520 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 521 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 522 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 523 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 524 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 525 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 526 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 527 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 528 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 529 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 530 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 531 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 532 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 533 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 534 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 535 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 536 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 537 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 538 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 539 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 540 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 541 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 542 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 543 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 544 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 545 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 546 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 547 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 548 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 549 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 550 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 551 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 552 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 553 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 554 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 555 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 556 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 557 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 558 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 559 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 560 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 561 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 562 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 563 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 564 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 565 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 566 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 567 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 568 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 569 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 570 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 571 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 572 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 573 | static constexpr std::string_view ADD = "ADD"; | ||
| 574 | static constexpr std::string_view MIN = "MIN"; | ||
| 575 | static constexpr std::string_view MAX = "MAX"; | ||
| 576 | static constexpr std::string_view AND = "AND"; | ||
| 577 | static constexpr std::string_view OR = "OR"; | ||
| 578 | static constexpr std::string_view XOR = "XOR"; | ||
| 579 | static constexpr std::string_view U32 = "U32"; | ||
| 580 | static constexpr std::string_view S32 = "S32"; | ||
| 581 | |||
| 582 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 583 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 584 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 585 | &ARBDecompiler::Assign, | ||
| 586 | |||
| 587 | &ARBDecompiler::Select, | ||
| 588 | |||
| 589 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 590 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 591 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 592 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 593 | &ARBDecompiler::Negate<'F'>, | ||
| 594 | &ARBDecompiler::Absolute<'F'>, | ||
| 595 | &ARBDecompiler::FClamp, | ||
| 596 | &ARBDecompiler::FCastHalf0, | ||
| 597 | &ARBDecompiler::FCastHalf1, | ||
| 598 | &ARBDecompiler::Binary<MIN_F>, | ||
| 599 | &ARBDecompiler::Binary<MAX_F>, | ||
| 600 | &ARBDecompiler::Unary<COS_F32>, | ||
| 601 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 602 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 603 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 604 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 605 | &ARBDecompiler::FSqrt, | ||
| 606 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 607 | &ARBDecompiler::Unary<FLR_F>, | ||
| 608 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 609 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 610 | &ARBDecompiler::Unary<I2F_S>, | ||
| 611 | &ARBDecompiler::Unary<I2F_U>, | ||
| 612 | &ARBDecompiler::FSwizzleAdd, | ||
| 613 | |||
| 614 | &ARBDecompiler::Binary<ADD_S>, | ||
| 615 | &ARBDecompiler::Binary<MUL_S>, | ||
| 616 | &ARBDecompiler::Binary<DIV_S>, | ||
| 617 | &ARBDecompiler::Negate<'S'>, | ||
| 618 | &ARBDecompiler::Absolute<'S'>, | ||
| 619 | &ARBDecompiler::Binary<MIN_S>, | ||
| 620 | &ARBDecompiler::Binary<MAX_S>, | ||
| 621 | |||
| 622 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 623 | &ARBDecompiler::Unary<MOV_U>, | ||
| 624 | &ARBDecompiler::Binary<SHL_S>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<SHR_S>, | ||
| 627 | &ARBDecompiler::Binary<AND_S>, | ||
| 628 | &ARBDecompiler::Binary<OR_S>, | ||
| 629 | &ARBDecompiler::Binary<XOR_S>, | ||
| 630 | &ARBDecompiler::Unary<NOT_S>, | ||
| 631 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 632 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 633 | &ARBDecompiler::Unary<BTC_S>, | ||
| 634 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 635 | |||
| 636 | &ARBDecompiler::Binary<ADD_U>, | ||
| 637 | &ARBDecompiler::Binary<MUL_U>, | ||
| 638 | &ARBDecompiler::Binary<DIV_U>, | ||
| 639 | &ARBDecompiler::Binary<MIN_U>, | ||
| 640 | &ARBDecompiler::Binary<MAX_U>, | ||
| 641 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 642 | &ARBDecompiler::Unary<MOV_U>, | ||
| 643 | &ARBDecompiler::Binary<SHL_U>, | ||
| 644 | &ARBDecompiler::Binary<SHR_U>, | ||
| 645 | &ARBDecompiler::Binary<SHR_U>, | ||
| 646 | &ARBDecompiler::Binary<AND_U>, | ||
| 647 | &ARBDecompiler::Binary<OR_U>, | ||
| 648 | &ARBDecompiler::Binary<XOR_U>, | ||
| 649 | &ARBDecompiler::Unary<NOT_U>, | ||
| 650 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 651 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 652 | &ARBDecompiler::Unary<BTC_U>, | ||
| 653 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 654 | |||
| 655 | &ARBDecompiler::HAdd2, | ||
| 656 | &ARBDecompiler::HMul2, | ||
| 657 | &ARBDecompiler::HFma2, | ||
| 658 | &ARBDecompiler::HAbsolute, | ||
| 659 | &ARBDecompiler::HNegate, | ||
| 660 | &ARBDecompiler::HClamp, | ||
| 661 | &ARBDecompiler::HCastFloat, | ||
| 662 | &ARBDecompiler::HUnpack, | ||
| 663 | &ARBDecompiler::HMergeF32, | ||
| 664 | &ARBDecompiler::HMergeH0, | ||
| 665 | &ARBDecompiler::HMergeH1, | ||
| 666 | &ARBDecompiler::HPack2, | ||
| 667 | |||
| 668 | &ARBDecompiler::LogicalAssign, | ||
| 669 | &ARBDecompiler::Binary<AND_U>, | ||
| 670 | &ARBDecompiler::Binary<OR_U>, | ||
| 671 | &ARBDecompiler::Binary<XOR_U>, | ||
| 672 | &ARBDecompiler::Unary<NOT_U>, | ||
| 673 | &ARBDecompiler::LogicalPick2, | ||
| 674 | &ARBDecompiler::LogicalAnd2, | ||
| 675 | |||
| 676 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 677 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 678 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 679 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 680 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 681 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 682 | &ARBDecompiler::FloatOrdered, | ||
| 683 | &ARBDecompiler::FloatUnordered, | ||
| 684 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 685 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 686 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 687 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 688 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 689 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 690 | |||
| 691 | &ARBDecompiler::Binary<SLT_S>, | ||
| 692 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 693 | &ARBDecompiler::Binary<SLE_S>, | ||
| 694 | &ARBDecompiler::Binary<SGT_S>, | ||
| 695 | &ARBDecompiler::Binary<SNE_S>, | ||
| 696 | &ARBDecompiler::Binary<SGE_S>, | ||
| 697 | |||
| 698 | &ARBDecompiler::Binary<SLT_U>, | ||
| 699 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 700 | &ARBDecompiler::Binary<SLE_U>, | ||
| 701 | &ARBDecompiler::Binary<SGT_U>, | ||
| 702 | &ARBDecompiler::Binary<SNE_U>, | ||
| 703 | &ARBDecompiler::Binary<SGE_U>, | ||
| 704 | |||
| 705 | &ARBDecompiler::LogicalAddCarry, | ||
| 706 | |||
| 707 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 708 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 709 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 710 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 711 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 712 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 713 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 714 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 715 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 716 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 717 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 718 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 719 | |||
| 720 | &ARBDecompiler::Texture, | ||
| 721 | &ARBDecompiler::Texture, | ||
| 722 | &ARBDecompiler::TextureGather, | ||
| 723 | &ARBDecompiler::TextureQueryDimensions, | ||
| 724 | &ARBDecompiler::TextureQueryLod, | ||
| 725 | &ARBDecompiler::TexelFetch, | ||
| 726 | &ARBDecompiler::TextureGradient, | ||
| 727 | |||
| 728 | &ARBDecompiler::ImageLoad, | ||
| 729 | &ARBDecompiler::ImageStore, | ||
| 730 | |||
| 731 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 732 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 733 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 734 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 735 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 736 | |||
| 737 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 738 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 739 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 740 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 741 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 742 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 743 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 744 | |||
| 745 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 746 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 747 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 748 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 749 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 750 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 751 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 752 | |||
| 753 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 754 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 755 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 756 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 757 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 758 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 759 | |||
| 760 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 761 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 762 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 763 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 764 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 765 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 766 | |||
| 767 | &ARBDecompiler::Branch, | ||
| 768 | &ARBDecompiler::BranchIndirect, | ||
| 769 | &ARBDecompiler::PushFlowStack, | ||
| 770 | &ARBDecompiler::PopFlowStack, | ||
| 771 | &ARBDecompiler::Exit, | ||
| 772 | &ARBDecompiler::Discard, | ||
| 773 | |||
| 774 | &ARBDecompiler::EmitVertex, | ||
| 775 | &ARBDecompiler::EndPrimitive, | ||
| 776 | |||
| 777 | &ARBDecompiler::InvocationId, | ||
| 778 | &ARBDecompiler::YNegate, | ||
| 779 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 780 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 781 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 782 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 783 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 784 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 785 | |||
| 786 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 787 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 788 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 789 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 790 | |||
| 791 | &ARBDecompiler::ThreadId, | ||
| 792 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 793 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 794 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 795 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 796 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 797 | &ARBDecompiler::ShuffleIndexed, | ||
| 798 | |||
| 799 | &ARBDecompiler::Barrier, | ||
| 800 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 801 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 802 | }; | ||
| 803 | }; | ||
| 804 | |||
| 805 | ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 806 | ShaderType stage_, std::string_view identifier) | ||
| 807 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { | ||
| 808 | DefineGlobalMemory(); | ||
| 809 | |||
| 810 | AddLine("TEMP RC;"); | ||
| 811 | AddLine("TEMP FSWZA[4];"); | ||
| 812 | AddLine("TEMP FSWZB[4];"); | ||
| 813 | if (ir.IsDecompiled()) { | ||
| 814 | DecompileAST(); | ||
| 815 | } else { | ||
| 816 | DecompileBranchMode(); | ||
| 817 | } | ||
| 818 | AddLine("END"); | ||
| 819 | |||
| 820 | const std::string code = std::move(shader_source); | ||
| 821 | DeclareHeader(); | ||
| 822 | DeclareVertex(); | ||
| 823 | DeclareGeometry(); | ||
| 824 | DeclareFragment(); | ||
| 825 | DeclareCompute(); | ||
| 826 | DeclareInputAttributes(); | ||
| 827 | DeclareOutputAttributes(); | ||
| 828 | DeclareLocalMemory(); | ||
| 829 | DeclareGlobalMemory(); | ||
| 830 | DeclareConstantBuffers(); | ||
| 831 | DeclareRegisters(); | ||
| 832 | DeclareTemporaries(); | ||
| 833 | DeclarePredicates(); | ||
| 834 | DeclareInternalFlags(); | ||
| 835 | |||
| 836 | shader_source += code; | ||
| 837 | } | ||
| 838 | |||
| 839 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 840 | switch (stage) { | ||
| 841 | case ShaderType::Vertex: | ||
| 842 | return "vp"; | ||
| 843 | case ShaderType::Geometry: | ||
| 844 | return "gp"; | ||
| 845 | case ShaderType::Fragment: | ||
| 846 | return "fp"; | ||
| 847 | case ShaderType::Compute: | ||
| 848 | return "cp"; | ||
| 849 | default: | ||
| 850 | UNREACHABLE(); | ||
| 851 | return ""; | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void ARBDecompiler::DefineGlobalMemory() { | ||
| 856 | u32 binding = 0; | ||
| 857 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 858 | const GlobalMemoryBase base = pair.first; | ||
| 859 | global_memory_names.emplace(base, binding); | ||
| 860 | ++binding; | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void ARBDecompiler::DeclareHeader() { | ||
| 865 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 866 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 867 | AddLine("OPTION NV_internal;"); | ||
| 868 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 869 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 870 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 871 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 872 | } | ||
| 873 | if (stage == ShaderType::Vertex) { | ||
| 874 | if (device.HasNvViewportArray2()) { | ||
| 875 | AddLine("OPTION NV_viewport_array2;"); | ||
| 876 | } | ||
| 877 | } | ||
| 878 | if (stage == ShaderType::Fragment) { | ||
| 879 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 880 | } | ||
| 881 | if (device.HasImageLoadFormatted()) { | ||
| 882 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 883 | } | ||
| 884 | } | ||
| 885 | |||
| 886 | void ARBDecompiler::DeclareVertex() { | ||
| 887 | if (stage != ShaderType::Vertex) { | ||
| 888 | return; | ||
| 889 | } | ||
| 890 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 891 | } | ||
| 892 | |||
| 893 | void ARBDecompiler::DeclareGeometry() { | ||
| 894 | if (stage != ShaderType::Geometry) { | ||
| 895 | return; | ||
| 896 | } | ||
| 897 | const auto& info = registry.GetGraphicsInfo(); | ||
| 898 | const auto& header = ir.GetHeader(); | ||
| 899 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 900 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 901 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 902 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 903 | } | ||
| 904 | |||
| 905 | void ARBDecompiler::DeclareFragment() { | ||
| 906 | if (stage != ShaderType::Fragment) { | ||
| 907 | return; | ||
| 908 | } | ||
| 909 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 910 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 911 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 912 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 913 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 914 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 915 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 916 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 917 | } | ||
| 918 | |||
| 919 | void ARBDecompiler::DeclareCompute() { | ||
| 920 | if (stage != ShaderType::Compute) { | ||
| 921 | return; | ||
| 922 | } | ||
| 923 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 924 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 925 | info.workgroup_size[2]); | ||
| 926 | if (info.shared_memory_size_in_words == 0) { | ||
| 927 | return; | ||
| 928 | } | ||
| 929 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 930 | u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 931 | if (size_in_bytes > limit) { | ||
| 932 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 933 | size_in_bytes, limit); | ||
| 934 | size_in_bytes = limit; | ||
| 935 | } | ||
| 936 | |||
| 937 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 938 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 939 | } | ||
| 940 | |||
| 941 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | return; | ||
| 944 | } | ||
| 945 | const std::string_view stage_name = StageInputName(stage); | ||
| 946 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 947 | if (!IsGenericAttribute(attribute)) { | ||
| 948 | continue; | ||
| 949 | } | ||
| 950 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 951 | |||
| 952 | std::string_view suffix; | ||
| 953 | if (stage == ShaderType::Fragment) { | ||
| 954 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 955 | if (input_mode == PixelImap::Unused) { | ||
| 956 | return; | ||
| 957 | } | ||
| 958 | suffix = GetInputFlags(input_mode); | ||
| 959 | } | ||
| 960 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 961 | index); | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 966 | if (stage == ShaderType::Compute) { | ||
| 967 | return; | ||
| 968 | } | ||
| 969 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 970 | if (!IsGenericAttribute(attribute)) { | ||
| 971 | continue; | ||
| 972 | } | ||
| 973 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 974 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 975 | } | ||
| 976 | } | ||
| 977 | |||
| 978 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 979 | u64 size = 0; | ||
| 980 | if (stage == ShaderType::Compute) { | ||
| 981 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 982 | } else { | ||
| 983 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 984 | } | ||
| 985 | if (size == 0) { | ||
| 986 | return; | ||
| 987 | } | ||
| 988 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 989 | AddLine("TEMP lmem[{}];", element_count); | ||
| 990 | } | ||
| 991 | |||
| 992 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 993 | const size_t num_entries = ir.GetGlobalMemory().size(); | ||
| 994 | if (num_entries > 0) { | ||
| 995 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); | ||
| 996 | } | ||
| 997 | } | ||
| 998 | |||
| 999 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 1000 | u32 binding = 0; | ||
| 1001 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 1002 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 1003 | ++binding; | ||
| 1004 | } | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | void ARBDecompiler::DeclareRegisters() { | ||
| 1008 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1009 | AddLine("TEMP R{};", gpr); | ||
| 1010 | } | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | void ARBDecompiler::DeclareTemporaries() { | ||
| 1014 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 1015 | AddLine("TEMP T{};", i); | ||
| 1016 | } | ||
| 1017 | for (std::size_t i = 0; i < max_long_temporaries; ++i) { | ||
| 1018 | AddLine("LONG TEMP L{};", i); | ||
| 1019 | } | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | void ARBDecompiler::DeclarePredicates() { | ||
| 1023 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1024 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 1029 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 1030 | AddLine("TEMP {};", name); | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | void ARBDecompiler::InitializeVariables() { | ||
| 1035 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 1036 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 1037 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 1038 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1039 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1040 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1041 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1042 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1043 | |||
| 1044 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1045 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1046 | } | ||
| 1047 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1048 | if (!IsGenericAttribute(attribute)) { | ||
| 1049 | continue; | ||
| 1050 | } | ||
| 1051 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1052 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1053 | } | ||
| 1054 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1055 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1056 | } | ||
| 1057 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1058 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1059 | } | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | void ARBDecompiler::DecompileAST() { | ||
| 1063 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1064 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1065 | AddLine("TEMP F{};", i); | ||
| 1066 | } | ||
| 1067 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1068 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | InitializeVariables(); | ||
| 1072 | |||
| 1073 | VisitAST(ir.GetASTProgram()); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1077 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1078 | if (!ir.IsFlowStackDisabled()) { | ||
| 1079 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1080 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1081 | AddLine("TEMP SSY_TOP;"); | ||
| 1082 | AddLine("TEMP PBK_TOP;"); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | AddLine("TEMP PC;"); | ||
| 1086 | |||
| 1087 | if (!ir.IsFlowStackDisabled()) { | ||
| 1088 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1089 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | InitializeVariables(); | ||
| 1093 | |||
| 1094 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1095 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1096 | const u32 first_address = basic_block_it->first; | ||
| 1097 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1098 | |||
| 1099 | AddLine("REP;"); | ||
| 1100 | |||
| 1101 | std::size_t num_blocks = 0; | ||
| 1102 | while (basic_block_it != basic_block_end) { | ||
| 1103 | const auto& [address, bb] = *basic_block_it; | ||
| 1104 | ++num_blocks; | ||
| 1105 | |||
| 1106 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1107 | AddLine("IF NE.x;"); | ||
| 1108 | |||
| 1109 | VisitBlock(bb); | ||
| 1110 | |||
| 1111 | ++basic_block_it; | ||
| 1112 | |||
| 1113 | if (basic_block_it != basic_block_end) { | ||
| 1114 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1115 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1116 | const u32 next_address = basic_block_it->first; | ||
| 1117 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1118 | AddLine("CONT;"); | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | AddLine("ELSE;"); | ||
| 1123 | } | ||
| 1124 | AddLine("RET;"); | ||
| 1125 | while (num_blocks--) { | ||
| 1126 | AddLine("ENDIF;"); | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | AddLine("ENDREP;"); | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1133 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1134 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1135 | VisitAST(current); | ||
| 1136 | } | ||
| 1137 | } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1138 | const std::string condition = VisitExpression(if_then->condition); | ||
| 1139 | ResetTemporaries(); | ||
| 1140 | |||
| 1141 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1142 | AddLine("IF NE.x;"); | ||
| 1143 | for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1144 | VisitAST(current); | ||
| 1145 | } | ||
| 1146 | AddLine("ENDIF;"); | ||
| 1147 | } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1148 | AddLine("ELSE;"); | ||
| 1149 | for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1150 | VisitAST(current); | ||
| 1151 | } | ||
| 1152 | } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1153 | VisitBlock(decoded->nodes); | ||
| 1154 | } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1155 | AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); | ||
| 1156 | ResetTemporaries(); | ||
| 1157 | } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1158 | const std::string condition = VisitExpression(do_while->condition); | ||
| 1159 | ResetTemporaries(); | ||
| 1160 | AddLine("REP;"); | ||
| 1161 | for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1162 | VisitAST(current); | ||
| 1163 | } | ||
| 1164 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1165 | AddLine("BRK (NE.x);"); | ||
| 1166 | AddLine("ENDREP;"); | ||
| 1167 | } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1168 | const bool is_true = ExprIsTrue(ast_return->condition); | ||
| 1169 | if (!is_true) { | ||
| 1170 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); | ||
| 1171 | AddLine("IF NE.x;"); | ||
| 1172 | ResetTemporaries(); | ||
| 1173 | } | ||
| 1174 | if (ast_return->kills) { | ||
| 1175 | AddLine("KIL TR;"); | ||
| 1176 | } else { | ||
| 1177 | Exit(); | ||
| 1178 | } | ||
| 1179 | if (!is_true) { | ||
| 1180 | AddLine("ENDIF;"); | ||
| 1181 | } | ||
| 1182 | } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1183 | if (ExprIsTrue(ast_break->condition)) { | ||
| 1184 | AddLine("BRK;"); | ||
| 1185 | } else { | ||
| 1186 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); | ||
| 1187 | AddLine("BRK (NE.x);"); | ||
| 1188 | ResetTemporaries(); | ||
| 1189 | } | ||
| 1190 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1191 | // Nothing to do | ||
| 1192 | } else { | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | } | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1198 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1199 | std::string result = AllocTemporary(); | ||
| 1200 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1201 | VisitExpression(expr->operand2)); | ||
| 1202 | return result; | ||
| 1203 | } | ||
| 1204 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1205 | std::string result = AllocTemporary(); | ||
| 1206 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1207 | VisitExpression(expr->operand2)); | ||
| 1208 | return result; | ||
| 1209 | } | ||
| 1210 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1211 | std::string result = AllocTemporary(); | ||
| 1212 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1213 | return result; | ||
| 1214 | } | ||
| 1215 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1216 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1217 | } | ||
| 1218 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1219 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1220 | } | ||
| 1221 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1222 | return fmt::format("F{}.x", expr->var_index); | ||
| 1223 | } | ||
| 1224 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1225 | return expr->value ? "0xffffffff" : "0"; | ||
| 1226 | } | ||
| 1227 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1228 | std::string result = AllocTemporary(); | ||
| 1229 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1230 | return result; | ||
| 1231 | } | ||
| 1232 | UNREACHABLE(); | ||
| 1233 | return "0"; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1237 | for (const auto& node : bb) { | ||
| 1238 | Visit(node); | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1243 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1244 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1245 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1246 | } | ||
| 1247 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1248 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1249 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1250 | return {}; | ||
| 1251 | } | ||
| 1252 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1253 | if (decompiler == nullptr) { | ||
| 1254 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1255 | return {}; | ||
| 1256 | } | ||
| 1257 | return (this->*decompiler)(*operation); | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1261 | const u32 index = gpr->GetIndex(); | ||
| 1262 | if (index == Register::ZeroIndex) { | ||
| 1263 | return "{0, 0, 0, 0}.x"; | ||
| 1264 | } | ||
| 1265 | return fmt::format("R{}.x", index); | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1269 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1273 | std::string temporary = AllocTemporary(); | ||
| 1274 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1275 | return temporary; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1279 | std::string temporary = AllocTemporary(); | ||
| 1280 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1281 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1282 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1283 | break; | ||
| 1284 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1285 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1289 | break; | ||
| 1290 | } | ||
| 1291 | if (predicate->IsNegated()) { | ||
| 1292 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1293 | } | ||
| 1294 | return temporary; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1298 | if (abuf->IsPhysicalBuffer()) { | ||
| 1299 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1300 | return "{0, 0, 0, 0}.x"; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1304 | const u32 element = abuf->GetElement(); | ||
| 1305 | const char swizzle = Swizzle(element); | ||
| 1306 | switch (index) { | ||
| 1307 | case Attribute::Index::Position: { | ||
| 1308 | if (stage == ShaderType::Geometry) { | ||
| 1309 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1310 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1311 | } else { | ||
| 1312 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1313 | } | ||
| 1314 | } | ||
| 1315 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1316 | ASSERT(stage == ShaderType::Vertex); | ||
| 1317 | switch (element) { | ||
| 1318 | case 2: | ||
| 1319 | return "vertex.instance"; | ||
| 1320 | case 3: | ||
| 1321 | return "vertex.id"; | ||
| 1322 | } | ||
| 1323 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1324 | break; | ||
| 1325 | case Attribute::Index::PointCoord: | ||
| 1326 | switch (element) { | ||
| 1327 | case 0: | ||
| 1328 | return "fragment.pointcoord.x"; | ||
| 1329 | case 1: | ||
| 1330 | return "fragment.pointcoord.y"; | ||
| 1331 | } | ||
| 1332 | UNIMPLEMENTED(); | ||
| 1333 | break; | ||
| 1334 | case Attribute::Index::FrontFacing: { | ||
| 1335 | ASSERT(stage == ShaderType::Fragment); | ||
| 1336 | ASSERT(element == 3); | ||
| 1337 | const std::string temporary = AllocVectorTemporary(); | ||
| 1338 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1339 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1340 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1341 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1342 | return fmt::format("{}.x", temporary); | ||
| 1343 | } | ||
| 1344 | default: | ||
| 1345 | if (IsGenericAttribute(index)) { | ||
| 1346 | if (stage == ShaderType::Geometry) { | ||
| 1347 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1348 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1349 | } else { | ||
| 1350 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1351 | GetGenericAttributeIndex(index), swizzle); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); | ||
| 1355 | break; | ||
| 1356 | } | ||
| 1357 | return "{0, 0, 0, 0}.x"; | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1361 | std::string offset_string; | ||
| 1362 | const auto& offset = cbuf->GetOffset(); | ||
| 1363 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1364 | offset_string = std::to_string(imm->GetValue()); | ||
| 1365 | } else { | ||
| 1366 | offset_string = Visit(offset); | ||
| 1367 | } | ||
| 1368 | std::string temporary = AllocTemporary(); | ||
| 1369 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1370 | return temporary; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1374 | std::string temporary = AllocTemporary(); | ||
| 1375 | AddLine("MOV {}, 0;", temporary); | ||
| 1376 | AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||
| 1377 | return temporary; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1381 | std::string temporary = Visit(lmem->GetAddress()); | ||
| 1382 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1383 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1384 | return temporary; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1388 | std::string temporary = Visit(smem->GetAddress()); | ||
| 1389 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1390 | return temporary; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1394 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1395 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1399 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1400 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1401 | } | ||
| 1402 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1403 | AddLine("IF NE.x;"); | ||
| 1404 | VisitBlock(conditional->GetCode()); | ||
| 1405 | AddLine("ENDIF;"); | ||
| 1406 | return {}; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1410 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1411 | // AddLine("// {}", cmt->GetText()); | ||
| 1412 | return {}; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | UNIMPLEMENTED(); | ||
| 1416 | return {}; | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1420 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1421 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1422 | |||
| 1423 | const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1424 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube; | ||
| 1425 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1426 | std::string temporary = AllocVectorTemporary(); | ||
| 1427 | std::size_t i = 0; | ||
| 1428 | for (; i < count; ++i) { | ||
| 1429 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1430 | } | ||
| 1431 | if (meta.sampler.is_array) { | ||
| 1432 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); | ||
| 1433 | ++i; | ||
| 1434 | } | ||
| 1435 | if (meta.sampler.is_shadow) { | ||
| 1436 | std::string compare = Visit(meta.depth_compare); | ||
| 1437 | if (is_extended) { | ||
| 1438 | ASSERT(i == 4); | ||
| 1439 | std::string extra_coord = AllocVectorTemporary(); | ||
| 1440 | AddLine("MOV.F {}.x, {};", extra_coord, compare); | ||
| 1441 | return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; | ||
| 1442 | } | ||
| 1443 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); | ||
| 1444 | ++i; | ||
| 1445 | } | ||
| 1446 | return {temporary, temporary, i}; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1450 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1451 | if (meta.aoffi.empty()) { | ||
| 1452 | return {}; | ||
| 1453 | } | ||
| 1454 | const std::string temporary = AllocVectorTemporary(); | ||
| 1455 | std::size_t i = 0; | ||
| 1456 | for (auto& node : meta.aoffi) { | ||
| 1457 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1458 | } | ||
| 1459 | return fmt::format(", offset({})", temporary); | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||
| 1463 | // Read a bindless SSBO, return its address and set CC accordingly | ||
| 1464 | // address = c[binding].xy | ||
| 1465 | // length = c[binding].z | ||
| 1466 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||
| 1467 | |||
| 1468 | const std::string pointer = AllocLongVectorTemporary(); | ||
| 1469 | std::string temporary = AllocTemporary(); | ||
| 1470 | |||
| 1471 | AddLine("PK64.U {}, c[{}];", pointer, binding); | ||
| 1472 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||
| 1473 | Visit(gmem.GetBaseAddress())); | ||
| 1474 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||
| 1475 | AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); | ||
| 1476 | // Compare offset to length and set CC | ||
| 1477 | AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||
| 1478 | return fmt::format("{}.x", pointer); | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | void ARBDecompiler::Exit() { | ||
| 1482 | if (stage != ShaderType::Fragment) { | ||
| 1483 | AddLine("RET;"); | ||
| 1484 | return; | ||
| 1485 | } | ||
| 1486 | |||
| 1487 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1488 | if (ir.GetRegisters().contains(reg)) { | ||
| 1489 | return fmt::format("R{}.x", reg); | ||
| 1490 | } | ||
| 1491 | return "{0, 0, 0, 0}.x"; | ||
| 1492 | }; | ||
| 1493 | |||
| 1494 | const auto& header = ir.GetHeader(); | ||
| 1495 | u32 current_reg = 0; | ||
| 1496 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1497 | for (u32 component = 0; component < 4; ++component) { | ||
| 1498 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1499 | continue; | ||
| 1500 | } | ||
| 1501 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1502 | safe_get_register(current_reg)); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | if (header.ps.omap.depth) { | ||
| 1507 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | AddLine("RET;"); | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1514 | const Node& dest = operation[0]; | ||
| 1515 | const Node& src = operation[1]; | ||
| 1516 | |||
| 1517 | std::string dest_name; | ||
| 1518 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1519 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1520 | // Writing to Register::ZeroIndex is a no op | ||
| 1521 | return {}; | ||
| 1522 | } | ||
| 1523 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1524 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1525 | const u32 element = abuf->GetElement(); | ||
| 1526 | const char swizzle = Swizzle(element); | ||
| 1527 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1528 | case Attribute::Index::Position: | ||
| 1529 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1530 | break; | ||
| 1531 | case Attribute::Index::LayerViewportPointSize: | ||
| 1532 | switch (element) { | ||
| 1533 | case 0: | ||
| 1534 | UNIMPLEMENTED(); | ||
| 1535 | return {}; | ||
| 1536 | case 1: | ||
| 1537 | case 2: | ||
| 1538 | if (!device.HasNvViewportArray2()) { | ||
| 1539 | LOG_ERROR( | ||
| 1540 | Render_OpenGL, | ||
| 1541 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1542 | return {}; | ||
| 1543 | } | ||
| 1544 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1545 | break; | ||
| 1546 | case 3: | ||
| 1547 | dest_name = "result.pointsize.x"; | ||
| 1548 | break; | ||
| 1549 | } | ||
| 1550 | break; | ||
| 1551 | case Attribute::Index::ClipDistances0123: | ||
| 1552 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1553 | break; | ||
| 1554 | case Attribute::Index::ClipDistances4567: | ||
| 1555 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1556 | break; | ||
| 1557 | default: | ||
| 1558 | if (!IsGenericAttribute(index)) { | ||
| 1559 | UNREACHABLE(); | ||
| 1560 | return {}; | ||
| 1561 | } | ||
| 1562 | dest_name = | ||
| 1563 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1564 | break; | ||
| 1565 | } | ||
| 1566 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1567 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1568 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1569 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1570 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1571 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1572 | ResetTemporaries(); | ||
| 1573 | return {}; | ||
| 1574 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1575 | AddLine("IF NE.x;"); | ||
| 1576 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | ||
| 1577 | AddLine("ENDIF;"); | ||
| 1578 | ResetTemporaries(); | ||
| 1579 | return {}; | ||
| 1580 | } else { | ||
| 1581 | UNREACHABLE(); | ||
| 1582 | ResetTemporaries(); | ||
| 1583 | return {}; | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1587 | ResetTemporaries(); | ||
| 1588 | return {}; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1592 | std::string temporary = AllocTemporary(); | ||
| 1593 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1594 | Visit(operation[2])); | ||
| 1595 | return temporary; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1599 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1600 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1601 | |||
| 1602 | std::string temporary = AllocTemporary(); | ||
| 1603 | const Node& value = operation[0]; | ||
| 1604 | const Node& low = operation[1]; | ||
| 1605 | const Node& high = operation[2]; | ||
| 1606 | const auto* const imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1607 | const auto* const imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1608 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1609 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1610 | } else { | ||
| 1611 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1612 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1613 | } | ||
| 1614 | return temporary; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1618 | const std::string temporary = AllocVectorTemporary(); | ||
| 1619 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1620 | return fmt::format("{}.x", temporary); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1624 | const std::string temporary = AllocVectorTemporary(); | ||
| 1625 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1626 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1627 | return fmt::format("{}.x", temporary); | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1631 | std::string temporary = AllocTemporary(); | ||
| 1632 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1633 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1634 | return temporary; | ||
| 1635 | } | ||
| 1636 | |||
| 1637 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1638 | const std::string temporary = AllocVectorTemporary(); | ||
| 1639 | if (!device.HasWarpIntrinsics()) { | ||
| 1640 | LOG_ERROR(Render_OpenGL, | ||
| 1641 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1642 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1643 | return fmt::format("{}.x", temporary); | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1647 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1648 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1649 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1650 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1651 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1652 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1653 | return fmt::format("{}.x", temporary); | ||
| 1654 | } | ||
| 1655 | |||
| 1656 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1657 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1658 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1659 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1660 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1661 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1667 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1668 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1669 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1670 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1671 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1672 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1673 | return fmt::format("{}.x", tmp1); | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1677 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1678 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1679 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1680 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1681 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1682 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1683 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1684 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1685 | return fmt::format("{}.x", tmp1); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1689 | const std::string temporary = AllocVectorTemporary(); | ||
| 1690 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1691 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1692 | return fmt::format("{}.x", temporary); | ||
| 1693 | } | ||
| 1694 | |||
| 1695 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1696 | const std::string temporary = AllocVectorTemporary(); | ||
| 1697 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1698 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1699 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1700 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1701 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1702 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1703 | return fmt::format("{}.x", temporary); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1707 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1708 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1709 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1710 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1711 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1712 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1713 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1714 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1715 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1717 | return fmt::format("{}.x", tmp1); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1723 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1724 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1725 | return fmt::format("{}.x", temporary); | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1729 | std::string operand = Visit(operation[0]); | ||
| 1730 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1731 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1732 | return operand; | ||
| 1733 | case Tegra::Shader::HalfType::F32: { | ||
| 1734 | const std::string temporary = AllocVectorTemporary(); | ||
| 1735 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1736 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1737 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1738 | return fmt::format("{}.x", temporary); | ||
| 1739 | } | ||
| 1740 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1741 | const std::string temporary = AllocVectorTemporary(); | ||
| 1742 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1743 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1744 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1745 | return fmt::format("{}.x", temporary); | ||
| 1746 | } | ||
| 1747 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1748 | const std::string temporary = AllocVectorTemporary(); | ||
| 1749 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1750 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1751 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1752 | return fmt::format("{}.x", temporary); | ||
| 1753 | } | ||
| 1754 | } | ||
| 1755 | UNREACHABLE(); | ||
| 1756 | return "{0, 0, 0, 0}.x"; | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1760 | const std::string temporary = AllocVectorTemporary(); | ||
| 1761 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1762 | return fmt::format("{}.x", temporary); | ||
| 1763 | } | ||
| 1764 | |||
| 1765 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1766 | const std::string temporary = AllocVectorTemporary(); | ||
| 1767 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1768 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1769 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1770 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1771 | return fmt::format("{}.x", temporary); | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1775 | const std::string temporary = AllocVectorTemporary(); | ||
| 1776 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1777 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1778 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1779 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1780 | return fmt::format("{}.x", temporary); | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1784 | const std::string temporary = AllocVectorTemporary(); | ||
| 1785 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1786 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1787 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1788 | return fmt::format("{}.x", temporary); | ||
| 1789 | } | ||
| 1790 | |||
| 1791 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1792 | const Node& dest = operation[0]; | ||
| 1793 | const Node& src = operation[1]; | ||
| 1794 | |||
| 1795 | std::string target; | ||
| 1796 | |||
| 1797 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1798 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1799 | |||
| 1800 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1801 | switch (index) { | ||
| 1802 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1803 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1804 | // Writing to these predicates is a no-op | ||
| 1805 | return {}; | ||
| 1806 | } | ||
| 1807 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1808 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1809 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1810 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1811 | } else { | ||
| 1812 | UNREACHABLE(); | ||
| 1813 | ResetTemporaries(); | ||
| 1814 | return {}; | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1818 | ResetTemporaries(); | ||
| 1819 | return {}; | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1823 | std::string temporary = AllocTemporary(); | ||
| 1824 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1825 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1826 | return temporary; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1830 | std::string temporary = AllocTemporary(); | ||
| 1831 | const std::string op = Visit(operation[0]); | ||
| 1832 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1833 | return temporary; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1837 | std::string temporary = AllocTemporary(); | ||
| 1838 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1839 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1840 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1841 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1842 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1843 | return temporary; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1847 | std::string temporary = AllocTemporary(); | ||
| 1848 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1849 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1850 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1851 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1852 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1853 | return temporary; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1857 | std::string temporary = AllocTemporary(); | ||
| 1858 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1859 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1860 | AddLine("IF CF.x;"); | ||
| 1861 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1862 | AddLine("ENDIF;"); | ||
| 1863 | return temporary; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1867 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1868 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1869 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1870 | |||
| 1871 | std::string_view opcode = "TEX"; | ||
| 1872 | std::string extra; | ||
| 1873 | if (meta.bias) { | ||
| 1874 | ASSERT(!meta.lod); | ||
| 1875 | opcode = "TXB"; | ||
| 1876 | |||
| 1877 | if (swizzle < 4) { | ||
| 1878 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1879 | } else { | ||
| 1880 | const std::string bias = AllocTemporary(); | ||
| 1881 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1882 | extra = fmt::format(" {},", bias); | ||
| 1883 | } | ||
| 1884 | } | ||
| 1885 | if (meta.lod) { | ||
| 1886 | ASSERT(!meta.bias); | ||
| 1887 | opcode = "TXL"; | ||
| 1888 | |||
| 1889 | if (swizzle < 4) { | ||
| 1890 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1891 | } else { | ||
| 1892 | const std::string lod = AllocTemporary(); | ||
| 1893 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1894 | extra = fmt::format(" {},", lod); | ||
| 1895 | } | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, | ||
| 1899 | TextureType(meta), BuildAoffi(operation)); | ||
| 1900 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1901 | return fmt::format("{}.x", temporary); | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1905 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1906 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1907 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1908 | |||
| 1909 | std::string comp; | ||
| 1910 | if (!meta.sampler.is_shadow) { | ||
| 1911 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1912 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1913 | } | ||
| 1914 | |||
| 1915 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1916 | TextureType(meta), BuildAoffi(operation)); | ||
| 1917 | AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); | ||
| 1918 | return fmt::format("{}.x", temporary); | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1922 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1923 | const std::string temporary = AllocVectorTemporary(); | ||
| 1924 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1925 | |||
| 1926 | ASSERT(!meta.sampler.is_array); | ||
| 1927 | |||
| 1928 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1929 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1930 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", temporary); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1936 | const std::string temporary = AllocVectorTemporary(); | ||
| 1937 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1938 | |||
| 1939 | ASSERT(!meta.sampler.is_array); | ||
| 1940 | |||
| 1941 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1942 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1943 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1944 | } | ||
| 1945 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1946 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1947 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1948 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1949 | return fmt::format("{}.x", temporary); | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1953 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1954 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1955 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1956 | |||
| 1957 | if (!meta.sampler.is_buffer) { | ||
| 1958 | ASSERT(swizzle < 4); | ||
| 1959 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1960 | } | ||
| 1961 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), | ||
| 1962 | BuildAoffi(operation)); | ||
| 1963 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1964 | return fmt::format("{}.x", temporary); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1968 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1969 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1970 | const std::string ddx = AllocVectorTemporary(); | ||
| 1971 | const std::string ddy = AllocVectorTemporary(); | ||
| 1972 | const std::string coord = std::get<1>(BuildCoords(operation)); | ||
| 1973 | |||
| 1974 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1975 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1976 | const char swizzle = Swizzle(index); | ||
| 1977 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1978 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | const std::string_view result = coord; | ||
| 1982 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1983 | TextureType(meta), BuildAoffi(operation)); | ||
| 1984 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1985 | return fmt::format("{}.x", result); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1989 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1990 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1991 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1992 | const std::string_view type = ImageType(meta.image.type); | ||
| 1993 | |||
| 1994 | const std::string temporary = AllocVectorTemporary(); | ||
| 1995 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1996 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1997 | } | ||
| 1998 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1999 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 2000 | return fmt::format("{}.x", temporary); | ||
| 2001 | } | ||
| 2002 | |||
| 2003 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 2004 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 2005 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 2006 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 2007 | const std::size_t num_values = meta.values.size(); | ||
| 2008 | const std::string_view type = ImageType(meta.image.type); | ||
| 2009 | |||
| 2010 | const std::string coord = AllocVectorTemporary(); | ||
| 2011 | const std::string value = AllocVectorTemporary(); | ||
| 2012 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 2013 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 2014 | } | ||
| 2015 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 2016 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 2017 | } | ||
| 2018 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 2019 | return {}; | ||
| 2020 | } | ||
| 2021 | |||
| 2022 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 2023 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 2024 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 2025 | AddLine("CONT;"); | ||
| 2026 | return {}; | ||
| 2027 | } | ||
| 2028 | |||
| 2029 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 2030 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 2031 | AddLine("CONT;"); | ||
| 2032 | return {}; | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 2036 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2037 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 2038 | const std::string_view stack_name = StackName(stack); | ||
| 2039 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 2040 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2041 | return {}; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 2045 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2046 | const std::string_view stack_name = StackName(stack); | ||
| 2047 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2048 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 2049 | AddLine("CONT;"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2054 | Exit(); | ||
| 2055 | return {}; | ||
| 2056 | } | ||
| 2057 | |||
| 2058 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2059 | AddLine("KIL TR;"); | ||
| 2060 | return {}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2064 | AddLine("EMIT;"); | ||
| 2065 | return {}; | ||
| 2066 | } | ||
| 2067 | |||
| 2068 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2069 | AddLine("ENDPRIM;"); | ||
| 2070 | return {}; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2074 | return "primitive.invocation"; | ||
| 2075 | } | ||
| 2076 | |||
| 2077 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2078 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2079 | std::string temporary = AllocTemporary(); | ||
| 2080 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2081 | return temporary; | ||
| 2082 | } | ||
| 2083 | |||
| 2084 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2085 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2089 | if (!device.HasWarpIntrinsics()) { | ||
| 2090 | LOG_ERROR(Render_OpenGL, | ||
| 2091 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2092 | return Visit(operation[0]); | ||
| 2093 | } | ||
| 2094 | const std::string temporary = AllocVectorTemporary(); | ||
| 2095 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2096 | Visit(operation[1])); | ||
| 2097 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2098 | return fmt::format("{}.x", temporary); | ||
| 2099 | } | ||
| 2100 | |||
| 2101 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2102 | AddLine("BAR;"); | ||
| 2103 | return {}; | ||
| 2104 | } | ||
| 2105 | |||
| 2106 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2107 | AddLine("MEMBAR.CTA;"); | ||
| 2108 | return {}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2112 | AddLine("MEMBAR;"); | ||
| 2113 | return {}; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | } // Anonymous namespace | ||
| 2117 | |||
| 2118 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2119 | const VideoCommon::Shader::Registry& registry, | ||
| 2120 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2121 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2122 | } | ||
| 2123 | |||
| 2124 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..07a995f7d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,14 +2,18 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <span> | 6 | #include <span> |
| 6 | 7 | ||
| 7 | #include "video_core/buffer_cache/buffer_cache.h" | 8 | #include "video_core/buffer_cache/buffer_cache.h" |
| 8 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 9 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 9 | #include "video_core/renderer_opengl/gl_device.h" | 10 | #include "video_core/renderer_opengl/gl_device.h" |
| 11 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 10 | 12 | ||
| 11 | namespace OpenGL { | 13 | namespace OpenGL { |
| 12 | namespace { | 14 | namespace { |
| 15 | using VideoCore::Surface::PixelFormat; | ||
| 16 | |||
| 13 | struct BindlessSSBO { | 17 | struct BindlessSSBO { |
| 14 | GLuint64EXT address; | 18 | GLuint64EXT address; |
| 15 | GLsizei length; | 19 | GLsizei length; |
| @@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ | |||
| 21 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | 25 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, |
| 22 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | 26 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, |
| 23 | }; | 27 | }; |
| 28 | |||
| 29 | [[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { | ||
| 30 | switch (gl_format) { | ||
| 31 | case GL_RGBA8_SNORM: | ||
| 32 | return GL_RGBA8; | ||
| 33 | case GL_R8_SNORM: | ||
| 34 | return GL_R8; | ||
| 35 | case GL_RGBA16_SNORM: | ||
| 36 | return GL_RGBA16; | ||
| 37 | case GL_R16_SNORM: | ||
| 38 | return GL_R16; | ||
| 39 | case GL_RG16_SNORM: | ||
| 40 | return GL_RG16; | ||
| 41 | case GL_RG8_SNORM: | ||
| 42 | return GL_RG8; | ||
| 43 | default: | ||
| 44 | return gl_format; | ||
| 45 | } | ||
| 46 | } | ||
| 24 | } // Anonymous namespace | 47 | } // Anonymous namespace |
| 25 | 48 | ||
| 26 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | 49 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| @@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept { | |||
| 62 | glMakeNamedBufferResidentNV(buffer.handle, access); | 85 | glMakeNamedBufferResidentNV(buffer.handle, access); |
| 63 | } | 86 | } |
| 64 | 87 | ||
| 88 | GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { | ||
| 89 | const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { | ||
| 90 | return offset == view.offset && size == view.size && format == view.format; | ||
| 91 | })}; | ||
| 92 | if (it != views.end()) { | ||
| 93 | return it->texture.handle; | ||
| 94 | } | ||
| 95 | OGLTexture texture; | ||
| 96 | texture.Create(GL_TEXTURE_BUFFER); | ||
| 97 | const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; | ||
| 98 | const GLenum texture_format{GetTextureBufferFormat(gl_format)}; | ||
| 99 | if (texture_format != gl_format) { | ||
| 100 | LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); | ||
| 101 | } | ||
| 102 | glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); | ||
| 103 | views.push_back({ | ||
| 104 | .offset = offset, | ||
| 105 | .size = size, | ||
| 106 | .format = format, | ||
| 107 | .texture = std::move(texture), | ||
| 108 | }); | ||
| 109 | return views.back().texture.handle; | ||
| 110 | } | ||
| 111 | |||
| 65 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | 112 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) |
| 66 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 113 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 67 | use_assembly_shaders{device.UseAssemblyShaders()}, | 114 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| @@ -100,7 +147,7 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | |||
| 100 | 147 | ||
| 101 | void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { | 148 | void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { |
| 102 | glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), | 149 | glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset), |
| 103 | static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RGBA, GL_UNSIGNED_INT, | 150 | static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT, |
| 104 | &value); | 151 | &value); |
| 105 | } | 152 | } |
| 106 | 153 | ||
| @@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff | |||
| 144 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | 191 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, |
| 145 | static_cast<GLsizeiptr>(size)); | 192 | static_cast<GLsizeiptr>(size)); |
| 146 | } else { | 193 | } else { |
| 147 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 194 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 148 | const GLuint binding = base_binding + binding_index; | 195 | const GLuint binding = base_binding + binding_index; |
| 149 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | 196 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), |
| 150 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 197 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| @@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf | |||
| 171 | 218 | ||
| 172 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 219 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 173 | u32 offset, u32 size, bool is_written) { | 220 | u32 offset, u32 size, bool is_written) { |
| 174 | if (use_assembly_shaders) { | 221 | if (use_storage_buffers) { |
| 222 | const GLuint base_binding = graphics_base_storage_bindings[stage]; | ||
| 223 | const GLuint binding = base_binding + binding_index; | ||
| 224 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 225 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 226 | } else { | ||
| 175 | const BindlessSSBO ssbo{ | 227 | const BindlessSSBO ssbo{ |
| 176 | .address = buffer.HostGpuAddr() + offset, | 228 | .address = buffer.HostGpuAddr() + offset, |
| 177 | .length = static_cast<GLsizei>(size), | 229 | .length = static_cast<GLsizei>(size), |
| @@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff | |||
| 180 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 232 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 181 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | 233 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, |
| 182 | reinterpret_cast<const GLuint*>(&ssbo)); | 234 | reinterpret_cast<const GLuint*>(&ssbo)); |
| 183 | } else { | ||
| 184 | const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 185 | const GLuint binding = base_binding + binding_index; | ||
| 186 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 187 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 188 | } | 235 | } |
| 189 | } | 236 | } |
| 190 | 237 | ||
| 191 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, | 238 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 192 | u32 size, bool is_written) { | 239 | u32 size, bool is_written) { |
| 193 | if (use_assembly_shaders) { | 240 | if (use_storage_buffers) { |
| 241 | if (size != 0) { | ||
| 242 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 243 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 244 | } else { | ||
| 245 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 246 | } | ||
| 247 | } else { | ||
| 194 | const BindlessSSBO ssbo{ | 248 | const BindlessSSBO ssbo{ |
| 195 | .address = buffer.HostGpuAddr() + offset, | 249 | .address = buffer.HostGpuAddr() + offset, |
| 196 | .length = static_cast<GLsizei>(size), | 250 | .length = static_cast<GLsizei>(size), |
| @@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf | |||
| 199 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 253 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 200 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | 254 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, |
| 201 | reinterpret_cast<const GLuint*>(&ssbo)); | 255 | reinterpret_cast<const GLuint*>(&ssbo)); |
| 202 | } else if (size == 0) { | ||
| 203 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 204 | } else { | ||
| 205 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 206 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 207 | } | 256 | } |
| 208 | } | 257 | } |
| 209 | 258 | ||
| @@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, | |||
| 213 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 262 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| 214 | } | 263 | } |
| 215 | 264 | ||
| 265 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 266 | PixelFormat format) { | ||
| 267 | *texture_handles++ = buffer.View(offset, size, format); | ||
| 268 | } | ||
| 269 | |||
| 270 | void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { | ||
| 271 | *image_handles++ = buffer.View(offset, size, format); | ||
| 272 | } | ||
| 273 | |||
| 216 | } // namespace OpenGL | 274 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -32,6 +32,8 @@ public: | |||
| 32 | 32 | ||
| 33 | void MakeResident(GLenum access) noexcept; | 33 | void MakeResident(GLenum access) noexcept; |
| 34 | 34 | ||
| 35 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||
| 36 | |||
| 35 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | 37 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| 36 | return address; | 38 | return address; |
| 37 | } | 39 | } |
| @@ -41,9 +43,17 @@ public: | |||
| 41 | } | 43 | } |
| 42 | 44 | ||
| 43 | private: | 45 | private: |
| 46 | struct BufferView { | ||
| 47 | u32 offset; | ||
| 48 | u32 size; | ||
| 49 | VideoCore::Surface::PixelFormat format; | ||
| 50 | OGLTexture texture; | ||
| 51 | }; | ||
| 52 | |||
| 44 | GLuint64EXT address = 0; | 53 | GLuint64EXT address = 0; |
| 45 | OGLBuffer buffer; | 54 | OGLBuffer buffer; |
| 46 | GLenum current_residency_access = GL_NONE; | 55 | GLenum current_residency_access = GL_NONE; |
| 56 | std::vector<BufferView> views; | ||
| 47 | }; | 57 | }; |
| 48 | 58 | ||
| 49 | class BufferCacheRuntime { | 59 | class BufferCacheRuntime { |
| @@ -75,17 +85,21 @@ public: | |||
| 75 | 85 | ||
| 76 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); | 86 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); |
| 77 | 87 | ||
| 88 | void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 89 | VideoCore::Surface::PixelFormat format); | ||
| 90 | |||
| 91 | void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 92 | VideoCore::Surface::PixelFormat format); | ||
| 93 | |||
| 78 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | 94 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { |
| 95 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||
| 96 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||
| 79 | if (use_assembly_shaders) { | 97 | if (use_assembly_shaders) { |
| 80 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||
| 81 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||
| 82 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | 98 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); |
| 83 | } else { | 99 | } else { |
| 84 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 100 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 85 | const GLuint binding = base_binding + binding_index; | 101 | const GLuint binding = base_binding + binding_index; |
| 86 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | 102 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); |
| 87 | fast_uniforms[stage][binding_index].handle, 0, | ||
| 88 | static_cast<GLsizeiptr>(size)); | ||
| 89 | } | 103 | } |
| 90 | } | 104 | } |
| 91 | 105 | ||
| @@ -103,7 +117,7 @@ public: | |||
| 103 | 117 | ||
| 104 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { | 118 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { |
| 105 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); | 119 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); |
| 106 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 120 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 107 | const GLuint binding = base_binding + binding_index; | 121 | const GLuint binding = base_binding + binding_index; |
| 108 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | 122 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), |
| 109 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 123 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| @@ -118,6 +132,27 @@ public: | |||
| 118 | return has_fast_buffer_sub_data; | 132 | return has_fast_buffer_sub_data; |
| 119 | } | 133 | } |
| 120 | 134 | ||
| 135 | [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { | ||
| 136 | return !use_assembly_shaders; | ||
| 137 | } | ||
| 138 | |||
| 139 | void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) { | ||
| 140 | graphics_base_uniform_bindings = bindings; | ||
| 141 | } | ||
| 142 | |||
| 143 | void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) { | ||
| 144 | graphics_base_storage_bindings = bindings; | ||
| 145 | } | ||
| 146 | |||
| 147 | void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { | ||
| 148 | texture_handles = texture_handles_; | ||
| 149 | image_handles = image_handles_; | ||
| 150 | } | ||
| 151 | |||
| 152 | void SetEnableStorageBuffers(bool use_storage_buffers_) { | ||
| 153 | use_storage_buffers = use_storage_buffers_; | ||
| 154 | } | ||
| 155 | |||
| 121 | private: | 156 | private: |
| 122 | static constexpr std::array PABO_LUT{ | 157 | static constexpr std::array PABO_LUT{ |
| 123 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 158 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| @@ -131,8 +166,15 @@ private: | |||
| 131 | bool use_assembly_shaders = false; | 166 | bool use_assembly_shaders = false; |
| 132 | bool has_unified_vertex_buffers = false; | 167 | bool has_unified_vertex_buffers = false; |
| 133 | 168 | ||
| 169 | bool use_storage_buffers = false; | ||
| 170 | |||
| 134 | u32 max_attributes = 0; | 171 | u32 max_attributes = 0; |
| 135 | 172 | ||
| 173 | std::array<GLuint, 5> graphics_base_uniform_bindings{}; | ||
| 174 | std::array<GLuint, 5> graphics_base_storage_bindings{}; | ||
| 175 | GLuint* texture_handles = nullptr; | ||
| 176 | GLuint* image_handles = nullptr; | ||
| 177 | |||
| 136 | std::optional<StreamBuffer> stream_buffer; | 178 | std::optional<StreamBuffer> stream_buffer; |
| 137 | 179 | ||
| 138 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | 180 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, |
| @@ -156,6 +198,7 @@ struct BufferCacheParams { | |||
| 156 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | 198 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; |
| 157 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 199 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 158 | static constexpr bool USE_MEMORY_MAPS = false; | 200 | static constexpr bool USE_MEMORY_MAPS = false; |
| 201 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | ||
| 159 | }; | 202 | }; |
| 160 | 203 | ||
| 161 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 204 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp new file mode 100644 index 000000000..aa1cc592f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -0,0 +1,209 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "common/settings.h" // for enum class Settings::ShaderBackend | ||
| 9 | #include "video_core/renderer_opengl/gl_compute_pipeline.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | |||
| 15 | using Shader::ImageBufferDescriptor; | ||
| 16 | using Tegra::Texture::TexturePair; | ||
| 17 | using VideoCommon::ImageId; | ||
| 18 | |||
| 19 | constexpr u32 MAX_TEXTURES = 64; | ||
| 20 | constexpr u32 MAX_IMAGES = 16; | ||
| 21 | |||
| 22 | template <typename Range> | ||
| 23 | u32 AccumulateCount(const Range& range) { | ||
| 24 | u32 num{}; | ||
| 25 | for (const auto& desc : range) { | ||
| 26 | num += desc.count; | ||
| 27 | } | ||
| 28 | return num; | ||
| 29 | } | ||
| 30 | |||
| 31 | size_t ComputePipelineKey::Hash() const noexcept { | ||
| 32 | return static_cast<size_t>( | ||
| 33 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); | ||
| 34 | } | ||
| 35 | |||
| 36 | bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { | ||
| 37 | return std::memcmp(this, &rhs, sizeof *this) == 0; | ||
| 38 | } | ||
| 39 | |||
| 40 | ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, | ||
| 41 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, | ||
| 42 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 43 | ProgramManager& program_manager_, const Shader::Info& info_, | ||
| 44 | std::string code, std::vector<u32> code_v) | ||
| 45 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, | ||
| 46 | kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { | ||
| 47 | switch (device.GetShaderBackend()) { | ||
| 48 | case Settings::ShaderBackend::GLSL: | ||
| 49 | source_program = CreateProgram(code, GL_COMPUTE_SHADER); | ||
| 50 | break; | ||
| 51 | case Settings::ShaderBackend::GLASM: | ||
| 52 | assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); | ||
| 53 | break; | ||
| 54 | case Settings::ShaderBackend::SPIRV: | ||
| 55 | source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | ||
| 59 | uniform_buffer_sizes.begin()); | ||
| 60 | |||
| 61 | num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); | ||
| 62 | num_image_buffers = AccumulateCount(info.image_buffer_descriptors); | ||
| 63 | |||
| 64 | const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; | ||
| 65 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 66 | |||
| 67 | const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; | ||
| 68 | ASSERT(num_images <= MAX_IMAGES); | ||
| 69 | |||
| 70 | const bool is_glasm{assembly_program.handle != 0}; | ||
| 71 | const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; | ||
| 72 | use_storage_buffers = | ||
| 73 | !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); | ||
| 74 | writes_global_memory = !use_storage_buffers && | ||
| 75 | std::ranges::any_of(info.storage_buffers_descriptors, | ||
| 76 | [](const auto& desc) { return desc.is_written; }); | ||
| 77 | } | ||
| 78 | |||
| 79 | void ComputePipeline::Configure() { | ||
| 80 | buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); | ||
| 81 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 82 | size_t ssbo_index{}; | ||
| 83 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 84 | ASSERT(desc.count == 1); | ||
| 85 | buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, | ||
| 86 | desc.is_written); | ||
| 87 | ++ssbo_index; | ||
| 88 | } | ||
| 89 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 90 | |||
| 91 | std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||
| 92 | boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 93 | std::array<GLuint, MAX_TEXTURES> samplers; | ||
| 94 | std::array<GLuint, MAX_TEXTURES> textures; | ||
| 95 | std::array<GLuint, MAX_IMAGES> images; | ||
| 96 | GLsizei sampler_binding{}; | ||
| 97 | GLsizei texture_binding{}; | ||
| 98 | GLsizei image_binding{}; | ||
| 99 | |||
| 100 | const auto& qmd{kepler_compute.launch_description}; | ||
| 101 | const auto& cbufs{qmd.const_buffer_config}; | ||
| 102 | const bool via_header_index{qmd.linked_tsc != 0}; | ||
| 103 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 104 | ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); | ||
| 105 | const u32 index_offset{index << desc.size_shift}; | ||
| 106 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 107 | const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; | ||
| 108 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 109 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 110 | if (desc.has_secondary) { | ||
| 111 | ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); | ||
| 112 | const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 113 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + | ||
| 114 | secondary_offset}; | ||
| 115 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 116 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 117 | return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 121 | }}; | ||
| 122 | const auto add_image{[&](const auto& desc) { | ||
| 123 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 124 | const auto handle{read_handle(desc, index)}; | ||
| 125 | image_view_indices.push_back(handle.first); | ||
| 126 | } | ||
| 127 | }}; | ||
| 128 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 129 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 130 | const auto handle{read_handle(desc, index)}; | ||
| 131 | image_view_indices.push_back(handle.first); | ||
| 132 | samplers[sampler_binding++] = 0; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||
| 136 | for (const auto& desc : info.texture_descriptors) { | ||
| 137 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 138 | const auto handle{read_handle(desc, index)}; | ||
| 139 | image_view_indices.push_back(handle.first); | ||
| 140 | |||
| 141 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||
| 142 | samplers[sampler_binding++] = sampler->Handle(); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | std::ranges::for_each(info.image_descriptors, add_image); | ||
| 146 | |||
| 147 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 148 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 149 | |||
| 150 | if (assembly_program.handle != 0) { | ||
| 151 | program_manager.BindComputeAssemblyProgram(assembly_program.handle); | ||
| 152 | } else { | ||
| 153 | program_manager.BindComputeProgram(source_program.handle); | ||
| 154 | } | ||
| 155 | buffer_cache.UnbindComputeTextureBuffers(); | ||
| 156 | size_t texbuf_index{}; | ||
| 157 | const auto add_buffer{[&](const auto& desc) { | ||
| 158 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 159 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 160 | bool is_written{false}; | ||
| 161 | if constexpr (is_image) { | ||
| 162 | is_written = desc.is_written; | ||
| 163 | } | ||
| 164 | ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; | ||
| 165 | buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), | ||
| 166 | image_view.BufferSize(), image_view.format, | ||
| 167 | is_written, is_image); | ||
| 168 | ++texbuf_index; | ||
| 169 | } | ||
| 170 | }}; | ||
| 171 | std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); | ||
| 172 | std::ranges::for_each(info.image_buffer_descriptors, add_buffer); | ||
| 173 | |||
| 174 | buffer_cache.UpdateComputeBuffers(); | ||
| 175 | |||
| 176 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); | ||
| 177 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); | ||
| 178 | buffer_cache.BindHostComputeBuffers(); | ||
| 179 | |||
| 180 | const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; | ||
| 181 | texture_binding += num_texture_buffers; | ||
| 182 | image_binding += num_image_buffers; | ||
| 183 | |||
| 184 | for (const auto& desc : info.texture_descriptors) { | ||
| 185 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 186 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 187 | textures[texture_binding++] = image_view.Handle(desc.type); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | for (const auto& desc : info.image_descriptors) { | ||
| 191 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 192 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 193 | if (desc.is_written) { | ||
| 194 | texture_cache.MarkModification(image_view.image_id); | ||
| 195 | } | ||
| 196 | images[image_binding++] = image_view.StorageView(desc.type, desc.format); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | if (texture_binding != 0) { | ||
| 200 | ASSERT(texture_binding == sampler_binding); | ||
| 201 | glBindTextures(0, texture_binding, textures.data()); | ||
| 202 | glBindSamplers(0, sampler_binding, samplers.data()); | ||
| 203 | } | ||
| 204 | if (image_binding != 0) { | ||
| 205 | glBindImageTextures(0, image_binding, images.data()); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h new file mode 100644 index 000000000..50c676365 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/shader_info.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 16 | |||
| 17 | namespace Tegra { | ||
| 18 | class MemoryManager; | ||
| 19 | } | ||
| 20 | |||
| 21 | namespace Tegra::Engines { | ||
| 22 | class KeplerCompute; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace Shader { | ||
| 26 | struct Info; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | class Device; | ||
| 32 | class ProgramManager; | ||
| 33 | |||
| 34 | struct ComputePipelineKey { | ||
| 35 | u64 unique_hash; | ||
| 36 | u32 shared_memory_size; | ||
| 37 | std::array<u32, 3> workgroup_size; | ||
| 38 | |||
| 39 | size_t Hash() const noexcept; | ||
| 40 | |||
| 41 | bool operator==(const ComputePipelineKey&) const noexcept; | ||
| 42 | |||
| 43 | bool operator!=(const ComputePipelineKey& rhs) const noexcept { | ||
| 44 | return !operator==(rhs); | ||
| 45 | } | ||
| 46 | }; | ||
| 47 | static_assert(std::has_unique_object_representations_v<ComputePipelineKey>); | ||
| 48 | static_assert(std::is_trivially_copyable_v<ComputePipelineKey>); | ||
| 49 | static_assert(std::is_trivially_constructible_v<ComputePipelineKey>); | ||
| 50 | |||
| 51 | class ComputePipeline { | ||
| 52 | public: | ||
| 53 | explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, | ||
| 54 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, | ||
| 55 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 56 | ProgramManager& program_manager_, const Shader::Info& info_, | ||
| 57 | std::string code, std::vector<u32> code_v); | ||
| 58 | |||
| 59 | void Configure(); | ||
| 60 | |||
| 61 | [[nodiscard]] bool WritesGlobalMemory() const noexcept { | ||
| 62 | return writes_global_memory; | ||
| 63 | } | ||
| 64 | |||
| 65 | private: | ||
| 66 | TextureCache& texture_cache; | ||
| 67 | BufferCache& buffer_cache; | ||
| 68 | Tegra::MemoryManager& gpu_memory; | ||
| 69 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 70 | ProgramManager& program_manager; | ||
| 71 | |||
| 72 | Shader::Info info; | ||
| 73 | OGLProgram source_program; | ||
| 74 | OGLAssemblyProgram assembly_program; | ||
| 75 | VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; | ||
| 76 | |||
| 77 | u32 num_texture_buffers{}; | ||
| 78 | u32 num_image_buffers{}; | ||
| 79 | |||
| 80 | bool use_storage_buffers{}; | ||
| 81 | bool writes_global_memory{}; | ||
| 82 | }; | ||
| 83 | |||
| 84 | } // namespace OpenGL | ||
| 85 | |||
| 86 | namespace std { | ||
| 87 | template <> | ||
| 88 | struct hash<OpenGL::ComputePipelineKey> { | ||
| 89 | size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { | ||
| 90 | return k.Hash(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -17,39 +17,17 @@ | |||
| 17 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 18 | #include "common/scope_exit.h" | 18 | #include "common/scope_exit.h" |
| 19 | #include "common/settings.h" | 19 | #include "common/settings.h" |
| 20 | #include "shader_recompiler/stage.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_device.h" | 21 | #include "video_core/renderer_opengl/gl_device.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | 23 | ||
| 23 | namespace OpenGL { | 24 | namespace OpenGL { |
| 24 | namespace { | 25 | namespace { |
| 25 | // One uniform block is reserved for emulation purposes | ||
| 26 | constexpr u32 ReservedUniformBlocks = 1; | ||
| 27 | |||
| 28 | constexpr u32 NumStages = 5; | ||
| 29 | |||
| 30 | constexpr std::array LIMIT_UBOS = { | 26 | constexpr std::array LIMIT_UBOS = { |
| 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 27 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | 28 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, | 29 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, |
| 34 | }; | 30 | }; |
| 35 | constexpr std::array LIMIT_SSBOS = { | ||
| 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | ||
| 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | ||
| 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, | ||
| 39 | }; | ||
| 40 | constexpr std::array LIMIT_SAMPLERS = { | ||
| 41 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | ||
| 42 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | ||
| 43 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | ||
| 44 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | ||
| 45 | GL_MAX_TEXTURE_IMAGE_UNITS, | ||
| 46 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, | ||
| 47 | }; | ||
| 48 | constexpr std::array LIMIT_IMAGES = { | ||
| 49 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | ||
| 50 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, | ||
| 51 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, | ||
| 52 | }; | ||
| 53 | 31 | ||
| 54 | template <typename T> | 32 | template <typename T> |
| 55 | T GetInteger(GLenum pname) { | 33 | T GetInteger(GLenum pname) { |
| @@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view | |||
| 82 | return std::ranges::find(extensions, extension) != extensions.end(); | 60 | return std::ranges::find(extensions, extension) != extensions.end(); |
| 83 | } | 61 | } |
| 84 | 62 | ||
| 85 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | 63 | std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept { |
| 86 | ASSERT(num >= amount); | 64 | std::array<u32, Shader::MaxStageTypes> max; |
| 87 | if (limit) { | 65 | std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>); |
| 88 | amount = std::min(amount, GetInteger<u32>(*limit)); | ||
| 89 | } | ||
| 90 | num -= amount; | ||
| 91 | return std::exchange(base, base + amount); | ||
| 92 | } | ||
| 93 | |||
| 94 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | ||
| 95 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | ||
| 96 | std::ranges::transform(LIMIT_UBOS, max.begin(), | ||
| 97 | [](GLenum pname) { return GetInteger<u32>(pname); }); | ||
| 98 | return max; | 66 | return max; |
| 99 | } | 67 | } |
| 100 | 68 | ||
| 101 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | ||
| 102 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | ||
| 103 | |||
| 104 | static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; | ||
| 105 | const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); | ||
| 106 | const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); | ||
| 107 | const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); | ||
| 108 | |||
| 109 | u32 num_ubos = total_ubos - ReservedUniformBlocks; | ||
| 110 | u32 num_ssbos = total_ssbos; | ||
| 111 | u32 num_samplers = total_samplers; | ||
| 112 | |||
| 113 | u32 base_ubo = ReservedUniformBlocks; | ||
| 114 | u32 base_ssbo = 0; | ||
| 115 | u32 base_samplers = 0; | ||
| 116 | |||
| 117 | for (std::size_t i = 0; i < NumStages; ++i) { | ||
| 118 | const std::size_t stage = stage_swizzle[i]; | ||
| 119 | bindings[stage] = { | ||
| 120 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), | ||
| 121 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), | ||
| 122 | Extract(base_samplers, num_samplers, total_samplers / NumStages, | ||
| 123 | LIMIT_SAMPLERS[stage])}; | ||
| 124 | } | ||
| 125 | |||
| 126 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | ||
| 127 | u32 base_images = 0; | ||
| 128 | |||
| 129 | // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. | ||
| 130 | // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the | ||
| 131 | // fragment stage, and at least 1 for the rest of the stages. | ||
| 132 | // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. | ||
| 133 | |||
| 134 | // Reserve at least 4 image bindings on the fragment stage. | ||
| 135 | bindings[4].image = | ||
| 136 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); | ||
| 137 | |||
| 138 | // This is guaranteed to be at least 1. | ||
| 139 | const u32 total_extracted_images = num_images / (NumStages - 1); | ||
| 140 | |||
| 141 | // Reserve the other image bindings. | ||
| 142 | for (std::size_t i = 0; i < NumStages; ++i) { | ||
| 143 | const std::size_t stage = stage_swizzle[i]; | ||
| 144 | if (stage == 4) { | ||
| 145 | continue; | ||
| 146 | } | ||
| 147 | bindings[stage].image = | ||
| 148 | Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); | ||
| 149 | } | ||
| 150 | |||
| 151 | // Compute doesn't care about any of this. | ||
| 152 | bindings[5] = {0, 0, 0, 0}; | ||
| 153 | |||
| 154 | return bindings; | ||
| 155 | } | ||
| 156 | |||
| 157 | bool IsASTCSupported() { | 69 | bool IsASTCSupported() { |
| 158 | static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; | 70 | static constexpr std::array targets{ |
| 159 | static constexpr std::array formats = { | 71 | GL_TEXTURE_2D, |
| 72 | GL_TEXTURE_2D_ARRAY, | ||
| 73 | }; | ||
| 74 | static constexpr std::array formats{ | ||
| 160 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, | 75 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, |
| 161 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, | 76 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, |
| 162 | GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, | 77 | GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, |
| @@ -172,11 +87,10 @@ bool IsASTCSupported() { | |||
| 172 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, | 87 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, |
| 173 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, | 88 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, |
| 174 | }; | 89 | }; |
| 175 | static constexpr std::array required_support = { | 90 | static constexpr std::array required_support{ |
| 176 | GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, | 91 | GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, |
| 177 | GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, | 92 | GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, |
| 178 | }; | 93 | }; |
| 179 | |||
| 180 | for (const GLenum target : targets) { | 94 | for (const GLenum target : targets) { |
| 181 | for (const GLenum format : formats) { | 95 | for (const GLenum format : formats) { |
| 182 | for (const GLenum support : required_support) { | 96 | for (const GLenum support : required_support) { |
| @@ -223,14 +137,13 @@ Device::Device() { | |||
| 223 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 137 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 224 | disable_fast_buffer_sub_data = true; | 138 | disable_fast_buffer_sub_data = true; |
| 225 | } | 139 | } |
| 226 | |||
| 227 | max_uniform_buffers = BuildMaxUniformBuffers(); | 140 | max_uniform_buffers = BuildMaxUniformBuffers(); |
| 228 | base_bindings = BuildBaseBindings(); | ||
| 229 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 141 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 230 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 142 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 231 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 143 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 232 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 144 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 233 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | 145 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); |
| 146 | max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); | ||
| 234 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 147 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 235 | GLAD_GL_NV_shader_thread_shuffle; | 148 | GLAD_GL_NV_shader_thread_shuffle; |
| 236 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | 149 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
| @@ -243,18 +156,30 @@ Device::Device() { | |||
| 243 | has_precise_bug = TestPreciseBug(); | 156 | has_precise_bug = TestPreciseBug(); |
| 244 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); | 157 | has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); |
| 245 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 158 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 159 | has_derivative_control = GLAD_GL_ARB_derivative_control; | ||
| 246 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | 160 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; |
| 247 | has_debugging_tool_attached = IsDebugToolAttached(extensions); | 161 | has_debugging_tool_attached = IsDebugToolAttached(extensions); |
| 248 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | 162 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |
| 163 | has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; | ||
| 164 | has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; | ||
| 165 | has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); | ||
| 166 | has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; | ||
| 167 | has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; | ||
| 168 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | ||
| 169 | need_fastmath_off = is_nvidia; | ||
| 249 | 170 | ||
| 250 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 171 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 251 | // uniform buffers as "push constants" | 172 | // uniform buffers as "push constants" |
| 252 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 173 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 253 | 174 | ||
| 254 | use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && | 175 | shader_backend = Settings::values.shader_backend.GetValue(); |
| 176 | use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && | ||
| 255 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && | 177 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && |
| 256 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; | 178 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; |
| 257 | 179 | if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { | |
| 180 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | ||
| 181 | shader_backend = Settings::ShaderBackend::GLSL; | ||
| 182 | } | ||
| 258 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. | 183 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. |
| 259 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && | 184 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && |
| 260 | !(is_amd || (is_intel && !is_linux)); | 185 | !(is_amd || (is_intel && !is_linux)); |
| @@ -265,11 +190,6 @@ Device::Device() { | |||
| 265 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 190 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
| 266 | LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", | 191 | LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", |
| 267 | has_broken_texture_view_formats); | 192 | has_broken_texture_view_formats); |
| 268 | |||
| 269 | if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { | ||
| 270 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | ||
| 271 | } | ||
| 272 | |||
| 273 | if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { | 193 | if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { |
| 274 | LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); | 194 | LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); |
| 275 | } | 195 | } |
| @@ -325,22 +245,6 @@ std::string Device::GetVendorName() const { | |||
| 325 | return vendor_name; | 245 | return vendor_name; |
| 326 | } | 246 | } |
| 327 | 247 | ||
| 328 | Device::Device(std::nullptr_t) { | ||
| 329 | max_uniform_buffers.fill(std::numeric_limits<u32>::max()); | ||
| 330 | uniform_buffer_alignment = 4; | ||
| 331 | shader_storage_alignment = 4; | ||
| 332 | max_vertex_attributes = 16; | ||
| 333 | max_varyings = 15; | ||
| 334 | max_compute_shared_memory_size = 0x10000; | ||
| 335 | has_warp_intrinsics = true; | ||
| 336 | has_shader_ballot = true; | ||
| 337 | has_vertex_viewport_layer = true; | ||
| 338 | has_image_load_formatted = true; | ||
| 339 | has_texture_shadow_lod = true; | ||
| 340 | has_variable_aoffi = true; | ||
| 341 | has_depth_buffer_float = true; | ||
| 342 | } | ||
| 343 | |||
| 344 | bool Device::TestVariableAoffi() { | 248 | bool Device::TestVariableAoffi() { |
| 345 | return TestProgram(R"(#version 430 core | 249 | return TestProgram(R"(#version 430 core |
| 346 | // This is a unit test, please ignore me on apitrace bug reports. | 250 | // This is a unit test, please ignore me on apitrace bug reports. |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -6,34 +6,22 @@ | |||
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/engines/shader_type.h" | 9 | #include "shader_recompiler/stage.h" |
| 10 | |||
| 11 | namespace Settings { | ||
| 12 | enum class ShaderBackend : u32; | ||
| 13 | }; | ||
| 10 | 14 | ||
| 11 | namespace OpenGL { | 15 | namespace OpenGL { |
| 12 | 16 | ||
| 13 | class Device { | 17 | class Device { |
| 14 | public: | 18 | public: |
| 15 | struct BaseBindings { | ||
| 16 | u32 uniform_buffer{}; | ||
| 17 | u32 shader_storage_buffer{}; | ||
| 18 | u32 sampler{}; | ||
| 19 | u32 image{}; | ||
| 20 | }; | ||
| 21 | |||
| 22 | explicit Device(); | 19 | explicit Device(); |
| 23 | explicit Device(std::nullptr_t); | ||
| 24 | 20 | ||
| 25 | [[nodiscard]] std::string GetVendorName() const; | 21 | [[nodiscard]] std::string GetVendorName() const; |
| 26 | 22 | ||
| 27 | u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { | 23 | u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { |
| 28 | return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; | 24 | return max_uniform_buffers[static_cast<size_t>(stage)]; |
| 29 | } | ||
| 30 | |||
| 31 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | ||
| 32 | return base_bindings[stage_index]; | ||
| 33 | } | ||
| 34 | |||
| 35 | const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | ||
| 37 | } | 25 | } |
| 38 | 26 | ||
| 39 | size_t GetUniformBufferAlignment() const { | 27 | size_t GetUniformBufferAlignment() const { |
| @@ -56,6 +44,10 @@ public: | |||
| 56 | return max_compute_shared_memory_size; | 44 | return max_compute_shared_memory_size; |
| 57 | } | 45 | } |
| 58 | 46 | ||
| 47 | u32 GetMaxGLASMStorageBufferBlocks() const { | ||
| 48 | return max_glasm_storage_buffer_blocks; | ||
| 49 | } | ||
| 50 | |||
| 59 | bool HasWarpIntrinsics() const { | 51 | bool HasWarpIntrinsics() const { |
| 60 | return has_warp_intrinsics; | 52 | return has_warp_intrinsics; |
| 61 | } | 53 | } |
| @@ -108,6 +100,10 @@ public: | |||
| 108 | return has_nv_viewport_array2; | 100 | return has_nv_viewport_array2; |
| 109 | } | 101 | } |
| 110 | 102 | ||
| 103 | bool HasDerivativeControl() const { | ||
| 104 | return has_derivative_control; | ||
| 105 | } | ||
| 106 | |||
| 111 | bool HasDebuggingToolAttached() const { | 107 | bool HasDebuggingToolAttached() const { |
| 112 | return has_debugging_tool_attached; | 108 | return has_debugging_tool_attached; |
| 113 | } | 109 | } |
| @@ -128,18 +124,52 @@ public: | |||
| 128 | return has_depth_buffer_float; | 124 | return has_depth_buffer_float; |
| 129 | } | 125 | } |
| 130 | 126 | ||
| 127 | bool HasGeometryShaderPassthrough() const { | ||
| 128 | return has_geometry_shader_passthrough; | ||
| 129 | } | ||
| 130 | |||
| 131 | bool HasNvGpuShader5() const { | ||
| 132 | return has_nv_gpu_shader_5; | ||
| 133 | } | ||
| 134 | |||
| 135 | bool HasShaderInt64() const { | ||
| 136 | return has_shader_int64; | ||
| 137 | } | ||
| 138 | |||
| 139 | bool HasAmdShaderHalfFloat() const { | ||
| 140 | return has_amd_shader_half_float; | ||
| 141 | } | ||
| 142 | |||
| 143 | bool HasSparseTexture2() const { | ||
| 144 | return has_sparse_texture_2; | ||
| 145 | } | ||
| 146 | |||
| 147 | bool IsWarpSizePotentiallyLargerThanGuest() const { | ||
| 148 | return warp_size_potentially_larger_than_guest; | ||
| 149 | } | ||
| 150 | |||
| 151 | bool NeedsFastmathOff() const { | ||
| 152 | return need_fastmath_off; | ||
| 153 | } | ||
| 154 | |||
| 155 | Settings::ShaderBackend GetShaderBackend() const { | ||
| 156 | return shader_backend; | ||
| 157 | } | ||
| 158 | |||
| 131 | private: | 159 | private: |
| 132 | static bool TestVariableAoffi(); | 160 | static bool TestVariableAoffi(); |
| 133 | static bool TestPreciseBug(); | 161 | static bool TestPreciseBug(); |
| 134 | 162 | ||
| 135 | std::string vendor_name; | 163 | std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{}; |
| 136 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; | ||
| 137 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | ||
| 138 | size_t uniform_buffer_alignment{}; | 164 | size_t uniform_buffer_alignment{}; |
| 139 | size_t shader_storage_alignment{}; | 165 | size_t shader_storage_alignment{}; |
| 140 | u32 max_vertex_attributes{}; | 166 | u32 max_vertex_attributes{}; |
| 141 | u32 max_varyings{}; | 167 | u32 max_varyings{}; |
| 142 | u32 max_compute_shared_memory_size{}; | 168 | u32 max_compute_shared_memory_size{}; |
| 169 | u32 max_glasm_storage_buffer_blocks{}; | ||
| 170 | |||
| 171 | Settings::ShaderBackend shader_backend{}; | ||
| 172 | |||
| 143 | bool has_warp_intrinsics{}; | 173 | bool has_warp_intrinsics{}; |
| 144 | bool has_shader_ballot{}; | 174 | bool has_shader_ballot{}; |
| 145 | bool has_vertex_viewport_layer{}; | 175 | bool has_vertex_viewport_layer{}; |
| @@ -153,11 +183,21 @@ private: | |||
| 153 | bool has_broken_texture_view_formats{}; | 183 | bool has_broken_texture_view_formats{}; |
| 154 | bool has_fast_buffer_sub_data{}; | 184 | bool has_fast_buffer_sub_data{}; |
| 155 | bool has_nv_viewport_array2{}; | 185 | bool has_nv_viewport_array2{}; |
| 186 | bool has_derivative_control{}; | ||
| 156 | bool has_debugging_tool_attached{}; | 187 | bool has_debugging_tool_attached{}; |
| 157 | bool use_assembly_shaders{}; | 188 | bool use_assembly_shaders{}; |
| 158 | bool use_asynchronous_shaders{}; | 189 | bool use_asynchronous_shaders{}; |
| 159 | bool use_driver_cache{}; | 190 | bool use_driver_cache{}; |
| 160 | bool has_depth_buffer_float{}; | 191 | bool has_depth_buffer_float{}; |
| 192 | bool has_geometry_shader_passthrough{}; | ||
| 193 | bool has_nv_gpu_shader_5{}; | ||
| 194 | bool has_shader_int64{}; | ||
| 195 | bool has_amd_shader_half_float{}; | ||
| 196 | bool has_sparse_texture_2{}; | ||
| 197 | bool warp_size_potentially_larger_than_guest{}; | ||
| 198 | bool need_fastmath_off{}; | ||
| 199 | |||
| 200 | std::string vendor_name; | ||
| 161 | }; | 201 | }; |
| 162 | 202 | ||
| 163 | } // namespace OpenGL | 203 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp new file mode 100644 index 000000000..fac0034fb --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -0,0 +1,572 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <string> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/settings.h" // for enum class Settings::ShaderBackend | ||
| 11 | #include "common/thread_worker.h" | ||
| 12 | #include "shader_recompiler/shader_info.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_graphics_pipeline.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 17 | #include "video_core/shader_notify.h" | ||
| 18 | #include "video_core/texture_cache/texture_cache.h" | ||
| 19 | |||
| 20 | #if defined(_MSC_VER) && defined(NDEBUG) | ||
| 21 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | ||
| 22 | #else | ||
| 23 | #define LAMBDA_FORCEINLINE | ||
| 24 | #endif | ||
| 25 | |||
| 26 | namespace OpenGL { | ||
| 27 | namespace { | ||
| 28 | using Shader::ImageBufferDescriptor; | ||
| 29 | using Shader::ImageDescriptor; | ||
| 30 | using Shader::TextureBufferDescriptor; | ||
| 31 | using Shader::TextureDescriptor; | ||
| 32 | using Tegra::Texture::TexturePair; | ||
| 33 | using VideoCommon::ImageId; | ||
| 34 | |||
| 35 | constexpr u32 MAX_TEXTURES = 64; | ||
| 36 | constexpr u32 MAX_IMAGES = 8; | ||
| 37 | |||
| 38 | template <typename Range> | ||
| 39 | u32 AccumulateCount(const Range& range) { | ||
| 40 | u32 num{}; | ||
| 41 | for (const auto& desc : range) { | ||
| 42 | num += desc.count; | ||
| 43 | } | ||
| 44 | return num; | ||
| 45 | } | ||
| 46 | |||
| 47 | GLenum Stage(size_t stage_index) { | ||
| 48 | switch (stage_index) { | ||
| 49 | case 0: | ||
| 50 | return GL_VERTEX_SHADER; | ||
| 51 | case 1: | ||
| 52 | return GL_TESS_CONTROL_SHADER; | ||
| 53 | case 2: | ||
| 54 | return GL_TESS_EVALUATION_SHADER; | ||
| 55 | case 3: | ||
| 56 | return GL_GEOMETRY_SHADER; | ||
| 57 | case 4: | ||
| 58 | return GL_FRAGMENT_SHADER; | ||
| 59 | } | ||
| 60 | UNREACHABLE_MSG("{}", stage_index); | ||
| 61 | return GL_NONE; | ||
| 62 | } | ||
| 63 | |||
| 64 | GLenum AssemblyStage(size_t stage_index) { | ||
| 65 | switch (stage_index) { | ||
| 66 | case 0: | ||
| 67 | return GL_VERTEX_PROGRAM_NV; | ||
| 68 | case 1: | ||
| 69 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 70 | case 2: | ||
| 71 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 72 | case 3: | ||
| 73 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 74 | case 4: | ||
| 75 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 76 | } | ||
| 77 | UNREACHABLE_MSG("{}", stage_index); | ||
| 78 | return GL_NONE; | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Translates hardware transform feedback indices | ||
| 82 | /// @param location Hardware location | ||
| 83 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | ||
| 84 | /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt | ||
| 85 | std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | ||
| 86 | const u8 index = location / 4; | ||
| 87 | if (index >= 8 && index <= 39) { | ||
| 88 | return {GL_GENERIC_ATTRIB_NV, index - 8}; | ||
| 89 | } | ||
| 90 | if (index >= 48 && index <= 55) { | ||
| 91 | return {GL_TEXTURE_COORD_NV, index - 48}; | ||
| 92 | } | ||
| 93 | switch (index) { | ||
| 94 | case 7: | ||
| 95 | return {GL_POSITION, 0}; | ||
| 96 | case 40: | ||
| 97 | return {GL_PRIMARY_COLOR_NV, 0}; | ||
| 98 | case 41: | ||
| 99 | return {GL_SECONDARY_COLOR_NV, 0}; | ||
| 100 | case 42: | ||
| 101 | return {GL_BACK_PRIMARY_COLOR_NV, 0}; | ||
| 102 | case 43: | ||
| 103 | return {GL_BACK_SECONDARY_COLOR_NV, 0}; | ||
| 104 | } | ||
| 105 | UNIMPLEMENTED_MSG("index={}", index); | ||
| 106 | return {GL_POSITION, 0}; | ||
| 107 | } | ||
| 108 | |||
| 109 | template <typename Spec> | ||
| 110 | bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { | ||
| 111 | for (size_t stage = 0; stage < stage_infos.size(); ++stage) { | ||
| 112 | if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { | ||
| 113 | return false; | ||
| 114 | } | ||
| 115 | const auto& info{stage_infos[stage]}; | ||
| 116 | if constexpr (!Spec::has_storage_buffers) { | ||
| 117 | if (!info.storage_buffers_descriptors.empty()) { | ||
| 118 | return false; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | if constexpr (!Spec::has_texture_buffers) { | ||
| 122 | if (!info.texture_buffer_descriptors.empty()) { | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | if constexpr (!Spec::has_image_buffers) { | ||
| 127 | if (!info.image_buffer_descriptors.empty()) { | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | if constexpr (!Spec::has_images) { | ||
| 132 | if (!info.image_descriptors.empty()) { | ||
| 133 | return false; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | } | ||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); | ||
| 141 | |||
| 142 | template <typename Spec, typename... Specs> | ||
| 143 | ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { | ||
| 144 | if constexpr (sizeof...(Specs) > 0) { | ||
| 145 | if (!Passes<Spec>(stage_infos, enabled_mask)) { | ||
| 146 | return FindSpec<Specs...>(stage_infos, enabled_mask); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); | ||
| 150 | } | ||
| 151 | |||
| 152 | struct SimpleVertexFragmentSpec { | ||
| 153 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; | ||
| 154 | static constexpr bool has_storage_buffers = false; | ||
| 155 | static constexpr bool has_texture_buffers = false; | ||
| 156 | static constexpr bool has_image_buffers = false; | ||
| 157 | static constexpr bool has_images = false; | ||
| 158 | }; | ||
| 159 | |||
| 160 | struct SimpleVertexSpec { | ||
| 161 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; | ||
| 162 | static constexpr bool has_storage_buffers = false; | ||
| 163 | static constexpr bool has_texture_buffers = false; | ||
| 164 | static constexpr bool has_image_buffers = false; | ||
| 165 | static constexpr bool has_images = false; | ||
| 166 | }; | ||
| 167 | |||
| 168 | struct DefaultSpec { | ||
| 169 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 170 | static constexpr bool has_storage_buffers = true; | ||
| 171 | static constexpr bool has_texture_buffers = true; | ||
| 172 | static constexpr bool has_image_buffers = true; | ||
| 173 | static constexpr bool has_images = true; | ||
| 174 | }; | ||
| 175 | |||
| 176 | ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) { | ||
| 177 | return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask); | ||
| 178 | } | ||
| 179 | } // Anonymous namespace | ||
| 180 | |||
| 181 | GraphicsPipeline::GraphicsPipeline( | ||
| 182 | const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 183 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 184 | ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, | ||
| 185 | VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources, | ||
| 186 | std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos, | ||
| 187 | const GraphicsPipelineKey& key_) | ||
| 188 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, | ||
| 189 | gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, | ||
| 190 | state_tracker{state_tracker_}, key{key_} { | ||
| 191 | if (shader_notify) { | ||
| 192 | shader_notify->MarkShaderBuilding(); | ||
| 193 | } | ||
| 194 | u32 num_textures{}; | ||
| 195 | u32 num_images{}; | ||
| 196 | u32 num_storage_buffers{}; | ||
| 197 | for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { | ||
| 198 | auto& info{stage_infos[stage]}; | ||
| 199 | if (infos[stage]) { | ||
| 200 | info = *infos[stage]; | ||
| 201 | enabled_stages_mask |= 1u << stage; | ||
| 202 | } | ||
| 203 | if (stage < 4) { | ||
| 204 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | ||
| 205 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | ||
| 206 | |||
| 207 | base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); | ||
| 208 | base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); | ||
| 209 | } | ||
| 210 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; | ||
| 211 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 212 | |||
| 213 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; | ||
| 214 | num_texture_buffers[stage] += num_tex_buffer_bindings; | ||
| 215 | num_textures += num_tex_buffer_bindings; | ||
| 216 | |||
| 217 | const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; | ||
| 218 | num_image_buffers[stage] += num_img_buffers_bindings; | ||
| 219 | num_images += num_img_buffers_bindings; | ||
| 220 | |||
| 221 | num_textures += AccumulateCount(info.texture_descriptors); | ||
| 222 | num_images += AccumulateCount(info.image_descriptors); | ||
| 223 | num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); | ||
| 224 | |||
| 225 | writes_global_memory |= std::ranges::any_of( | ||
| 226 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | ||
| 227 | } | ||
| 228 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 229 | ASSERT(num_images <= MAX_IMAGES); | ||
| 230 | |||
| 231 | const bool assembly_shaders{assembly_programs[0].handle != 0}; | ||
| 232 | use_storage_buffers = | ||
| 233 | !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 234 | writes_global_memory &= !use_storage_buffers; | ||
| 235 | configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); | ||
| 236 | |||
| 237 | if (key.xfb_enabled && device.UseAssemblyShaders()) { | ||
| 238 | GenerateTransformFeedbackState(); | ||
| 239 | } | ||
| 240 | const bool in_parallel = thread_worker != nullptr; | ||
| 241 | const auto backend = device.GetShaderBackend(); | ||
| 242 | auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), | ||
| 243 | shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { | ||
| 244 | for (size_t stage = 0; stage < 5; ++stage) { | ||
| 245 | switch (backend) { | ||
| 246 | case Settings::ShaderBackend::GLSL: | ||
| 247 | if (!sources[stage].empty()) { | ||
| 248 | source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); | ||
| 249 | } | ||
| 250 | break; | ||
| 251 | case Settings::ShaderBackend::GLASM: | ||
| 252 | if (!sources[stage].empty()) { | ||
| 253 | assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); | ||
| 254 | if (in_parallel) { | ||
| 255 | // Make sure program is built before continuing when building in parallel | ||
| 256 | glGetString(GL_PROGRAM_ERROR_STRING_NV); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | break; | ||
| 260 | case Settings::ShaderBackend::SPIRV: | ||
| 261 | if (!sources_spirv[stage].empty()) { | ||
| 262 | source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); | ||
| 263 | } | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | if (in_parallel && backend != Settings::ShaderBackend::GLASM) { | ||
| 268 | // Make sure programs have built if we are building shaders in parallel | ||
| 269 | for (OGLProgram& program : source_programs) { | ||
| 270 | if (program.handle != 0) { | ||
| 271 | GLint status{}; | ||
| 272 | glGetProgramiv(program.handle, GL_LINK_STATUS, &status); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | } | ||
| 276 | if (shader_notify) { | ||
| 277 | shader_notify->MarkShaderComplete(); | ||
| 278 | } | ||
| 279 | is_built = true; | ||
| 280 | built_condvar.notify_one(); | ||
| 281 | }}; | ||
| 282 | if (thread_worker) { | ||
| 283 | thread_worker->QueueWork(std::move(func)); | ||
| 284 | } else { | ||
| 285 | func(nullptr); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | template <typename Spec> | ||
| 290 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | ||
| 291 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||
| 292 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 293 | std::array<GLuint, MAX_TEXTURES> samplers; | ||
| 294 | size_t image_view_index{}; | ||
| 295 | GLsizei sampler_binding{}; | ||
| 296 | |||
| 297 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 298 | |||
| 299 | buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); | ||
| 300 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); | ||
| 301 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); | ||
| 302 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); | ||
| 303 | |||
| 304 | const auto& regs{maxwell3d.regs}; | ||
| 305 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 306 | const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 307 | const Shader::Info& info{stage_infos[stage]}; | ||
| 308 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 309 | if constexpr (Spec::has_storage_buffers) { | ||
| 310 | size_t ssbo_index{}; | ||
| 311 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 312 | ASSERT(desc.count == 1); | ||
| 313 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, | ||
| 314 | desc.cbuf_offset, desc.is_written); | ||
| 315 | ++ssbo_index; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; | ||
| 319 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 320 | ASSERT(cbufs[desc.cbuf_index].enabled); | ||
| 321 | const u32 index_offset{index << desc.size_shift}; | ||
| 322 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 323 | const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; | ||
| 324 | if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> || | ||
| 325 | std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) { | ||
| 326 | if (desc.has_secondary) { | ||
| 327 | ASSERT(cbufs[desc.secondary_cbuf_index].enabled); | ||
| 328 | const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 329 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + | ||
| 330 | second_offset}; | ||
| 331 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 332 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 333 | const u32 raw{lhs_raw | rhs_raw}; | ||
| 334 | return TexturePair(raw, via_header_index); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 338 | }}; | ||
| 339 | const auto add_image{[&](const auto& desc) { | ||
| 340 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 341 | const auto handle{read_handle(desc, index)}; | ||
| 342 | image_view_indices[image_view_index++] = handle.first; | ||
| 343 | } | ||
| 344 | }}; | ||
| 345 | if constexpr (Spec::has_texture_buffers) { | ||
| 346 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 347 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 348 | const auto handle{read_handle(desc, index)}; | ||
| 349 | image_view_indices[image_view_index++] = handle.first; | ||
| 350 | samplers[sampler_binding++] = 0; | ||
| 351 | } | ||
| 352 | } | ||
| 353 | } | ||
| 354 | if constexpr (Spec::has_image_buffers) { | ||
| 355 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 356 | add_image(desc); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | for (const auto& desc : info.texture_descriptors) { | ||
| 360 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 361 | const auto handle{read_handle(desc, index)}; | ||
| 362 | image_view_indices[image_view_index++] = handle.first; | ||
| 363 | |||
| 364 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | ||
| 365 | samplers[sampler_binding++] = sampler->Handle(); | ||
| 366 | } | ||
| 367 | } | ||
| 368 | if constexpr (Spec::has_images) { | ||
| 369 | for (const auto& desc : info.image_descriptors) { | ||
| 370 | add_image(desc); | ||
| 371 | } | ||
| 372 | } | ||
| 373 | }}; | ||
| 374 | if constexpr (Spec::enabled_stages[0]) { | ||
| 375 | config_stage(0); | ||
| 376 | } | ||
| 377 | if constexpr (Spec::enabled_stages[1]) { | ||
| 378 | config_stage(1); | ||
| 379 | } | ||
| 380 | if constexpr (Spec::enabled_stages[2]) { | ||
| 381 | config_stage(2); | ||
| 382 | } | ||
| 383 | if constexpr (Spec::enabled_stages[3]) { | ||
| 384 | config_stage(3); | ||
| 385 | } | ||
| 386 | if constexpr (Spec::enabled_stages[4]) { | ||
| 387 | config_stage(4); | ||
| 388 | } | ||
| 389 | const std::span indices_span(image_view_indices.data(), image_view_index); | ||
| 390 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 391 | |||
| 392 | texture_cache.UpdateRenderTargets(false); | ||
| 393 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 394 | |||
| 395 | ImageId* texture_buffer_index{image_view_ids.data()}; | ||
| 396 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 397 | size_t index{}; | ||
| 398 | const auto add_buffer{[&](const auto& desc) { | ||
| 399 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 400 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 401 | bool is_written{false}; | ||
| 402 | if constexpr (is_image) { | ||
| 403 | is_written = desc.is_written; | ||
| 404 | } | ||
| 405 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||
| 406 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||
| 407 | image_view.BufferSize(), image_view.format, | ||
| 408 | is_written, is_image); | ||
| 409 | ++index; | ||
| 410 | ++texture_buffer_index; | ||
| 411 | } | ||
| 412 | }}; | ||
| 413 | const Shader::Info& info{stage_infos[stage]}; | ||
| 414 | buffer_cache.UnbindGraphicsTextureBuffers(stage); | ||
| 415 | |||
| 416 | if constexpr (Spec::has_texture_buffers) { | ||
| 417 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 418 | add_buffer(desc); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | if constexpr (Spec::has_image_buffers) { | ||
| 422 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 423 | add_buffer(desc); | ||
| 424 | } | ||
| 425 | } | ||
| 426 | for (const auto& desc : info.texture_descriptors) { | ||
| 427 | texture_buffer_index += desc.count; | ||
| 428 | } | ||
| 429 | if constexpr (Spec::has_images) { | ||
| 430 | for (const auto& desc : info.image_descriptors) { | ||
| 431 | texture_buffer_index += desc.count; | ||
| 432 | } | ||
| 433 | } | ||
| 434 | }}; | ||
| 435 | if constexpr (Spec::enabled_stages[0]) { | ||
| 436 | bind_stage_info(0); | ||
| 437 | } | ||
| 438 | if constexpr (Spec::enabled_stages[1]) { | ||
| 439 | bind_stage_info(1); | ||
| 440 | } | ||
| 441 | if constexpr (Spec::enabled_stages[2]) { | ||
| 442 | bind_stage_info(2); | ||
| 443 | } | ||
| 444 | if constexpr (Spec::enabled_stages[3]) { | ||
| 445 | bind_stage_info(3); | ||
| 446 | } | ||
| 447 | if constexpr (Spec::enabled_stages[4]) { | ||
| 448 | bind_stage_info(4); | ||
| 449 | } | ||
| 450 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 451 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 452 | |||
| 453 | if (!is_built.load(std::memory_order::relaxed)) { | ||
| 454 | WaitForBuild(); | ||
| 455 | } | ||
| 456 | if (assembly_programs[0].handle != 0) { | ||
| 457 | program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); | ||
| 458 | } else { | ||
| 459 | program_manager.BindSourcePrograms(source_programs); | ||
| 460 | } | ||
| 461 | const ImageId* views_it{image_view_ids.data()}; | ||
| 462 | GLsizei texture_binding = 0; | ||
| 463 | GLsizei image_binding = 0; | ||
| 464 | std::array<GLuint, MAX_TEXTURES> textures; | ||
| 465 | std::array<GLuint, MAX_IMAGES> images; | ||
| 466 | const auto prepare_stage{[&](size_t stage) { | ||
| 467 | buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); | ||
| 468 | buffer_cache.BindHostStageBuffers(stage); | ||
| 469 | |||
| 470 | texture_binding += num_texture_buffers[stage]; | ||
| 471 | image_binding += num_image_buffers[stage]; | ||
| 472 | |||
| 473 | views_it += num_texture_buffers[stage]; | ||
| 474 | views_it += num_image_buffers[stage]; | ||
| 475 | |||
| 476 | const auto& info{stage_infos[stage]}; | ||
| 477 | for (const auto& desc : info.texture_descriptors) { | ||
| 478 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 479 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 480 | textures[texture_binding++] = image_view.Handle(desc.type); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | for (const auto& desc : info.image_descriptors) { | ||
| 484 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 485 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 486 | if (desc.is_written) { | ||
| 487 | texture_cache.MarkModification(image_view.image_id); | ||
| 488 | } | ||
| 489 | images[image_binding++] = image_view.StorageView(desc.type, desc.format); | ||
| 490 | } | ||
| 491 | } | ||
| 492 | }}; | ||
| 493 | if constexpr (Spec::enabled_stages[0]) { | ||
| 494 | prepare_stage(0); | ||
| 495 | } | ||
| 496 | if constexpr (Spec::enabled_stages[1]) { | ||
| 497 | prepare_stage(1); | ||
| 498 | } | ||
| 499 | if constexpr (Spec::enabled_stages[2]) { | ||
| 500 | prepare_stage(2); | ||
| 501 | } | ||
| 502 | if constexpr (Spec::enabled_stages[3]) { | ||
| 503 | prepare_stage(3); | ||
| 504 | } | ||
| 505 | if constexpr (Spec::enabled_stages[4]) { | ||
| 506 | prepare_stage(4); | ||
| 507 | } | ||
| 508 | if (texture_binding != 0) { | ||
| 509 | ASSERT(texture_binding == sampler_binding); | ||
| 510 | glBindTextures(0, texture_binding, textures.data()); | ||
| 511 | glBindSamplers(0, sampler_binding, samplers.data()); | ||
| 512 | } | ||
| 513 | if (image_binding != 0) { | ||
| 514 | glBindImageTextures(0, image_binding, images.data()); | ||
| 515 | } | ||
| 516 | } | ||
| 517 | |||
| 518 | void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { | ||
| 519 | glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, | ||
| 520 | xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); | ||
| 521 | } | ||
| 522 | |||
| 523 | void GraphicsPipeline::GenerateTransformFeedbackState() { | ||
| 524 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal | ||
| 525 | // when this is required. | ||
| 526 | GLint* cursor{xfb_attribs.data()}; | ||
| 527 | GLint* current_stream{xfb_streams.data()}; | ||
| 528 | |||
| 529 | for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { | ||
| 530 | const auto& layout = key.xfb_state.layouts[feedback]; | ||
| 531 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); | ||
| 532 | if (layout.varying_count == 0) { | ||
| 533 | continue; | ||
| 534 | } | ||
| 535 | *current_stream = static_cast<GLint>(feedback); | ||
| 536 | if (current_stream != xfb_streams.data()) { | ||
| 537 | // When stepping one stream, push the expected token | ||
| 538 | cursor[0] = GL_NEXT_BUFFER_NV; | ||
| 539 | cursor[1] = 0; | ||
| 540 | cursor[2] = 0; | ||
| 541 | cursor += XFB_ENTRY_STRIDE; | ||
| 542 | } | ||
| 543 | ++current_stream; | ||
| 544 | |||
| 545 | const auto& locations = key.xfb_state.varyings[feedback]; | ||
| 546 | std::optional<u8> current_index; | ||
| 547 | for (u32 offset = 0; offset < layout.varying_count; ++offset) { | ||
| 548 | const u8 location = locations[offset]; | ||
| 549 | const u8 index = location / 4; | ||
| 550 | |||
| 551 | if (current_index == index) { | ||
| 552 | // Increase number of components of the previous attachment | ||
| 553 | ++cursor[-2]; | ||
| 554 | continue; | ||
| 555 | } | ||
| 556 | current_index = index; | ||
| 557 | |||
| 558 | std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); | ||
| 559 | cursor[1] = 1; | ||
| 560 | cursor += XFB_ENTRY_STRIDE; | ||
| 561 | } | ||
| 562 | } | ||
| 563 | num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); | ||
| 564 | num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data()); | ||
| 565 | } | ||
| 566 | |||
| 567 | void GraphicsPipeline::WaitForBuild() { | ||
| 568 | std::unique_lock lock{built_mutex}; | ||
| 569 | built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); | ||
| 570 | } | ||
| 571 | |||
| 572 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h new file mode 100644 index 000000000..4e28d9a42 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/cityhash.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "shader_recompiler/shader_info.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/memory_manager.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 21 | #include "video_core/transform_feedback.h" | ||
| 22 | |||
| 23 | namespace OpenGL { | ||
| 24 | |||
| 25 | namespace ShaderContext { | ||
| 26 | struct Context; | ||
| 27 | } | ||
| 28 | |||
| 29 | class Device; | ||
| 30 | class ProgramManager; | ||
| 31 | |||
| 32 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 33 | using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | ||
| 34 | |||
| 35 | struct GraphicsPipelineKey { | ||
| 36 | std::array<u64, 6> unique_hashes; | ||
| 37 | union { | ||
| 38 | u32 raw; | ||
| 39 | BitField<0, 1, u32> xfb_enabled; | ||
| 40 | BitField<1, 1, u32> early_z; | ||
| 41 | BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||
| 42 | BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; | ||
| 43 | BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; | ||
| 44 | BitField<10, 1, u32> tessellation_clockwise; | ||
| 45 | }; | ||
| 46 | std::array<u32, 3> padding; | ||
| 47 | VideoCommon::TransformFeedbackState xfb_state; | ||
| 48 | |||
| 49 | size_t Hash() const noexcept { | ||
| 50 | return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size())); | ||
| 51 | } | ||
| 52 | |||
| 53 | bool operator==(const GraphicsPipelineKey& rhs) const noexcept { | ||
| 54 | return std::memcmp(this, &rhs, Size()) == 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { | ||
| 58 | return !operator==(rhs); | ||
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] size_t Size() const noexcept { | ||
| 62 | if (xfb_enabled != 0) { | ||
| 63 | return sizeof(GraphicsPipelineKey); | ||
| 64 | } else { | ||
| 65 | return offsetof(GraphicsPipelineKey, padding); | ||
| 66 | } | ||
| 67 | } | ||
| 68 | }; | ||
| 69 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>); | ||
| 70 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>); | ||
| 71 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>); | ||
| 72 | |||
| 73 | class GraphicsPipeline { | ||
| 74 | public: | ||
| 75 | explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, | ||
| 76 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, | ||
| 77 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 78 | ProgramManager& program_manager_, StateTracker& state_tracker_, | ||
| 79 | ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, | ||
| 80 | std::array<std::string, 5> sources, | ||
| 81 | std::array<std::vector<u32>, 5> sources_spirv, | ||
| 82 | const std::array<const Shader::Info*, 5>& infos, | ||
| 83 | const GraphicsPipelineKey& key_); | ||
| 84 | |||
| 85 | void Configure(bool is_indexed) { | ||
| 86 | configure_func(this, is_indexed); | ||
| 87 | } | ||
| 88 | |||
| 89 | void ConfigureTransformFeedback() const { | ||
| 90 | if (num_xfb_attribs != 0) { | ||
| 91 | ConfigureTransformFeedbackImpl(); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { | ||
| 96 | return key; | ||
| 97 | } | ||
| 98 | |||
| 99 | [[nodiscard]] bool WritesGlobalMemory() const noexcept { | ||
| 100 | return writes_global_memory; | ||
| 101 | } | ||
| 102 | |||
| 103 | [[nodiscard]] bool IsBuilt() const noexcept { | ||
| 104 | return is_built.load(std::memory_order::relaxed); | ||
| 105 | } | ||
| 106 | |||
| 107 | template <typename Spec> | ||
| 108 | static auto MakeConfigureSpecFunc() { | ||
| 109 | return [](GraphicsPipeline* pipeline, bool is_indexed) { | ||
| 110 | pipeline->ConfigureImpl<Spec>(is_indexed); | ||
| 111 | }; | ||
| 112 | } | ||
| 113 | |||
| 114 | private: | ||
| 115 | template <typename Spec> | ||
| 116 | void ConfigureImpl(bool is_indexed); | ||
| 117 | |||
| 118 | void ConfigureTransformFeedbackImpl() const; | ||
| 119 | |||
| 120 | void GenerateTransformFeedbackState(); | ||
| 121 | |||
| 122 | void WaitForBuild(); | ||
| 123 | |||
| 124 | TextureCache& texture_cache; | ||
| 125 | BufferCache& buffer_cache; | ||
| 126 | Tegra::MemoryManager& gpu_memory; | ||
| 127 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 128 | ProgramManager& program_manager; | ||
| 129 | StateTracker& state_tracker; | ||
| 130 | const GraphicsPipelineKey key; | ||
| 131 | |||
| 132 | void (*configure_func)(GraphicsPipeline*, bool){}; | ||
| 133 | |||
| 134 | std::array<OGLProgram, 5> source_programs; | ||
| 135 | std::array<OGLAssemblyProgram, 5> assembly_programs; | ||
| 136 | u32 enabled_stages_mask{}; | ||
| 137 | |||
| 138 | std::array<Shader::Info, 5> stage_infos{}; | ||
| 139 | std::array<u32, 5> enabled_uniform_buffer_masks{}; | ||
| 140 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | ||
| 141 | std::array<u32, 5> base_uniform_bindings{}; | ||
| 142 | std::array<u32, 5> base_storage_bindings{}; | ||
| 143 | std::array<u32, 5> num_texture_buffers{}; | ||
| 144 | std::array<u32, 5> num_image_buffers{}; | ||
| 145 | |||
| 146 | bool use_storage_buffers{}; | ||
| 147 | bool writes_global_memory{}; | ||
| 148 | |||
| 149 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; | ||
| 150 | GLsizei num_xfb_attribs{}; | ||
| 151 | GLsizei num_xfb_strides{}; | ||
| 152 | std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; | ||
| 153 | std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{}; | ||
| 154 | |||
| 155 | std::mutex built_mutex; | ||
| 156 | std::condition_variable built_condvar; | ||
| 157 | std::atomic_bool is_built{false}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | } // namespace OpenGL | ||
| 161 | |||
| 162 | namespace std { | ||
| 163 | template <> | ||
| 164 | struct hash<OpenGL::GraphicsPipelineKey> { | ||
| 165 | size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { | ||
| 166 | return k.Hash(); | ||
| 167 | } | ||
| 168 | }; | ||
| 169 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | #include "core/memory.h" | 23 | #include "core/memory.h" |
| 24 | #include "video_core/engines/kepler_compute.h" | 24 | #include "video_core/engines/kepler_compute.h" |
| 25 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 26 | #include "video_core/engines/shader_type.h" | ||
| 27 | #include "video_core/memory_manager.h" | 26 | #include "video_core/memory_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_device.h" | 27 | #include "video_core/renderer_opengl/gl_device.h" |
| 29 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| @@ -40,7 +39,6 @@ namespace OpenGL { | |||
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 41 | using GLvec4 = std::array<GLfloat, 4>; | 40 | using GLvec4 = std::array<GLfloat, 4>; |
| 42 | 41 | ||
| 43 | using Tegra::Engines::ShaderType; | ||
| 44 | using VideoCore::Surface::PixelFormat; | 42 | using VideoCore::Surface::PixelFormat; |
| 45 | using VideoCore::Surface::SurfaceTarget; | 43 | using VideoCore::Surface::SurfaceTarget; |
| 46 | using VideoCore::Surface::SurfaceType; | 44 | using VideoCore::Surface::SurfaceType; |
| @@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | |||
| 51 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); | 49 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); |
| 52 | 50 | ||
| 53 | namespace { | 51 | namespace { |
| 54 | |||
| 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 52 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 56 | 53 | ||
| 57 | struct TextureHandle { | ||
| 58 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 59 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 60 | image = handle.tic_id; | ||
| 61 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 62 | } | ||
| 63 | |||
| 64 | u32 image; | ||
| 65 | u32 sampler; | ||
| 66 | }; | ||
| 67 | |||
| 68 | template <typename Engine, typename Entry> | ||
| 69 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 70 | ShaderType shader_type, size_t index = 0) { | ||
| 71 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 72 | if (entry.is_separated) { | ||
| 73 | const u32 buffer_1 = entry.buffer; | ||
| 74 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 75 | const u32 offset_1 = entry.offset; | ||
| 76 | const u32 offset_2 = entry.secondary_offset; | ||
| 77 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 78 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 79 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | if (entry.is_bindless) { | ||
| 83 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 84 | return TextureHandle(raw, via_header_index); | ||
| 85 | } | ||
| 86 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 87 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 88 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 89 | } | ||
| 90 | |||
| 91 | /// Translates hardware transform feedback indices | ||
| 92 | /// @param location Hardware location | ||
| 93 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | ||
| 94 | /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt | ||
| 95 | std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | ||
| 96 | const u8 index = location / 4; | ||
| 97 | if (index >= 8 && index <= 39) { | ||
| 98 | return {GL_GENERIC_ATTRIB_NV, index - 8}; | ||
| 99 | } | ||
| 100 | if (index >= 48 && index <= 55) { | ||
| 101 | return {GL_TEXTURE_COORD_NV, index - 48}; | ||
| 102 | } | ||
| 103 | switch (index) { | ||
| 104 | case 7: | ||
| 105 | return {GL_POSITION, 0}; | ||
| 106 | case 40: | ||
| 107 | return {GL_PRIMARY_COLOR_NV, 0}; | ||
| 108 | case 41: | ||
| 109 | return {GL_SECONDARY_COLOR_NV, 0}; | ||
| 110 | case 42: | ||
| 111 | return {GL_BACK_PRIMARY_COLOR_NV, 0}; | ||
| 112 | case 43: | ||
| 113 | return {GL_BACK_SECONDARY_COLOR_NV, 0}; | ||
| 114 | } | ||
| 115 | UNIMPLEMENTED_MSG("index={}", index); | ||
| 116 | return {GL_POSITION, 0}; | ||
| 117 | } | ||
| 118 | |||
| 119 | void oglEnable(GLenum cap, bool state) { | 54 | void oglEnable(GLenum cap, bool state) { |
| 120 | (state ? glEnable : glDisable)(cap); | 55 | (state ? glEnable : glDisable)(cap); |
| 121 | } | 56 | } |
| 122 | |||
| 123 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 124 | if (entry.is_buffer) { | ||
| 125 | return ImageViewType::Buffer; | ||
| 126 | } | ||
| 127 | switch (entry.type) { | ||
| 128 | case Tegra::Shader::TextureType::Texture1D: | ||
| 129 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 130 | case Tegra::Shader::TextureType::Texture2D: | ||
| 131 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 132 | case Tegra::Shader::TextureType::Texture3D: | ||
| 133 | return ImageViewType::e3D; | ||
| 134 | case Tegra::Shader::TextureType::TextureCube: | ||
| 135 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 136 | } | ||
| 137 | UNREACHABLE(); | ||
| 138 | return ImageViewType::e2D; | ||
| 139 | } | ||
| 140 | |||
| 141 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 142 | switch (entry.type) { | ||
| 143 | case Tegra::Shader::ImageType::Texture1D: | ||
| 144 | return ImageViewType::e1D; | ||
| 145 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 146 | return ImageViewType::e1DArray; | ||
| 147 | case Tegra::Shader::ImageType::Texture2D: | ||
| 148 | return ImageViewType::e2D; | ||
| 149 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 150 | return ImageViewType::e2DArray; | ||
| 151 | case Tegra::Shader::ImageType::Texture3D: | ||
| 152 | return ImageViewType::e3D; | ||
| 153 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 154 | return ImageViewType::Buffer; | ||
| 155 | } | ||
| 156 | UNREACHABLE(); | ||
| 157 | return ImageViewType::e2D; | ||
| 158 | } | ||
| 159 | |||
| 160 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 161 | 58 | ||
| 162 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 59 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 170 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 67 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 171 | buffer_cache_runtime(device), | 68 | buffer_cache_runtime(device), |
| 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 69 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 70 | shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, |
| 71 | buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), | ||
| 174 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | 72 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |
| 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 73 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} |
| 176 | async_shaders(emu_window_) { | ||
| 177 | if (device.UseAsynchronousShaders()) { | ||
| 178 | async_shaders.AllocateWorkers(); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | 74 | ||
| 182 | RasterizerOpenGL::~RasterizerOpenGL() = default; | 75 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| 183 | 76 | ||
| @@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { | |||
| 204 | const auto gl_index = static_cast<GLuint>(index); | 97 | const auto gl_index = static_cast<GLuint>(index); |
| 205 | 98 | ||
| 206 | // Disable constant attributes. | 99 | // Disable constant attributes. |
| 207 | if (attrib.IsConstant()) { | 100 | if (attrib.constant) { |
| 208 | glDisableVertexAttribArray(gl_index); | 101 | glDisableVertexAttribArray(gl_index); |
| 209 | continue; | 102 | continue; |
| 210 | } | 103 | } |
| @@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() { | |||
| 244 | } | 137 | } |
| 245 | } | 138 | } |
| 246 | 139 | ||
| 247 | void RasterizerOpenGL::SetupShaders(bool is_indexed) { | ||
| 248 | u32 clip_distances = 0; | ||
| 249 | |||
| 250 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||
| 251 | image_view_indices.clear(); | ||
| 252 | sampler_handles.clear(); | ||
| 253 | |||
| 254 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 255 | |||
| 256 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 257 | const auto& shader_config = maxwell3d.regs.shader_config[index]; | ||
| 258 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 259 | |||
| 260 | // Skip stages that are not enabled | ||
| 261 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 262 | switch (program) { | ||
| 263 | case Maxwell::ShaderProgram::Geometry: | ||
| 264 | program_manager.UseGeometryShader(0); | ||
| 265 | break; | ||
| 266 | case Maxwell::ShaderProgram::Fragment: | ||
| 267 | program_manager.UseFragmentShader(0); | ||
| 268 | break; | ||
| 269 | default: | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | continue; | ||
| 273 | } | ||
| 274 | // Currently this stages are not supported in the OpenGL backend. | ||
| 275 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | ||
| 276 | if (program == Maxwell::ShaderProgram::TesselationControl || | ||
| 277 | program == Maxwell::ShaderProgram::TesselationEval) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | |||
| 281 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); | ||
| 282 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | ||
| 283 | switch (program) { | ||
| 284 | case Maxwell::ShaderProgram::VertexA: | ||
| 285 | case Maxwell::ShaderProgram::VertexB: | ||
| 286 | program_manager.UseVertexShader(program_handle); | ||
| 287 | break; | ||
| 288 | case Maxwell::ShaderProgram::Geometry: | ||
| 289 | program_manager.UseGeometryShader(program_handle); | ||
| 290 | break; | ||
| 291 | case Maxwell::ShaderProgram::Fragment: | ||
| 292 | program_manager.UseFragmentShader(program_handle); | ||
| 293 | break; | ||
| 294 | default: | ||
| 295 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | ||
| 296 | shader_config.enable.Value(), shader_config.offset); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | |||
| 300 | // Stage indices are 0 - 5 | ||
| 301 | const size_t stage = index == 0 ? 0 : index - 1; | ||
| 302 | shaders[stage] = shader; | ||
| 303 | |||
| 304 | SetupDrawTextures(shader, stage); | ||
| 305 | SetupDrawImages(shader, stage); | ||
| 306 | |||
| 307 | buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||
| 308 | |||
| 309 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 310 | u32 ssbo_index = 0; | ||
| 311 | for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||
| 312 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 313 | buffer.cbuf_offset, buffer.is_written); | ||
| 314 | ++ssbo_index; | ||
| 315 | } | ||
| 316 | |||
| 317 | // Workaround for Intel drivers. | ||
| 318 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | ||
| 319 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | ||
| 320 | // clip distances only when it's written by a shader stage. | ||
| 321 | clip_distances |= shader->GetEntries().clip_distances; | ||
| 322 | |||
| 323 | // When VertexA is enabled, we have dual vertex shaders | ||
| 324 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 325 | // VertexB was combined with VertexA, so we skip the VertexB iteration | ||
| 326 | ++index; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | SyncClipEnabled(clip_distances); | ||
| 330 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | ||
| 331 | |||
| 332 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 333 | |||
| 334 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 335 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 336 | |||
| 337 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 338 | |||
| 339 | size_t image_view_index = 0; | ||
| 340 | size_t texture_index = 0; | ||
| 341 | size_t image_index = 0; | ||
| 342 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 343 | const Shader* const shader = shaders[stage]; | ||
| 344 | if (!shader) { | ||
| 345 | continue; | ||
| 346 | } | ||
| 347 | buffer_cache.BindHostStageBuffers(stage); | ||
| 348 | const auto& base = device.GetBaseBindings(stage); | ||
| 349 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 350 | texture_index, image_index); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 140 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 355 | const VideoCore::DiskResourceLoadCallback& callback) { | 141 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 356 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); | 142 | shader_cache.LoadDiskResources(title_id, stop_loading, callback); |
| 357 | } | 143 | } |
| 358 | 144 | ||
| 359 | void RasterizerOpenGL::Clear() { | 145 | void RasterizerOpenGL::Clear() { |
| @@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 432 | 218 | ||
| 433 | SyncState(); | 219 | SyncState(); |
| 434 | 220 | ||
| 435 | // Setup shaders and their used resources. | 221 | GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; |
| 222 | if (!pipeline) { | ||
| 223 | return; | ||
| 224 | } | ||
| 436 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 225 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 437 | SetupShaders(is_indexed); | 226 | pipeline->Configure(is_indexed); |
| 438 | |||
| 439 | texture_cache.UpdateRenderTargets(false); | ||
| 440 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 441 | program_manager.BindGraphicsPipeline(); | ||
| 442 | 227 | ||
| 443 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | 228 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); |
| 444 | BeginTransformFeedback(primitive_mode); | 229 | BeginTransformFeedback(pipeline, primitive_mode); |
| 445 | 230 | ||
| 446 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); | 231 | const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); |
| 447 | const GLsizei num_instances = | 232 | const GLsizei num_instances = |
| @@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 480 | num_instances, base_instance); | 265 | num_instances, base_instance); |
| 481 | } | 266 | } |
| 482 | } | 267 | } |
| 483 | |||
| 484 | EndTransformFeedback(); | 268 | EndTransformFeedback(); |
| 485 | 269 | ||
| 486 | ++num_queued_commands; | 270 | ++num_queued_commands; |
| 271 | has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||
| 487 | 272 | ||
| 488 | gpu.TickWork(); | 273 | gpu.TickWork(); |
| 489 | } | 274 | } |
| 490 | 275 | ||
| 491 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 276 | void RasterizerOpenGL::DispatchCompute() { |
| 492 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | 277 | ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; |
| 493 | 278 | if (!pipeline) { | |
| 494 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 279 | return; |
| 495 | BindComputeTextures(kernel); | 280 | } |
| 496 | 281 | pipeline->Configure(); | |
| 497 | const auto& entries = kernel->GetEntries(); | 282 | const auto& qmd{kepler_compute.launch_description}; |
| 498 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | 283 | glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); |
| 499 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 500 | u32 ssbo_index = 0; | ||
| 501 | for (const auto& buffer : entries.global_memory_entries) { | ||
| 502 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 503 | buffer.is_written); | ||
| 504 | ++ssbo_index; | ||
| 505 | } | ||
| 506 | buffer_cache.UpdateComputeBuffers(); | ||
| 507 | buffer_cache.BindHostComputeBuffers(); | ||
| 508 | |||
| 509 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 510 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | ||
| 511 | ++num_queued_commands; | 284 | ++num_queued_commands; |
| 285 | has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||
| 512 | } | 286 | } |
| 513 | 287 | ||
| 514 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 288 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() { | |||
| 661 | } | 435 | } |
| 662 | 436 | ||
| 663 | void RasterizerOpenGL::FragmentBarrier() { | 437 | void RasterizerOpenGL::FragmentBarrier() { |
| 664 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); | 438 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); |
| 665 | } | 439 | } |
| 666 | 440 | ||
| 667 | void RasterizerOpenGL::TiledCacheBarrier() { | 441 | void RasterizerOpenGL::TiledCacheBarrier() { |
| @@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() { | |||
| 674 | return; | 448 | return; |
| 675 | } | 449 | } |
| 676 | num_queued_commands = 0; | 450 | num_queued_commands = 0; |
| 451 | |||
| 452 | // Make sure memory stored from the previous GL command stream is visible | ||
| 453 | // This is only needed on assembly shaders where we write to GPU memory with raw pointers | ||
| 454 | if (has_written_global_memory) { | ||
| 455 | has_written_global_memory = false; | ||
| 456 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 457 | } | ||
| 677 | glFlush(); | 458 | glFlush(); |
| 678 | } | 459 | } |
| 679 | 460 | ||
| @@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 721 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); | 502 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); |
| 722 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); | 503 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); |
| 723 | 504 | ||
| 724 | screen_info.display_texture = image_view->Handle(ImageViewType::e2D); | 505 | screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); |
| 725 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); | 506 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 726 | return true; | 507 | return true; |
| 727 | } | 508 | } |
| 728 | 509 | ||
| 729 | void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { | ||
| 730 | image_view_indices.clear(); | ||
| 731 | sampler_handles.clear(); | ||
| 732 | |||
| 733 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 734 | |||
| 735 | SetupComputeTextures(kernel); | ||
| 736 | SetupComputeImages(kernel); | ||
| 737 | |||
| 738 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 739 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 740 | |||
| 741 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 742 | size_t image_view_index = 0; | ||
| 743 | size_t texture_index = 0; | ||
| 744 | size_t image_index = 0; | ||
| 745 | BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); | ||
| 746 | } | ||
| 747 | |||
| 748 | void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, | ||
| 749 | GLuint base_image, size_t& image_view_index, | ||
| 750 | size_t& texture_index, size_t& image_index) { | ||
| 751 | const GLuint* const samplers = sampler_handles.data() + texture_index; | ||
| 752 | const GLuint* const textures = texture_handles.data() + texture_index; | ||
| 753 | const GLuint* const images = image_handles.data() + image_index; | ||
| 754 | |||
| 755 | const size_t num_samplers = entries.samplers.size(); | ||
| 756 | for (const auto& sampler : entries.samplers) { | ||
| 757 | for (size_t i = 0; i < sampler.size; ++i) { | ||
| 758 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 759 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 760 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); | ||
| 761 | texture_handles[texture_index++] = handle; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | const size_t num_images = entries.images.size(); | ||
| 765 | for (size_t unit = 0; unit < num_images; ++unit) { | ||
| 766 | // TODO: Mark as modified | ||
| 767 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 768 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 769 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); | ||
| 770 | image_handles[image_index] = handle; | ||
| 771 | ++image_index; | ||
| 772 | } | ||
| 773 | if (num_samplers > 0) { | ||
| 774 | glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); | ||
| 775 | glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); | ||
| 776 | } | ||
| 777 | if (num_images > 0) { | ||
| 778 | glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | ||
| 783 | const bool via_header_index = | ||
| 784 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 785 | for (const auto& entry : shader->GetEntries().samplers) { | ||
| 786 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 787 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 788 | const auto handle = | ||
| 789 | GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); | ||
| 790 | const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 791 | sampler_handles.push_back(sampler->Handle()); | ||
| 792 | image_view_indices.push_back(handle.image); | ||
| 793 | } | ||
| 794 | } | ||
| 795 | } | ||
| 796 | |||
| 797 | void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { | ||
| 798 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 799 | for (const auto& entry : kernel->GetEntries().samplers) { | ||
| 800 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 801 | const auto handle = | ||
| 802 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); | ||
| 803 | const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 804 | sampler_handles.push_back(sampler->Handle()); | ||
| 805 | image_view_indices.push_back(handle.image); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { | ||
| 811 | const bool via_header_index = | ||
| 812 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 813 | for (const auto& entry : shader->GetEntries().images) { | ||
| 814 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 815 | const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); | ||
| 816 | image_view_indices.push_back(handle.image); | ||
| 817 | } | ||
| 818 | } | ||
| 819 | |||
| 820 | void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | ||
| 821 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 822 | for (const auto& entry : shader->GetEntries().images) { | ||
| 823 | const auto handle = | ||
| 824 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); | ||
| 825 | image_view_indices.push_back(handle.image); | ||
| 826 | } | ||
| 827 | } | ||
| 828 | |||
| 829 | void RasterizerOpenGL::SyncState() { | 510 | void RasterizerOpenGL::SyncState() { |
| 830 | SyncViewport(); | 511 | SyncViewport(); |
| 831 | SyncRasterizeEnable(); | 512 | SyncRasterizeEnable(); |
| @@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() { | |||
| 941 | 622 | ||
| 942 | void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { | 623 | void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { |
| 943 | auto& flags = maxwell3d.dirty.flags; | 624 | auto& flags = maxwell3d.dirty.flags; |
| 944 | if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { | 625 | if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { |
| 945 | return; | 626 | return; |
| 946 | } | 627 | } |
| 947 | flags[Dirty::ClipDistances] = false; | 628 | flags[Dirty::ClipDistances] = false; |
| @@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1318 | oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); | 999 | oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); |
| 1319 | } | 1000 | } |
| 1320 | 1001 | ||
| 1321 | void RasterizerOpenGL::SyncTransformFeedback() { | 1002 | void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { |
| 1322 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal | ||
| 1323 | // when this is required. | ||
| 1324 | const auto& regs = maxwell3d.regs; | ||
| 1325 | |||
| 1326 | static constexpr std::size_t STRIDE = 3; | ||
| 1327 | std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; | ||
| 1328 | std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams; | ||
| 1329 | |||
| 1330 | GLint* cursor = attribs.data(); | ||
| 1331 | GLint* current_stream = streams.data(); | ||
| 1332 | |||
| 1333 | for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { | ||
| 1334 | const auto& layout = regs.tfb_layouts[feedback]; | ||
| 1335 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); | ||
| 1336 | if (layout.varying_count == 0) { | ||
| 1337 | continue; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | *current_stream = static_cast<GLint>(feedback); | ||
| 1341 | if (current_stream != streams.data()) { | ||
| 1342 | // When stepping one stream, push the expected token | ||
| 1343 | cursor[0] = GL_NEXT_BUFFER_NV; | ||
| 1344 | cursor[1] = 0; | ||
| 1345 | cursor[2] = 0; | ||
| 1346 | cursor += STRIDE; | ||
| 1347 | } | ||
| 1348 | ++current_stream; | ||
| 1349 | |||
| 1350 | const auto& locations = regs.tfb_varying_locs[feedback]; | ||
| 1351 | std::optional<u8> current_index; | ||
| 1352 | for (u32 offset = 0; offset < layout.varying_count; ++offset) { | ||
| 1353 | const u8 location = locations[offset]; | ||
| 1354 | const u8 index = location / 4; | ||
| 1355 | |||
| 1356 | if (current_index == index) { | ||
| 1357 | // Increase number of components of the previous attachment | ||
| 1358 | ++cursor[-2]; | ||
| 1359 | continue; | ||
| 1360 | } | ||
| 1361 | current_index = index; | ||
| 1362 | |||
| 1363 | std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); | ||
| 1364 | cursor[1] = 1; | ||
| 1365 | cursor += STRIDE; | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE); | ||
| 1370 | const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data()); | ||
| 1371 | glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), | ||
| 1372 | GL_INTERLEAVED_ATTRIBS); | ||
| 1373 | } | ||
| 1374 | |||
| 1375 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | ||
| 1376 | const auto& regs = maxwell3d.regs; | 1003 | const auto& regs = maxwell3d.regs; |
| 1377 | if (regs.tfb_enabled == 0) { | 1004 | if (regs.tfb_enabled == 0) { |
| 1378 | return; | 1005 | return; |
| 1379 | } | 1006 | } |
| 1380 | if (device.UseAssemblyShaders()) { | 1007 | program->ConfigureTransformFeedback(); |
| 1381 | SyncTransformFeedback(); | 1008 | |
| 1382 | } | ||
| 1383 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 1009 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 1384 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 1010 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 1385 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 1011 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| @@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
| 1393 | } | 1019 | } |
| 1394 | 1020 | ||
| 1395 | void RasterizerOpenGL::EndTransformFeedback() { | 1021 | void RasterizerOpenGL::EndTransformFeedback() { |
| 1396 | const auto& regs = maxwell3d.regs; | 1022 | if (maxwell3d.regs.tfb_enabled != 0) { |
| 1397 | if (regs.tfb_enabled == 0) { | 1023 | glEndTransformFeedback(); |
| 1398 | return; | ||
| 1399 | } | 1024 | } |
| 1400 | glEndTransformFeedback(); | ||
| 1401 | } | 1025 | } |
| 1402 | 1026 | ||
| 1403 | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | 1027 | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -28,11 +28,9 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/shader/async_shaders.h" | ||
| 36 | #include "video_core/textures/texture.h" | 34 | #include "video_core/textures/texture.h" |
| 37 | 35 | ||
| 38 | namespace Core::Memory { | 36 | namespace Core::Memory { |
| @@ -81,7 +79,7 @@ public: | |||
| 81 | 79 | ||
| 82 | void Draw(bool is_indexed, bool is_instanced) override; | 80 | void Draw(bool is_indexed, bool is_instanced) override; |
| 83 | void Clear() override; | 81 | void Clear() override; |
| 84 | void DispatchCompute(GPUVAddr code_addr) override; | 82 | void DispatchCompute() override; |
| 85 | void ResetCounter(VideoCore::QueryType type) override; | 83 | void ResetCounter(VideoCore::QueryType type) override; |
| 86 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 84 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 87 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 85 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -118,36 +116,11 @@ public: | |||
| 118 | return num_queued_commands > 0; | 116 | return num_queued_commands > 0; |
| 119 | } | 117 | } |
| 120 | 118 | ||
| 121 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 122 | return async_shaders; | ||
| 123 | } | ||
| 124 | |||
| 125 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 126 | return async_shaders; | ||
| 127 | } | ||
| 128 | |||
| 129 | private: | 119 | private: |
| 130 | static constexpr size_t MAX_TEXTURES = 192; | 120 | static constexpr size_t MAX_TEXTURES = 192; |
| 131 | static constexpr size_t MAX_IMAGES = 48; | 121 | static constexpr size_t MAX_IMAGES = 48; |
| 132 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | 122 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; |
| 133 | 123 | ||
| 134 | void BindComputeTextures(Shader* kernel); | ||
| 135 | |||
| 136 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | ||
| 137 | size_t& image_view_index, size_t& texture_index, size_t& image_index); | ||
| 138 | |||
| 139 | /// Configures the current textures to use for the draw command. | ||
| 140 | void SetupDrawTextures(const Shader* shader, size_t stage_index); | ||
| 141 | |||
| 142 | /// Configures the textures used in a compute shader. | ||
| 143 | void SetupComputeTextures(const Shader* kernel); | ||
| 144 | |||
| 145 | /// Configures images in a graphics shader. | ||
| 146 | void SetupDrawImages(const Shader* shader, size_t stage_index); | ||
| 147 | |||
| 148 | /// Configures images in a compute shader. | ||
| 149 | void SetupComputeImages(const Shader* shader); | ||
| 150 | |||
| 151 | /// Syncs state to match guest's | 124 | /// Syncs state to match guest's |
| 152 | void SyncState(); | 125 | void SyncState(); |
| 153 | 126 | ||
| @@ -220,18 +193,12 @@ private: | |||
| 220 | /// Syncs vertex instances to match the guest state | 193 | /// Syncs vertex instances to match the guest state |
| 221 | void SyncVertexInstances(); | 194 | void SyncVertexInstances(); |
| 222 | 195 | ||
| 223 | /// Syncs transform feedback state to match guest state | ||
| 224 | /// @note Only valid on assembly shaders | ||
| 225 | void SyncTransformFeedback(); | ||
| 226 | |||
| 227 | /// Begin a transform feedback | 196 | /// Begin a transform feedback |
| 228 | void BeginTransformFeedback(GLenum primitive_mode); | 197 | void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); |
| 229 | 198 | ||
| 230 | /// End a transform feedback | 199 | /// End a transform feedback |
| 231 | void EndTransformFeedback(); | 200 | void EndTransformFeedback(); |
| 232 | 201 | ||
| 233 | void SetupShaders(bool is_indexed); | ||
| 234 | |||
| 235 | Tegra::GPU& gpu; | 202 | Tegra::GPU& gpu; |
| 236 | Tegra::Engines::Maxwell3D& maxwell3d; | 203 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 237 | Tegra::Engines::KeplerCompute& kepler_compute; | 204 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -246,13 +213,11 @@ private: | |||
| 246 | TextureCache texture_cache; | 213 | TextureCache texture_cache; |
| 247 | BufferCacheRuntime buffer_cache_runtime; | 214 | BufferCacheRuntime buffer_cache_runtime; |
| 248 | BufferCache buffer_cache; | 215 | BufferCache buffer_cache; |
| 249 | ShaderCacheOpenGL shader_cache; | 216 | ShaderCache shader_cache; |
| 250 | QueryCache query_cache; | 217 | QueryCache query_cache; |
| 251 | AccelerateDMA accelerate_dma; | 218 | AccelerateDMA accelerate_dma; |
| 252 | FenceManagerOpenGL fence_manager; | 219 | FenceManagerOpenGL fence_manager; |
| 253 | 220 | ||
| 254 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 255 | |||
| 256 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 221 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 257 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 222 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 258 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | 223 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
| @@ -260,7 +225,8 @@ private: | |||
| 260 | std::array<GLuint, MAX_IMAGES> image_handles{}; | 225 | std::array<GLuint, MAX_IMAGES> image_handles{}; |
| 261 | 226 | ||
| 262 | /// Number of commands queued to the OpenGL driver. Resetted on flush. | 227 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 263 | std::size_t num_queued_commands = 0; | 228 | size_t num_queued_commands = 0; |
| 229 | bool has_written_global_memory = false; | ||
| 264 | 230 | ||
| 265 | u32 last_clip_distance_mask = 0; | 231 | u32 last_clip_distance_mask = 0; |
| 266 | }; | 232 | }; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -83,18 +83,6 @@ void OGLSampler::Release() { | |||
| 83 | handle = 0; | 83 | handle = 0; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | void OGLShader::Create(std::string_view source, GLenum type) { | ||
| 87 | if (handle != 0) { | ||
| 88 | return; | ||
| 89 | } | ||
| 90 | if (source.empty()) { | ||
| 91 | return; | ||
| 92 | } | ||
| 93 | |||
| 94 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 95 | handle = GLShader::LoadShader(source, type); | ||
| 96 | } | ||
| 97 | |||
| 98 | void OGLShader::Release() { | 86 | void OGLShader::Release() { |
| 99 | if (handle == 0) | 87 | if (handle == 0) |
| 100 | return; | 88 | return; |
| @@ -104,21 +92,6 @@ void OGLShader::Release() { | |||
| 104 | handle = 0; | 92 | handle = 0; |
| 105 | } | 93 | } |
| 106 | 94 | ||
| 107 | void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, | ||
| 108 | const char* frag_shader, bool separable_program, | ||
| 109 | bool hint_retrievable) { | ||
| 110 | OGLShader vert, geo, frag; | ||
| 111 | if (vert_shader) | ||
| 112 | vert.Create(vert_shader, GL_VERTEX_SHADER); | ||
| 113 | if (geo_shader) | ||
| 114 | geo.Create(geo_shader, GL_GEOMETRY_SHADER); | ||
| 115 | if (frag_shader) | ||
| 116 | frag.Create(frag_shader, GL_FRAGMENT_SHADER); | ||
| 117 | |||
| 118 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 119 | Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); | ||
| 120 | } | ||
| 121 | |||
| 122 | void OGLProgram::Release() { | 95 | void OGLProgram::Release() { |
| 123 | if (handle == 0) | 96 | if (handle == 0) |
| 124 | return; | 97 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <glad/glad.h> | 9 | #include <glad/glad.h> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 12 | 11 | ||
| 13 | namespace OpenGL { | 12 | namespace OpenGL { |
| 14 | 13 | ||
| @@ -128,8 +127,6 @@ public: | |||
| 128 | return *this; | 127 | return *this; |
| 129 | } | 128 | } |
| 130 | 129 | ||
| 131 | void Create(std::string_view source, GLenum type); | ||
| 132 | |||
| 133 | void Release(); | 130 | void Release(); |
| 134 | 131 | ||
| 135 | GLuint handle = 0; | 132 | GLuint handle = 0; |
| @@ -151,17 +148,6 @@ public: | |||
| 151 | return *this; | 148 | return *this; |
| 152 | } | 149 | } |
| 153 | 150 | ||
| 154 | template <typename... T> | ||
| 155 | void Create(bool separable_program, bool hint_retrievable, T... shaders) { | ||
| 156 | if (handle != 0) | ||
| 157 | return; | ||
| 158 | handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 162 | void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, | ||
| 163 | bool separable_program = false, bool hint_retrievable = false); | ||
| 164 | |||
| 165 | /// Deletes the internal OpenGL resource | 151 | /// Deletes the internal OpenGL resource |
| 166 | void Release(); | 152 | void Release(); |
| 167 | 153 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -3,606 +3,544 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | 5 | #include <atomic> |
| 6 | #include <fstream> | ||
| 6 | #include <functional> | 7 | #include <functional> |
| 7 | #include <mutex> | 8 | #include <mutex> |
| 8 | #include <optional> | ||
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <thread> | 10 | #include <thread> |
| 11 | #include <unordered_set> | ||
| 12 | 11 | ||
| 13 | #include "common/alignment.h" | 12 | #include "common/alignment.h" |
| 14 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 14 | #include "common/fs/fs.h" | ||
| 15 | #include "common/fs/path_util.h" | ||
| 15 | #include "common/logging/log.h" | 16 | #include "common/logging/log.h" |
| 16 | #include "common/scope_exit.h" | 17 | #include "common/scope_exit.h" |
| 18 | #include "common/settings.h" | ||
| 19 | #include "common/thread_worker.h" | ||
| 17 | #include "core/core.h" | 20 | #include "core/core.h" |
| 18 | #include "core/frontend/emu_window.h" | 21 | #include "shader_recompiler/backend/glasm/emit_glasm.h" |
| 22 | #include "shader_recompiler/backend/glsl/emit_glsl.h" | ||
| 23 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 24 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 25 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 26 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 27 | #include "shader_recompiler/profile.h" | ||
| 19 | #include "video_core/engines/kepler_compute.h" | 28 | #include "video_core/engines/kepler_compute.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 29 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | ||
| 22 | #include "video_core/memory_manager.h" | 30 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 31 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 32 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 33 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 34 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 35 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 30 | #include "video_core/shader/memory_util.h" | ||
| 31 | #include "video_core/shader/registry.h" | ||
| 32 | #include "video_core/shader/shader_ir.h" | ||
| 33 | #include "video_core/shader_cache.h" | 36 | #include "video_core/shader_cache.h" |
| 37 | #include "video_core/shader_environment.h" | ||
| 34 | #include "video_core/shader_notify.h" | 38 | #include "video_core/shader_notify.h" |
| 35 | 39 | ||
| 36 | namespace OpenGL { | 40 | namespace OpenGL { |
| 37 | |||
| 38 | using Tegra::Engines::ShaderType; | ||
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::GetUniqueIdentifier; | ||
| 42 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 43 | using VideoCommon::Shader::ProgramCode; | ||
| 44 | using VideoCommon::Shader::Registry; | ||
| 45 | using VideoCommon::Shader::ShaderIR; | ||
| 46 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 47 | |||
| 48 | namespace { | 41 | namespace { |
| 49 | 42 | using Shader::Backend::GLASM::EmitGLASM; | |
| 50 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; | 43 | using Shader::Backend::GLSL::EmitGLSL; |
| 51 | 44 | using Shader::Backend::SPIRV::EmitSPIRV; | |
| 52 | /// Gets the shader type from a Maxwell program type | 45 | using Shader::Maxwell::MergeDualVertexPrograms; |
| 53 | constexpr GLenum GetGLShaderType(ShaderType shader_type) { | 46 | using Shader::Maxwell::TranslateProgram; |
| 54 | switch (shader_type) { | 47 | using VideoCommon::ComputeEnvironment; |
| 55 | case ShaderType::Vertex: | 48 | using VideoCommon::FileEnvironment; |
| 56 | return GL_VERTEX_SHADER; | 49 | using VideoCommon::GenericEnvironment; |
| 57 | case ShaderType::Geometry: | 50 | using VideoCommon::GraphicsEnvironment; |
| 58 | return GL_GEOMETRY_SHADER; | 51 | using VideoCommon::LoadPipelines; |
| 59 | case ShaderType::Fragment: | 52 | using VideoCommon::SerializePipeline; |
| 60 | return GL_FRAGMENT_SHADER; | 53 | using Context = ShaderContext::Context; |
| 61 | case ShaderType::Compute: | 54 | |
| 62 | return GL_COMPUTE_SHADER; | 55 | constexpr u32 CACHE_VERSION = 5; |
| 63 | default: | 56 | |
| 64 | return GL_NONE; | 57 | template <typename Container> |
| 65 | } | 58 | auto MakeSpan(Container& container) { |
| 59 | return std::span(container.data(), container.size()); | ||
| 66 | } | 60 | } |
| 67 | 61 | ||
| 68 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | 62 | Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, |
| 69 | switch (shader_type) { | 63 | const Shader::IR::Program& program, |
| 70 | case ShaderType::Vertex: | 64 | const Shader::IR::Program* previous_program, |
| 71 | return "VS"; | 65 | bool glasm_use_storage_buffers, bool use_assembly_shaders) { |
| 72 | case ShaderType::TesselationControl: | 66 | Shader::RuntimeInfo info; |
| 73 | return "HS"; | 67 | if (previous_program) { |
| 74 | case ShaderType::TesselationEval: | 68 | info.previous_stage_stores = previous_program->info.stores; |
| 75 | return "DS"; | 69 | } else { |
| 76 | case ShaderType::Geometry: | 70 | // Mark all stores as available for vertex shaders |
| 77 | return "GS"; | 71 | info.previous_stage_stores.mask.set(); |
| 78 | case ShaderType::Fragment: | 72 | } |
| 79 | return "FS"; | 73 | switch (program.stage) { |
| 80 | case ShaderType::Compute: | 74 | case Shader::Stage::VertexB: |
| 81 | return "CS"; | 75 | case Shader::Stage::Geometry: |
| 82 | } | 76 | if (!use_assembly_shaders && key.xfb_enabled != 0) { |
| 83 | return "UNK"; | 77 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); |
| 78 | } | ||
| 79 | break; | ||
| 80 | case Shader::Stage::TessellationEval: | ||
| 81 | info.tess_clockwise = key.tessellation_clockwise != 0; | ||
| 82 | info.tess_primitive = [&key] { | ||
| 83 | switch (key.tessellation_primitive) { | ||
| 84 | case Maxwell::TessellationPrimitive::Isolines: | ||
| 85 | return Shader::TessPrimitive::Isolines; | ||
| 86 | case Maxwell::TessellationPrimitive::Triangles: | ||
| 87 | return Shader::TessPrimitive::Triangles; | ||
| 88 | case Maxwell::TessellationPrimitive::Quads: | ||
| 89 | return Shader::TessPrimitive::Quads; | ||
| 90 | } | ||
| 91 | UNREACHABLE(); | ||
| 92 | return Shader::TessPrimitive::Triangles; | ||
| 93 | }(); | ||
| 94 | info.tess_spacing = [&] { | ||
| 95 | switch (key.tessellation_spacing) { | ||
| 96 | case Maxwell::TessellationSpacing::Equal: | ||
| 97 | return Shader::TessSpacing::Equal; | ||
| 98 | case Maxwell::TessellationSpacing::FractionalOdd: | ||
| 99 | return Shader::TessSpacing::FractionalOdd; | ||
| 100 | case Maxwell::TessellationSpacing::FractionalEven: | ||
| 101 | return Shader::TessSpacing::FractionalEven; | ||
| 102 | } | ||
| 103 | UNREACHABLE(); | ||
| 104 | return Shader::TessSpacing::Equal; | ||
| 105 | }(); | ||
| 106 | break; | ||
| 107 | case Shader::Stage::Fragment: | ||
| 108 | info.force_early_z = key.early_z != 0; | ||
| 109 | break; | ||
| 110 | default: | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | switch (key.gs_input_topology) { | ||
| 114 | case Maxwell::PrimitiveTopology::Points: | ||
| 115 | info.input_topology = Shader::InputTopology::Points; | ||
| 116 | break; | ||
| 117 | case Maxwell::PrimitiveTopology::Lines: | ||
| 118 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 119 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 120 | info.input_topology = Shader::InputTopology::Lines; | ||
| 121 | break; | ||
| 122 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 123 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 124 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 125 | case Maxwell::PrimitiveTopology::Quads: | ||
| 126 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 127 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 128 | case Maxwell::PrimitiveTopology::Patches: | ||
| 129 | info.input_topology = Shader::InputTopology::Triangles; | ||
| 130 | break; | ||
| 131 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 132 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 133 | info.input_topology = Shader::InputTopology::LinesAdjacency; | ||
| 134 | break; | ||
| 135 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 136 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 137 | info.input_topology = Shader::InputTopology::TrianglesAdjacency; | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | info.glasm_use_storage_buffers = glasm_use_storage_buffers; | ||
| 141 | return info; | ||
| 84 | } | 142 | } |
| 85 | 143 | ||
| 86 | constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | 144 | void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { |
| 87 | switch (program_type) { | 145 | std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { |
| 88 | case Maxwell::ShaderProgram::VertexA: | 146 | return VideoCommon::TransformFeedbackState::Layout{ |
| 89 | case Maxwell::ShaderProgram::VertexB: | 147 | .stream = layout.stream, |
| 90 | return ShaderType::Vertex; | 148 | .varying_count = layout.varying_count, |
| 91 | case Maxwell::ShaderProgram::TesselationControl: | 149 | .stride = layout.stride, |
| 92 | return ShaderType::TesselationControl; | 150 | }; |
| 93 | case Maxwell::ShaderProgram::TesselationEval: | 151 | }); |
| 94 | return ShaderType::TesselationEval; | 152 | state.varyings = regs.tfb_varying_locs; |
| 95 | case Maxwell::ShaderProgram::Geometry: | ||
| 96 | return ShaderType::Geometry; | ||
| 97 | case Maxwell::ShaderProgram::Fragment: | ||
| 98 | return ShaderType::Fragment; | ||
| 99 | } | ||
| 100 | return {}; | ||
| 101 | } | 153 | } |
| 154 | } // Anonymous namespace | ||
| 102 | 155 | ||
| 103 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | 156 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 104 | switch (shader_type) { | 157 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 105 | case ShaderType::Vertex: | 158 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 106 | return GL_VERTEX_PROGRAM_NV; | 159 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 107 | case ShaderType::TesselationControl: | 160 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 108 | return GL_TESS_CONTROL_PROGRAM_NV; | 161 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 109 | case ShaderType::TesselationEval: | 162 | VideoCore::ShaderNotify& shader_notify_) |
| 110 | return GL_TESS_EVALUATION_PROGRAM_NV; | 163 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, |
| 111 | case ShaderType::Geometry: | 164 | emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, |
| 112 | return GL_GEOMETRY_PROGRAM_NV; | 165 | buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, |
| 113 | case ShaderType::Fragment: | 166 | shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, |
| 114 | return GL_FRAGMENT_PROGRAM_NV; | 167 | profile{ |
| 115 | case ShaderType::Compute: | 168 | .supported_spirv = 0x00010000, |
| 116 | return GL_COMPUTE_PROGRAM_NV; | 169 | |
| 170 | .unified_descriptor_binding = false, | ||
| 171 | .support_descriptor_aliasing = false, | ||
| 172 | .support_int8 = false, | ||
| 173 | .support_int16 = false, | ||
| 174 | .support_int64 = device.HasShaderInt64(), | ||
| 175 | .support_vertex_instance_id = true, | ||
| 176 | .support_float_controls = false, | ||
| 177 | .support_separate_denorm_behavior = false, | ||
| 178 | .support_separate_rounding_mode = false, | ||
| 179 | .support_fp16_denorm_preserve = false, | ||
| 180 | .support_fp32_denorm_preserve = false, | ||
| 181 | .support_fp16_denorm_flush = false, | ||
| 182 | .support_fp32_denorm_flush = false, | ||
| 183 | .support_fp16_signed_zero_nan_preserve = false, | ||
| 184 | .support_fp32_signed_zero_nan_preserve = false, | ||
| 185 | .support_fp64_signed_zero_nan_preserve = false, | ||
| 186 | .support_explicit_workgroup_layout = false, | ||
| 187 | .support_vote = true, | ||
| 188 | .support_viewport_index_layer_non_geometry = | ||
| 189 | device.HasNvViewportArray2() || device.HasVertexViewportLayer(), | ||
| 190 | .support_viewport_mask = device.HasNvViewportArray2(), | ||
| 191 | .support_typeless_image_loads = device.HasImageLoadFormatted(), | ||
| 192 | .support_demote_to_helper_invocation = false, | ||
| 193 | .support_int64_atomics = false, | ||
| 194 | .support_derivative_control = device.HasDerivativeControl(), | ||
| 195 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), | ||
| 196 | .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), | ||
| 197 | .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), | ||
| 198 | .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), | ||
| 199 | .support_gl_warp_intrinsics = false, | ||
| 200 | .support_gl_variable_aoffi = device.HasVariableAoffi(), | ||
| 201 | .support_gl_sparse_textures = device.HasSparseTexture2(), | ||
| 202 | .support_gl_derivative_control = device.HasDerivativeControl(), | ||
| 203 | |||
| 204 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), | ||
| 205 | |||
| 206 | .lower_left_origin_mode = true, | ||
| 207 | .need_declared_frag_colors = true, | ||
| 208 | .need_fastmath_off = device.NeedsFastmathOff(), | ||
| 209 | |||
| 210 | .has_broken_spirv_clamp = true, | ||
| 211 | .has_broken_unsigned_image_offsets = true, | ||
| 212 | .has_broken_signed_operations = true, | ||
| 213 | .has_broken_fp16_float_controls = false, | ||
| 214 | .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), | ||
| 215 | .has_gl_precise_bug = device.HasPreciseBug(), | ||
| 216 | .ignore_nan_fp_comparisons = true, | ||
| 217 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | ||
| 218 | }, | ||
| 219 | host_info{ | ||
| 220 | .support_float16 = false, | ||
| 221 | .support_int64 = device.HasShaderInt64(), | ||
| 222 | } { | ||
| 223 | if (use_asynchronous_shaders) { | ||
| 224 | workers = CreateWorkers(); | ||
| 117 | } | 225 | } |
| 118 | return {}; | ||
| 119 | } | 226 | } |
| 120 | 227 | ||
| 121 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | 228 | ShaderCache::~ShaderCache() = default; |
| 122 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | ||
| 123 | } | ||
| 124 | 229 | ||
| 125 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | 230 | void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 126 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; | 231 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 127 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, | 232 | if (title_id == 0) { |
| 128 | entry.graphics_info, entry.compute_info}; | 233 | return; |
| 129 | auto registry = std::make_shared<Registry>(entry.type, info); | ||
| 130 | for (const auto& [address, value] : entry.keys) { | ||
| 131 | const auto [buffer, offset] = address; | ||
| 132 | registry->InsertKey(buffer, offset, value); | ||
| 133 | } | ||
| 134 | for (const auto& [offset, sampler] : entry.bound_samplers) { | ||
| 135 | registry->InsertBoundSampler(offset, sampler); | ||
| 136 | } | 234 | } |
| 137 | for (const auto& [key, sampler] : entry.bindless_samplers) { | 235 | const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; |
| 138 | const auto [buffer, offset] = key; | 236 | const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; |
| 139 | registry->InsertBindlessSampler(buffer, offset, sampler); | 237 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { |
| 238 | LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); | ||
| 239 | return; | ||
| 140 | } | 240 | } |
| 141 | return registry; | 241 | shader_cache_filename = base_dir / "opengl.bin"; |
| 142 | } | 242 | |
| 143 | 243 | if (!workers) { | |
| 144 | std::unordered_set<GLenum> GetSupportedFormats() { | 244 | workers = CreateWorkers(); |
| 145 | GLint num_formats; | 245 | } |
| 146 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 246 | struct { |
| 247 | std::mutex mutex; | ||
| 248 | size_t total{}; | ||
| 249 | size_t built{}; | ||
| 250 | bool has_loaded{}; | ||
| 251 | } state; | ||
| 252 | |||
| 253 | const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { | ||
| 254 | ComputePipelineKey key; | ||
| 255 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 256 | workers->QueueWork( | ||
| 257 | [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { | ||
| 258 | ctx->pools.ReleaseContents(); | ||
| 259 | auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; | ||
| 260 | std::lock_guard lock{state.mutex}; | ||
| 261 | if (pipeline) { | ||
| 262 | compute_cache.emplace(key, std::move(pipeline)); | ||
| 263 | } | ||
| 264 | ++state.built; | ||
| 265 | if (state.has_loaded) { | ||
| 266 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 267 | } | ||
| 268 | }); | ||
| 269 | ++state.total; | ||
| 270 | }}; | ||
| 271 | const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { | ||
| 272 | GraphicsPipelineKey key; | ||
| 273 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 274 | workers->QueueWork( | ||
| 275 | [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { | ||
| 276 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 277 | for (auto& env : envs) { | ||
| 278 | env_ptrs.push_back(&env); | ||
| 279 | } | ||
| 280 | ctx->pools.ReleaseContents(); | ||
| 281 | auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; | ||
| 282 | std::lock_guard lock{state.mutex}; | ||
| 283 | if (pipeline) { | ||
| 284 | graphics_cache.emplace(key, std::move(pipeline)); | ||
| 285 | } | ||
| 286 | ++state.built; | ||
| 287 | if (state.has_loaded) { | ||
| 288 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 289 | } | ||
| 290 | }); | ||
| 291 | ++state.total; | ||
| 292 | }}; | ||
| 293 | LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); | ||
| 147 | 294 | ||
| 148 | std::vector<GLint> formats(num_formats); | 295 | std::unique_lock lock{state.mutex}; |
| 149 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | 296 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); |
| 297 | state.has_loaded = true; | ||
| 298 | lock.unlock(); | ||
| 150 | 299 | ||
| 151 | std::unordered_set<GLenum> supported_formats; | 300 | workers->WaitForRequests(); |
| 152 | for (const GLint format : formats) { | 301 | if (!use_asynchronous_shaders) { |
| 153 | supported_formats.insert(static_cast<GLenum>(format)); | 302 | workers.reset(); |
| 154 | } | 303 | } |
| 155 | return supported_formats; | ||
| 156 | } | 304 | } |
| 157 | 305 | ||
| 158 | } // Anonymous namespace | 306 | GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { |
| 159 | 307 | if (!RefreshStages(graphics_key.unique_hashes)) { | |
| 160 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | 308 | current_pipeline = nullptr; |
| 161 | const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { | 309 | return nullptr; |
| 162 | if (device.UseDriverCache()) { | 310 | } |
| 163 | // Ignore hint retrievable if we are using the driver cache | 311 | const auto& regs{maxwell3d.regs}; |
| 164 | hint_retrievable = false; | 312 | graphics_key.raw = 0; |
| 165 | } | 313 | graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); |
| 166 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | 314 | graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 |
| 167 | LOG_INFO(Render_OpenGL, "{}", shader_id); | 315 | ? regs.draw.topology.Value() |
| 168 | 316 | : Maxwell::PrimitiveTopology{}); | |
| 169 | auto program = std::make_shared<ProgramHandle>(); | 317 | graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); |
| 170 | 318 | graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); | |
| 171 | if (device.UseAssemblyShaders()) { | 319 | graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); |
| 172 | const std::string arb = | 320 | graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); |
| 173 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | 321 | if (graphics_key.xfb_enabled) { |
| 174 | 322 | SetXfbState(graphics_key.xfb_state, regs); | |
| 175 | GLuint& arb_prog = program->assembly_program.handle; | 323 | } |
| 176 | 324 | if (current_pipeline && graphics_key == current_pipeline->Key()) { | |
| 177 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | 325 | return BuiltPipeline(current_pipeline); |
| 178 | // Use them only to capture and replay on apitrace. | 326 | } |
| 179 | #if 0 | 327 | return CurrentGraphicsPipelineSlowPath(); |
| 180 | glGenProgramsNV(1, &arb_prog); | ||
| 181 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 182 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 183 | #else | ||
| 184 | glGenProgramsARB(1, &arb_prog); | ||
| 185 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 186 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 187 | #endif | ||
| 188 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 189 | if (err && *err) { | ||
| 190 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 191 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 195 | OGLShader shader; | ||
| 196 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 197 | |||
| 198 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 199 | } | ||
| 200 | |||
| 201 | return program; | ||
| 202 | } | 328 | } |
| 203 | 329 | ||
| 204 | Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, | 330 | GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { |
| 205 | ProgramSharedPtr program_, bool is_built_) | 331 | const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; |
| 206 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, | 332 | auto& pipeline{pair->second}; |
| 207 | is_built{is_built_} { | 333 | if (is_new) { |
| 208 | handle = program->assembly_program.handle; | 334 | pipeline = CreateGraphicsPipeline(); |
| 209 | if (handle == 0) { | ||
| 210 | handle = program->source_program.handle; | ||
| 211 | } | 335 | } |
| 212 | if (is_built) { | 336 | if (!pipeline) { |
| 213 | ASSERT(handle != 0); | 337 | return nullptr; |
| 214 | } | 338 | } |
| 339 | current_pipeline = pipeline.get(); | ||
| 340 | return BuiltPipeline(current_pipeline); | ||
| 215 | } | 341 | } |
| 216 | 342 | ||
| 217 | Shader::~Shader() = default; | 343 | GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { |
| 218 | 344 | if (pipeline->IsBuilt()) { | |
| 219 | GLuint Shader::GetHandle() const { | 345 | return pipeline; |
| 220 | DEBUG_ASSERT(registry->IsConsistent()); | ||
| 221 | return handle; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool Shader::IsBuilt() const { | ||
| 225 | return is_built; | ||
| 226 | } | ||
| 227 | |||
| 228 | void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { | ||
| 229 | program->source_program = std::move(new_program); | ||
| 230 | handle = program->source_program.handle; | ||
| 231 | is_built = true; | ||
| 232 | } | ||
| 233 | |||
| 234 | void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { | ||
| 235 | program->assembly_program = std::move(new_program); | ||
| 236 | handle = program->assembly_program.handle; | ||
| 237 | is_built = true; | ||
| 238 | } | ||
| 239 | |||
| 240 | std::unique_ptr<Shader> Shader::CreateStageFromMemory( | ||
| 241 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, | ||
| 242 | ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { | ||
| 243 | const auto shader_type = GetShaderType(program_type); | ||
| 244 | |||
| 245 | auto& gpu = params.gpu; | ||
| 246 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 247 | |||
| 248 | auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); | ||
| 249 | if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { | ||
| 250 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 251 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 252 | // std::optional<ShaderIR> ir_b; | ||
| 253 | // if (!code_b.empty()) { | ||
| 254 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | ||
| 255 | // } | ||
| 256 | auto program = | ||
| 257 | BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||
| 258 | ShaderDiskCacheEntry entry; | ||
| 259 | entry.type = shader_type; | ||
| 260 | entry.code = std::move(code); | ||
| 261 | entry.code_b = std::move(code_b); | ||
| 262 | entry.unique_identifier = params.unique_identifier; | ||
| 263 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 264 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 265 | entry.keys = registry->GetKeys(); | ||
| 266 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 267 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 268 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 269 | |||
| 270 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 271 | |||
| 272 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 273 | MakeEntries(params.device, ir, shader_type), | ||
| 274 | std::move(program), true)); | ||
| 275 | } else { | ||
| 276 | // Required for entries | ||
| 277 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 278 | auto entries = MakeEntries(params.device, ir, shader_type); | ||
| 279 | |||
| 280 | async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, | ||
| 281 | std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, | ||
| 282 | COMPILER_SETTINGS, *registry, cpu_addr); | ||
| 283 | |||
| 284 | auto program = std::make_shared<ProgramHandle>(); | ||
| 285 | return std::unique_ptr<Shader>( | ||
| 286 | new Shader(std::move(registry), std::move(entries), std::move(program), false)); | ||
| 287 | } | 346 | } |
| 288 | } | 347 | if (!use_asynchronous_shaders) { |
| 289 | 348 | return pipeline; | |
| 290 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | ||
| 291 | ProgramCode code) { | ||
| 292 | auto& gpu = params.gpu; | ||
| 293 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 294 | |||
| 295 | auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); | ||
| 296 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 297 | const u64 uid = params.unique_identifier; | ||
| 298 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); | ||
| 299 | |||
| 300 | ShaderDiskCacheEntry entry; | ||
| 301 | entry.type = ShaderType::Compute; | ||
| 302 | entry.code = std::move(code); | ||
| 303 | entry.unique_identifier = uid; | ||
| 304 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 305 | entry.compute_info = registry->GetComputeInfo(); | ||
| 306 | entry.keys = registry->GetKeys(); | ||
| 307 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 308 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 309 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 310 | |||
| 311 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 312 | |||
| 313 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 314 | MakeEntries(params.device, ir, ShaderType::Compute), | ||
| 315 | std::move(program))); | ||
| 316 | } | ||
| 317 | |||
| 318 | std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, | ||
| 319 | const PrecompiledShader& precompiled_shader) { | ||
| 320 | return std::unique_ptr<Shader>(new Shader( | ||
| 321 | precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); | ||
| 322 | } | ||
| 323 | |||
| 324 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | ||
| 325 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||
| 326 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 327 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 328 | Tegra::MemoryManager& gpu_memory_, const Device& device_) | ||
| 329 | : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, | ||
| 330 | maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} | ||
| 331 | |||
| 332 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ||
| 333 | |||
| 334 | void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 335 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 336 | disk_cache.BindTitleID(title_id); | ||
| 337 | const std::optional transferable = disk_cache.LoadTransferable(); | ||
| 338 | |||
| 339 | LOG_INFO(Render_OpenGL, "Total Shader Count: {}", | ||
| 340 | transferable.has_value() ? transferable->size() : 0); | ||
| 341 | |||
| 342 | if (!transferable) { | ||
| 343 | return; | ||
| 344 | } | 349 | } |
| 345 | 350 | // If something is using depth, we can assume that games are not rendering anything which | |
| 346 | std::vector<ShaderDiskCachePrecompiled> gl_cache; | 351 | // will be used one time. |
| 347 | if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { | 352 | if (maxwell3d.regs.zeta_enable) { |
| 348 | // Only load precompiled cache when we are not using assembly shaders | 353 | return nullptr; |
| 349 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 350 | } | 354 | } |
| 351 | const auto supported_formats = GetSupportedFormats(); | 355 | // If games are using a small index count, we can assume these are full screen quads. |
| 352 | 356 | // Usually these shaders are only used once for building textures so we can assume they | |
| 353 | // Track if precompiled cache was altered during loading to know if we have to | 357 | // can't be built async |
| 354 | // serialize the virtual precompiled cache file back to the hard drive | 358 | if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { |
| 355 | bool precompiled_cache_altered = false; | 359 | return pipeline; |
| 356 | |||
| 357 | // Inform the frontend about shader build initialization | ||
| 358 | if (callback) { | ||
| 359 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); | ||
| 360 | } | 360 | } |
| 361 | return nullptr; | ||
| 362 | } | ||
| 361 | 363 | ||
| 362 | std::mutex mutex; | 364 | ComputePipeline* ShaderCache::CurrentComputePipeline() { |
| 363 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | 365 | const VideoCommon::ShaderInfo* const shader{ComputeShader()}; |
| 364 | std::atomic_bool gl_cache_failed = false; | 366 | if (!shader) { |
| 365 | 367 | return nullptr; | |
| 366 | const auto find_precompiled = [&gl_cache](u64 id) { | ||
| 367 | return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); | ||
| 368 | }; | ||
| 369 | |||
| 370 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 371 | std::size_t end) { | ||
| 372 | const auto scope = context->Acquire(); | ||
| 373 | |||
| 374 | for (std::size_t i = begin; i < end; ++i) { | ||
| 375 | if (stop_loading.stop_requested()) { | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | const auto& entry = (*transferable)[i]; | ||
| 379 | const u64 uid = entry.unique_identifier; | ||
| 380 | const auto it = find_precompiled(uid); | ||
| 381 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | ||
| 382 | |||
| 383 | const bool is_compute = entry.type == ShaderType::Compute; | ||
| 384 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 385 | auto registry = MakeRegistry(entry); | ||
| 386 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | ||
| 387 | |||
| 388 | ProgramSharedPtr program; | ||
| 389 | if (precompiled_entry) { | ||
| 390 | // If the shader is precompiled, attempt to load it with | ||
| 391 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 392 | if (!program) { | ||
| 393 | gl_cache_failed = true; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if (!program) { | ||
| 397 | // Otherwise compile it from GLSL | ||
| 398 | program = BuildShader(device, entry.type, uid, ir, *registry, true); | ||
| 399 | } | ||
| 400 | |||
| 401 | PrecompiledShader shader; | ||
| 402 | shader.program = std::move(program); | ||
| 403 | shader.registry = std::move(registry); | ||
| 404 | shader.entries = MakeEntries(device, ir, entry.type); | ||
| 405 | |||
| 406 | std::scoped_lock lock{mutex}; | ||
| 407 | if (callback) { | ||
| 408 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | ||
| 409 | transferable->size()); | ||
| 410 | } | ||
| 411 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | ||
| 412 | } | ||
| 413 | }; | ||
| 414 | |||
| 415 | const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; | ||
| 416 | const std::size_t bucket_size{transferable->size() / num_workers}; | ||
| 417 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | ||
| 418 | std::vector<std::thread> threads(num_workers); | ||
| 419 | for (std::size_t i = 0; i < num_workers; ++i) { | ||
| 420 | const bool is_last_worker = i + 1 == num_workers; | ||
| 421 | const std::size_t start{bucket_size * i}; | ||
| 422 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; | ||
| 423 | |||
| 424 | // On some platforms the shared context has to be created from the GUI thread | ||
| 425 | contexts[i] = emu_window.CreateSharedContext(); | ||
| 426 | threads[i] = std::thread(worker, contexts[i].get(), start, end); | ||
| 427 | } | 368 | } |
| 428 | for (auto& thread : threads) { | 369 | const auto& qmd{kepler_compute.launch_description}; |
| 429 | thread.join(); | 370 | const ComputePipelineKey key{ |
| 371 | .unique_hash = shader->unique_hash, | ||
| 372 | .shared_memory_size = qmd.shared_alloc, | ||
| 373 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | ||
| 374 | }; | ||
| 375 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; | ||
| 376 | auto& pipeline{pair->second}; | ||
| 377 | if (!is_new) { | ||
| 378 | return pipeline.get(); | ||
| 430 | } | 379 | } |
| 380 | pipeline = CreateComputePipeline(key, shader); | ||
| 381 | return pipeline.get(); | ||
| 382 | } | ||
| 431 | 383 | ||
| 432 | if (gl_cache_failed) { | 384 | std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() { |
| 433 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 385 | GraphicsEnvironments environments; |
| 434 | disk_cache.InvalidatePrecompiled(); | 386 | GetGraphicsEnvironments(environments, graphics_key.unique_hashes); |
| 435 | precompiled_cache_altered = true; | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | if (stop_loading.stop_requested()) { | ||
| 439 | return; | ||
| 440 | } | ||
| 441 | 387 | ||
| 442 | if (device.UseAssemblyShaders() || device.UseDriverCache()) { | 388 | main_pools.ReleaseContents(); |
| 443 | // Don't store precompiled binaries for assembly shaders or when using the driver cache | 389 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), |
| 444 | return; | 390 | use_asynchronous_shaders)}; |
| 391 | if (!pipeline || shader_cache_filename.empty()) { | ||
| 392 | return pipeline; | ||
| 445 | } | 393 | } |
| 446 | 394 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs; | |
| 447 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 395 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 448 | // before precompiling them | 396 | if (graphics_key.unique_hashes[index] != 0) { |
| 449 | 397 | env_ptrs.push_back(&environments.envs[index]); | |
| 450 | for (std::size_t i = 0; i < transferable->size(); ++i) { | ||
| 451 | const u64 id = (*transferable)[i].unique_identifier; | ||
| 452 | const auto it = find_precompiled(id); | ||
| 453 | if (it == gl_cache.end()) { | ||
| 454 | const GLuint program = runtime_cache.at(id).program->source_program.handle; | ||
| 455 | disk_cache.SavePrecompiled(id, program); | ||
| 456 | precompiled_cache_altered = true; | ||
| 457 | } | 398 | } |
| 458 | } | 399 | } |
| 459 | 400 | SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); | |
| 460 | if (precompiled_cache_altered) { | 401 | return pipeline; |
| 461 | disk_cache.SaveVirtualPrecompiledFile(); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 466 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | ||
| 467 | const std::unordered_set<GLenum>& supported_formats) { | ||
| 468 | if (!supported_formats.contains(precompiled_entry.binary_format)) { | ||
| 469 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | ||
| 470 | return {}; | ||
| 471 | } | ||
| 472 | |||
| 473 | auto program = std::make_shared<ProgramHandle>(); | ||
| 474 | GLuint& handle = program->source_program.handle; | ||
| 475 | handle = glCreateProgram(); | ||
| 476 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 477 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), | ||
| 478 | static_cast<GLsizei>(precompiled_entry.binary.size())); | ||
| 479 | |||
| 480 | GLint link_status; | ||
| 481 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); | ||
| 482 | if (link_status == GL_FALSE) { | ||
| 483 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | ||
| 484 | return {}; | ||
| 485 | } | ||
| 486 | |||
| 487 | return program; | ||
| 488 | } | 402 | } |
| 489 | 403 | ||
| 490 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, | 404 | std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( |
| 491 | VideoCommon::Shader::AsyncShaders& async_shaders) { | 405 | ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, |
| 492 | if (!maxwell3d.dirty.flags[Dirty::Shaders]) { | 406 | std::span<Shader::Environment* const> envs, bool build_in_parallel) try { |
| 493 | auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; | 407 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); |
| 494 | if (last_shader->IsBuilt()) { | 408 | size_t env_index{}; |
| 495 | return last_shader; | 409 | u32 total_storage_buffers{}; |
| 410 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||
| 411 | const bool uses_vertex_a{key.unique_hashes[0] != 0}; | ||
| 412 | const bool uses_vertex_b{key.unique_hashes[1] != 0}; | ||
| 413 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 414 | if (key.unique_hashes[index] == 0) { | ||
| 415 | continue; | ||
| 496 | } | 416 | } |
| 497 | } | 417 | Shader::Environment& env{*envs[env_index]}; |
| 418 | ++env_index; | ||
| 498 | 419 | ||
| 499 | const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; | 420 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; |
| 421 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||
| 422 | if (!uses_vertex_a || index != 1) { | ||
| 423 | // Normal path | ||
| 424 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | ||
| 500 | 425 | ||
| 501 | if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { | 426 | for (const auto& desc : programs[index].info.storage_buffers_descriptors) { |
| 502 | auto completed_work = async_shaders.GetCompletedWork(); | 427 | total_storage_buffers += desc.count; |
| 503 | for (auto& work : completed_work) { | ||
| 504 | Shader* shader = TryGet(work.cpu_address); | ||
| 505 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 506 | if (shader == nullptr) { | ||
| 507 | continue; | ||
| 508 | } | 428 | } |
| 509 | using namespace VideoCommon::Shader; | 429 | } else { |
| 510 | if (work.backend == AsyncShaders::Backend::OpenGL) { | 430 | // VertexB path when VertexA is present. |
| 511 | shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); | 431 | auto& program_va{programs[0]}; |
| 512 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | 432 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 513 | shader->AsyncGLASMBuilt(std::move(work.program.glasm)); | 433 | for (const auto& desc : program_vb.info.storage_buffers_descriptors) { |
| 434 | total_storage_buffers += desc.count; | ||
| 514 | } | 435 | } |
| 515 | 436 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | |
| 516 | auto& registry = shader->GetRegistry(); | ||
| 517 | |||
| 518 | ShaderDiskCacheEntry entry; | ||
| 519 | entry.type = work.shader_type; | ||
| 520 | entry.code = std::move(work.code); | ||
| 521 | entry.code_b = std::move(work.code_b); | ||
| 522 | entry.unique_identifier = work.uid; | ||
| 523 | entry.bound_buffer = registry.GetBoundBuffer(); | ||
| 524 | entry.graphics_info = registry.GetGraphicsInfo(); | ||
| 525 | entry.keys = registry.GetKeys(); | ||
| 526 | entry.bound_samplers = registry.GetBoundSamplers(); | ||
| 527 | entry.bindless_samplers = registry.GetBindlessSamplers(); | ||
| 528 | disk_cache.SaveEntry(std::move(entry)); | ||
| 529 | } | 437 | } |
| 530 | } | 438 | } |
| 531 | 439 | const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; | |
| 532 | // Look up shader in the cache based on address | 440 | const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; |
| 533 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; | 441 | |
| 534 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { | 442 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; |
| 535 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 443 | |
| 536 | } | 444 | OGLProgram source_program; |
| 537 | 445 | std::array<std::string, 5> sources; | |
| 538 | const u8* const host_ptr{gpu_memory.GetPointer(address)}; | 446 | std::array<std::vector<u32>, 5> sources_spirv; |
| 539 | 447 | Shader::Backend::Bindings binding; | |
| 540 | // No shader found - create a new one | 448 | Shader::IR::Program* previous_program{}; |
| 541 | ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; | 449 | const bool use_glasm{device.UseAssemblyShaders()}; |
| 542 | ProgramCode code_b; | 450 | const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; |
| 543 | if (program == Maxwell::ShaderProgram::VertexA) { | 451 | for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { |
| 544 | const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; | 452 | if (key.unique_hashes[index] == 0) { |
| 545 | const u8* host_ptr_b = gpu_memory.GetPointer(address_b); | 453 | continue; |
| 546 | code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); | 454 | } |
| 547 | } | 455 | UNIMPLEMENTED_IF(index == 0); |
| 548 | const std::size_t code_size = code.size() * sizeof(u64); | 456 | |
| 549 | 457 | Shader::IR::Program& program{programs[index]}; | |
| 550 | const u64 unique_identifier = GetUniqueIdentifier( | 458 | const size_t stage_index{index - 1}; |
| 551 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 459 | infos[stage_index] = &program.info; |
| 552 | 460 | ||
| 553 | const ShaderParameters params{gpu, maxwell3d, disk_cache, device, | 461 | const auto runtime_info{ |
| 554 | *cpu_addr, host_ptr, unique_identifier}; | 462 | MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; |
| 555 | 463 | switch (device.GetShaderBackend()) { | |
| 556 | std::unique_ptr<Shader> shader; | 464 | case Settings::ShaderBackend::GLSL: |
| 557 | const auto found = runtime_cache.find(unique_identifier); | 465 | sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); |
| 558 | if (found == runtime_cache.end()) { | 466 | break; |
| 559 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), | 467 | case Settings::ShaderBackend::GLASM: |
| 560 | async_shaders, cpu_addr.value_or(0)); | 468 | sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); |
| 561 | } else { | 469 | break; |
| 562 | shader = Shader::CreateFromCache(params, found->second); | 470 | case Settings::ShaderBackend::SPIRV: |
| 563 | } | 471 | sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); |
| 564 | 472 | break; | |
| 565 | Shader* const result = shader.get(); | 473 | } |
| 566 | if (cpu_addr) { | 474 | previous_program = &program; |
| 567 | Register(std::move(shader), *cpu_addr, code_size); | ||
| 568 | } else { | ||
| 569 | null_shader = std::move(shader); | ||
| 570 | } | 475 | } |
| 476 | auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; | ||
| 477 | return std::make_unique<GraphicsPipeline>( | ||
| 478 | device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, | ||
| 479 | thread_worker, &shader_notify, sources, sources_spirv, infos, key); | ||
| 571 | 480 | ||
| 572 | return last_shaders[static_cast<std::size_t>(program)] = result; | 481 | } catch (Shader::Exception& exception) { |
| 482 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); | ||
| 483 | return nullptr; | ||
| 573 | } | 484 | } |
| 574 | 485 | ||
| 575 | Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | 486 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( |
| 576 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; | 487 | const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { |
| 577 | 488 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | |
| 578 | if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { | 489 | const auto& qmd{kepler_compute.launch_description}; |
| 579 | return kernel; | 490 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; |
| 580 | } | 491 | env.SetCachedSize(shader->size_bytes); |
| 581 | 492 | ||
| 582 | // No kernel found, create a new one | 493 | main_pools.ReleaseContents(); |
| 583 | const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; | 494 | auto pipeline{CreateComputePipeline(main_pools, key, env)}; |
| 584 | ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; | 495 | if (!pipeline || shader_cache_filename.empty()) { |
| 585 | const std::size_t code_size{code.size() * sizeof(u64)}; | 496 | return pipeline; |
| 586 | const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | 497 | } |
| 587 | 498 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename, | |
| 588 | const ShaderParameters params{gpu, kepler_compute, disk_cache, device, | 499 | CACHE_VERSION); |
| 589 | *cpu_addr, host_ptr, unique_identifier}; | 500 | return pipeline; |
| 501 | } | ||
| 590 | 502 | ||
| 591 | std::unique_ptr<Shader> kernel; | 503 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( |
| 592 | const auto found = runtime_cache.find(unique_identifier); | 504 | ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, |
| 593 | if (found == runtime_cache.end()) { | 505 | Shader::Environment& env) try { |
| 594 | kernel = Shader::CreateKernelFromMemory(params, std::move(code)); | 506 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); |
| 595 | } else { | 507 | |
| 596 | kernel = Shader::CreateFromCache(params, found->second); | 508 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 597 | } | 509 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 510 | |||
| 511 | u32 num_storage_buffers{}; | ||
| 512 | for (const auto& desc : program.info.storage_buffers_descriptors) { | ||
| 513 | num_storage_buffers += desc.count; | ||
| 514 | } | ||
| 515 | Shader::RuntimeInfo info; | ||
| 516 | info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 517 | |||
| 518 | std::string code{}; | ||
| 519 | std::vector<u32> code_spirv; | ||
| 520 | switch (device.GetShaderBackend()) { | ||
| 521 | case Settings::ShaderBackend::GLSL: | ||
| 522 | code = EmitGLSL(profile, program); | ||
| 523 | break; | ||
| 524 | case Settings::ShaderBackend::GLASM: | ||
| 525 | code = EmitGLASM(profile, info, program); | ||
| 526 | break; | ||
| 527 | case Settings::ShaderBackend::SPIRV: | ||
| 528 | code_spirv = EmitSPIRV(profile, program); | ||
| 529 | break; | ||
| 530 | } | ||
| 531 | |||
| 532 | return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, | ||
| 533 | kepler_compute, program_manager, program.info, code, | ||
| 534 | code_spirv); | ||
| 535 | } catch (Shader::Exception& exception) { | ||
| 536 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); | ||
| 537 | return nullptr; | ||
| 538 | } | ||
| 598 | 539 | ||
| 599 | Shader* const result = kernel.get(); | 540 | std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const { |
| 600 | if (cpu_addr) { | 541 | return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1, |
| 601 | Register(std::move(kernel), *cpu_addr, code_size); | 542 | "yuzu:ShaderBuilder", |
| 602 | } else { | 543 | [this] { return Context{emu_window}; }); |
| 603 | null_kernel = std::move(kernel); | ||
| 604 | } | ||
| 605 | return result; | ||
| 606 | } | 544 | } |
| 607 | 545 | ||
| 608 | } // namespace OpenGL | 546 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -5,157 +5,93 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <filesystem> |
| 9 | #include <bitset> | 9 | #include <stop_token> |
| 10 | #include <memory> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <unordered_map> | 10 | #include <unordered_map> |
| 14 | #include <unordered_set> | ||
| 15 | #include <vector> | ||
| 16 | 11 | ||
| 17 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 18 | 13 | ||
| 19 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | 15 | #include "common/thread_worker.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "shader_recompiler/frontend/ir/value.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 17 | #include "shader_recompiler/host_translate_info.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 18 | #include "shader_recompiler/object_pool.h" |
| 24 | #include "video_core/shader/registry.h" | 19 | #include "shader_recompiler/profile.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 20 | #include "video_core/renderer_opengl/gl_compute_pipeline.h" |
| 21 | #include "video_core/renderer_opengl/gl_graphics_pipeline.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_context.h" | ||
| 26 | #include "video_core/shader_cache.h" | 23 | #include "video_core/shader_cache.h" |
| 27 | 24 | ||
| 28 | namespace Tegra { | 25 | namespace Tegra { |
| 29 | class MemoryManager; | 26 | class MemoryManager; |
| 30 | } | 27 | } |
| 31 | 28 | ||
| 32 | namespace Core::Frontend { | ||
| 33 | class EmuWindow; | ||
| 34 | } | ||
| 35 | |||
| 36 | namespace VideoCommon::Shader { | ||
| 37 | class AsyncShaders; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace OpenGL { | 29 | namespace OpenGL { |
| 41 | 30 | ||
| 42 | class Device; | 31 | class Device; |
| 32 | class ProgramManager; | ||
| 43 | class RasterizerOpenGL; | 33 | class RasterizerOpenGL; |
| 34 | using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | ||
| 44 | 35 | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 36 | class ShaderCache : public VideoCommon::ShaderCache { |
| 46 | |||
| 47 | struct ProgramHandle { | ||
| 48 | OGLProgram source_program; | ||
| 49 | OGLAssemblyProgram assembly_program; | ||
| 50 | }; | ||
| 51 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 52 | |||
| 53 | struct PrecompiledShader { | ||
| 54 | ProgramSharedPtr program; | ||
| 55 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 56 | ShaderEntries entries; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct ShaderParameters { | ||
| 60 | Tegra::GPU& gpu; | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine; | ||
| 62 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 63 | const Device& device; | ||
| 64 | VAddr cpu_addr; | ||
| 65 | const u8* host_ptr; | ||
| 66 | u64 unique_identifier; | ||
| 67 | }; | ||
| 68 | |||
| 69 | ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 70 | u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, | ||
| 71 | const VideoCommon::Shader::Registry& registry, | ||
| 72 | bool hint_retrievable = false); | ||
| 73 | |||
| 74 | class Shader final { | ||
| 75 | public: | 37 | public: |
| 76 | ~Shader(); | 38 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 77 | 39 | Tegra::Engines::Maxwell3D& maxwell3d_, | |
| 78 | /// Gets the GL program handle for the shader | 40 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 79 | GLuint GetHandle() const; | 41 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 80 | 42 | TextureCache& texture_cache_, BufferCache& buffer_cache_, | |
| 81 | bool IsBuilt() const; | 43 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 82 | 44 | VideoCore::ShaderNotify& shader_notify_); | |
| 83 | /// Gets the shader entries for the shader | 45 | ~ShaderCache(); |
| 84 | const ShaderEntries& GetEntries() const { | ||
| 85 | return entries; | ||
| 86 | } | ||
| 87 | |||
| 88 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 89 | return *registry; | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Mark a OpenGL shader as built | ||
| 93 | void AsyncOpenGLBuilt(OGLProgram new_program); | ||
| 94 | 46 | ||
| 95 | /// Mark a GLASM shader as built | 47 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 96 | void AsyncGLASMBuilt(OGLAssemblyProgram new_program); | 48 | const VideoCore::DiskResourceLoadCallback& callback); |
| 97 | 49 | ||
| 98 | static std::unique_ptr<Shader> CreateStageFromMemory( | 50 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); |
| 99 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||
| 100 | ProgramCode program_code, ProgramCode program_code_b, | ||
| 101 | VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); | ||
| 102 | 51 | ||
| 103 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | 52 | [[nodiscard]] ComputePipeline* CurrentComputePipeline(); |
| 104 | ProgramCode code); | ||
| 105 | |||
| 106 | static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, | ||
| 107 | const PrecompiledShader& precompiled_shader); | ||
| 108 | 53 | ||
| 109 | private: | 54 | private: |
| 110 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, | 55 | GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); |
| 111 | ProgramSharedPtr program, bool is_built_ = true); | ||
| 112 | |||
| 113 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 114 | ShaderEntries entries; | ||
| 115 | ProgramSharedPtr program; | ||
| 116 | GLuint handle = 0; | ||
| 117 | bool is_built{}; | ||
| 118 | }; | ||
| 119 | 56 | ||
| 120 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | 57 | [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; |
| 121 | public: | ||
| 122 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | ||
| 123 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, | ||
| 124 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 125 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 126 | Tegra::MemoryManager& gpu_memory_, const Device& device_); | ||
| 127 | ~ShaderCacheOpenGL() override; | ||
| 128 | 58 | ||
| 129 | /// Loads disk cache for the current game | 59 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); |
| 130 | void LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 131 | const VideoCore::DiskResourceLoadCallback& callback); | ||
| 132 | 60 | ||
| 133 | /// Gets the current specified shader stage program | 61 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( |
| 134 | Shader* GetStageProgram(Maxwell::ShaderProgram program, | 62 | ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, |
| 135 | VideoCommon::Shader::AsyncShaders& async_shaders); | 63 | std::span<Shader::Environment* const> envs, bool build_in_parallel); |
| 136 | 64 | ||
| 137 | /// Gets a compute kernel in the passed address | 65 | std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key, |
| 138 | Shader* GetComputeKernel(GPUVAddr code_addr); | 66 | const VideoCommon::ShaderInfo* shader); |
| 139 | 67 | ||
| 140 | private: | 68 | std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools, |
| 141 | ProgramSharedPtr GeneratePrecompiledProgram( | 69 | const ComputePipelineKey& key, |
| 142 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 70 | Shader::Environment& env); |
| 143 | const std::unordered_set<GLenum>& supported_formats); | 71 | |
| 72 | std::unique_ptr<ShaderWorker> CreateWorkers() const; | ||
| 144 | 73 | ||
| 145 | Core::Frontend::EmuWindow& emu_window; | 74 | Core::Frontend::EmuWindow& emu_window; |
| 146 | Tegra::GPU& gpu; | ||
| 147 | Tegra::MemoryManager& gpu_memory; | ||
| 148 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 149 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 150 | const Device& device; | 75 | const Device& device; |
| 76 | TextureCache& texture_cache; | ||
| 77 | BufferCache& buffer_cache; | ||
| 78 | ProgramManager& program_manager; | ||
| 79 | StateTracker& state_tracker; | ||
| 80 | VideoCore::ShaderNotify& shader_notify; | ||
| 81 | const bool use_asynchronous_shaders; | ||
| 82 | |||
| 83 | GraphicsPipelineKey graphics_key{}; | ||
| 84 | GraphicsPipeline* current_pipeline{}; | ||
| 151 | 85 | ||
| 152 | ShaderDiskCacheOpenGL disk_cache; | 86 | ShaderContext::ShaderPools main_pools; |
| 153 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | 87 | std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; |
| 88 | std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; | ||
| 154 | 89 | ||
| 155 | std::unique_ptr<Shader> null_shader; | 90 | Shader::Profile profile; |
| 156 | std::unique_ptr<Shader> null_kernel; | 91 | Shader::HostTranslateInfo host_info; |
| 157 | 92 | ||
| 158 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | 93 | std::filesystem::path shader_cache_filename; |
| 94 | std::unique_ptr<ShaderWorker> workers; | ||
| 159 | }; | 95 | }; |
| 160 | 96 | ||
| 161 | } // namespace OpenGL | 97 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/frontend/emu_window.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 10 | |||
| 11 | namespace OpenGL::ShaderContext { | ||
| 12 | struct ShaderPools { | ||
| 13 | void ReleaseContents() { | ||
| 14 | flow_block.ReleaseContents(); | ||
| 15 | block.ReleaseContents(); | ||
| 16 | inst.ReleaseContents(); | ||
| 17 | } | ||
| 18 | |||
| 19 | Shader::ObjectPool<Shader::IR::Inst> inst; | ||
| 20 | Shader::ObjectPool<Shader::IR::Block> block; | ||
| 21 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct Context { | ||
| 25 | explicit Context(Core::Frontend::EmuWindow& emu_window) | ||
| 26 | : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} | ||
| 27 | |||
| 28 | std::unique_ptr<Core::Frontend::GraphicsContext> gl_context; | ||
| 29 | Core::Frontend::GraphicsContext::Scoped scoped; | ||
| 30 | ShaderPools pools; | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL::ShaderContext | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,2986 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <string> | ||
| 7 | #include <string_view> | ||
| 8 | #include <utility> | ||
| 9 | #include <variant> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "common/div_ceil.h" | ||
| 18 | #include "common/logging/log.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 24 | #include "video_core/shader/ast.h" | ||
| 25 | #include "video_core/shader/node.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader/transform_feedback.h" | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | using Tegra::Engines::ShaderType; | ||
| 34 | using Tegra::Shader::Attribute; | ||
| 35 | using Tegra::Shader::Header; | ||
| 36 | using Tegra::Shader::IpaInterpMode; | ||
| 37 | using Tegra::Shader::IpaMode; | ||
| 38 | using Tegra::Shader::IpaSampleMode; | ||
| 39 | using Tegra::Shader::PixelImap; | ||
| 40 | using Tegra::Shader::Register; | ||
| 41 | using Tegra::Shader::TextureType; | ||
| 42 | |||
| 43 | using namespace VideoCommon::Shader; | ||
| 44 | using namespace std::string_literals; | ||
| 45 | |||
| 46 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 47 | using Operation = const OperationNode&; | ||
| 48 | |||
| 49 | class ASTDecompiler; | ||
| 50 | class ExprDecompiler; | ||
| 51 | |||
| 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 53 | |||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 59 | struct TextureOffset {}; | ||
| 60 | struct TextureDerivates {}; | ||
| 61 | using TextureArgument = std::pair<Type, Node>; | ||
| 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; | ||
| 63 | |||
| 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | ||
| 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | ||
| 66 | |||
| 67 | constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | )"; | ||
| 81 | |||
| 82 | class ShaderWriter final { | ||
| 83 | public: | ||
| 84 | void AddExpression(std::string_view text) { | ||
| 85 | DEBUG_ASSERT(scope >= 0); | ||
| 86 | if (!text.empty()) { | ||
| 87 | AppendIndentation(); | ||
| 88 | } | ||
| 89 | shader_source += text; | ||
| 90 | } | ||
| 91 | |||
| 92 | // Forwards all arguments directly to libfmt. | ||
| 93 | // Note that all formatting requirements for fmt must be | ||
| 94 | // obeyed when using this function. (e.g. {{ must be used | ||
| 95 | // printing the character '{' is desirable. Ditto for }} and '}', | ||
| 96 | // etc). | ||
| 97 | template <typename... Args> | ||
| 98 | void AddLine(std::string_view text, Args&&... args) { | ||
| 99 | AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...)); | ||
| 100 | AddNewLine(); | ||
| 101 | } | ||
| 102 | |||
| 103 | void AddNewLine() { | ||
| 104 | DEBUG_ASSERT(scope >= 0); | ||
| 105 | shader_source += '\n'; | ||
| 106 | } | ||
| 107 | |||
| 108 | std::string GenerateTemporary() { | ||
| 109 | return fmt::format("tmp{}", temporary_index++); | ||
| 110 | } | ||
| 111 | |||
| 112 | std::string GetResult() { | ||
| 113 | return std::move(shader_source); | ||
| 114 | } | ||
| 115 | |||
| 116 | s32 scope = 0; | ||
| 117 | |||
| 118 | private: | ||
| 119 | void AppendIndentation() { | ||
| 120 | shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); | ||
| 121 | } | ||
| 122 | |||
| 123 | std::string shader_source; | ||
| 124 | u32 temporary_index = 1; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class Expression final { | ||
| 128 | public: | ||
| 129 | Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { | ||
| 130 | ASSERT(type != Type::Void); | ||
| 131 | } | ||
| 132 | Expression() : type{Type::Void} {} | ||
| 133 | |||
| 134 | Type GetType() const { | ||
| 135 | return type; | ||
| 136 | } | ||
| 137 | |||
| 138 | std::string GetCode() const { | ||
| 139 | return code; | ||
| 140 | } | ||
| 141 | |||
| 142 | void CheckVoid() const { | ||
| 143 | ASSERT(type == Type::Void); | ||
| 144 | } | ||
| 145 | |||
| 146 | std::string As(Type type_) const { | ||
| 147 | switch (type_) { | ||
| 148 | case Type::Bool: | ||
| 149 | return AsBool(); | ||
| 150 | case Type::Bool2: | ||
| 151 | return AsBool2(); | ||
| 152 | case Type::Float: | ||
| 153 | return AsFloat(); | ||
| 154 | case Type::Int: | ||
| 155 | return AsInt(); | ||
| 156 | case Type::Uint: | ||
| 157 | return AsUint(); | ||
| 158 | case Type::HalfFloat: | ||
| 159 | return AsHalfFloat(); | ||
| 160 | default: | ||
| 161 | UNREACHABLE_MSG("Invalid type"); | ||
| 162 | return code; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | std::string AsBool() const { | ||
| 167 | switch (type) { | ||
| 168 | case Type::Bool: | ||
| 169 | return code; | ||
| 170 | default: | ||
| 171 | UNREACHABLE_MSG("Incompatible types"); | ||
| 172 | return code; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | std::string AsBool2() const { | ||
| 177 | switch (type) { | ||
| 178 | case Type::Bool2: | ||
| 179 | return code; | ||
| 180 | default: | ||
| 181 | UNREACHABLE_MSG("Incompatible types"); | ||
| 182 | return code; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string AsFloat() const { | ||
| 187 | switch (type) { | ||
| 188 | case Type::Float: | ||
| 189 | return code; | ||
| 190 | case Type::Uint: | ||
| 191 | return fmt::format("utof({})", code); | ||
| 192 | case Type::Int: | ||
| 193 | return fmt::format("itof({})", code); | ||
| 194 | case Type::HalfFloat: | ||
| 195 | return fmt::format("utof(packHalf2x16({}))", code); | ||
| 196 | default: | ||
| 197 | UNREACHABLE_MSG("Incompatible types"); | ||
| 198 | return code; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string AsInt() const { | ||
| 203 | switch (type) { | ||
| 204 | case Type::Float: | ||
| 205 | return fmt::format("ftoi({})", code); | ||
| 206 | case Type::Uint: | ||
| 207 | return fmt::format("int({})", code); | ||
| 208 | case Type::Int: | ||
| 209 | return code; | ||
| 210 | case Type::HalfFloat: | ||
| 211 | return fmt::format("int(packHalf2x16({}))", code); | ||
| 212 | default: | ||
| 213 | UNREACHABLE_MSG("Incompatible types"); | ||
| 214 | return code; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | std::string AsUint() const { | ||
| 219 | switch (type) { | ||
| 220 | case Type::Float: | ||
| 221 | return fmt::format("ftou({})", code); | ||
| 222 | case Type::Uint: | ||
| 223 | return code; | ||
| 224 | case Type::Int: | ||
| 225 | return fmt::format("uint({})", code); | ||
| 226 | case Type::HalfFloat: | ||
| 227 | return fmt::format("packHalf2x16({})", code); | ||
| 228 | default: | ||
| 229 | UNREACHABLE_MSG("Incompatible types"); | ||
| 230 | return code; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | std::string AsHalfFloat() const { | ||
| 235 | switch (type) { | ||
| 236 | case Type::Float: | ||
| 237 | return fmt::format("unpackHalf2x16(ftou({}))", code); | ||
| 238 | case Type::Uint: | ||
| 239 | return fmt::format("unpackHalf2x16({})", code); | ||
| 240 | case Type::Int: | ||
| 241 | return fmt::format("unpackHalf2x16(int({}))", code); | ||
| 242 | case Type::HalfFloat: | ||
| 243 | return code; | ||
| 244 | default: | ||
| 245 | UNREACHABLE_MSG("Incompatible types"); | ||
| 246 | return code; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | private: | ||
| 251 | std::string code; | ||
| 252 | Type type{}; | ||
| 253 | }; | ||
| 254 | |||
| 255 | const char* GetTypeString(Type type) { | ||
| 256 | switch (type) { | ||
| 257 | case Type::Bool: | ||
| 258 | return "bool"; | ||
| 259 | case Type::Bool2: | ||
| 260 | return "bvec2"; | ||
| 261 | case Type::Float: | ||
| 262 | return "float"; | ||
| 263 | case Type::Int: | ||
| 264 | return "int"; | ||
| 265 | case Type::Uint: | ||
| 266 | return "uint"; | ||
| 267 | case Type::HalfFloat: | ||
| 268 | return "vec2"; | ||
| 269 | default: | ||
| 270 | UNREACHABLE_MSG("Invalid type"); | ||
| 271 | return "<invalid type>"; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | ||
| 276 | switch (image_type) { | ||
| 277 | case Tegra::Shader::ImageType::Texture1D: | ||
| 278 | return "1D"; | ||
| 279 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 280 | return "Buffer"; | ||
| 281 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 282 | return "1DArray"; | ||
| 283 | case Tegra::Shader::ImageType::Texture2D: | ||
| 284 | return "2D"; | ||
| 285 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 286 | return "2DArray"; | ||
| 287 | case Tegra::Shader::ImageType::Texture3D: | ||
| 288 | return "3D"; | ||
| 289 | default: | ||
| 290 | UNREACHABLE(); | ||
| 291 | return "1D"; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | /// Describes primitive behavior on geometry shaders | ||
| 296 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 297 | switch (topology) { | ||
| 298 | case Maxwell::PrimitiveTopology::Points: | ||
| 299 | return {"points", 1}; | ||
| 300 | case Maxwell::PrimitiveTopology::Lines: | ||
| 301 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 302 | return {"lines", 2}; | ||
| 303 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 304 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 305 | return {"lines_adjacency", 4}; | ||
| 306 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 307 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 308 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 309 | return {"triangles", 3}; | ||
| 310 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 312 | return {"triangles_adjacency", 6}; | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 315 | return {"points", 1}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | /// Generates code to use for a swizzle operation. | ||
| 320 | constexpr const char* GetSwizzle(std::size_t element) { | ||
| 321 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | ||
| 322 | return swizzle.at(element); | ||
| 323 | } | ||
| 324 | |||
| 325 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 326 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 327 | return swizzle.at(element); | ||
| 328 | } | ||
| 329 | |||
| 330 | /// Translate topology | ||
| 331 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 332 | switch (topology) { | ||
| 333 | case Tegra::Shader::OutputTopology::PointList: | ||
| 334 | return "points"; | ||
| 335 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 336 | return "line_strip"; | ||
| 337 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 338 | return "triangle_strip"; | ||
| 339 | default: | ||
| 340 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 341 | return "points"; | ||
| 342 | } | ||
| 343 | } | ||
| 344 | |||
| 345 | /// Returns true if an object has to be treated as precise | ||
| 346 | bool IsPrecise(Operation operand) { | ||
| 347 | const auto& meta{operand.GetMeta()}; | ||
| 348 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | ||
| 349 | return arithmetic->precise; | ||
| 350 | } | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | |||
| 354 | bool IsPrecise(const Node& node) { | ||
| 355 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 356 | return IsPrecise(*operation); | ||
| 357 | } | ||
| 358 | return false; | ||
| 359 | } | ||
| 360 | |||
| 361 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 362 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 363 | } | ||
| 364 | |||
| 365 | constexpr bool IsLegacyTexCoord(Attribute::Index index) { | ||
| 366 | return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && | ||
| 367 | static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); | ||
| 368 | } | ||
| 369 | |||
| 370 | constexpr Attribute::Index ToGenericAttribute(u64 value) { | ||
| 371 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); | ||
| 372 | } | ||
| 373 | |||
| 374 | constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { | ||
| 375 | return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); | ||
| 376 | } | ||
| 377 | |||
| 378 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 379 | ASSERT(IsGenericAttribute(index)); | ||
| 380 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 381 | } | ||
| 382 | |||
| 383 | constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { | ||
| 384 | switch (stack) { | ||
| 385 | case MetaStackClass::Ssy: | ||
| 386 | return "ssy"; | ||
| 387 | case MetaStackClass::Pbk: | ||
| 388 | return "pbk"; | ||
| 389 | } | ||
| 390 | return {}; | ||
| 391 | } | ||
| 392 | |||
| 393 | std::string FlowStackName(MetaStackClass stack) { | ||
| 394 | return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); | ||
| 395 | } | ||
| 396 | |||
| 397 | std::string FlowStackTopName(MetaStackClass stack) { | ||
| 398 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||
| 399 | } | ||
| 400 | |||
| 401 | struct GenericVaryingDescription { | ||
| 402 | std::string name; | ||
| 403 | u8 first_element = 0; | ||
| 404 | bool is_scalar = false; | ||
| 405 | }; | ||
| 406 | |||
| 407 | class GLSLDecompiler final { | ||
| 408 | public: | ||
| 409 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 410 | ShaderType stage_, std::string_view identifier_, | ||
| 411 | std::string_view suffix_) | ||
| 412 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, | ||
| 413 | identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { | ||
| 414 | if (stage != ShaderType::Compute) { | ||
| 415 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | void Decompile() { | ||
| 420 | DeclareHeader(); | ||
| 421 | DeclareVertex(); | ||
| 422 | DeclareGeometry(); | ||
| 423 | DeclareFragment(); | ||
| 424 | DeclareCompute(); | ||
| 425 | DeclareInputAttributes(); | ||
| 426 | DeclareOutputAttributes(); | ||
| 427 | DeclareImages(); | ||
| 428 | DeclareSamplers(); | ||
| 429 | DeclareGlobalMemory(); | ||
| 430 | DeclareConstantBuffers(); | ||
| 431 | DeclareLocalMemory(); | ||
| 432 | DeclareRegisters(); | ||
| 433 | DeclarePredicates(); | ||
| 434 | DeclareInternalFlags(); | ||
| 435 | DeclareCustomVariables(); | ||
| 436 | DeclarePhysicalAttributeReader(); | ||
| 437 | |||
| 438 | code.AddLine("void main() {{"); | ||
| 439 | ++code.scope; | ||
| 440 | |||
| 441 | if (stage == ShaderType::Vertex) { | ||
| 442 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 443 | } | ||
| 444 | |||
| 445 | if (ir.IsDecompiled()) { | ||
| 446 | DecompileAST(); | ||
| 447 | } else { | ||
| 448 | DecompileBranchMode(); | ||
| 449 | } | ||
| 450 | |||
| 451 | --code.scope; | ||
| 452 | code.AddLine("}}"); | ||
| 453 | } | ||
| 454 | |||
| 455 | std::string GetResult() { | ||
| 456 | return code.GetResult(); | ||
| 457 | } | ||
| 458 | |||
| 459 | private: | ||
| 460 | friend class ASTDecompiler; | ||
| 461 | friend class ExprDecompiler; | ||
| 462 | |||
| 463 | void DecompileBranchMode() { | ||
| 464 | // VM's program counter | ||
| 465 | const auto first_address = ir.GetBasicBlocks().begin()->first; | ||
| 466 | code.AddLine("uint jmp_to = {}U;", first_address); | ||
| 467 | |||
| 468 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 469 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 470 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 471 | if (!ir.IsFlowStackDisabled()) { | ||
| 472 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||
| 473 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||
| 474 | code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | |||
| 478 | code.AddLine("while (true) {{"); | ||
| 479 | ++code.scope; | ||
| 480 | |||
| 481 | code.AddLine("switch (jmp_to) {{"); | ||
| 482 | |||
| 483 | for (const auto& pair : ir.GetBasicBlocks()) { | ||
| 484 | const auto& [address, bb] = pair; | ||
| 485 | code.AddLine("case 0x{:X}U: {{", address); | ||
| 486 | ++code.scope; | ||
| 487 | |||
| 488 | VisitBlock(bb); | ||
| 489 | |||
| 490 | --code.scope; | ||
| 491 | code.AddLine("}}"); | ||
| 492 | } | ||
| 493 | |||
| 494 | code.AddLine("default: return;"); | ||
| 495 | code.AddLine("}}"); | ||
| 496 | |||
| 497 | --code.scope; | ||
| 498 | code.AddLine("}}"); | ||
| 499 | } | ||
| 500 | |||
| 501 | void DecompileAST(); | ||
| 502 | |||
| 503 | void DeclareHeader() { | ||
| 504 | if (!identifier.empty()) { | ||
| 505 | code.AddLine("// {}", identifier); | ||
| 506 | } | ||
| 507 | const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); | ||
| 508 | code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||
| 509 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | ||
| 510 | if (device.HasShaderBallot()) { | ||
| 511 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | ||
| 512 | } | ||
| 513 | if (device.HasVertexViewportLayer()) { | ||
| 514 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); | ||
| 515 | } | ||
| 516 | if (device.HasImageLoadFormatted()) { | ||
| 517 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | ||
| 518 | } | ||
| 519 | if (device.HasTextureShadowLod()) { | ||
| 520 | code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); | ||
| 521 | } | ||
| 522 | if (device.HasWarpIntrinsics()) { | ||
| 523 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 524 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 525 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 526 | } | ||
| 527 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 528 | // operations) on places where we don't want to. | ||
| 529 | // Thanks to Ryujinx for finding this workaround. | ||
| 530 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 531 | |||
| 532 | code.AddNewLine(); | ||
| 533 | |||
| 534 | code.AddLine(COMMON_DECLARATIONS); | ||
| 535 | } | ||
| 536 | |||
| 537 | void DeclareVertex() { | ||
| 538 | if (stage != ShaderType::Vertex) { | ||
| 539 | return; | ||
| 540 | } | ||
| 541 | |||
| 542 | DeclareVertexRedeclarations(); | ||
| 543 | } | ||
| 544 | |||
| 545 | void DeclareGeometry() { | ||
| 546 | if (stage != ShaderType::Geometry) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | |||
| 550 | const auto& info = registry.GetGraphicsInfo(); | ||
| 551 | const auto input_topology = info.primitive_topology; | ||
| 552 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 553 | max_input_vertices = max_vertices; | ||
| 554 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 555 | |||
| 556 | const auto topology = GetTopologyName(header.common3.output_topology); | ||
| 557 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); | ||
| 558 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); | ||
| 559 | code.AddNewLine(); | ||
| 560 | |||
| 561 | code.AddLine("in gl_PerVertex {{"); | ||
| 562 | ++code.scope; | ||
| 563 | code.AddLine("vec4 gl_Position;"); | ||
| 564 | --code.scope; | ||
| 565 | code.AddLine("}} gl_in[];"); | ||
| 566 | |||
| 567 | DeclareVertexRedeclarations(); | ||
| 568 | } | ||
| 569 | |||
| 570 | void DeclareFragment() { | ||
| 571 | if (stage != ShaderType::Fragment) { | ||
| 572 | return; | ||
| 573 | } | ||
| 574 | if (ir.UsesLegacyVaryings()) { | ||
| 575 | code.AddLine("in gl_PerFragment {{"); | ||
| 576 | ++code.scope; | ||
| 577 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 578 | code.AddLine("vec4 gl_Color;"); | ||
| 579 | code.AddLine("vec4 gl_SecondaryColor;"); | ||
| 580 | --code.scope; | ||
| 581 | code.AddLine("}};"); | ||
| 582 | } | ||
| 583 | |||
| 584 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 585 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | void DeclareCompute() { | ||
| 590 | if (stage != ShaderType::Compute) { | ||
| 591 | return; | ||
| 592 | } | ||
| 593 | const auto& info = registry.GetComputeInfo(); | ||
| 594 | if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { | ||
| 595 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 596 | if (size > limit) { | ||
| 597 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 598 | size, limit); | ||
| 599 | size = limit; | ||
| 600 | } | ||
| 601 | |||
| 602 | code.AddLine("shared uint smem[{}];", size / 4); | ||
| 603 | code.AddNewLine(); | ||
| 604 | } | ||
| 605 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 606 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 607 | code.AddNewLine(); | ||
| 608 | } | ||
| 609 | |||
| 610 | void DeclareVertexRedeclarations() { | ||
| 611 | code.AddLine("out gl_PerVertex {{"); | ||
| 612 | ++code.scope; | ||
| 613 | |||
| 614 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); | ||
| 615 | if (!pos_xfb.empty()) { | ||
| 616 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 617 | } | ||
| 618 | const char* pos_type = | ||
| 619 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 620 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 621 | |||
| 622 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 623 | if (attribute == Attribute::Index::ClipDistances0123 || | ||
| 624 | attribute == Attribute::Index::ClipDistances4567) { | ||
| 625 | code.AddLine("float gl_ClipDistance[];"); | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | if (stage != ShaderType::Geometry && | ||
| 631 | (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { | ||
| 632 | if (ir.UsesLayer()) { | ||
| 633 | code.AddLine("int gl_Layer;"); | ||
| 634 | } | ||
| 635 | if (ir.UsesViewportIndex()) { | ||
| 636 | code.AddLine("int gl_ViewportIndex;"); | ||
| 637 | } | ||
| 638 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && | ||
| 639 | !device.HasVertexViewportLayer()) { | ||
| 640 | LOG_ERROR( | ||
| 641 | Render_OpenGL, | ||
| 642 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 643 | } | ||
| 644 | |||
| 645 | if (ir.UsesPointSize()) { | ||
| 646 | code.AddLine("float gl_PointSize;"); | ||
| 647 | } | ||
| 648 | |||
| 649 | if (ir.UsesLegacyVaryings()) { | ||
| 650 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 651 | code.AddLine("vec4 gl_FrontColor;"); | ||
| 652 | code.AddLine("vec4 gl_FrontSecondaryColor;"); | ||
| 653 | code.AddLine("vec4 gl_BackColor;"); | ||
| 654 | code.AddLine("vec4 gl_BackSecondaryColor;"); | ||
| 655 | } | ||
| 656 | |||
| 657 | --code.scope; | ||
| 658 | code.AddLine("}};"); | ||
| 659 | code.AddNewLine(); | ||
| 660 | |||
| 661 | if (stage == ShaderType::Geometry) { | ||
| 662 | if (ir.UsesLayer()) { | ||
| 663 | code.AddLine("out int gl_Layer;"); | ||
| 664 | } | ||
| 665 | if (ir.UsesViewportIndex()) { | ||
| 666 | code.AddLine("out int gl_ViewportIndex;"); | ||
| 667 | } | ||
| 668 | } | ||
| 669 | code.AddNewLine(); | ||
| 670 | } | ||
| 671 | |||
| 672 | void DeclareRegisters() { | ||
| 673 | const auto& registers = ir.GetRegisters(); | ||
| 674 | for (const u32 gpr : registers) { | ||
| 675 | code.AddLine("float {} = 0.0f;", GetRegister(gpr)); | ||
| 676 | } | ||
| 677 | if (!registers.empty()) { | ||
| 678 | code.AddNewLine(); | ||
| 679 | } | ||
| 680 | } | ||
| 681 | |||
| 682 | void DeclareCustomVariables() { | ||
| 683 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 684 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 685 | code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); | ||
| 686 | } | ||
| 687 | if (num_custom_variables > 0) { | ||
| 688 | code.AddNewLine(); | ||
| 689 | } | ||
| 690 | } | ||
| 691 | |||
| 692 | void DeclarePredicates() { | ||
| 693 | const auto& predicates = ir.GetPredicates(); | ||
| 694 | for (const auto pred : predicates) { | ||
| 695 | code.AddLine("bool {} = false;", GetPredicate(pred)); | ||
| 696 | } | ||
| 697 | if (!predicates.empty()) { | ||
| 698 | code.AddNewLine(); | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | void DeclareLocalMemory() { | ||
| 703 | u64 local_memory_size = 0; | ||
| 704 | if (stage == ShaderType::Compute) { | ||
| 705 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 706 | } else { | ||
| 707 | local_memory_size = header.GetLocalMemorySize(); | ||
| 708 | } | ||
| 709 | if (local_memory_size == 0) { | ||
| 710 | return; | ||
| 711 | } | ||
| 712 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 713 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | ||
| 714 | code.AddNewLine(); | ||
| 715 | } | ||
| 716 | |||
| 717 | void DeclareInternalFlags() { | ||
| 718 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 719 | const auto flag_code = static_cast<InternalFlag>(flag); | ||
| 720 | code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); | ||
| 721 | } | ||
| 722 | code.AddNewLine(); | ||
| 723 | } | ||
| 724 | |||
| 725 | const char* GetInputFlags(PixelImap attribute) { | ||
| 726 | switch (attribute) { | ||
| 727 | case PixelImap::Perspective: | ||
| 728 | return "smooth"; | ||
| 729 | case PixelImap::Constant: | ||
| 730 | return "flat"; | ||
| 731 | case PixelImap::ScreenLinear: | ||
| 732 | return "noperspective"; | ||
| 733 | case PixelImap::Unused: | ||
| 734 | break; | ||
| 735 | } | ||
| 736 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 737 | return {}; | ||
| 738 | } | ||
| 739 | |||
| 740 | void DeclareInputAttributes() { | ||
| 741 | if (ir.HasPhysicalAttributes()) { | ||
| 742 | const u32 num_inputs{GetNumPhysicalInputAttributes()}; | ||
| 743 | for (u32 i = 0; i < num_inputs; ++i) { | ||
| 744 | DeclareInputAttribute(ToGenericAttribute(i), true); | ||
| 745 | } | ||
| 746 | code.AddNewLine(); | ||
| 747 | return; | ||
| 748 | } | ||
| 749 | |||
| 750 | const auto& attributes = ir.GetInputAttributes(); | ||
| 751 | for (const auto index : attributes) { | ||
| 752 | if (IsGenericAttribute(index)) { | ||
| 753 | DeclareInputAttribute(index, false); | ||
| 754 | } | ||
| 755 | } | ||
| 756 | if (!attributes.empty()) { | ||
| 757 | code.AddNewLine(); | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | ||
| 762 | const u32 location{GetGenericAttributeIndex(index)}; | ||
| 763 | |||
| 764 | std::string name{GetGenericInputAttribute(index)}; | ||
| 765 | if (stage == ShaderType::Geometry) { | ||
| 766 | name = "gs_" + name + "[]"; | ||
| 767 | } | ||
| 768 | |||
| 769 | std::string suffix_; | ||
| 770 | if (stage == ShaderType::Fragment) { | ||
| 771 | const auto input_mode{header.ps.GetPixelImap(location)}; | ||
| 772 | if (input_mode == PixelImap::Unused) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | suffix_ = GetInputFlags(input_mode); | ||
| 776 | } | ||
| 777 | |||
| 778 | code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); | ||
| 779 | } | ||
| 780 | |||
| 781 | void DeclareOutputAttributes() { | ||
| 782 | if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { | ||
| 783 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | ||
| 784 | DeclareOutputAttribute(ToGenericAttribute(i)); | ||
| 785 | } | ||
| 786 | code.AddNewLine(); | ||
| 787 | return; | ||
| 788 | } | ||
| 789 | |||
| 790 | const auto& attributes = ir.GetOutputAttributes(); | ||
| 791 | for (const auto index : attributes) { | ||
| 792 | if (IsGenericAttribute(index)) { | ||
| 793 | DeclareOutputAttribute(index); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | if (!attributes.empty()) { | ||
| 797 | code.AddNewLine(); | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 802 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 803 | const auto it = transform_feedback.find(location); | ||
| 804 | if (it == transform_feedback.end()) { | ||
| 805 | return std::nullopt; | ||
| 806 | } | ||
| 807 | return it->second.components; | ||
| 808 | } | ||
| 809 | |||
| 810 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 811 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 812 | const auto it = transform_feedback.find(location); | ||
| 813 | if (it == transform_feedback.end()) { | ||
| 814 | return {}; | ||
| 815 | } | ||
| 816 | |||
| 817 | const VaryingTFB& tfb = it->second; | ||
| 818 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 819 | tfb.offset, tfb.stride); | ||
| 820 | } | ||
| 821 | |||
| 822 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 823 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 824 | u8 element = 0; | ||
| 825 | while (element < 4) { | ||
| 826 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 827 | if (!xfb.empty()) { | ||
| 828 | xfb = fmt::format(", {}", xfb); | ||
| 829 | } | ||
| 830 | const std::size_t remainder = 4 - element; | ||
| 831 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 832 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 833 | |||
| 834 | const u32 location = GetGenericAttributeIndex(index); | ||
| 835 | |||
| 836 | GenericVaryingDescription description; | ||
| 837 | description.first_element = static_cast<u8>(element); | ||
| 838 | description.is_scalar = num_components == 1; | ||
| 839 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 840 | if (element != 0 || num_components != 4) { | ||
| 841 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 842 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 843 | } | ||
| 844 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 845 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 846 | varying_description.insert({offset, description}); | ||
| 847 | } | ||
| 848 | |||
| 849 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 850 | xfb, type, description.name); | ||
| 851 | |||
| 852 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void DeclareConstantBuffers() { | ||
| 857 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 858 | for (const auto& [index, info] : ir.GetConstantBuffers()) { | ||
| 859 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | ||
| 860 | const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; | ||
| 861 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, | ||
| 862 | GetConstBufferBlock(index)); | ||
| 863 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); | ||
| 864 | code.AddLine("}};"); | ||
| 865 | code.AddNewLine(); | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 869 | void DeclareGlobalMemory() { | ||
| 870 | u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 871 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 872 | // Since we don't know how the shader will use the shader, hint the driver to disable as | ||
| 873 | // much optimizations as possible | ||
| 874 | std::string qualifier = "coherent volatile"; | ||
| 875 | if (usage.is_read && !usage.is_written) { | ||
| 876 | qualifier += " readonly"; | ||
| 877 | } else if (usage.is_written && !usage.is_read) { | ||
| 878 | qualifier += " writeonly"; | ||
| 879 | } | ||
| 880 | |||
| 881 | code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, | ||
| 882 | GetGlobalMemoryBlock(base)); | ||
| 883 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); | ||
| 884 | code.AddLine("}};"); | ||
| 885 | code.AddNewLine(); | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 889 | void DeclareSamplers() { | ||
| 890 | u32 binding = device.GetBaseBindings(stage).sampler; | ||
| 891 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 892 | const std::string name = GetSampler(sampler); | ||
| 893 | const std::string description = fmt::format("layout (binding = {}) uniform", binding); | ||
| 894 | binding += sampler.is_indexed ? sampler.size : 1; | ||
| 895 | |||
| 896 | std::string sampler_type = [&]() { | ||
| 897 | if (sampler.is_buffer) { | ||
| 898 | return "samplerBuffer"; | ||
| 899 | } | ||
| 900 | switch (sampler.type) { | ||
| 901 | case TextureType::Texture1D: | ||
| 902 | return "sampler1D"; | ||
| 903 | case TextureType::Texture2D: | ||
| 904 | return "sampler2D"; | ||
| 905 | case TextureType::Texture3D: | ||
| 906 | return "sampler3D"; | ||
| 907 | case TextureType::TextureCube: | ||
| 908 | return "samplerCube"; | ||
| 909 | default: | ||
| 910 | UNREACHABLE(); | ||
| 911 | return "sampler2D"; | ||
| 912 | } | ||
| 913 | }(); | ||
| 914 | if (sampler.is_array) { | ||
| 915 | sampler_type += "Array"; | ||
| 916 | } | ||
| 917 | if (sampler.is_shadow) { | ||
| 918 | sampler_type += "Shadow"; | ||
| 919 | } | ||
| 920 | |||
| 921 | if (!sampler.is_indexed) { | ||
| 922 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 923 | } else { | ||
| 924 | code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); | ||
| 925 | } | ||
| 926 | } | ||
| 927 | if (!ir.GetSamplers().empty()) { | ||
| 928 | code.AddNewLine(); | ||
| 929 | } | ||
| 930 | } | ||
| 931 | |||
| 932 | void DeclarePhysicalAttributeReader() { | ||
| 933 | if (!ir.HasPhysicalAttributes()) { | ||
| 934 | return; | ||
| 935 | } | ||
| 936 | code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); | ||
| 937 | ++code.scope; | ||
| 938 | code.AddLine("switch (physical_address) {{"); | ||
| 939 | |||
| 940 | // Just declare generic attributes for now. | ||
| 941 | const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; | ||
| 942 | for (u32 index = 0; index < num_attributes; ++index) { | ||
| 943 | const auto attribute{ToGenericAttribute(index)}; | ||
| 944 | for (u32 element = 0; element < 4; ++element) { | ||
| 945 | constexpr u32 generic_base = 0x80; | ||
| 946 | constexpr u32 generic_stride = 16; | ||
| 947 | constexpr u32 element_stride = 4; | ||
| 948 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | ||
| 949 | |||
| 950 | const bool declared = stage != ShaderType::Fragment || | ||
| 951 | header.ps.GetPixelImap(index) != PixelImap::Unused; | ||
| 952 | const std::string value = | ||
| 953 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; | ||
| 954 | code.AddLine("case 0x{:X}U: return {};", address, value); | ||
| 955 | } | ||
| 956 | } | ||
| 957 | |||
| 958 | code.AddLine("default: return 0;"); | ||
| 959 | |||
| 960 | code.AddLine("}}"); | ||
| 961 | --code.scope; | ||
| 962 | code.AddLine("}}"); | ||
| 963 | code.AddNewLine(); | ||
| 964 | } | ||
| 965 | |||
| 966 | void DeclareImages() { | ||
| 967 | u32 binding = device.GetBaseBindings(stage).image; | ||
| 968 | for (const auto& image : ir.GetImages()) { | ||
| 969 | std::string qualifier = "coherent volatile"; | ||
| 970 | if (image.is_read && !image.is_written) { | ||
| 971 | qualifier += " readonly"; | ||
| 972 | } else if (image.is_written && !image.is_read) { | ||
| 973 | qualifier += " writeonly"; | ||
| 974 | } | ||
| 975 | |||
| 976 | const char* format = image.is_atomic ? "r32ui, " : ""; | ||
| 977 | const char* type_declaration = GetImageTypeDeclaration(image.type); | ||
| 978 | code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, | ||
| 979 | qualifier, type_declaration, GetImage(image)); | ||
| 980 | } | ||
| 981 | if (!ir.GetImages().empty()) { | ||
| 982 | code.AddNewLine(); | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | void VisitBlock(const NodeBlock& bb) { | ||
| 987 | for (const auto& node : bb) { | ||
| 988 | Visit(node).CheckVoid(); | ||
| 989 | } | ||
| 990 | } | ||
| 991 | |||
| 992 | Expression Visit(const Node& node) { | ||
| 993 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 994 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 995 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 996 | } | ||
| 997 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | ||
| 998 | if (operation_index >= operation_decompilers.size()) { | ||
| 999 | UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); | ||
| 1000 | return {}; | ||
| 1001 | } | ||
| 1002 | const auto decompiler = operation_decompilers[operation_index]; | ||
| 1003 | if (decompiler == nullptr) { | ||
| 1004 | UNREACHABLE_MSG("Undefined operation: {}", operation_index); | ||
| 1005 | return {}; | ||
| 1006 | } | ||
| 1007 | return (this->*decompiler)(*operation); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1011 | const u32 index = gpr->GetIndex(); | ||
| 1012 | if (index == Register::ZeroIndex) { | ||
| 1013 | return {"0U", Type::Uint}; | ||
| 1014 | } | ||
| 1015 | return {GetRegister(index), Type::Float}; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1019 | const u32 index = cv->GetIndex(); | ||
| 1020 | return {GetCustomVariable(index), Type::Float}; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1024 | const u32 value = immediate->GetValue(); | ||
| 1025 | if (value < 10) { | ||
| 1026 | // For eyecandy avoid using hex numbers on single digits | ||
| 1027 | return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; | ||
| 1028 | } | ||
| 1029 | return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1033 | const auto value = [&]() -> std::string { | ||
| 1034 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1035 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1036 | return "true"; | ||
| 1037 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1038 | return "false"; | ||
| 1039 | default: | ||
| 1040 | return GetPredicate(index); | ||
| 1041 | } | ||
| 1042 | }(); | ||
| 1043 | if (predicate->IsNegated()) { | ||
| 1044 | return {fmt::format("!({})", value), Type::Bool}; | ||
| 1045 | } | ||
| 1046 | return {value, Type::Bool}; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1050 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, | ||
| 1051 | "Physical attributes in geometry shaders are not implemented"); | ||
| 1052 | if (abuf->IsPhysicalBuffer()) { | ||
| 1053 | return {fmt::format("ReadPhysicalAttribute({})", | ||
| 1054 | Visit(abuf->GetPhysicalAddress()).AsUint()), | ||
| 1055 | Type::Float}; | ||
| 1056 | } | ||
| 1057 | return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1061 | const Node offset = cbuf->GetOffset(); | ||
| 1062 | |||
| 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1064 | // Direct access | ||
| 1065 | const u32 offset_imm = immediate->GetValue(); | ||
| 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | ||
| 1067 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1068 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1069 | Type::Uint}; | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | // Indirect access | ||
| 1073 | const std::string final_offset = code.GenerateTemporary(); | ||
| 1074 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | ||
| 1075 | |||
| 1076 | if (!device.HasComponentIndexingBug()) { | ||
| 1077 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1078 | final_offset, final_offset), | ||
| 1079 | Type::Uint}; | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | ||
| 1083 | // To bypass this driver bug generate 4 ifs, one per each component. | ||
| 1084 | const std::string pack = code.GenerateTemporary(); | ||
| 1085 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||
| 1086 | final_offset); | ||
| 1087 | |||
| 1088 | const std::string result = code.GenerateTemporary(); | ||
| 1089 | code.AddLine("uint {};", result); | ||
| 1090 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1091 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, | ||
| 1092 | GetSwizzle(swizzle)); | ||
| 1093 | } | ||
| 1094 | return {result, Type::Uint}; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1098 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1099 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1100 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1101 | return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1102 | Type::Uint}; | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1106 | return { | ||
| 1107 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1108 | Type::Uint}; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1112 | return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1116 | return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1120 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1121 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 1122 | } | ||
| 1123 | // It's invalid to call conditional on nested nodes, use an operation instead | ||
| 1124 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); | ||
| 1125 | ++code.scope; | ||
| 1126 | |||
| 1127 | VisitBlock(conditional->GetCode()); | ||
| 1128 | |||
| 1129 | --code.scope; | ||
| 1130 | code.AddLine("}}"); | ||
| 1131 | return {}; | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | ||
| 1135 | code.AddLine("// " + comment->GetText()); | ||
| 1136 | return {}; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | UNREACHABLE(); | ||
| 1140 | return {}; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | ||
| 1144 | const auto GeometryPass = [&](std::string_view name) { | ||
| 1145 | if (stage == ShaderType::Geometry && buffer) { | ||
| 1146 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | ||
| 1147 | // set an 0x80000000 index for those and the shader fails to build. Find out why | ||
| 1148 | // this happens and what's its intent. | ||
| 1149 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), | ||
| 1150 | max_input_vertices.value()); | ||
| 1151 | } | ||
| 1152 | return std::string(name); | ||
| 1153 | }; | ||
| 1154 | |||
| 1155 | switch (attribute) { | ||
| 1156 | case Attribute::Index::Position: | ||
| 1157 | switch (stage) { | ||
| 1158 | case ShaderType::Geometry: | ||
| 1159 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), | ||
| 1160 | GetSwizzle(element)), | ||
| 1161 | Type::Float}; | ||
| 1162 | case ShaderType::Fragment: | ||
| 1163 | return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; | ||
| 1164 | default: | ||
| 1165 | UNREACHABLE(); | ||
| 1166 | return {"0", Type::Int}; | ||
| 1167 | } | ||
| 1168 | case Attribute::Index::FrontColor: | ||
| 1169 | return {"gl_Color"s + GetSwizzle(element), Type::Float}; | ||
| 1170 | case Attribute::Index::FrontSecondaryColor: | ||
| 1171 | return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; | ||
| 1172 | case Attribute::Index::PointCoord: | ||
| 1173 | switch (element) { | ||
| 1174 | case 0: | ||
| 1175 | return {"gl_PointCoord.x", Type::Float}; | ||
| 1176 | case 1: | ||
| 1177 | return {"gl_PointCoord.y", Type::Float}; | ||
| 1178 | case 2: | ||
| 1179 | case 3: | ||
| 1180 | return {"0.0f", Type::Float}; | ||
| 1181 | } | ||
| 1182 | UNREACHABLE(); | ||
| 1183 | return {"0", Type::Int}; | ||
| 1184 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1185 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 1186 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 1187 | // shader. | ||
| 1188 | ASSERT(stage == ShaderType::Vertex); | ||
| 1189 | switch (element) { | ||
| 1190 | case 2: | ||
| 1191 | // Config pack's first value is instance_id. | ||
| 1192 | return {"gl_InstanceID", Type::Int}; | ||
| 1193 | case 3: | ||
| 1194 | return {"gl_VertexID", Type::Int}; | ||
| 1195 | } | ||
| 1196 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1197 | return {"0", Type::Int}; | ||
| 1198 | case Attribute::Index::FrontFacing: | ||
| 1199 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 1200 | ASSERT(stage == ShaderType::Fragment); | ||
| 1201 | switch (element) { | ||
| 1202 | case 3: | ||
| 1203 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; | ||
| 1204 | } | ||
| 1205 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 1206 | return {"0", Type::Int}; | ||
| 1207 | default: | ||
| 1208 | if (IsGenericAttribute(attribute)) { | ||
| 1209 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), | ||
| 1210 | Type::Float}; | ||
| 1211 | } | ||
| 1212 | if (IsLegacyTexCoord(attribute)) { | ||
| 1213 | UNIMPLEMENTED_IF(stage == ShaderType::Geometry); | ||
| 1214 | return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1215 | GetSwizzle(element)), | ||
| 1216 | Type::Float}; | ||
| 1217 | } | ||
| 1218 | break; | ||
| 1219 | } | ||
| 1220 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); | ||
| 1221 | return {"0", Type::Int}; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | Expression ApplyPrecise(Operation operation, std::string value, Type type) { | ||
| 1225 | if (!IsPrecise(operation)) { | ||
| 1226 | return {std::move(value), type}; | ||
| 1227 | } | ||
| 1228 | // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to | ||
| 1229 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that | ||
| 1230 | // also fail to build but nobody seems to care about those. | ||
| 1231 | // Note: Only bugged drivers will skip precise. | ||
| 1232 | const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; | ||
| 1233 | |||
| 1234 | std::string temporary = code.GenerateTemporary(); | ||
| 1235 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), | ||
| 1236 | temporary, value); | ||
| 1237 | return {std::move(temporary), type}; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | Expression VisitOperand(Operation operation, std::size_t operand_index) { | ||
| 1241 | const auto& operand = operation[operand_index]; | ||
| 1242 | const bool parent_precise = IsPrecise(operation); | ||
| 1243 | const bool child_precise = IsPrecise(operand); | ||
| 1244 | const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); | ||
| 1245 | if (!parent_precise || child_precise || child_trivial) { | ||
| 1246 | return Visit(operand); | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | Expression value = Visit(operand); | ||
| 1250 | std::string temporary = code.GenerateTemporary(); | ||
| 1251 | code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); | ||
| 1252 | return {std::move(temporary), value.GetType()}; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { | ||
| 1256 | const u32 element = abuf->GetElement(); | ||
| 1257 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 1258 | case Attribute::Index::Position: | ||
| 1259 | return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; | ||
| 1260 | case Attribute::Index::LayerViewportPointSize: | ||
| 1261 | switch (element) { | ||
| 1262 | case 0: | ||
| 1263 | UNIMPLEMENTED(); | ||
| 1264 | return std::nullopt; | ||
| 1265 | case 1: | ||
| 1266 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1267 | return std::nullopt; | ||
| 1268 | } | ||
| 1269 | return {{"gl_Layer", Type::Int}}; | ||
| 1270 | case 2: | ||
| 1271 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1272 | return std::nullopt; | ||
| 1273 | } | ||
| 1274 | return {{"gl_ViewportIndex", Type::Int}}; | ||
| 1275 | case 3: | ||
| 1276 | return {{"gl_PointSize", Type::Float}}; | ||
| 1277 | } | ||
| 1278 | return std::nullopt; | ||
| 1279 | case Attribute::Index::FrontColor: | ||
| 1280 | return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1281 | case Attribute::Index::FrontSecondaryColor: | ||
| 1282 | return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1283 | case Attribute::Index::BackColor: | ||
| 1284 | return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1285 | case Attribute::Index::BackSecondaryColor: | ||
| 1286 | return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1287 | case Attribute::Index::ClipDistances0123: | ||
| 1288 | return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; | ||
| 1289 | case Attribute::Index::ClipDistances4567: | ||
| 1290 | return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; | ||
| 1291 | default: | ||
| 1292 | if (IsGenericAttribute(attribute)) { | ||
| 1293 | return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; | ||
| 1294 | } | ||
| 1295 | if (IsLegacyTexCoord(attribute)) { | ||
| 1296 | return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1297 | GetSwizzle(element)), | ||
| 1298 | Type::Float}}; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); | ||
| 1301 | return std::nullopt; | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, | ||
| 1306 | Type type_a) { | ||
| 1307 | std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); | ||
| 1308 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, | ||
| 1312 | Type type_a, Type type_b) { | ||
| 1313 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1314 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1315 | std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); | ||
| 1316 | |||
| 1317 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, | ||
| 1321 | Type type_a, Type type_b) { | ||
| 1322 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1323 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1324 | std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); | ||
| 1325 | |||
| 1326 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, | ||
| 1330 | Type type_a, Type type_b, Type type_c) { | ||
| 1331 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1332 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1333 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1334 | std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); | ||
| 1335 | |||
| 1336 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, | ||
| 1340 | Type type_a, Type type_b, Type type_c, Type type_d) { | ||
| 1341 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1342 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1343 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1344 | const std::string op_d = VisitOperand(operation, 3).As(type_d); | ||
| 1345 | std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); | ||
| 1346 | |||
| 1347 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, | ||
| 1351 | const std::vector<TextureIR>& extras, bool separate_dc = false) { | ||
| 1352 | constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||
| 1353 | |||
| 1354 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1355 | ASSERT(meta); | ||
| 1356 | |||
| 1357 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1358 | const bool has_array = meta->sampler.is_array; | ||
| 1359 | const bool has_shadow = meta->sampler.is_shadow; | ||
| 1360 | const bool workaround_lod_array_shadow_as_grad = | ||
| 1361 | !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && | ||
| 1362 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 1363 | meta->sampler.type == TextureType::TextureCube); | ||
| 1364 | |||
| 1365 | std::string expr = "texture"; | ||
| 1366 | |||
| 1367 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1368 | expr += "Grad"; | ||
| 1369 | } else { | ||
| 1370 | expr += function_suffix; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (!meta->aoffi.empty()) { | ||
| 1374 | expr += "Offset"; | ||
| 1375 | } else if (!meta->ptp.empty()) { | ||
| 1376 | expr += "Offsets"; | ||
| 1377 | } | ||
| 1378 | if (!meta->sampler.is_indexed) { | ||
| 1379 | expr += '(' + GetSampler(meta->sampler) + ", "; | ||
| 1380 | } else { | ||
| 1381 | expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; | ||
| 1382 | } | ||
| 1383 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + | ||
| 1384 | (has_shadow && !separate_dc ? 1 : 0) - 1); | ||
| 1385 | expr += '('; | ||
| 1386 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1387 | expr += Visit(operation[i]).AsFloat(); | ||
| 1388 | |||
| 1389 | const std::size_t next = i + 1; | ||
| 1390 | if (next < count) | ||
| 1391 | expr += ", "; | ||
| 1392 | } | ||
| 1393 | if (has_array) { | ||
| 1394 | expr += ", float(" + Visit(meta->array).AsInt() + ')'; | ||
| 1395 | } | ||
| 1396 | if (has_shadow) { | ||
| 1397 | if (separate_dc) { | ||
| 1398 | expr += "), " + Visit(meta->depth_compare).AsFloat(); | ||
| 1399 | } else { | ||
| 1400 | expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; | ||
| 1401 | } | ||
| 1402 | } else { | ||
| 1403 | expr += ')'; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1407 | switch (meta->sampler.type) { | ||
| 1408 | case TextureType::Texture2D: | ||
| 1409 | return expr + ", vec2(0.0), vec2(0.0))"; | ||
| 1410 | case TextureType::TextureCube: | ||
| 1411 | return expr + ", vec3(0.0), vec3(0.0))"; | ||
| 1412 | default: | ||
| 1413 | UNREACHABLE(); | ||
| 1414 | break; | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | for (const auto& variant : extras) { | ||
| 1419 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { | ||
| 1420 | expr += GenerateTextureArgument(*argument); | ||
| 1421 | } else if (std::holds_alternative<TextureOffset>(variant)) { | ||
| 1422 | if (!meta->aoffi.empty()) { | ||
| 1423 | expr += GenerateTextureAoffi(meta->aoffi); | ||
| 1424 | } else if (!meta->ptp.empty()) { | ||
| 1425 | expr += GenerateTexturePtp(meta->ptp); | ||
| 1426 | } | ||
| 1427 | } else if (std::holds_alternative<TextureDerivates>(variant)) { | ||
| 1428 | expr += GenerateTextureDerivates(meta->derivates); | ||
| 1429 | } else { | ||
| 1430 | UNREACHABLE(); | ||
| 1431 | } | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | return expr + ')'; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | std::string GenerateTextureArgument(const TextureArgument& argument) { | ||
| 1438 | const auto& [type, operand] = argument; | ||
| 1439 | if (operand == nullptr) { | ||
| 1440 | return {}; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | std::string expr = ", "; | ||
| 1444 | switch (type) { | ||
| 1445 | case Type::Int: | ||
| 1446 | if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { | ||
| 1447 | // Inline the string as an immediate integer in GLSL (some extra arguments are | ||
| 1448 | // required to be constant) | ||
| 1449 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1450 | } else { | ||
| 1451 | expr += Visit(operand).AsInt(); | ||
| 1452 | } | ||
| 1453 | break; | ||
| 1454 | case Type::Float: | ||
| 1455 | expr += Visit(operand).AsFloat(); | ||
| 1456 | break; | ||
| 1457 | default: { | ||
| 1458 | const auto type_int = static_cast<u32>(type); | ||
| 1459 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 1460 | expr += '0'; | ||
| 1461 | break; | ||
| 1462 | } | ||
| 1463 | } | ||
| 1464 | return expr; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | std::string ReadTextureOffset(const Node& value) { | ||
| 1468 | if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { | ||
| 1469 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||
| 1470 | // to be constant by the standard). | ||
| 1471 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1472 | } else if (device.HasVariableAoffi()) { | ||
| 1473 | // Avoid using variable AOFFI on unsupported devices. | ||
| 1474 | return Visit(value).AsInt(); | ||
| 1475 | } else { | ||
| 1476 | // Insert 0 on devices not supporting variable AOFFI. | ||
| 1477 | return "0"; | ||
| 1478 | } | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||
| 1482 | if (aoffi.empty()) { | ||
| 1483 | return {}; | ||
| 1484 | } | ||
| 1485 | constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; | ||
| 1486 | std::string expr = ", "; | ||
| 1487 | expr += coord_constructors.at(aoffi.size() - 1); | ||
| 1488 | expr += '('; | ||
| 1489 | |||
| 1490 | for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||
| 1491 | expr += ReadTextureOffset(aoffi.at(index)); | ||
| 1492 | if (index + 1 < aoffi.size()) { | ||
| 1493 | expr += ", "; | ||
| 1494 | } | ||
| 1495 | } | ||
| 1496 | expr += ')'; | ||
| 1497 | |||
| 1498 | return expr; | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | std::string GenerateTexturePtp(const std::vector<Node>& ptp) { | ||
| 1502 | static constexpr std::size_t num_vectors = 4; | ||
| 1503 | ASSERT(ptp.size() == num_vectors * 2); | ||
| 1504 | |||
| 1505 | std::string expr = ", ivec2[]("; | ||
| 1506 | for (std::size_t vector = 0; vector < num_vectors; ++vector) { | ||
| 1507 | const bool has_next = vector + 1 < num_vectors; | ||
| 1508 | expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), | ||
| 1509 | ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); | ||
| 1510 | } | ||
| 1511 | expr += ')'; | ||
| 1512 | return expr; | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | std::string GenerateTextureDerivates(const std::vector<Node>& derivates) { | ||
| 1516 | if (derivates.empty()) { | ||
| 1517 | return {}; | ||
| 1518 | } | ||
| 1519 | constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; | ||
| 1520 | std::string expr = ", "; | ||
| 1521 | const std::size_t components = derivates.size() / 2; | ||
| 1522 | std::string dx = coord_constructors.at(components - 1); | ||
| 1523 | std::string dy = coord_constructors.at(components - 1); | ||
| 1524 | dx += '('; | ||
| 1525 | dy += '('; | ||
| 1526 | |||
| 1527 | for (std::size_t index = 0; index < components; ++index) { | ||
| 1528 | const auto& operand_x{derivates.at(index * 2)}; | ||
| 1529 | const auto& operand_y{derivates.at(index * 2 + 1)}; | ||
| 1530 | dx += Visit(operand_x).AsFloat(); | ||
| 1531 | dy += Visit(operand_y).AsFloat(); | ||
| 1532 | |||
| 1533 | if (index + 1 < components) { | ||
| 1534 | dx += ", "; | ||
| 1535 | dy += ", "; | ||
| 1536 | } | ||
| 1537 | } | ||
| 1538 | dx += ')'; | ||
| 1539 | dy += ')'; | ||
| 1540 | expr += dx + ", " + dy; | ||
| 1541 | |||
| 1542 | return expr; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | std::string BuildIntegerCoordinates(Operation operation) { | ||
| 1546 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1547 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1548 | std::string expr = constructors.at(coords_count - 1); | ||
| 1549 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1550 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1551 | if (i + 1 < coords_count) { | ||
| 1552 | expr += ", "; | ||
| 1553 | } | ||
| 1554 | } | ||
| 1555 | expr += ')'; | ||
| 1556 | return expr; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | std::string BuildImageValues(Operation operation) { | ||
| 1560 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; | ||
| 1561 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1562 | |||
| 1563 | const std::size_t values_count{meta.values.size()}; | ||
| 1564 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | ||
| 1565 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1566 | expr += Visit(meta.values.at(i)).AsUint(); | ||
| 1567 | if (i + 1 < values_count) { | ||
| 1568 | expr += ", "; | ||
| 1569 | } | ||
| 1570 | } | ||
| 1571 | expr += ')'; | ||
| 1572 | return expr; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | Expression Assign(Operation operation) { | ||
| 1576 | const Node& dest = operation[0]; | ||
| 1577 | const Node& src = operation[1]; | ||
| 1578 | |||
| 1579 | Expression target; | ||
| 1580 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1581 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1582 | // Writing to Register::ZeroIndex is a no op but we still have to visit the source | ||
| 1583 | // as it might have side effects. | ||
| 1584 | code.AddLine("{};", Visit(src).GetCode()); | ||
| 1585 | return {}; | ||
| 1586 | } | ||
| 1587 | target = {GetRegister(gpr->GetIndex()), Type::Float}; | ||
| 1588 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1589 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | ||
| 1590 | auto output = GetOutputAttribute(abuf); | ||
| 1591 | if (!output) { | ||
| 1592 | return {}; | ||
| 1593 | } | ||
| 1594 | target = std::move(*output); | ||
| 1595 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1596 | target = { | ||
| 1597 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1598 | Type::Uint}; | ||
| 1599 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1600 | ASSERT(stage == ShaderType::Compute); | ||
| 1601 | target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1602 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1603 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1604 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1605 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1606 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1607 | Type::Uint}; | ||
| 1608 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1609 | target = {GetCustomVariable(cv->GetIndex()), Type::Float}; | ||
| 1610 | } else { | ||
| 1611 | UNREACHABLE_MSG("Assign called without a proper target"); | ||
| 1612 | } | ||
| 1613 | |||
| 1614 | code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); | ||
| 1615 | return {}; | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | template <Type type> | ||
| 1619 | Expression Add(Operation operation) { | ||
| 1620 | return GenerateBinaryInfix(operation, "+", type, type, type); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | template <Type type> | ||
| 1624 | Expression Mul(Operation operation) { | ||
| 1625 | return GenerateBinaryInfix(operation, "*", type, type, type); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | template <Type type> | ||
| 1629 | Expression Div(Operation operation) { | ||
| 1630 | return GenerateBinaryInfix(operation, "/", type, type, type); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | template <Type type> | ||
| 1634 | Expression Fma(Operation operation) { | ||
| 1635 | return GenerateTernary(operation, "fma", type, type, type, type); | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | template <Type type> | ||
| 1639 | Expression Negate(Operation operation) { | ||
| 1640 | return GenerateUnary(operation, "-", type, type); | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | template <Type type> | ||
| 1644 | Expression Absolute(Operation operation) { | ||
| 1645 | return GenerateUnary(operation, "abs", type, type); | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | Expression FClamp(Operation operation) { | ||
| 1649 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, | ||
| 1650 | Type::Float); | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | Expression FCastHalf0(Operation operation) { | ||
| 1654 | return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | Expression FCastHalf1(Operation operation) { | ||
| 1658 | return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1659 | } | ||
| 1660 | |||
| 1661 | template <Type type> | ||
| 1662 | Expression Min(Operation operation) { | ||
| 1663 | return GenerateBinaryCall(operation, "min", type, type, type); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | template <Type type> | ||
| 1667 | Expression Max(Operation operation) { | ||
| 1668 | return GenerateBinaryCall(operation, "max", type, type, type); | ||
| 1669 | } | ||
| 1670 | |||
| 1671 | Expression Select(Operation operation) { | ||
| 1672 | const std::string condition = Visit(operation[0]).AsBool(); | ||
| 1673 | const std::string true_case = Visit(operation[1]).AsUint(); | ||
| 1674 | const std::string false_case = Visit(operation[2]).AsUint(); | ||
| 1675 | std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); | ||
| 1676 | |||
| 1677 | return ApplyPrecise(operation, std::move(op_str), Type::Uint); | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | Expression FCos(Operation operation) { | ||
| 1681 | return GenerateUnary(operation, "cos", Type::Float, Type::Float); | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | Expression FSin(Operation operation) { | ||
| 1685 | return GenerateUnary(operation, "sin", Type::Float, Type::Float); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | Expression FExp2(Operation operation) { | ||
| 1689 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float); | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | Expression FLog2(Operation operation) { | ||
| 1693 | return GenerateUnary(operation, "log2", Type::Float, Type::Float); | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | Expression FInverseSqrt(Operation operation) { | ||
| 1697 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | Expression FSqrt(Operation operation) { | ||
| 1701 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | Expression FRoundEven(Operation operation) { | ||
| 1705 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); | ||
| 1706 | } | ||
| 1707 | |||
| 1708 | Expression FFloor(Operation operation) { | ||
| 1709 | return GenerateUnary(operation, "floor", Type::Float, Type::Float); | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | Expression FCeil(Operation operation) { | ||
| 1713 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | Expression FTrunc(Operation operation) { | ||
| 1717 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | template <Type type> | ||
| 1721 | Expression FCastInteger(Operation operation) { | ||
| 1722 | return GenerateUnary(operation, "float", Type::Float, type); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | Expression FSwizzleAdd(Operation operation) { | ||
| 1726 | const std::string op_a = VisitOperand(operation, 0).AsFloat(); | ||
| 1727 | const std::string op_b = VisitOperand(operation, 1).AsFloat(); | ||
| 1728 | |||
| 1729 | if (!device.HasShaderBallot()) { | ||
| 1730 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 1731 | return {fmt::format("{} + {}", op_a, op_b), Type::Float}; | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | const std::string instr_mask = VisitOperand(operation, 2).AsUint(); | ||
| 1735 | const std::string mask = code.GenerateTemporary(); | ||
| 1736 | code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, | ||
| 1737 | instr_mask); | ||
| 1738 | |||
| 1739 | const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); | ||
| 1740 | const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); | ||
| 1741 | return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), | ||
| 1742 | Type::Float}; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | Expression ICastFloat(Operation operation) { | ||
| 1746 | return GenerateUnary(operation, "int", Type::Int, Type::Float); | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | Expression ICastUnsigned(Operation operation) { | ||
| 1750 | return GenerateUnary(operation, "int", Type::Int, Type::Uint); | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | template <Type type> | ||
| 1754 | Expression LogicalShiftLeft(Operation operation) { | ||
| 1755 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | ||
| 1756 | } | ||
| 1757 | |||
| 1758 | Expression ILogicalShiftRight(Operation operation) { | ||
| 1759 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 1760 | const std::string op_b = VisitOperand(operation, 1).AsUint(); | ||
| 1761 | std::string op_str = fmt::format("int({} >> {})", op_a, op_b); | ||
| 1762 | |||
| 1763 | return ApplyPrecise(operation, std::move(op_str), Type::Int); | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | Expression IArithmeticShiftRight(Operation operation) { | ||
| 1767 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | template <Type type> | ||
| 1771 | Expression BitwiseAnd(Operation operation) { | ||
| 1772 | return GenerateBinaryInfix(operation, "&", type, type, type); | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | template <Type type> | ||
| 1776 | Expression BitwiseOr(Operation operation) { | ||
| 1777 | return GenerateBinaryInfix(operation, "|", type, type, type); | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | template <Type type> | ||
| 1781 | Expression BitwiseXor(Operation operation) { | ||
| 1782 | return GenerateBinaryInfix(operation, "^", type, type, type); | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | template <Type type> | ||
| 1786 | Expression BitwiseNot(Operation operation) { | ||
| 1787 | return GenerateUnary(operation, "~", type, type); | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | Expression UCastFloat(Operation operation) { | ||
| 1791 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float); | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | Expression UCastSigned(Operation operation) { | ||
| 1795 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | Expression UShiftRight(Operation operation) { | ||
| 1799 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | template <Type type> | ||
| 1803 | Expression BitfieldInsert(Operation operation) { | ||
| 1804 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | ||
| 1805 | Type::Int); | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | template <Type type> | ||
| 1809 | Expression BitfieldExtract(Operation operation) { | ||
| 1810 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | template <Type type> | ||
| 1814 | Expression BitCount(Operation operation) { | ||
| 1815 | return GenerateUnary(operation, "bitCount", type, type); | ||
| 1816 | } | ||
| 1817 | |||
| 1818 | template <Type type> | ||
| 1819 | Expression BitMSB(Operation operation) { | ||
| 1820 | return GenerateUnary(operation, "findMSB", type, type); | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | Expression HNegate(Operation operation) { | ||
| 1824 | const auto GetNegate = [&](std::size_t index) { | ||
| 1825 | return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; | ||
| 1826 | }; | ||
| 1827 | return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1828 | GetNegate(1), GetNegate(2)), | ||
| 1829 | Type::HalfFloat}; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | Expression HClamp(Operation operation) { | ||
| 1833 | const std::string value = VisitOperand(operation, 0).AsHalfFloat(); | ||
| 1834 | const std::string min = VisitOperand(operation, 1).AsFloat(); | ||
| 1835 | const std::string max = VisitOperand(operation, 2).AsFloat(); | ||
| 1836 | std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); | ||
| 1837 | |||
| 1838 | return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | Expression HCastFloat(Operation operation) { | ||
| 1842 | return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), | ||
| 1843 | Type::HalfFloat}; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | Expression HUnpack(Operation operation) { | ||
| 1847 | Expression operand = VisitOperand(operation, 0); | ||
| 1848 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1849 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1850 | return operand; | ||
| 1851 | case Tegra::Shader::HalfType::F32: | ||
| 1852 | return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; | ||
| 1853 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1854 | return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1855 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1856 | return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1857 | } | ||
| 1858 | UNREACHABLE(); | ||
| 1859 | return {"0", Type::Int}; | ||
| 1860 | } | ||
| 1861 | |||
| 1862 | Expression HMergeF32(Operation operation) { | ||
| 1863 | return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | Expression HMergeH0(Operation operation) { | ||
| 1867 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1868 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1869 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), | ||
| 1870 | Type::HalfFloat}; | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | Expression HMergeH1(Operation operation) { | ||
| 1874 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1875 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1876 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), | ||
| 1877 | Type::HalfFloat}; | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | Expression HPack2(Operation operation) { | ||
| 1881 | return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), | ||
| 1882 | VisitOperand(operation, 1).AsFloat()), | ||
| 1883 | Type::HalfFloat}; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | template <const std::string_view& op, Type type, bool unordered = false> | ||
| 1887 | Expression Comparison(Operation operation) { | ||
| 1888 | static_assert(!unordered || type == Type::Float); | ||
| 1889 | |||
| 1890 | Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); | ||
| 1891 | |||
| 1892 | if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { | ||
| 1893 | // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's | ||
| 1894 | // and Nvidia's proprietary stacks. Manually force an ordered comparison. | ||
| 1895 | return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), | ||
| 1896 | VisitOperand(operation, 0).AsFloat(), | ||
| 1897 | VisitOperand(operation, 1).AsFloat()), | ||
| 1898 | Type::Bool}; | ||
| 1899 | } | ||
| 1900 | if constexpr (!unordered) { | ||
| 1901 | return expr; | ||
| 1902 | } | ||
| 1903 | // Unordered comparisons are always true for NaN operands. | ||
| 1904 | return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), | ||
| 1905 | VisitOperand(operation, 0).AsFloat(), | ||
| 1906 | VisitOperand(operation, 1).AsFloat()), | ||
| 1907 | Type::Bool}; | ||
| 1908 | } | ||
| 1909 | |||
| 1910 | Expression FOrdered(Operation operation) { | ||
| 1911 | return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1912 | VisitOperand(operation, 1).AsFloat()), | ||
| 1913 | Type::Bool}; | ||
| 1914 | } | ||
| 1915 | |||
| 1916 | Expression FUnordered(Operation operation) { | ||
| 1917 | return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1918 | VisitOperand(operation, 1).AsFloat()), | ||
| 1919 | Type::Bool}; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | Expression LogicalAddCarry(Operation operation) { | ||
| 1923 | const std::string carry = code.GenerateTemporary(); | ||
| 1924 | code.AddLine("uint {};", carry); | ||
| 1925 | code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), | ||
| 1926 | VisitOperand(operation, 1).AsUint(), carry); | ||
| 1927 | return {fmt::format("({} != 0)", carry), Type::Bool}; | ||
| 1928 | } | ||
| 1929 | |||
| 1930 | Expression LogicalAssign(Operation operation) { | ||
| 1931 | const Node& dest = operation[0]; | ||
| 1932 | const Node& src = operation[1]; | ||
| 1933 | |||
| 1934 | std::string target; | ||
| 1935 | |||
| 1936 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1937 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1938 | |||
| 1939 | const auto index = pred->GetIndex(); | ||
| 1940 | switch (index) { | ||
| 1941 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1942 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1943 | // Writing to these predicates is a no-op | ||
| 1944 | return {}; | ||
| 1945 | } | ||
| 1946 | target = GetPredicate(index); | ||
| 1947 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1948 | target = GetInternalFlag(flag->GetFlag()); | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | code.AddLine("{} = {};", target, Visit(src).AsBool()); | ||
| 1952 | return {}; | ||
| 1953 | } | ||
| 1954 | |||
| 1955 | Expression LogicalAnd(Operation operation) { | ||
| 1956 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); | ||
| 1957 | } | ||
| 1958 | |||
| 1959 | Expression LogicalOr(Operation operation) { | ||
| 1960 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | Expression LogicalXor(Operation operation) { | ||
| 1964 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | Expression LogicalNegate(Operation operation) { | ||
| 1968 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool); | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | Expression LogicalPick2(Operation operation) { | ||
| 1972 | return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), | ||
| 1973 | VisitOperand(operation, 1).AsUint()), | ||
| 1974 | Type::Bool}; | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | Expression LogicalAnd2(Operation operation) { | ||
| 1978 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | template <bool with_nan> | ||
| 1982 | Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { | ||
| 1983 | Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, | ||
| 1984 | Type::HalfFloat, Type::HalfFloat); | ||
| 1985 | if constexpr (!with_nan) { | ||
| 1986 | return comparison; | ||
| 1987 | } | ||
| 1988 | return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), | ||
| 1989 | VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1990 | VisitOperand(operation, 1).AsHalfFloat()), | ||
| 1991 | Type::Bool2}; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | template <bool with_nan> | ||
| 1995 | Expression Logical2HLessThan(Operation operation) { | ||
| 1996 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | template <bool with_nan> | ||
| 2000 | Expression Logical2HEqual(Operation operation) { | ||
| 2001 | return GenerateHalfComparison<with_nan>(operation, "equal"); | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | template <bool with_nan> | ||
| 2005 | Expression Logical2HLessEqual(Operation operation) { | ||
| 2006 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | template <bool with_nan> | ||
| 2010 | Expression Logical2HGreaterThan(Operation operation) { | ||
| 2011 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | template <bool with_nan> | ||
| 2015 | Expression Logical2HNotEqual(Operation operation) { | ||
| 2016 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | template <bool with_nan> | ||
| 2020 | Expression Logical2HGreaterEqual(Operation operation) { | ||
| 2021 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | Expression Texture(Operation operation) { | ||
| 2025 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2026 | const bool separate_dc = meta.sampler.type == TextureType::TextureCube && | ||
| 2027 | meta.sampler.is_array && meta.sampler.is_shadow; | ||
| 2028 | // TODO: Replace this with an array and make GenerateTexture use C++20 std::span | ||
| 2029 | const std::vector<TextureIR> extras{ | ||
| 2030 | TextureOffset{}, | ||
| 2031 | TextureArgument{Type::Float, meta.bias}, | ||
| 2032 | }; | ||
| 2033 | std::string expr = GenerateTexture(operation, "", extras, separate_dc); | ||
| 2034 | if (meta.sampler.is_shadow) { | ||
| 2035 | expr = fmt::format("vec4({})", expr); | ||
| 2036 | } | ||
| 2037 | return {expr + GetSwizzle(meta.element), Type::Float}; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | Expression TextureLod(Operation operation) { | ||
| 2041 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2042 | ASSERT(meta); | ||
| 2043 | |||
| 2044 | std::string expr{}; | ||
| 2045 | |||
| 2046 | if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && | ||
| 2047 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 2048 | meta->sampler.type == TextureType::TextureCube)) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, | ||
| 2050 | "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); | ||
| 2051 | expr = GenerateTexture(operation, "Lod", {}); | ||
| 2052 | } else { | ||
| 2053 | expr = GenerateTexture(operation, "Lod", | ||
| 2054 | {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | if (meta->sampler.is_shadow) { | ||
| 2058 | expr = "vec4(" + expr + ')'; | ||
| 2059 | } | ||
| 2060 | return {expr + GetSwizzle(meta->element), Type::Float}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | Expression TextureGather(Operation operation) { | ||
| 2064 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2065 | |||
| 2066 | const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; | ||
| 2067 | const bool separate_dc = meta.sampler.is_shadow; | ||
| 2068 | |||
| 2069 | std::vector<TextureIR> ir_; | ||
| 2070 | if (meta.sampler.is_shadow) { | ||
| 2071 | ir_ = {TextureOffset{}}; | ||
| 2072 | } else { | ||
| 2073 | ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; | ||
| 2074 | } | ||
| 2075 | return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), | ||
| 2076 | Type::Float}; | ||
| 2077 | } | ||
| 2078 | |||
| 2079 | Expression TextureQueryDimensions(Operation operation) { | ||
| 2080 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2081 | ASSERT(meta); | ||
| 2082 | |||
| 2083 | const std::string sampler = GetSampler(meta->sampler); | ||
| 2084 | const std::string lod = VisitOperand(operation, 0).AsInt(); | ||
| 2085 | |||
| 2086 | switch (meta->element) { | ||
| 2087 | case 0: | ||
| 2088 | case 1: | ||
| 2089 | return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), | ||
| 2090 | Type::Int}; | ||
| 2091 | case 3: | ||
| 2092 | return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; | ||
| 2093 | } | ||
| 2094 | UNREACHABLE(); | ||
| 2095 | return {"0", Type::Int}; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | Expression TextureQueryLod(Operation operation) { | ||
| 2099 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2100 | ASSERT(meta); | ||
| 2101 | |||
| 2102 | if (meta->element < 2) { | ||
| 2103 | return {fmt::format("int(({} * vec2(256)){})", | ||
| 2104 | GenerateTexture(operation, "QueryLod", {}), | ||
| 2105 | GetSwizzle(meta->element)), | ||
| 2106 | Type::Int}; | ||
| 2107 | } | ||
| 2108 | return {"0", Type::Int}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | Expression TexelFetch(Operation operation) { | ||
| 2112 | constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; | ||
| 2113 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2114 | ASSERT(meta); | ||
| 2115 | UNIMPLEMENTED_IF(meta->sampler.is_array); | ||
| 2116 | const std::size_t count = operation.GetOperandsCount(); | ||
| 2117 | |||
| 2118 | std::string expr = "texelFetch("; | ||
| 2119 | expr += GetSampler(meta->sampler); | ||
| 2120 | expr += ", "; | ||
| 2121 | |||
| 2122 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); | ||
| 2123 | expr += '('; | ||
| 2124 | for (std::size_t i = 0; i < count; ++i) { | ||
| 2125 | if (i > 0) { | ||
| 2126 | expr += ", "; | ||
| 2127 | } | ||
| 2128 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2129 | } | ||
| 2130 | if (meta->array) { | ||
| 2131 | expr += ", "; | ||
| 2132 | expr += Visit(meta->array).AsInt(); | ||
| 2133 | } | ||
| 2134 | expr += ')'; | ||
| 2135 | |||
| 2136 | if (meta->lod && !meta->sampler.is_buffer) { | ||
| 2137 | expr += ", "; | ||
| 2138 | expr += Visit(meta->lod).AsInt(); | ||
| 2139 | } | ||
| 2140 | expr += ')'; | ||
| 2141 | expr += GetSwizzle(meta->element); | ||
| 2142 | |||
| 2143 | return {std::move(expr), Type::Float}; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | Expression TextureGradient(Operation operation) { | ||
| 2147 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2148 | std::string expr = | ||
| 2149 | GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); | ||
| 2150 | return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | Expression ImageLoad(Operation operation) { | ||
| 2154 | if (!device.HasImageLoadFormatted()) { | ||
| 2155 | LOG_ERROR(Render_OpenGL, | ||
| 2156 | "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); | ||
| 2157 | return {"0", Type::Int}; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2161 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | ||
| 2162 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | ||
| 2163 | Type::Uint}; | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | Expression ImageStore(Operation operation) { | ||
| 2167 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2168 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | ||
| 2169 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | ||
| 2170 | return {}; | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | template <const std::string_view& opname> | ||
| 2174 | Expression AtomicImage(Operation operation) { | ||
| 2175 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2176 | ASSERT(meta.values.size() == 1); | ||
| 2177 | |||
| 2178 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 2179 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), | ||
| 2180 | Type::Uint}; | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | template <const std::string_view& opname, Type type> | ||
| 2184 | Expression Atomic(Operation operation) { | ||
| 2185 | if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { | ||
| 2186 | UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); | ||
| 2187 | return {}; | ||
| 2188 | } | ||
| 2189 | return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||
| 2190 | Visit(operation[1]).AsUint()), | ||
| 2191 | Type::Uint}; | ||
| 2192 | } | ||
| 2193 | |||
| 2194 | template <const std::string_view& opname, Type type> | ||
| 2195 | Expression Reduce(Operation operation) { | ||
| 2196 | code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); | ||
| 2197 | return {}; | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | Expression Branch(Operation operation) { | ||
| 2201 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2202 | UNIMPLEMENTED_IF(!target); | ||
| 2203 | |||
| 2204 | code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); | ||
| 2205 | code.AddLine("break;"); | ||
| 2206 | return {}; | ||
| 2207 | } | ||
| 2208 | |||
| 2209 | Expression BranchIndirect(Operation operation) { | ||
| 2210 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 2211 | |||
| 2212 | code.AddLine("jmp_to = {};", op_a); | ||
| 2213 | code.AddLine("break;"); | ||
| 2214 | return {}; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | Expression PushFlowStack(Operation operation) { | ||
| 2218 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2219 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2220 | UNIMPLEMENTED_IF(!target); | ||
| 2221 | |||
| 2222 | code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), | ||
| 2223 | target->GetValue()); | ||
| 2224 | return {}; | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | Expression PopFlowStack(Operation operation) { | ||
| 2228 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2229 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | ||
| 2230 | code.AddLine("break;"); | ||
| 2231 | return {}; | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | void PreExit() { | ||
| 2235 | if (stage != ShaderType::Fragment) { | ||
| 2236 | return; | ||
| 2237 | } | ||
| 2238 | const auto& used_registers = ir.GetRegisters(); | ||
| 2239 | const auto SafeGetRegister = [&](u32 reg) -> Expression { | ||
| 2240 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 2241 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 2242 | return {GetRegister(reg), Type::Float}; | ||
| 2243 | } | ||
| 2244 | return {"0.0f", Type::Float}; | ||
| 2245 | }; | ||
| 2246 | |||
| 2247 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); | ||
| 2248 | |||
| 2249 | // Write the color outputs using the data in the shader registers, disabled | ||
| 2250 | // rendertargets/components are skipped in the register assignment. | ||
| 2251 | u32 current_reg = 0; | ||
| 2252 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 2253 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 2254 | for (u32 component = 0; component < 4; ++component) { | ||
| 2255 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2256 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), | ||
| 2257 | SafeGetRegister(current_reg).AsFloat()); | ||
| 2258 | ++current_reg; | ||
| 2259 | } | ||
| 2260 | } | ||
| 2261 | } | ||
| 2262 | if (header.ps.omap.depth) { | ||
| 2263 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 2264 | // already contains one past the last color register. | ||
| 2265 | code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); | ||
| 2266 | } | ||
| 2267 | } | ||
| 2268 | |||
| 2269 | Expression Exit(Operation operation) { | ||
| 2270 | PreExit(); | ||
| 2271 | code.AddLine("return;"); | ||
| 2272 | return {}; | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | Expression Discard(Operation operation) { | ||
| 2276 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | ||
| 2277 | // about unexecuted instructions that may follow this. | ||
| 2278 | code.AddLine("if (true) {{"); | ||
| 2279 | ++code.scope; | ||
| 2280 | code.AddLine("discard;"); | ||
| 2281 | --code.scope; | ||
| 2282 | code.AddLine("}}"); | ||
| 2283 | return {}; | ||
| 2284 | } | ||
| 2285 | |||
| 2286 | Expression EmitVertex(Operation operation) { | ||
| 2287 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2288 | "EmitVertex is expected to be used in a geometry shader."); | ||
| 2289 | code.AddLine("EmitVertex();"); | ||
| 2290 | return {}; | ||
| 2291 | } | ||
| 2292 | |||
| 2293 | Expression EndPrimitive(Operation operation) { | ||
| 2294 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2295 | "EndPrimitive is expected to be used in a geometry shader."); | ||
| 2296 | code.AddLine("EndPrimitive();"); | ||
| 2297 | return {}; | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | Expression InvocationId(Operation operation) { | ||
| 2301 | return {"gl_InvocationID", Type::Int}; | ||
| 2302 | } | ||
| 2303 | |||
| 2304 | Expression YNegate(Operation operation) { | ||
| 2305 | // Y_NEGATE is mapped to this uniform value | ||
| 2306 | return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||
| 2307 | } | ||
| 2308 | |||
| 2309 | template <u32 element> | ||
| 2310 | Expression LocalInvocationId(Operation) { | ||
| 2311 | return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | template <u32 element> | ||
| 2315 | Expression WorkGroupId(Operation) { | ||
| 2316 | return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; | ||
| 2317 | } | ||
| 2318 | |||
| 2319 | Expression BallotThread(Operation operation) { | ||
| 2320 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2321 | if (!device.HasWarpIntrinsics()) { | ||
| 2322 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2323 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 2324 | // one. | ||
| 2325 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; | ||
| 2326 | } | ||
| 2327 | return {fmt::format("ballotThreadNV({})", value), Type::Uint}; | ||
| 2328 | } | ||
| 2329 | |||
| 2330 | Expression Vote(Operation operation, const char* func) { | ||
| 2331 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2332 | if (!device.HasWarpIntrinsics()) { | ||
| 2333 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2334 | // Stub with a warp size of one. | ||
| 2335 | return {value, Type::Bool}; | ||
| 2336 | } | ||
| 2337 | return {fmt::format("{}({})", func, value), Type::Bool}; | ||
| 2338 | } | ||
| 2339 | |||
| 2340 | Expression VoteAll(Operation operation) { | ||
| 2341 | return Vote(operation, "allThreadsNV"); | ||
| 2342 | } | ||
| 2343 | |||
| 2344 | Expression VoteAny(Operation operation) { | ||
| 2345 | return Vote(operation, "anyThreadNV"); | ||
| 2346 | } | ||
| 2347 | |||
| 2348 | Expression VoteEqual(Operation operation) { | ||
| 2349 | if (!device.HasWarpIntrinsics()) { | ||
| 2350 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2351 | // We must return true here since a stub for a theoretical warp size of 1. | ||
| 2352 | // This will always return an equal result across all votes. | ||
| 2353 | return {"true", Type::Bool}; | ||
| 2354 | } | ||
| 2355 | return Vote(operation, "allThreadsEqualNV"); | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | Expression ThreadId(Operation operation) { | ||
| 2359 | if (!device.HasShaderBallot()) { | ||
| 2360 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2361 | return {"0U", Type::Uint}; | ||
| 2362 | } | ||
| 2363 | return {"gl_SubGroupInvocationARB", Type::Uint}; | ||
| 2364 | } | ||
| 2365 | |||
| 2366 | template <const std::string_view& comparison> | ||
| 2367 | Expression ThreadMask(Operation) { | ||
| 2368 | if (device.HasWarpIntrinsics()) { | ||
| 2369 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2370 | } | ||
| 2371 | if (device.HasShaderBallot()) { | ||
| 2372 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2373 | } | ||
| 2374 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2375 | return {"0U", Type::Uint}; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | Expression ShuffleIndexed(Operation operation) { | ||
| 2379 | std::string value = VisitOperand(operation, 0).AsFloat(); | ||
| 2380 | |||
| 2381 | if (!device.HasShaderBallot()) { | ||
| 2382 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2383 | return {std::move(value), Type::Float}; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 2387 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | Expression Barrier(Operation) { | ||
| 2391 | if (!ir.IsDecompiled()) { | ||
| 2392 | LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); | ||
| 2393 | return {}; | ||
| 2394 | } | ||
| 2395 | code.AddLine("barrier();"); | ||
| 2396 | return {}; | ||
| 2397 | } | ||
| 2398 | |||
| 2399 | Expression MemoryBarrierGroup(Operation) { | ||
| 2400 | code.AddLine("groupMemoryBarrier();"); | ||
| 2401 | return {}; | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2405 | code.AddLine("memoryBarrier();"); | ||
| 2406 | return {}; | ||
| 2407 | } | ||
| 2408 | |||
| 2409 | struct Func final { | ||
| 2410 | Func() = delete; | ||
| 2411 | ~Func() = delete; | ||
| 2412 | |||
| 2413 | static constexpr std::string_view LessThan = "<"; | ||
| 2414 | static constexpr std::string_view Equal = "=="; | ||
| 2415 | static constexpr std::string_view LessEqual = "<="; | ||
| 2416 | static constexpr std::string_view GreaterThan = ">"; | ||
| 2417 | static constexpr std::string_view NotEqual = "!="; | ||
| 2418 | static constexpr std::string_view GreaterEqual = ">="; | ||
| 2419 | |||
| 2420 | static constexpr std::string_view Eq = "Eq"; | ||
| 2421 | static constexpr std::string_view Ge = "Ge"; | ||
| 2422 | static constexpr std::string_view Gt = "Gt"; | ||
| 2423 | static constexpr std::string_view Le = "Le"; | ||
| 2424 | static constexpr std::string_view Lt = "Lt"; | ||
| 2425 | |||
| 2426 | static constexpr std::string_view Add = "Add"; | ||
| 2427 | static constexpr std::string_view Min = "Min"; | ||
| 2428 | static constexpr std::string_view Max = "Max"; | ||
| 2429 | static constexpr std::string_view And = "And"; | ||
| 2430 | static constexpr std::string_view Or = "Or"; | ||
| 2431 | static constexpr std::string_view Xor = "Xor"; | ||
| 2432 | static constexpr std::string_view Exchange = "Exchange"; | ||
| 2433 | }; | ||
| 2434 | |||
| 2435 | static constexpr std::array operation_decompilers = { | ||
| 2436 | &GLSLDecompiler::Assign, | ||
| 2437 | |||
| 2438 | &GLSLDecompiler::Select, | ||
| 2439 | |||
| 2440 | &GLSLDecompiler::Add<Type::Float>, | ||
| 2441 | &GLSLDecompiler::Mul<Type::Float>, | ||
| 2442 | &GLSLDecompiler::Div<Type::Float>, | ||
| 2443 | &GLSLDecompiler::Fma<Type::Float>, | ||
| 2444 | &GLSLDecompiler::Negate<Type::Float>, | ||
| 2445 | &GLSLDecompiler::Absolute<Type::Float>, | ||
| 2446 | &GLSLDecompiler::FClamp, | ||
| 2447 | &GLSLDecompiler::FCastHalf0, | ||
| 2448 | &GLSLDecompiler::FCastHalf1, | ||
| 2449 | &GLSLDecompiler::Min<Type::Float>, | ||
| 2450 | &GLSLDecompiler::Max<Type::Float>, | ||
| 2451 | &GLSLDecompiler::FCos, | ||
| 2452 | &GLSLDecompiler::FSin, | ||
| 2453 | &GLSLDecompiler::FExp2, | ||
| 2454 | &GLSLDecompiler::FLog2, | ||
| 2455 | &GLSLDecompiler::FInverseSqrt, | ||
| 2456 | &GLSLDecompiler::FSqrt, | ||
| 2457 | &GLSLDecompiler::FRoundEven, | ||
| 2458 | &GLSLDecompiler::FFloor, | ||
| 2459 | &GLSLDecompiler::FCeil, | ||
| 2460 | &GLSLDecompiler::FTrunc, | ||
| 2461 | &GLSLDecompiler::FCastInteger<Type::Int>, | ||
| 2462 | &GLSLDecompiler::FCastInteger<Type::Uint>, | ||
| 2463 | &GLSLDecompiler::FSwizzleAdd, | ||
| 2464 | |||
| 2465 | &GLSLDecompiler::Add<Type::Int>, | ||
| 2466 | &GLSLDecompiler::Mul<Type::Int>, | ||
| 2467 | &GLSLDecompiler::Div<Type::Int>, | ||
| 2468 | &GLSLDecompiler::Negate<Type::Int>, | ||
| 2469 | &GLSLDecompiler::Absolute<Type::Int>, | ||
| 2470 | &GLSLDecompiler::Min<Type::Int>, | ||
| 2471 | &GLSLDecompiler::Max<Type::Int>, | ||
| 2472 | |||
| 2473 | &GLSLDecompiler::ICastFloat, | ||
| 2474 | &GLSLDecompiler::ICastUnsigned, | ||
| 2475 | &GLSLDecompiler::LogicalShiftLeft<Type::Int>, | ||
| 2476 | &GLSLDecompiler::ILogicalShiftRight, | ||
| 2477 | &GLSLDecompiler::IArithmeticShiftRight, | ||
| 2478 | &GLSLDecompiler::BitwiseAnd<Type::Int>, | ||
| 2479 | &GLSLDecompiler::BitwiseOr<Type::Int>, | ||
| 2480 | &GLSLDecompiler::BitwiseXor<Type::Int>, | ||
| 2481 | &GLSLDecompiler::BitwiseNot<Type::Int>, | ||
| 2482 | &GLSLDecompiler::BitfieldInsert<Type::Int>, | ||
| 2483 | &GLSLDecompiler::BitfieldExtract<Type::Int>, | ||
| 2484 | &GLSLDecompiler::BitCount<Type::Int>, | ||
| 2485 | &GLSLDecompiler::BitMSB<Type::Int>, | ||
| 2486 | |||
| 2487 | &GLSLDecompiler::Add<Type::Uint>, | ||
| 2488 | &GLSLDecompiler::Mul<Type::Uint>, | ||
| 2489 | &GLSLDecompiler::Div<Type::Uint>, | ||
| 2490 | &GLSLDecompiler::Min<Type::Uint>, | ||
| 2491 | &GLSLDecompiler::Max<Type::Uint>, | ||
| 2492 | &GLSLDecompiler::UCastFloat, | ||
| 2493 | &GLSLDecompiler::UCastSigned, | ||
| 2494 | &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, | ||
| 2495 | &GLSLDecompiler::UShiftRight, | ||
| 2496 | &GLSLDecompiler::UShiftRight, | ||
| 2497 | &GLSLDecompiler::BitwiseAnd<Type::Uint>, | ||
| 2498 | &GLSLDecompiler::BitwiseOr<Type::Uint>, | ||
| 2499 | &GLSLDecompiler::BitwiseXor<Type::Uint>, | ||
| 2500 | &GLSLDecompiler::BitwiseNot<Type::Uint>, | ||
| 2501 | &GLSLDecompiler::BitfieldInsert<Type::Uint>, | ||
| 2502 | &GLSLDecompiler::BitfieldExtract<Type::Uint>, | ||
| 2503 | &GLSLDecompiler::BitCount<Type::Uint>, | ||
| 2504 | &GLSLDecompiler::BitMSB<Type::Uint>, | ||
| 2505 | |||
| 2506 | &GLSLDecompiler::Add<Type::HalfFloat>, | ||
| 2507 | &GLSLDecompiler::Mul<Type::HalfFloat>, | ||
| 2508 | &GLSLDecompiler::Fma<Type::HalfFloat>, | ||
| 2509 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||
| 2510 | &GLSLDecompiler::HNegate, | ||
| 2511 | &GLSLDecompiler::HClamp, | ||
| 2512 | &GLSLDecompiler::HCastFloat, | ||
| 2513 | &GLSLDecompiler::HUnpack, | ||
| 2514 | &GLSLDecompiler::HMergeF32, | ||
| 2515 | &GLSLDecompiler::HMergeH0, | ||
| 2516 | &GLSLDecompiler::HMergeH1, | ||
| 2517 | &GLSLDecompiler::HPack2, | ||
| 2518 | |||
| 2519 | &GLSLDecompiler::LogicalAssign, | ||
| 2520 | &GLSLDecompiler::LogicalAnd, | ||
| 2521 | &GLSLDecompiler::LogicalOr, | ||
| 2522 | &GLSLDecompiler::LogicalXor, | ||
| 2523 | &GLSLDecompiler::LogicalNegate, | ||
| 2524 | &GLSLDecompiler::LogicalPick2, | ||
| 2525 | &GLSLDecompiler::LogicalAnd2, | ||
| 2526 | |||
| 2527 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, | ||
| 2528 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, | ||
| 2529 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, | ||
| 2530 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, | ||
| 2531 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, | ||
| 2532 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, | ||
| 2533 | &GLSLDecompiler::FOrdered, | ||
| 2534 | &GLSLDecompiler::FUnordered, | ||
| 2535 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, | ||
| 2536 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, | ||
| 2537 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, | ||
| 2538 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, | ||
| 2539 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, | ||
| 2540 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, | ||
| 2541 | |||
| 2542 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, | ||
| 2543 | &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, | ||
| 2544 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, | ||
| 2545 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, | ||
| 2546 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, | ||
| 2547 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, | ||
| 2548 | |||
| 2549 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, | ||
| 2550 | &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, | ||
| 2551 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, | ||
| 2552 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, | ||
| 2553 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, | ||
| 2554 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, | ||
| 2555 | |||
| 2556 | &GLSLDecompiler::LogicalAddCarry, | ||
| 2557 | |||
| 2558 | &GLSLDecompiler::Logical2HLessThan<false>, | ||
| 2559 | &GLSLDecompiler::Logical2HEqual<false>, | ||
| 2560 | &GLSLDecompiler::Logical2HLessEqual<false>, | ||
| 2561 | &GLSLDecompiler::Logical2HGreaterThan<false>, | ||
| 2562 | &GLSLDecompiler::Logical2HNotEqual<false>, | ||
| 2563 | &GLSLDecompiler::Logical2HGreaterEqual<false>, | ||
| 2564 | &GLSLDecompiler::Logical2HLessThan<true>, | ||
| 2565 | &GLSLDecompiler::Logical2HEqual<true>, | ||
| 2566 | &GLSLDecompiler::Logical2HLessEqual<true>, | ||
| 2567 | &GLSLDecompiler::Logical2HGreaterThan<true>, | ||
| 2568 | &GLSLDecompiler::Logical2HNotEqual<true>, | ||
| 2569 | &GLSLDecompiler::Logical2HGreaterEqual<true>, | ||
| 2570 | |||
| 2571 | &GLSLDecompiler::Texture, | ||
| 2572 | &GLSLDecompiler::TextureLod, | ||
| 2573 | &GLSLDecompiler::TextureGather, | ||
| 2574 | &GLSLDecompiler::TextureQueryDimensions, | ||
| 2575 | &GLSLDecompiler::TextureQueryLod, | ||
| 2576 | &GLSLDecompiler::TexelFetch, | ||
| 2577 | &GLSLDecompiler::TextureGradient, | ||
| 2578 | |||
| 2579 | &GLSLDecompiler::ImageLoad, | ||
| 2580 | &GLSLDecompiler::ImageStore, | ||
| 2581 | |||
| 2582 | &GLSLDecompiler::AtomicImage<Func::Add>, | ||
| 2583 | &GLSLDecompiler::AtomicImage<Func::And>, | ||
| 2584 | &GLSLDecompiler::AtomicImage<Func::Or>, | ||
| 2585 | &GLSLDecompiler::AtomicImage<Func::Xor>, | ||
| 2586 | &GLSLDecompiler::AtomicImage<Func::Exchange>, | ||
| 2587 | |||
| 2588 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, | ||
| 2589 | &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, | ||
| 2590 | &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, | ||
| 2591 | &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, | ||
| 2592 | &GLSLDecompiler::Atomic<Func::And, Type::Uint>, | ||
| 2593 | &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, | ||
| 2594 | &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, | ||
| 2595 | |||
| 2596 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, | ||
| 2597 | &GLSLDecompiler::Atomic<Func::Add, Type::Int>, | ||
| 2598 | &GLSLDecompiler::Atomic<Func::Min, Type::Int>, | ||
| 2599 | &GLSLDecompiler::Atomic<Func::Max, Type::Int>, | ||
| 2600 | &GLSLDecompiler::Atomic<Func::And, Type::Int>, | ||
| 2601 | &GLSLDecompiler::Atomic<Func::Or, Type::Int>, | ||
| 2602 | &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, | ||
| 2603 | |||
| 2604 | &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, | ||
| 2605 | &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, | ||
| 2606 | &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, | ||
| 2607 | &GLSLDecompiler::Reduce<Func::And, Type::Uint>, | ||
| 2608 | &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, | ||
| 2609 | &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, | ||
| 2610 | |||
| 2611 | &GLSLDecompiler::Reduce<Func::Add, Type::Int>, | ||
| 2612 | &GLSLDecompiler::Reduce<Func::Min, Type::Int>, | ||
| 2613 | &GLSLDecompiler::Reduce<Func::Max, Type::Int>, | ||
| 2614 | &GLSLDecompiler::Reduce<Func::And, Type::Int>, | ||
| 2615 | &GLSLDecompiler::Reduce<Func::Or, Type::Int>, | ||
| 2616 | &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, | ||
| 2617 | |||
| 2618 | &GLSLDecompiler::Branch, | ||
| 2619 | &GLSLDecompiler::BranchIndirect, | ||
| 2620 | &GLSLDecompiler::PushFlowStack, | ||
| 2621 | &GLSLDecompiler::PopFlowStack, | ||
| 2622 | &GLSLDecompiler::Exit, | ||
| 2623 | &GLSLDecompiler::Discard, | ||
| 2624 | |||
| 2625 | &GLSLDecompiler::EmitVertex, | ||
| 2626 | &GLSLDecompiler::EndPrimitive, | ||
| 2627 | |||
| 2628 | &GLSLDecompiler::InvocationId, | ||
| 2629 | &GLSLDecompiler::YNegate, | ||
| 2630 | &GLSLDecompiler::LocalInvocationId<0>, | ||
| 2631 | &GLSLDecompiler::LocalInvocationId<1>, | ||
| 2632 | &GLSLDecompiler::LocalInvocationId<2>, | ||
| 2633 | &GLSLDecompiler::WorkGroupId<0>, | ||
| 2634 | &GLSLDecompiler::WorkGroupId<1>, | ||
| 2635 | &GLSLDecompiler::WorkGroupId<2>, | ||
| 2636 | |||
| 2637 | &GLSLDecompiler::BallotThread, | ||
| 2638 | &GLSLDecompiler::VoteAll, | ||
| 2639 | &GLSLDecompiler::VoteAny, | ||
| 2640 | &GLSLDecompiler::VoteEqual, | ||
| 2641 | |||
| 2642 | &GLSLDecompiler::ThreadId, | ||
| 2643 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2644 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2645 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2646 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2647 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2648 | &GLSLDecompiler::ShuffleIndexed, | ||
| 2649 | |||
| 2650 | &GLSLDecompiler::Barrier, | ||
| 2651 | &GLSLDecompiler::MemoryBarrierGroup, | ||
| 2652 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2653 | }; | ||
| 2654 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 2655 | |||
| 2656 | std::string GetRegister(u32 index) const { | ||
| 2657 | return AppendSuffix(index, "gpr"); | ||
| 2658 | } | ||
| 2659 | |||
| 2660 | std::string GetCustomVariable(u32 index) const { | ||
| 2661 | return AppendSuffix(index, "custom_var"); | ||
| 2662 | } | ||
| 2663 | |||
| 2664 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | ||
| 2665 | return AppendSuffix(static_cast<u32>(pred), "pred"); | ||
| 2666 | } | ||
| 2667 | |||
| 2668 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { | ||
| 2669 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | std::unordered_map<u8, GenericVaryingDescription> varying_description; | ||
| 2673 | |||
| 2674 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2675 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2676 | const auto& description = varying_description.at(offset); | ||
| 2677 | if (description.is_scalar) { | ||
| 2678 | return description.name; | ||
| 2679 | } | ||
| 2680 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2681 | } | ||
| 2682 | |||
| 2683 | std::string GetConstBuffer(u32 index) const { | ||
| 2684 | return AppendSuffix(index, "cbuf"); | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | ||
| 2688 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); | ||
| 2689 | } | ||
| 2690 | |||
| 2691 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { | ||
| 2692 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, | ||
| 2693 | suffix); | ||
| 2694 | } | ||
| 2695 | |||
| 2696 | std::string GetConstBufferBlock(u32 index) const { | ||
| 2697 | return AppendSuffix(index, "cbuf_block"); | ||
| 2698 | } | ||
| 2699 | |||
| 2700 | std::string GetLocalMemory() const { | ||
| 2701 | if (suffix.empty()) { | ||
| 2702 | return "lmem"; | ||
| 2703 | } else { | ||
| 2704 | return "lmem_" + std::string{suffix}; | ||
| 2705 | } | ||
| 2706 | } | ||
| 2707 | |||
| 2708 | std::string GetInternalFlag(InternalFlag flag) const { | ||
| 2709 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", | ||
| 2710 | "overflow_flag"}; | ||
| 2711 | const auto index = static_cast<u32>(flag); | ||
| 2712 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 2713 | |||
| 2714 | if (suffix.empty()) { | ||
| 2715 | return InternalFlagNames[index]; | ||
| 2716 | } else { | ||
| 2717 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2718 | } | ||
| 2719 | } | ||
| 2720 | |||
| 2721 | std::string GetSampler(const SamplerEntry& sampler) const { | ||
| 2722 | return AppendSuffix(sampler.index, "sampler"); | ||
| 2723 | } | ||
| 2724 | |||
| 2725 | std::string GetImage(const ImageEntry& image) const { | ||
| 2726 | return AppendSuffix(image.index, "image"); | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | std::string AppendSuffix(u32 index, std::string_view name) const { | ||
| 2730 | if (suffix.empty()) { | ||
| 2731 | return fmt::format("{}{}", name, index); | ||
| 2732 | } else { | ||
| 2733 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2734 | } | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | u32 GetNumPhysicalInputAttributes() const { | ||
| 2738 | return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | ||
| 2739 | } | ||
| 2740 | |||
| 2741 | u32 GetNumPhysicalAttributes() const { | ||
| 2742 | return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); | ||
| 2743 | } | ||
| 2744 | |||
| 2745 | u32 GetNumPhysicalVaryings() const { | ||
| 2746 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | ||
| 2747 | } | ||
| 2748 | |||
| 2749 | const Device& device; | ||
| 2750 | const ShaderIR& ir; | ||
| 2751 | const Registry& registry; | ||
| 2752 | const ShaderType stage; | ||
| 2753 | const std::string_view identifier; | ||
| 2754 | const std::string_view suffix; | ||
| 2755 | const Header header; | ||
| 2756 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2757 | |||
| 2758 | ShaderWriter code; | ||
| 2759 | |||
| 2760 | std::optional<u32> max_input_vertices; | ||
| 2761 | }; | ||
| 2762 | |||
| 2763 | std::string GetFlowVariable(u32 index) { | ||
| 2764 | return fmt::format("flow_var{}", index); | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | class ExprDecompiler { | ||
| 2768 | public: | ||
| 2769 | explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2770 | |||
| 2771 | void operator()(const ExprAnd& expr) { | ||
| 2772 | inner += '('; | ||
| 2773 | std::visit(*this, *expr.operand1); | ||
| 2774 | inner += " && "; | ||
| 2775 | std::visit(*this, *expr.operand2); | ||
| 2776 | inner += ')'; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | void operator()(const ExprOr& expr) { | ||
| 2780 | inner += '('; | ||
| 2781 | std::visit(*this, *expr.operand1); | ||
| 2782 | inner += " || "; | ||
| 2783 | std::visit(*this, *expr.operand2); | ||
| 2784 | inner += ')'; | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | void operator()(const ExprNot& expr) { | ||
| 2788 | inner += '!'; | ||
| 2789 | std::visit(*this, *expr.operand1); | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | void operator()(const ExprPredicate& expr) { | ||
| 2793 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | ||
| 2794 | inner += decomp.GetPredicate(pred); | ||
| 2795 | } | ||
| 2796 | |||
| 2797 | void operator()(const ExprCondCode& expr) { | ||
| 2798 | inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); | ||
| 2799 | } | ||
| 2800 | |||
| 2801 | void operator()(const ExprVar& expr) { | ||
| 2802 | inner += GetFlowVariable(expr.var_index); | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | void operator()(const ExprBoolean& expr) { | ||
| 2806 | inner += expr.value ? "true" : "false"; | ||
| 2807 | } | ||
| 2808 | |||
| 2809 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { | ||
| 2810 | inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | const std::string& GetResult() const { | ||
| 2814 | return inner; | ||
| 2815 | } | ||
| 2816 | |||
| 2817 | private: | ||
| 2818 | GLSLDecompiler& decomp; | ||
| 2819 | std::string inner; | ||
| 2820 | }; | ||
| 2821 | |||
| 2822 | class ASTDecompiler { | ||
| 2823 | public: | ||
| 2824 | explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2825 | |||
| 2826 | void operator()(const ASTProgram& ast) { | ||
| 2827 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2828 | while (current) { | ||
| 2829 | Visit(current); | ||
| 2830 | current = current->GetNext(); | ||
| 2831 | } | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | void operator()(const ASTIfThen& ast) { | ||
| 2835 | ExprDecompiler expr_parser{decomp}; | ||
| 2836 | std::visit(expr_parser, *ast.condition); | ||
| 2837 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2838 | decomp.code.scope++; | ||
| 2839 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2840 | while (current) { | ||
| 2841 | Visit(current); | ||
| 2842 | current = current->GetNext(); | ||
| 2843 | } | ||
| 2844 | decomp.code.scope--; | ||
| 2845 | decomp.code.AddLine("}}"); | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | void operator()(const ASTIfElse& ast) { | ||
| 2849 | decomp.code.AddLine("else {{"); | ||
| 2850 | decomp.code.scope++; | ||
| 2851 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2852 | while (current) { | ||
| 2853 | Visit(current); | ||
| 2854 | current = current->GetNext(); | ||
| 2855 | } | ||
| 2856 | decomp.code.scope--; | ||
| 2857 | decomp.code.AddLine("}}"); | ||
| 2858 | } | ||
| 2859 | |||
| 2860 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { | ||
| 2861 | UNREACHABLE(); | ||
| 2862 | } | ||
| 2863 | |||
| 2864 | void operator()(const ASTBlockDecoded& ast) { | ||
| 2865 | decomp.VisitBlock(ast.nodes); | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | void operator()(const ASTVarSet& ast) { | ||
| 2869 | ExprDecompiler expr_parser{decomp}; | ||
| 2870 | std::visit(expr_parser, *ast.condition); | ||
| 2871 | decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); | ||
| 2872 | } | ||
| 2873 | |||
| 2874 | void operator()(const ASTLabel& ast) { | ||
| 2875 | decomp.code.AddLine("// Label_{}:", ast.index); | ||
| 2876 | } | ||
| 2877 | |||
| 2878 | void operator()([[maybe_unused]] const ASTGoto& ast) { | ||
| 2879 | UNREACHABLE(); | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | void operator()(const ASTDoWhile& ast) { | ||
| 2883 | ExprDecompiler expr_parser{decomp}; | ||
| 2884 | std::visit(expr_parser, *ast.condition); | ||
| 2885 | decomp.code.AddLine("do {{"); | ||
| 2886 | decomp.code.scope++; | ||
| 2887 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2888 | while (current) { | ||
| 2889 | Visit(current); | ||
| 2890 | current = current->GetNext(); | ||
| 2891 | } | ||
| 2892 | decomp.code.scope--; | ||
| 2893 | decomp.code.AddLine("}} while({});", expr_parser.GetResult()); | ||
| 2894 | } | ||
| 2895 | |||
| 2896 | void operator()(const ASTReturn& ast) { | ||
| 2897 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2898 | if (!is_true) { | ||
| 2899 | ExprDecompiler expr_parser{decomp}; | ||
| 2900 | std::visit(expr_parser, *ast.condition); | ||
| 2901 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2902 | decomp.code.scope++; | ||
| 2903 | } | ||
| 2904 | if (ast.kills) { | ||
| 2905 | decomp.code.AddLine("discard;"); | ||
| 2906 | } else { | ||
| 2907 | decomp.PreExit(); | ||
| 2908 | decomp.code.AddLine("return;"); | ||
| 2909 | } | ||
| 2910 | if (!is_true) { | ||
| 2911 | decomp.code.scope--; | ||
| 2912 | decomp.code.AddLine("}}"); | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | void operator()(const ASTBreak& ast) { | ||
| 2917 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2918 | if (!is_true) { | ||
| 2919 | ExprDecompiler expr_parser{decomp}; | ||
| 2920 | std::visit(expr_parser, *ast.condition); | ||
| 2921 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2922 | decomp.code.scope++; | ||
| 2923 | } | ||
| 2924 | decomp.code.AddLine("break;"); | ||
| 2925 | if (!is_true) { | ||
| 2926 | decomp.code.scope--; | ||
| 2927 | decomp.code.AddLine("}}"); | ||
| 2928 | } | ||
| 2929 | } | ||
| 2930 | |||
| 2931 | void Visit(const ASTNode& node) { | ||
| 2932 | std::visit(*this, *node->GetInnerData()); | ||
| 2933 | } | ||
| 2934 | |||
| 2935 | private: | ||
| 2936 | GLSLDecompiler& decomp; | ||
| 2937 | }; | ||
| 2938 | |||
| 2939 | void GLSLDecompiler::DecompileAST() { | ||
| 2940 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 2941 | for (u32 i = 0; i < num_flow_variables; i++) { | ||
| 2942 | code.AddLine("bool {} = false;", GetFlowVariable(i)); | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | ASTDecompiler decompiler{*this}; | ||
| 2946 | decompiler.Visit(ir.GetASTProgram()); | ||
| 2947 | } | ||
| 2948 | |||
| 2949 | } // Anonymous namespace | ||
| 2950 | |||
| 2951 | ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 2952 | ShaderEntries entries; | ||
| 2953 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 2954 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 2955 | cbuf.first); | ||
| 2956 | } | ||
| 2957 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 2958 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||
| 2959 | usage.is_written); | ||
| 2960 | } | ||
| 2961 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 2962 | entries.samplers.emplace_back(sampler); | ||
| 2963 | } | ||
| 2964 | for (const auto& image : ir.GetImages()) { | ||
| 2965 | entries.images.emplace_back(image); | ||
| 2966 | } | ||
| 2967 | const auto clip_distances = ir.GetClipDistances(); | ||
| 2968 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | ||
| 2969 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | ||
| 2970 | } | ||
| 2971 | for (const auto& buffer : entries.const_buffers) { | ||
| 2972 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 2973 | } | ||
| 2974 | entries.shader_length = ir.GetLength(); | ||
| 2975 | return entries; | ||
| 2976 | } | ||
| 2977 | |||
| 2978 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 2979 | ShaderType stage, std::string_view identifier, | ||
| 2980 | std::string_view suffix) { | ||
| 2981 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); | ||
| 2982 | decompiler.Decompile(); | ||
| 2983 | return decompiler.GetResult(); | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/engines/shader_type.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | class Device; | ||
| 21 | |||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; | ||
| 24 | using ImageEntry = VideoCommon::Shader::ImageEntry; | ||
| 25 | |||
| 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 27 | public: | ||
| 28 | explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) | ||
| 29 | : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} | ||
| 30 | |||
| 31 | u32 GetIndex() const { | ||
| 32 | return index; | ||
| 33 | } | ||
| 34 | |||
| 35 | private: | ||
| 36 | u32 index = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct GlobalMemoryEntry { | ||
| 40 | constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, | ||
| 41 | bool is_written_) | ||
| 42 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ | ||
| 43 | is_written_} {} | ||
| 44 | |||
| 45 | u32 cbuf_index = 0; | ||
| 46 | u32 cbuf_offset = 0; | ||
| 47 | bool is_read = false; | ||
| 48 | bool is_written = false; | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct ShaderEntries { | ||
| 52 | std::vector<ConstBufferEntry> const_buffers; | ||
| 53 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 54 | std::vector<SamplerEntry> samplers; | ||
| 55 | std::vector<ImageEntry> images; | ||
| 56 | std::size_t shader_length{}; | ||
| 57 | u32 clip_distances{}; | ||
| 58 | u32 enabled_uniform_buffers{}; | ||
| 59 | }; | ||
| 60 | |||
| 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 62 | Tegra::Engines::ShaderType stage); | ||
| 63 | |||
| 64 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 65 | const VideoCommon::Shader::Registry& registry, | ||
| 66 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 67 | std::string_view suffix = {}); | ||
| 68 | |||
| 69 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null | |||
| @@ -1,482 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/fs/file.h" | ||
| 12 | #include "common/fs/fs.h" | ||
| 13 | #include "common/fs/path_util.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "common/scm_rev.h" | ||
| 16 | #include "common/settings.h" | ||
| 17 | #include "common/zstd_compression.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/hle/kernel/k_process.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 23 | |||
| 24 | namespace OpenGL { | ||
| 25 | |||
| 26 | using Tegra::Engines::ShaderType; | ||
| 27 | using VideoCommon::Shader::BindlessSamplerMap; | ||
| 28 | using VideoCommon::Shader::BoundSamplerMap; | ||
| 29 | using VideoCommon::Shader::KeyMap; | ||
| 30 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 31 | using ShaderCacheVersionHash = std::array<u8, 64>; | ||
| 32 | |||
| 33 | struct ConstBufferKey { | ||
| 34 | u32 cbuf = 0; | ||
| 35 | u32 offset = 0; | ||
| 36 | u32 value = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct BoundSamplerEntry { | ||
| 40 | u32 offset = 0; | ||
| 41 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct SeparateSamplerEntry { | ||
| 45 | u32 cbuf1 = 0; | ||
| 46 | u32 cbuf2 = 0; | ||
| 47 | u32 offset1 = 0; | ||
| 48 | u32 offset2 = 0; | ||
| 49 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct BindlessSamplerEntry { | ||
| 53 | u32 cbuf = 0; | ||
| 54 | u32 offset = 0; | ||
| 55 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 56 | }; | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr u32 NativeVersion = 21; | ||
| 61 | |||
| 62 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | ||
| 63 | ShaderCacheVersionHash hash{}; | ||
| 64 | const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); | ||
| 65 | std::memcpy(hash.data(), Common::g_shader_cache_version, length); | ||
| 66 | return hash; | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; | ||
| 72 | |||
| 73 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; | ||
| 74 | |||
| 75 | bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { | ||
| 76 | if (!file.ReadObject(type)) { | ||
| 77 | return false; | ||
| 78 | } | ||
| 79 | u32 code_size; | ||
| 80 | u32 code_size_b; | ||
| 81 | if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | code.resize(code_size); | ||
| 85 | code_b.resize(code_size_b); | ||
| 86 | if (file.Read(code) != code_size) { | ||
| 87 | return false; | ||
| 88 | } | ||
| 89 | if (HasProgramA() && file.Read(code_b) != code_size_b) { | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | u8 is_texture_handler_size_known; | ||
| 94 | u32 texture_handler_size_value; | ||
| 95 | u32 num_keys; | ||
| 96 | u32 num_bound_samplers; | ||
| 97 | u32 num_separate_samplers; | ||
| 98 | u32 num_bindless_samplers; | ||
| 99 | if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || | ||
| 100 | !file.ReadObject(is_texture_handler_size_known) || | ||
| 101 | !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || | ||
| 102 | !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || | ||
| 103 | !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || | ||
| 104 | !file.ReadObject(num_bindless_samplers)) { | ||
| 105 | return false; | ||
| 106 | } | ||
| 107 | if (is_texture_handler_size_known) { | ||
| 108 | texture_handler_size = texture_handler_size_value; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 112 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); | ||
| 113 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); | ||
| 114 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 115 | if (file.Read(flat_keys) != flat_keys.size() || | ||
| 116 | file.Read(flat_bound_samplers) != flat_bound_samplers.size() || | ||
| 117 | file.Read(flat_separate_samplers) != flat_separate_samplers.size() || | ||
| 118 | file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | for (const auto& entry : flat_keys) { | ||
| 122 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); | ||
| 123 | } | ||
| 124 | for (const auto& entry : flat_bound_samplers) { | ||
| 125 | bound_samplers.emplace(entry.offset, entry.sampler); | ||
| 126 | } | ||
| 127 | for (const auto& entry : flat_separate_samplers) { | ||
| 128 | SeparateSamplerKey key; | ||
| 129 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 130 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 131 | separate_samplers.emplace(key, entry.sampler); | ||
| 132 | } | ||
| 133 | for (const auto& entry : flat_bindless_samplers) { | ||
| 134 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 135 | } | ||
| 136 | |||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { | ||
| 141 | if (!file.WriteObject(static_cast<u32>(type)) || | ||
| 142 | !file.WriteObject(static_cast<u32>(code.size())) || | ||
| 143 | !file.WriteObject(static_cast<u32>(code_b.size()))) { | ||
| 144 | return false; | ||
| 145 | } | ||
| 146 | if (file.Write(code) != code.size()) { | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | if (HasProgramA() && file.Write(code_b) != code_b.size()) { | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | |||
| 153 | if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || | ||
| 154 | !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || | ||
| 155 | !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || | ||
| 156 | !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || | ||
| 157 | !file.WriteObject(static_cast<u32>(bound_samplers.size())) || | ||
| 158 | !file.WriteObject(static_cast<u32>(separate_samplers.size())) || | ||
| 159 | !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { | ||
| 160 | return false; | ||
| 161 | } | ||
| 162 | |||
| 163 | std::vector<ConstBufferKey> flat_keys; | ||
| 164 | flat_keys.reserve(keys.size()); | ||
| 165 | for (const auto& [address, value] : keys) { | ||
| 166 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 167 | } | ||
| 168 | |||
| 169 | std::vector<BoundSamplerEntry> flat_bound_samplers; | ||
| 170 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 171 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 172 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); | ||
| 173 | } | ||
| 174 | |||
| 175 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 176 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 177 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 178 | SeparateSamplerEntry entry; | ||
| 179 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 180 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 181 | entry.sampler = sampler; | ||
| 182 | flat_separate_samplers.push_back(entry); | ||
| 183 | } | ||
| 184 | |||
| 185 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; | ||
| 186 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 187 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 188 | flat_bindless_samplers.push_back( | ||
| 189 | BindlessSamplerEntry{address.first, address.second, sampler}); | ||
| 190 | } | ||
| 191 | |||
| 192 | return file.Write(flat_keys) == flat_keys.size() && | ||
| 193 | file.Write(flat_bound_samplers) == flat_bound_samplers.size() && | ||
| 194 | file.Write(flat_separate_samplers) == flat_separate_samplers.size() && | ||
| 195 | file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); | ||
| 196 | } | ||
| 197 | |||
| 198 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; | ||
| 199 | |||
| 200 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | ||
| 201 | |||
| 202 | void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { | ||
| 203 | title_id = title_id_; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 207 | // Skip games without title id | ||
| 208 | const bool has_title_id = title_id != 0; | ||
| 209 | if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, | ||
| 214 | Common::FS::FileType::BinaryFile}; | ||
| 215 | if (!file.IsOpen()) { | ||
| 216 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); | ||
| 217 | is_usable = true; | ||
| 218 | return std::nullopt; | ||
| 219 | } | ||
| 220 | |||
| 221 | u32 version{}; | ||
| 222 | if (!file.ReadObject(version)) { | ||
| 223 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); | ||
| 224 | return std::nullopt; | ||
| 225 | } | ||
| 226 | |||
| 227 | if (version < NativeVersion) { | ||
| 228 | LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); | ||
| 229 | file.Close(); | ||
| 230 | InvalidateTransferable(); | ||
| 231 | is_usable = true; | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | if (version > NativeVersion) { | ||
| 235 | LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " | ||
| 236 | "of the emulator, skipping"); | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | // Version is valid, load the shaders | ||
| 241 | std::vector<ShaderDiskCacheEntry> entries; | ||
| 242 | while (static_cast<u64>(file.Tell()) < file.GetSize()) { | ||
| 243 | ShaderDiskCacheEntry& entry = entries.emplace_back(); | ||
| 244 | if (!entry.Load(file)) { | ||
| 245 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | is_usable = true; | ||
| 251 | return {std::move(entries)}; | ||
| 252 | } | ||
| 253 | |||
| 254 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 255 | if (!is_usable) { | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | |||
| 259 | Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, | ||
| 260 | Common::FS::FileType::BinaryFile}; | ||
| 261 | if (!file.IsOpen()) { | ||
| 262 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); | ||
| 263 | return {}; | ||
| 264 | } | ||
| 265 | |||
| 266 | if (const auto result = LoadPrecompiledFile(file)) { | ||
| 267 | return *result; | ||
| 268 | } | ||
| 269 | |||
| 270 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 271 | file.Close(); | ||
| 272 | InvalidatePrecompiled(); | ||
| 273 | return {}; | ||
| 274 | } | ||
| 275 | |||
| 276 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( | ||
| 277 | Common::FS::IOFile& file) { | ||
| 278 | // Read compressed file from disk and decompress to virtual precompiled cache file | ||
| 279 | std::vector<u8> compressed(file.GetSize()); | ||
| 280 | if (file.Read(compressed) != file.GetSize()) { | ||
| 281 | return std::nullopt; | ||
| 282 | } | ||
| 283 | const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); | ||
| 284 | SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); | ||
| 285 | precompiled_cache_virtual_file_offset = 0; | ||
| 286 | |||
| 287 | ShaderCacheVersionHash file_hash{}; | ||
| 288 | if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { | ||
| 289 | precompiled_cache_virtual_file_offset = 0; | ||
| 290 | return std::nullopt; | ||
| 291 | } | ||
| 292 | if (GetShaderCacheVersionHash() != file_hash) { | ||
| 293 | LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); | ||
| 294 | precompiled_cache_virtual_file_offset = 0; | ||
| 295 | return std::nullopt; | ||
| 296 | } | ||
| 297 | |||
| 298 | std::vector<ShaderDiskCachePrecompiled> entries; | ||
| 299 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | ||
| 300 | u32 binary_size; | ||
| 301 | auto& entry = entries.emplace_back(); | ||
| 302 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || | ||
| 303 | !LoadObjectFromPrecompiled(entry.binary_format) || | ||
| 304 | !LoadObjectFromPrecompiled(binary_size)) { | ||
| 305 | return std::nullopt; | ||
| 306 | } | ||
| 307 | |||
| 308 | entry.binary.resize(binary_size); | ||
| 309 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { | ||
| 310 | return std::nullopt; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | return entries; | ||
| 314 | } | ||
| 315 | |||
| 316 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | ||
| 317 | if (!Common::FS::RemoveFile(GetTransferablePath())) { | ||
| 318 | LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", | ||
| 319 | Common::FS::PathToUTF8String(GetTransferablePath())); | ||
| 320 | } | ||
| 321 | InvalidatePrecompiled(); | ||
| 322 | } | ||
| 323 | |||
| 324 | void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | ||
| 325 | // Clear virtaul precompiled cache file | ||
| 326 | precompiled_cache_virtual_file.Resize(0); | ||
| 327 | |||
| 328 | if (!Common::FS::RemoveFile(GetPrecompiledPath())) { | ||
| 329 | LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", | ||
| 330 | Common::FS::PathToUTF8String(GetPrecompiledPath())); | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { | ||
| 335 | if (!is_usable) { | ||
| 336 | return; | ||
| 337 | } | ||
| 338 | |||
| 339 | const u64 id = entry.unique_identifier; | ||
| 340 | if (stored_transferable.contains(id)) { | ||
| 341 | // The shader already exists | ||
| 342 | return; | ||
| 343 | } | ||
| 344 | |||
| 345 | Common::FS::IOFile file = AppendTransferableFile(); | ||
| 346 | if (!file.IsOpen()) { | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | if (!entry.Save(file)) { | ||
| 350 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | ||
| 351 | file.Close(); | ||
| 352 | InvalidateTransferable(); | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | stored_transferable.insert(id); | ||
| 357 | } | ||
| 358 | |||
| 359 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { | ||
| 360 | if (!is_usable) { | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | |||
| 364 | // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||
| 365 | // when writing the dump. This should be done the moment I get access to write to the virtual | ||
| 366 | // file. | ||
| 367 | if (precompiled_cache_virtual_file.GetSize() == 0) { | ||
| 368 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 369 | } | ||
| 370 | |||
| 371 | GLint binary_length; | ||
| 372 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | ||
| 373 | |||
| 374 | GLenum binary_format; | ||
| 375 | std::vector<u8> binary(binary_length); | ||
| 376 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | ||
| 377 | |||
| 378 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || | ||
| 379 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 380 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 381 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | ||
| 382 | unique_identifier); | ||
| 383 | InvalidatePrecompiled(); | ||
| 384 | } | ||
| 385 | } | ||
| 386 | |||
| 387 | Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | ||
| 388 | if (!EnsureDirectories()) { | ||
| 389 | return {}; | ||
| 390 | } | ||
| 391 | |||
| 392 | const auto transferable_path{GetTransferablePath()}; | ||
| 393 | const bool existed = Common::FS::Exists(transferable_path); | ||
| 394 | |||
| 395 | Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, | ||
| 396 | Common::FS::FileType::BinaryFile}; | ||
| 397 | if (!file.IsOpen()) { | ||
| 398 | LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", | ||
| 399 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 400 | return {}; | ||
| 401 | } | ||
| 402 | if (!existed || file.GetSize() == 0) { | ||
| 403 | // If the file didn't exist, write its version | ||
| 404 | if (!file.WriteObject(NativeVersion)) { | ||
| 405 | LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", | ||
| 406 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 407 | return {}; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | return file; | ||
| 411 | } | ||
| 412 | |||
| 413 | void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | ||
| 414 | const auto hash{GetShaderCacheVersionHash()}; | ||
| 415 | if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { | ||
| 416 | LOG_ERROR( | ||
| 417 | Render_OpenGL, | ||
| 418 | "Failed to write precompiled cache version hash to virtual precompiled cache file"); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | ||
| 423 | precompiled_cache_virtual_file_offset = 0; | ||
| 424 | const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | ||
| 425 | const std::vector<u8> compressed = | ||
| 426 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | ||
| 427 | |||
| 428 | const auto precompiled_path = GetPrecompiledPath(); | ||
| 429 | Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, | ||
| 430 | Common::FS::FileType::BinaryFile}; | ||
| 431 | |||
| 432 | if (!file.IsOpen()) { | ||
| 433 | LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", | ||
| 434 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 435 | return; | ||
| 436 | } | ||
| 437 | if (file.Write(compressed) != compressed.size()) { | ||
| 438 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | ||
| 439 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 443 | bool ShaderDiskCacheOpenGL::EnsureDirectories() const { | ||
| 444 | const auto CreateDir = [](const std::filesystem::path& dir) { | ||
| 445 | if (!Common::FS::CreateDir(dir)) { | ||
| 446 | LOG_ERROR(Render_OpenGL, "Failed to create directory={}", | ||
| 447 | Common::FS::PathToUTF8String(dir)); | ||
| 448 | return false; | ||
| 449 | } | ||
| 450 | return true; | ||
| 451 | }; | ||
| 452 | |||
| 453 | return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && | ||
| 454 | CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && | ||
| 455 | CreateDir(GetPrecompiledDir()); | ||
| 456 | } | ||
| 457 | |||
| 458 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { | ||
| 459 | return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 460 | } | ||
| 461 | |||
| 462 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { | ||
| 463 | return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 464 | } | ||
| 465 | |||
| 466 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { | ||
| 467 | return GetBaseDir() / "transferable"; | ||
| 468 | } | ||
| 469 | |||
| 470 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { | ||
| 471 | return GetBaseDir() / "precompiled"; | ||
| 472 | } | ||
| 473 | |||
| 474 | std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { | ||
| 475 | return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; | ||
| 476 | } | ||
| 477 | |||
| 478 | std::string ShaderDiskCacheOpenGL::GetTitleID() const { | ||
| 479 | return fmt::format("{:016X}", title_id); | ||
| 480 | } | ||
| 481 | |||
| 482 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null | |||
| @@ -1,176 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <filesystem> | ||
| 8 | #include <optional> | ||
| 9 | #include <string> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <unordered_set> | ||
| 14 | #include <utility> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include <glad/glad.h> | ||
| 18 | |||
| 19 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "core/file_sys/vfs_vector.h" | ||
| 22 | #include "video_core/engines/shader_type.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace Common::FS { | ||
| 26 | class IOFile; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | using ProgramCode = std::vector<u64>; | ||
| 32 | |||
| 33 | /// Describes a shader and how it's used by the guest GPU | ||
| 34 | struct ShaderDiskCacheEntry { | ||
| 35 | ShaderDiskCacheEntry(); | ||
| 36 | ~ShaderDiskCacheEntry(); | ||
| 37 | |||
| 38 | bool Load(Common::FS::IOFile& file); | ||
| 39 | |||
| 40 | bool Save(Common::FS::IOFile& file) const; | ||
| 41 | |||
| 42 | bool HasProgramA() const { | ||
| 43 | return !code.empty() && !code_b.empty(); | ||
| 44 | } | ||
| 45 | |||
| 46 | Tegra::Engines::ShaderType type{}; | ||
| 47 | ProgramCode code; | ||
| 48 | ProgramCode code_b; | ||
| 49 | |||
| 50 | u64 unique_identifier = 0; | ||
| 51 | std::optional<u32> texture_handler_size; | ||
| 52 | u32 bound_buffer = 0; | ||
| 53 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 54 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 55 | VideoCommon::Shader::KeyMap keys; | ||
| 56 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 57 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 58 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 59 | }; | ||
| 60 | |||
| 61 | /// Contains an OpenGL dumped binary program | ||
| 62 | struct ShaderDiskCachePrecompiled { | ||
| 63 | u64 unique_identifier = 0; | ||
| 64 | GLenum binary_format = 0; | ||
| 65 | std::vector<u8> binary; | ||
| 66 | }; | ||
| 67 | |||
| 68 | class ShaderDiskCacheOpenGL { | ||
| 69 | public: | ||
| 70 | explicit ShaderDiskCacheOpenGL(); | ||
| 71 | ~ShaderDiskCacheOpenGL(); | ||
| 72 | |||
| 73 | /// Binds a title ID for all future operations. | ||
| 74 | void BindTitleID(u64 title_id); | ||
| 75 | |||
| 76 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | ||
| 77 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); | ||
| 78 | |||
| 79 | /// Loads current game's precompiled cache. Invalidates on failure. | ||
| 80 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); | ||
| 81 | |||
| 82 | /// Removes the transferable (and precompiled) cache file. | ||
| 83 | void InvalidateTransferable(); | ||
| 84 | |||
| 85 | /// Removes the precompiled cache file and clears virtual precompiled cache file. | ||
| 86 | void InvalidatePrecompiled(); | ||
| 87 | |||
| 88 | /// Saves a raw dump to the transferable file. Checks for collisions. | ||
| 89 | void SaveEntry(const ShaderDiskCacheEntry& entry); | ||
| 90 | |||
| 91 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | ||
| 92 | void SavePrecompiled(u64 unique_identifier, GLuint program); | ||
| 93 | |||
| 94 | /// Serializes virtual precompiled shader cache file to real file | ||
| 95 | void SaveVirtualPrecompiledFile(); | ||
| 96 | |||
| 97 | private: | ||
| 98 | /// Loads the transferable cache. Returns empty on failure. | ||
| 99 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( | ||
| 100 | Common::FS::IOFile& file); | ||
| 101 | |||
| 102 | /// Opens current game's transferable file and write it's header if it doesn't exist | ||
| 103 | Common::FS::IOFile AppendTransferableFile() const; | ||
| 104 | |||
| 105 | /// Save precompiled header to precompiled_cache_in_memory | ||
| 106 | void SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 107 | |||
| 108 | /// Create shader disk cache directories. Returns true on success. | ||
| 109 | bool EnsureDirectories() const; | ||
| 110 | |||
| 111 | /// Gets current game's transferable file path | ||
| 112 | std::filesystem::path GetTransferablePath() const; | ||
| 113 | |||
| 114 | /// Gets current game's precompiled file path | ||
| 115 | std::filesystem::path GetPrecompiledPath() const; | ||
| 116 | |||
| 117 | /// Get user's transferable directory path | ||
| 118 | std::filesystem::path GetTransferableDir() const; | ||
| 119 | |||
| 120 | /// Get user's precompiled directory path | ||
| 121 | std::filesystem::path GetPrecompiledDir() const; | ||
| 122 | |||
| 123 | /// Get user's shader directory path | ||
| 124 | std::filesystem::path GetBaseDir() const; | ||
| 125 | |||
| 126 | /// Get current game's title id | ||
| 127 | std::string GetTitleID() const; | ||
| 128 | |||
| 129 | template <typename T> | ||
| 130 | bool SaveArrayToPrecompiled(const T* data, std::size_t length) { | ||
| 131 | const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( | ||
| 132 | data, length, precompiled_cache_virtual_file_offset); | ||
| 133 | precompiled_cache_virtual_file_offset += write_length; | ||
| 134 | return write_length == sizeof(T) * length; | ||
| 135 | } | ||
| 136 | |||
| 137 | template <typename T> | ||
| 138 | bool LoadArrayFromPrecompiled(T* data, std::size_t length) { | ||
| 139 | const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( | ||
| 140 | data, length, precompiled_cache_virtual_file_offset); | ||
| 141 | precompiled_cache_virtual_file_offset += read_length; | ||
| 142 | return read_length == sizeof(T) * length; | ||
| 143 | } | ||
| 144 | |||
| 145 | template <typename T> | ||
| 146 | bool SaveObjectToPrecompiled(const T& object) { | ||
| 147 | return SaveArrayToPrecompiled(&object, 1); | ||
| 148 | } | ||
| 149 | |||
| 150 | bool SaveObjectToPrecompiled(bool object) { | ||
| 151 | const auto value = static_cast<u8>(object); | ||
| 152 | return SaveArrayToPrecompiled(&value, 1); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename T> | ||
| 156 | bool LoadObjectFromPrecompiled(T& object) { | ||
| 157 | return LoadArrayFromPrecompiled(&object, 1); | ||
| 158 | } | ||
| 159 | |||
| 160 | // Stores whole precompiled cache which will be read from or saved to the precompiled chache | ||
| 161 | // file | ||
| 162 | FileSys::VectorVfsFile precompiled_cache_virtual_file; | ||
| 163 | // Stores the current offset of the precompiled cache file for IO purposes | ||
| 164 | std::size_t precompiled_cache_virtual_file_offset = 0; | ||
| 165 | |||
| 166 | // Stored transferable shaders | ||
| 167 | std::unordered_set<u64> stored_transferable; | ||
| 168 | |||
| 169 | /// Title ID to operate on | ||
| 170 | u64 title_id = 0; | ||
| 171 | |||
| 172 | // The cache has been loaded at boot | ||
| 173 | bool is_usable = false; | ||
| 174 | }; | ||
| 175 | |||
| 176 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -1,149 +1,3 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | 1 | // Copyright 2018 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { | ||
| 17 | if (current == old) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | if (current == 0) { | ||
| 21 | if (enabled) { | ||
| 22 | enabled = false; | ||
| 23 | glDisable(stage); | ||
| 24 | } | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | if (!enabled) { | ||
| 28 | enabled = true; | ||
| 29 | glEnable(stage); | ||
| 30 | } | ||
| 31 | glBindProgramARB(stage, current); | ||
| 32 | } | ||
| 33 | |||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | ProgramManager::ProgramManager(const Device& device) | ||
| 37 | : use_assembly_programs{device.UseAssemblyShaders()} { | ||
| 38 | if (use_assembly_programs) { | ||
| 39 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 40 | } else { | ||
| 41 | graphics_pipeline.Create(); | ||
| 42 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | ProgramManager::~ProgramManager() = default; | ||
| 47 | |||
| 48 | void ProgramManager::BindCompute(GLuint program) { | ||
| 49 | if (use_assembly_programs) { | ||
| 50 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); | ||
| 51 | } else { | ||
| 52 | is_graphics_bound = false; | ||
| 53 | glUseProgram(program); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | void ProgramManager::BindGraphicsPipeline() { | ||
| 58 | if (!use_assembly_programs) { | ||
| 59 | UpdateSourcePrograms(); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | void ProgramManager::BindHostPipeline(GLuint pipeline) { | ||
| 64 | if (use_assembly_programs) { | ||
| 65 | if (geometry_enabled) { | ||
| 66 | geometry_enabled = false; | ||
| 67 | old_state.geometry = 0; | ||
| 68 | glDisable(GL_GEOMETRY_PROGRAM_NV); | ||
| 69 | } | ||
| 70 | } else { | ||
| 71 | if (!is_graphics_bound) { | ||
| 72 | glUseProgram(0); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | glBindProgramPipeline(pipeline); | ||
| 76 | } | ||
| 77 | |||
| 78 | void ProgramManager::RestoreGuestPipeline() { | ||
| 79 | if (use_assembly_programs) { | ||
| 80 | glBindProgramPipeline(0); | ||
| 81 | } else { | ||
| 82 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 83 | } | ||
| 84 | } | ||
| 85 | |||
| 86 | void ProgramManager::BindHostCompute(GLuint program) { | ||
| 87 | if (use_assembly_programs) { | ||
| 88 | glDisable(GL_COMPUTE_PROGRAM_NV); | ||
| 89 | } | ||
| 90 | glUseProgram(program); | ||
| 91 | is_graphics_bound = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | void ProgramManager::RestoreGuestCompute() { | ||
| 95 | if (use_assembly_programs) { | ||
| 96 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 97 | glUseProgram(0); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | void ProgramManager::UseVertexShader(GLuint program) { | ||
| 102 | if (use_assembly_programs) { | ||
| 103 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); | ||
| 104 | } | ||
| 105 | current_state.vertex = program; | ||
| 106 | } | ||
| 107 | |||
| 108 | void ProgramManager::UseGeometryShader(GLuint program) { | ||
| 109 | if (use_assembly_programs) { | ||
| 110 | BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); | ||
| 111 | } | ||
| 112 | current_state.geometry = program; | ||
| 113 | } | ||
| 114 | |||
| 115 | void ProgramManager::UseFragmentShader(GLuint program) { | ||
| 116 | if (use_assembly_programs) { | ||
| 117 | BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); | ||
| 118 | } | ||
| 119 | current_state.fragment = program; | ||
| 120 | } | ||
| 121 | |||
| 122 | void ProgramManager::UpdateSourcePrograms() { | ||
| 123 | if (!is_graphics_bound) { | ||
| 124 | is_graphics_bound = true; | ||
| 125 | glUseProgram(0); | ||
| 126 | } | ||
| 127 | |||
| 128 | const GLuint handle = graphics_pipeline.handle; | ||
| 129 | const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||
| 130 | if (current == old) { | ||
| 131 | return; | ||
| 132 | } | ||
| 133 | glUseProgramStages(handle, stage, current); | ||
| 134 | }; | ||
| 135 | update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||
| 136 | update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||
| 137 | update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||
| 138 | |||
| 139 | old_state = current_state; | ||
| 140 | } | ||
| 141 | |||
| 142 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | ||
| 143 | const auto& regs = maxwell.regs; | ||
| 144 | |||
| 145 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. | ||
| 146 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -4,79 +4,142 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <array> |
| 8 | #include <span> | ||
| 8 | 9 | ||
| 9 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 10 | 11 | ||
| 12 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 13 | 14 | ||
| 14 | namespace OpenGL { | 15 | namespace OpenGL { |
| 15 | 16 | ||
| 16 | class Device; | ||
| 17 | |||
| 18 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | ||
| 19 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | ||
| 20 | /// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | ||
| 21 | /// Not following that rule will cause problems on some AMD drivers. | ||
| 22 | struct alignas(16) MaxwellUniformData { | ||
| 23 | void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); | ||
| 24 | |||
| 25 | GLfloat y_direction; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); | ||
| 28 | static_assert(sizeof(MaxwellUniformData) < 16384, | ||
| 29 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 30 | |||
| 31 | class ProgramManager { | 17 | class ProgramManager { |
| 32 | public: | 18 | static constexpr size_t NUM_STAGES = 5; |
| 33 | explicit ProgramManager(const Device& device); | ||
| 34 | ~ProgramManager(); | ||
| 35 | |||
| 36 | /// Binds a compute program | ||
| 37 | void BindCompute(GLuint program); | ||
| 38 | |||
| 39 | /// Updates bound programs. | ||
| 40 | void BindGraphicsPipeline(); | ||
| 41 | |||
| 42 | /// Binds an OpenGL pipeline object unsynchronized with the guest state. | ||
| 43 | void BindHostPipeline(GLuint pipeline); | ||
| 44 | |||
| 45 | /// Rewinds BindHostPipeline state changes. | ||
| 46 | void RestoreGuestPipeline(); | ||
| 47 | |||
| 48 | /// Binds an OpenGL GLSL program object unsynchronized with the guest state. | ||
| 49 | void BindHostCompute(GLuint program); | ||
| 50 | 19 | ||
| 51 | /// Rewinds BindHostCompute state changes. | 20 | static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ |
| 52 | void RestoreGuestCompute(); | 21 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, |
| 53 | 22 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | |
| 54 | void UseVertexShader(GLuint program); | ||
| 55 | void UseGeometryShader(GLuint program); | ||
| 56 | void UseFragmentShader(GLuint program); | ||
| 57 | |||
| 58 | private: | ||
| 59 | struct PipelineState { | ||
| 60 | GLuint vertex = 0; | ||
| 61 | GLuint geometry = 0; | ||
| 62 | GLuint fragment = 0; | ||
| 63 | }; | 23 | }; |
| 64 | 24 | ||
| 65 | /// Update GLSL programs. | 25 | public: |
| 66 | void UpdateSourcePrograms(); | 26 | explicit ProgramManager(const Device& device) { |
| 67 | 27 | glCreateProgramPipelines(1, &pipeline.handle); | |
| 68 | OGLPipeline graphics_pipeline; | 28 | if (device.UseAssemblyShaders()) { |
| 69 | 29 | glEnable(GL_COMPUTE_PROGRAM_NV); | |
| 70 | PipelineState current_state; | 30 | } |
| 71 | PipelineState old_state; | 31 | } |
| 72 | 32 | ||
| 73 | bool use_assembly_programs = false; | 33 | void BindComputeProgram(GLuint program) { |
| 74 | 34 | glUseProgram(program); | |
| 75 | bool is_graphics_bound = true; | 35 | is_compute_bound = true; |
| 36 | } | ||
| 37 | |||
| 38 | void BindComputeAssemblyProgram(GLuint program) { | ||
| 39 | if (current_assembly_compute_program != program) { | ||
| 40 | current_assembly_compute_program = program; | ||
| 41 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); | ||
| 42 | } | ||
| 43 | UnbindPipeline(); | ||
| 44 | } | ||
| 45 | |||
| 46 | void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { | ||
| 47 | static constexpr std::array<GLenum, 5> stage_enums{ | ||
| 48 | GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, | ||
| 49 | GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, | ||
| 50 | }; | ||
| 51 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 52 | if (current_programs[stage] != programs[stage].handle) { | ||
| 53 | current_programs[stage] = programs[stage].handle; | ||
| 54 | glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | BindPipeline(); | ||
| 58 | } | ||
| 59 | |||
| 60 | void BindPresentPrograms(GLuint vertex, GLuint fragment) { | ||
| 61 | if (current_programs[0] != vertex) { | ||
| 62 | current_programs[0] = vertex; | ||
| 63 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); | ||
| 64 | } | ||
| 65 | if (current_programs[4] != fragment) { | ||
| 66 | current_programs[4] = fragment; | ||
| 67 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); | ||
| 68 | } | ||
| 69 | glUseProgramStages( | ||
| 70 | pipeline.handle, | ||
| 71 | GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); | ||
| 72 | current_programs[1] = 0; | ||
| 73 | current_programs[2] = 0; | ||
| 74 | current_programs[3] = 0; | ||
| 75 | |||
| 76 | if (current_stage_mask != 0) { | ||
| 77 | current_stage_mask = 0; | ||
| 78 | for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { | ||
| 79 | glDisable(program_type); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | BindPipeline(); | ||
| 83 | } | ||
| 84 | |||
| 85 | void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, | ||
| 86 | u32 stage_mask) { | ||
| 87 | const u32 changed_mask = current_stage_mask ^ stage_mask; | ||
| 88 | current_stage_mask = stage_mask; | ||
| 89 | |||
| 90 | if (changed_mask != 0) { | ||
| 91 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 92 | if (((changed_mask >> stage) & 1) != 0) { | ||
| 93 | if (((stage_mask >> stage) & 1) != 0) { | ||
| 94 | glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); | ||
| 95 | } else { | ||
| 96 | glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | } | ||
| 101 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 102 | if (current_programs[stage] != programs[stage].handle) { | ||
| 103 | current_programs[stage] = programs[stage].handle; | ||
| 104 | glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | UnbindPipeline(); | ||
| 108 | } | ||
| 109 | |||
| 110 | void RestoreGuestCompute() {} | ||
| 76 | 111 | ||
| 77 | bool vertex_enabled = false; | 112 | private: |
| 78 | bool geometry_enabled = false; | 113 | void BindPipeline() { |
| 79 | bool fragment_enabled = false; | 114 | if (!is_pipeline_bound) { |
| 115 | is_pipeline_bound = true; | ||
| 116 | glBindProgramPipeline(pipeline.handle); | ||
| 117 | } | ||
| 118 | UnbindCompute(); | ||
| 119 | } | ||
| 120 | |||
| 121 | void UnbindPipeline() { | ||
| 122 | if (is_pipeline_bound) { | ||
| 123 | is_pipeline_bound = false; | ||
| 124 | glBindProgramPipeline(0); | ||
| 125 | } | ||
| 126 | UnbindCompute(); | ||
| 127 | } | ||
| 128 | |||
| 129 | void UnbindCompute() { | ||
| 130 | if (is_compute_bound) { | ||
| 131 | is_compute_bound = false; | ||
| 132 | glUseProgram(0); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | OGLPipeline pipeline; | ||
| 137 | bool is_pipeline_bound{}; | ||
| 138 | bool is_compute_bound{}; | ||
| 139 | |||
| 140 | u32 current_stage_mask = 0; | ||
| 141 | std::array<GLuint, NUM_STAGES> current_programs{}; | ||
| 142 | GLuint current_assembly_compute_program = 0; | ||
| 80 | }; | 143 | }; |
| 81 | 144 | ||
| 82 | } // namespace OpenGL | 145 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -5,57 +5,108 @@ | |||
| 5 | #include <string_view> | 5 | #include <string_view> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "common/settings.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_util.h" | 12 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 11 | 13 | ||
| 12 | namespace OpenGL::GLShader { | 14 | namespace OpenGL { |
| 13 | 15 | ||
| 14 | namespace { | 16 | static OGLProgram LinkSeparableProgram(GLuint shader) { |
| 17 | OGLProgram program; | ||
| 18 | program.handle = glCreateProgram(); | ||
| 19 | glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 20 | glAttachShader(program.handle, shader); | ||
| 21 | glLinkProgram(program.handle); | ||
| 22 | if (!Settings::values.renderer_debug) { | ||
| 23 | return program; | ||
| 24 | } | ||
| 25 | GLint link_status{}; | ||
| 26 | glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); | ||
| 15 | 27 | ||
| 16 | std::string_view StageDebugName(GLenum type) { | 28 | GLint log_length{}; |
| 17 | switch (type) { | 29 | glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); |
| 18 | case GL_VERTEX_SHADER: | 30 | if (log_length == 0) { |
| 19 | return "vertex"; | 31 | return program; |
| 20 | case GL_GEOMETRY_SHADER: | 32 | } |
| 21 | return "geometry"; | 33 | std::string log(log_length, 0); |
| 22 | case GL_FRAGMENT_SHADER: | 34 | glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); |
| 23 | return "fragment"; | 35 | if (link_status == GL_FALSE) { |
| 24 | case GL_COMPUTE_SHADER: | 36 | LOG_ERROR(Render_OpenGL, "{}", log); |
| 25 | return "compute"; | 37 | } else { |
| 38 | LOG_WARNING(Render_OpenGL, "{}", log); | ||
| 26 | } | 39 | } |
| 27 | UNIMPLEMENTED(); | 40 | return program; |
| 28 | return "unknown"; | ||
| 29 | } | 41 | } |
| 30 | 42 | ||
| 31 | } // Anonymous namespace | 43 | static void LogShader(GLuint shader, std::string_view code = {}) { |
| 44 | GLint shader_status{}; | ||
| 45 | glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); | ||
| 46 | if (shader_status == GL_FALSE) { | ||
| 47 | LOG_ERROR(Render_OpenGL, "Failed to build shader"); | ||
| 48 | } | ||
| 49 | GLint log_length{}; | ||
| 50 | glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); | ||
| 51 | if (log_length == 0) { | ||
| 52 | return; | ||
| 53 | } | ||
| 54 | std::string log(log_length, 0); | ||
| 55 | glGetShaderInfoLog(shader, log_length, nullptr, log.data()); | ||
| 56 | if (shader_status == GL_FALSE) { | ||
| 57 | LOG_ERROR(Render_OpenGL, "{}", log); | ||
| 58 | if (!code.empty()) { | ||
| 59 | LOG_INFO(Render_OpenGL, "\n{}", code); | ||
| 60 | } | ||
| 61 | } else { | ||
| 62 | LOG_WARNING(Render_OpenGL, "{}", log); | ||
| 63 | } | ||
| 64 | } | ||
| 32 | 65 | ||
| 33 | GLuint LoadShader(std::string_view source, GLenum type) { | 66 | OGLProgram CreateProgram(std::string_view code, GLenum stage) { |
| 34 | const std::string_view debug_type = StageDebugName(type); | 67 | OGLShader shader; |
| 35 | const GLuint shader_id = glCreateShader(type); | 68 | shader.handle = glCreateShader(stage); |
| 36 | 69 | ||
| 37 | const GLchar* source_string = source.data(); | 70 | const GLint length = static_cast<GLint>(code.size()); |
| 38 | const GLint source_length = static_cast<GLint>(source.size()); | 71 | const GLchar* const code_ptr = code.data(); |
| 72 | glShaderSource(shader.handle, 1, &code_ptr, &length); | ||
| 73 | glCompileShader(shader.handle); | ||
| 74 | if (Settings::values.renderer_debug) { | ||
| 75 | LogShader(shader.handle, code); | ||
| 76 | } | ||
| 77 | return LinkSeparableProgram(shader.handle); | ||
| 78 | } | ||
| 39 | 79 | ||
| 40 | glShaderSource(shader_id, 1, &source_string, &source_length); | 80 | OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) { |
| 41 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 81 | OGLShader shader; |
| 42 | glCompileShader(shader_id); | 82 | shader.handle = glCreateShader(stage); |
| 43 | 83 | ||
| 44 | GLint result = GL_FALSE; | 84 | glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), |
| 45 | GLint info_log_length; | 85 | static_cast<GLsizei>(code.size_bytes())); |
| 46 | glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); | 86 | glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); |
| 47 | glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | 87 | if (Settings::values.renderer_debug) { |
| 88 | LogShader(shader.handle); | ||
| 89 | } | ||
| 90 | return LinkSeparableProgram(shader.handle); | ||
| 91 | } | ||
| 48 | 92 | ||
| 49 | if (info_log_length > 1) { | 93 | OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { |
| 50 | std::string shader_error(info_log_length, ' '); | 94 | OGLAssemblyProgram program; |
| 51 | glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); | 95 | glGenProgramsARB(1, &program.handle); |
| 52 | if (result == GL_TRUE) { | 96 | glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, |
| 53 | LOG_DEBUG(Render_OpenGL, "{}", shader_error); | 97 | static_cast<GLsizei>(code.size()), code.data()); |
| 54 | } else { | 98 | if (Settings::values.renderer_debug) { |
| 55 | LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); | 99 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); |
| 100 | if (err && *err) { | ||
| 101 | if (std::strstr(err, "error")) { | ||
| 102 | LOG_CRITICAL(Render_OpenGL, "\n{}", err); | ||
| 103 | LOG_INFO(Render_OpenGL, "\n{}", code); | ||
| 104 | } else { | ||
| 105 | LOG_WARNING(Render_OpenGL, "\n{}", err); | ||
| 106 | } | ||
| 56 | } | 107 | } |
| 57 | } | 108 | } |
| 58 | return shader_id; | 109 | return program; |
| 59 | } | 110 | } |
| 60 | 111 | ||
| 61 | } // namespace OpenGL::GLShader | 112 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h | |||
| @@ -4,92 +4,23 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <span> | ||
| 7 | #include <string> | 8 | #include <string> |
| 9 | #include <string_view> | ||
| 8 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 9 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 13 | |||
| 10 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 11 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 12 | 17 | ||
| 13 | namespace OpenGL::GLShader { | 18 | namespace OpenGL { |
| 14 | |||
| 15 | /** | ||
| 16 | * Utility function to log the source code of a list of shaders. | ||
| 17 | * @param shaders The OpenGL shaders whose source we will print. | ||
| 18 | */ | ||
| 19 | template <typename... T> | ||
| 20 | void LogShaderSource(T... shaders) { | ||
| 21 | auto shader_list = {shaders...}; | ||
| 22 | |||
| 23 | for (const auto& shader : shader_list) { | ||
| 24 | if (shader == 0) | ||
| 25 | continue; | ||
| 26 | |||
| 27 | GLint source_length; | ||
| 28 | glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); | ||
| 29 | |||
| 30 | std::string source(source_length, ' '); | ||
| 31 | glGetShaderSource(shader, source_length, nullptr, &source[0]); | ||
| 32 | LOG_INFO(Render_OpenGL, "Shader source {}", source); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | /** | ||
| 37 | * Utility function to create and compile an OpenGL GLSL shader | ||
| 38 | * @param source String of the GLSL shader program | ||
| 39 | * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) | ||
| 40 | */ | ||
| 41 | GLuint LoadShader(std::string_view source, GLenum type); | ||
| 42 | |||
| 43 | /** | ||
| 44 | * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) | ||
| 45 | * @param separable_program whether to create a separable program | ||
| 46 | * @param shaders ID of shaders to attach to the program | ||
| 47 | * @returns Handle of the newly created OpenGL program object | ||
| 48 | */ | ||
| 49 | template <typename... T> | ||
| 50 | GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { | ||
| 51 | // Link the program | ||
| 52 | LOG_DEBUG(Render_OpenGL, "Linking program..."); | ||
| 53 | |||
| 54 | GLuint program_id = glCreateProgram(); | ||
| 55 | |||
| 56 | ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); | ||
| 57 | |||
| 58 | if (separable_program) { | ||
| 59 | glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 60 | } | ||
| 61 | if (hint_retrievable) { | ||
| 62 | glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); | ||
| 63 | } | ||
| 64 | |||
| 65 | glLinkProgram(program_id); | ||
| 66 | |||
| 67 | // Check the program | ||
| 68 | GLint result = GL_FALSE; | ||
| 69 | GLint info_log_length; | ||
| 70 | glGetProgramiv(program_id, GL_LINK_STATUS, &result); | ||
| 71 | glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 72 | |||
| 73 | if (info_log_length > 1) { | ||
| 74 | std::string program_error(info_log_length, ' '); | ||
| 75 | glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); | ||
| 76 | if (result == GL_TRUE) { | ||
| 77 | LOG_DEBUG(Render_OpenGL, "{}", program_error); | ||
| 78 | } else { | ||
| 79 | LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | if (result == GL_FALSE) { | ||
| 84 | // There was a problem linking the shader, print the source for debugging purposes. | ||
| 85 | LogShaderSource(shaders...); | ||
| 86 | } | ||
| 87 | 19 | ||
| 88 | ASSERT_MSG(result == GL_TRUE, "Shader not linked"); | 20 | OGLProgram CreateProgram(std::string_view code, GLenum stage); |
| 89 | 21 | ||
| 90 | ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); | 22 | OGLProgram CreateProgram(std::span<const u32> code, GLenum stage); |
| 91 | 23 | ||
| 92 | return program_id; | 24 | OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); |
| 93 | } | ||
| 94 | 25 | ||
| 95 | } // namespace OpenGL::GLShader | 26 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { | |||
| 83 | FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); | 83 | FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | void SetupDirtyShaders(Tables& tables) { | ||
| 87 | FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, | ||
| 88 | Shaders); | ||
| 89 | } | ||
| 90 | |||
| 91 | void SetupDirtyPolygonModes(Tables& tables) { | 86 | void SetupDirtyPolygonModes(Tables& tables) { |
| 92 | tables[0][OFF(polygon_mode_front)] = PolygonModeFront; | 87 | tables[0][OFF(polygon_mode_front)] = PolygonModeFront; |
| 93 | tables[0][OFF(polygon_mode_back)] = PolygonModeBack; | 88 | tables[0][OFF(polygon_mode_back)] = PolygonModeBack; |
| @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
| 217 | SetupDirtyScissors(tables); | 212 | SetupDirtyScissors(tables); |
| 218 | SetupDirtyVertexInstances(tables); | 213 | SetupDirtyVertexInstances(tables); |
| 219 | SetupDirtyVertexFormat(tables); | 214 | SetupDirtyVertexFormat(tables); |
| 220 | SetupDirtyShaders(tables); | ||
| 221 | SetupDirtyPolygonModes(tables); | 215 | SetupDirtyPolygonModes(tables); |
| 222 | SetupDirtyDepthTest(tables); | 216 | SetupDirtyDepthTest(tables); |
| 223 | SetupDirtyStencilTest(tables); | 217 | SetupDirtyStencilTest(tables); |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -52,7 +52,6 @@ enum : u8 { | |||
| 52 | BlendState0, | 52 | BlendState0, |
| 53 | BlendState7 = BlendState0 + 7, | 53 | BlendState7 = BlendState0 + 7, |
| 54 | 54 | ||
| 55 | Shaders, | ||
| 56 | ClipDistances, | 55 | ClipDistances, |
| 57 | 56 | ||
| 58 | PolygonModes, | 57 | PolygonModes, |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -24,9 +24,7 @@ | |||
| 24 | #include "video_core/textures/decoders.h" | 24 | #include "video_core/textures/decoders.h" |
| 25 | 25 | ||
| 26 | namespace OpenGL { | 26 | namespace OpenGL { |
| 27 | |||
| 28 | namespace { | 27 | namespace { |
| 29 | |||
| 30 | using Tegra::Texture::SwizzleSource; | 28 | using Tegra::Texture::SwizzleSource; |
| 31 | using Tegra::Texture::TextureMipmapFilter; | 29 | using Tegra::Texture::TextureMipmapFilter; |
| 32 | using Tegra::Texture::TextureType; | 30 | using Tegra::Texture::TextureType; |
| @@ -59,107 +57,6 @@ struct CopyRegion { | |||
| 59 | GLsizei depth; | 57 | GLsizei depth; |
| 60 | }; | 58 | }; |
| 61 | 59 | ||
| 62 | struct FormatTuple { | ||
| 63 | GLenum internal_format; | ||
| 64 | GLenum format = GL_NONE; | ||
| 65 | GLenum type = GL_NONE; | ||
| 66 | }; | ||
| 67 | |||
| 68 | constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | ||
| 69 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | ||
| 70 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | ||
| 71 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | ||
| 72 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT | ||
| 73 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM | ||
| 74 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM | ||
| 75 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | ||
| 76 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | ||
| 77 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | ||
| 78 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | ||
| 79 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | ||
| 80 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM | ||
| 81 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT | ||
| 82 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT | ||
| 83 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT | ||
| 84 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM | ||
| 85 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM | ||
| 86 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT | ||
| 87 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT | ||
| 88 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT | ||
| 89 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT | ||
| 90 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM | ||
| 91 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM | ||
| 92 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM | ||
| 93 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM | ||
| 94 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM | ||
| 95 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM | ||
| 96 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM | ||
| 97 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM | ||
| 98 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | ||
| 99 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | ||
| 100 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | ||
| 101 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | ||
| 102 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | ||
| 103 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | ||
| 104 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | ||
| 105 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT | ||
| 106 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT | ||
| 107 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT | ||
| 108 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM | ||
| 109 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM | ||
| 110 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT | ||
| 111 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT | ||
| 112 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM | ||
| 113 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT | ||
| 114 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT | ||
| 115 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT | ||
| 116 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM | ||
| 117 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT | ||
| 118 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB | ||
| 119 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM | ||
| 120 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM | ||
| 121 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT | ||
| 122 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT | ||
| 123 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT | ||
| 124 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT | ||
| 125 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT | ||
| 126 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT | ||
| 127 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | ||
| 128 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | ||
| 129 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | ||
| 130 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB | ||
| 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | ||
| 132 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | ||
| 133 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | ||
| 134 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | ||
| 135 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | ||
| 136 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | ||
| 137 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | ||
| 138 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | ||
| 139 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | ||
| 140 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | ||
| 141 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | ||
| 142 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | ||
| 143 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | ||
| 144 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | ||
| 145 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | ||
| 146 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | ||
| 147 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | ||
| 148 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | ||
| 149 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | ||
| 150 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | ||
| 151 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | ||
| 152 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | ||
| 153 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | ||
| 154 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | ||
| 155 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | ||
| 156 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 157 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 158 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 159 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||
| 160 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | ||
| 161 | }}; | ||
| 162 | |||
| 163 | constexpr std::array ACCELERATED_FORMATS{ | 60 | constexpr std::array ACCELERATED_FORMATS{ |
| 164 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, | 61 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, |
| 165 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, | 62 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, |
| @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ | |||
| 170 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, | 67 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, |
| 171 | }; | 68 | }; |
| 172 | 69 | ||
| 173 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | ||
| 174 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||
| 175 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||
| 176 | } | ||
| 177 | |||
| 178 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | 70 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { |
| 179 | switch (info.type) { | 71 | switch (info.type) { |
| 180 | case ImageType::e1D: | 72 | case ImageType::e1D: |
| @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | |||
| 195 | return GL_NONE; | 87 | return GL_NONE; |
| 196 | } | 88 | } |
| 197 | 89 | ||
| 198 | GLenum ImageTarget(ImageViewType type, int num_samples = 1) { | 90 | GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { |
| 199 | const bool is_multisampled = num_samples > 1; | 91 | const bool is_multisampled = num_samples > 1; |
| 200 | switch (type) { | 92 | switch (type) { |
| 201 | case ImageViewType::e1D: | 93 | case Shader::TextureType::Color1D: |
| 202 | return GL_TEXTURE_1D; | 94 | return GL_TEXTURE_1D; |
| 203 | case ImageViewType::e2D: | 95 | case Shader::TextureType::Color2D: |
| 204 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; | 96 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; |
| 205 | case ImageViewType::Cube: | 97 | case Shader::TextureType::ColorCube: |
| 206 | return GL_TEXTURE_CUBE_MAP; | 98 | return GL_TEXTURE_CUBE_MAP; |
| 207 | case ImageViewType::e3D: | 99 | case Shader::TextureType::Color3D: |
| 208 | return GL_TEXTURE_3D; | 100 | return GL_TEXTURE_3D; |
| 209 | case ImageViewType::e1DArray: | 101 | case Shader::TextureType::ColorArray1D: |
| 210 | return GL_TEXTURE_1D_ARRAY; | 102 | return GL_TEXTURE_1D_ARRAY; |
| 211 | case ImageViewType::e2DArray: | 103 | case Shader::TextureType::ColorArray2D: |
| 212 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; | 104 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; |
| 213 | case ImageViewType::CubeArray: | 105 | case Shader::TextureType::ColorArrayCube: |
| 214 | return GL_TEXTURE_CUBE_MAP_ARRAY; | 106 | return GL_TEXTURE_CUBE_MAP_ARRAY; |
| 215 | case ImageViewType::Rect: | 107 | case Shader::TextureType::Buffer: |
| 216 | return GL_TEXTURE_RECTANGLE; | ||
| 217 | case ImageViewType::Buffer: | ||
| 218 | return GL_TEXTURE_BUFFER; | 108 | return GL_TEXTURE_BUFFER; |
| 219 | } | 109 | } |
| 220 | UNREACHABLE_MSG("Invalid image view type={}", type); | 110 | UNREACHABLE_MSG("Invalid image view type={}", type); |
| @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 322 | default: | 212 | default: |
| 323 | return false; | 213 | return false; |
| 324 | } | 214 | } |
| 325 | const GLenum internal_format = GetFormatTuple(info.format).internal_format; | 215 | const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format; |
| 326 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); | 216 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); |
| 327 | if (format_info.is_compressed) { | 217 | if (format_info.is_compressed) { |
| 328 | return false; | 218 | return false; |
| @@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 414 | 304 | ||
| 415 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | 305 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { |
| 416 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { | 306 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 417 | const GLuint texture = image_view->DefaultHandle(); | 307 | glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); |
| 418 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||
| 419 | return; | 308 | return; |
| 420 | } | 309 | } |
| 421 | const GLuint texture = image_view->Handle(ImageViewType::e3D); | 310 | const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); |
| 422 | if (image_view->range.extent.layers > 1) { | 311 | if (image_view->range.extent.layers > 1) { |
| 423 | // TODO: OpenGL doesn't support rendering to a fixed number of slices | 312 | // TODO: OpenGL doesn't support rendering to a fixed number of slices |
| 424 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | 313 | glNamedFramebufferTexture(fbo, attachment, texture, 0); |
| @@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 439 | } | 328 | } |
| 440 | } | 329 | } |
| 441 | 330 | ||
| 331 | [[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { | ||
| 332 | switch (format) { | ||
| 333 | case Shader::ImageFormat::Typeless: | ||
| 334 | break; | ||
| 335 | case Shader::ImageFormat::R8_SINT: | ||
| 336 | return GL_R8I; | ||
| 337 | case Shader::ImageFormat::R8_UINT: | ||
| 338 | return GL_R8UI; | ||
| 339 | case Shader::ImageFormat::R16_UINT: | ||
| 340 | return GL_R16UI; | ||
| 341 | case Shader::ImageFormat::R16_SINT: | ||
| 342 | return GL_R16I; | ||
| 343 | case Shader::ImageFormat::R32_UINT: | ||
| 344 | return GL_R32UI; | ||
| 345 | case Shader::ImageFormat::R32G32_UINT: | ||
| 346 | return GL_RG32UI; | ||
| 347 | case Shader::ImageFormat::R32G32B32A32_UINT: | ||
| 348 | return GL_RGBA32UI; | ||
| 349 | } | ||
| 350 | UNREACHABLE_MSG("Invalid image format={}", format); | ||
| 351 | return GL_R32UI; | ||
| 352 | } | ||
| 442 | } // Anonymous namespace | 353 | } // Anonymous namespace |
| 443 | 354 | ||
| 444 | ImageBufferMap::~ImageBufferMap() { | 355 | ImageBufferMap::~ImageBufferMap() { |
| @@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 453 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | 364 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; |
| 454 | for (size_t i = 0; i < TARGETS.size(); ++i) { | 365 | for (size_t i = 0; i < TARGETS.size(); ++i) { |
| 455 | const GLenum target = TARGETS[i]; | 366 | const GLenum target = TARGETS[i]; |
| 456 | for (const FormatTuple& tuple : FORMAT_TABLE) { | 367 | for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { |
| 457 | const GLenum format = tuple.internal_format; | 368 | const GLenum format = tuple.internal_format; |
| 458 | GLint compat_class; | 369 | GLint compat_class; |
| 459 | GLint compat_type; | 370 | GLint compat_type; |
| @@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 475 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); | 386 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); |
| 476 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); | 387 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); |
| 477 | null_image_3d.Create(GL_TEXTURE_3D); | 388 | null_image_3d.Create(GL_TEXTURE_3D); |
| 478 | null_image_rect.Create(GL_TEXTURE_RECTANGLE); | ||
| 479 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); | 389 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); |
| 480 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); | 390 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); |
| 481 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); | 391 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); |
| 482 | glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); | ||
| 483 | 392 | ||
| 484 | std::array<GLuint, 4> new_handles; | 393 | std::array<GLuint, 4> new_handles; |
| 485 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); | 394 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); |
| @@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 496 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, | 405 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, |
| 497 | GL_R8, 0, 1, 0, 6); | 406 | GL_R8, 0, 1, 0, 6); |
| 498 | const std::array texture_handles{ | 407 | const std::array texture_handles{ |
| 499 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, | 408 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, |
| 500 | null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, | 409 | null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, |
| 501 | null_image_view_2d_array.handle, null_image_view_cube.handle, | 410 | null_image_view_cube.handle, |
| 502 | }; | 411 | }; |
| 503 | for (const GLuint handle : texture_handles) { | 412 | for (const GLuint handle : texture_handles) { |
| 504 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; | 413 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; |
| 505 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); | 414 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); |
| 506 | } | 415 | } |
| 507 | const auto set_view = [this](ImageViewType type, GLuint handle) { | 416 | const auto set_view = [this](Shader::TextureType type, GLuint handle) { |
| 508 | if (device.HasDebuggingToolAttached()) { | 417 | if (device.HasDebuggingToolAttached()) { |
| 509 | const std::string name = fmt::format("NullImage {}", type); | 418 | const std::string name = fmt::format("NullImage {}", type); |
| 510 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | 419 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 511 | } | 420 | } |
| 512 | null_image_views[static_cast<size_t>(type)] = handle; | 421 | null_image_views[static_cast<size_t>(type)] = handle; |
| 513 | }; | 422 | }; |
| 514 | set_view(ImageViewType::e1D, null_image_view_1d.handle); | 423 | set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); |
| 515 | set_view(ImageViewType::e2D, null_image_view_2d.handle); | 424 | set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); |
| 516 | set_view(ImageViewType::Cube, null_image_view_cube.handle); | 425 | set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); |
| 517 | set_view(ImageViewType::e3D, null_image_3d.handle); | 426 | set_view(Shader::TextureType::Color3D, null_image_3d.handle); |
| 518 | set_view(ImageViewType::e1DArray, null_image_1d_array.handle); | 427 | set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); |
| 519 | set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); | 428 | set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); |
| 520 | set_view(ImageViewType::CubeArray, null_image_cube_array.handle); | 429 | set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); |
| 521 | set_view(ImageViewType::Rect, null_image_rect.handle); | ||
| 522 | } | 430 | } |
| 523 | 431 | ||
| 524 | TextureCacheRuntime::~TextureCacheRuntime() = default; | 432 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| @@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 710 | gl_format = GL_RGBA; | 618 | gl_format = GL_RGBA; |
| 711 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 619 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 712 | } else { | 620 | } else { |
| 713 | const auto& tuple = GetFormatTuple(info.format); | 621 | const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); |
| 714 | gl_internal_format = tuple.internal_format; | 622 | gl_internal_format = tuple.internal_format; |
| 715 | gl_format = tuple.format; | 623 | gl_format = tuple.format; |
| 716 | gl_type = tuple.type; | 624 | gl_type = tuple.type; |
| @@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 750 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | 658 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); |
| 751 | break; | 659 | break; |
| 752 | case GL_TEXTURE_BUFFER: | 660 | case GL_TEXTURE_BUFFER: |
| 753 | buffer.Create(); | 661 | UNREACHABLE(); |
| 754 | glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); | ||
| 755 | break; | 662 | break; |
| 756 | default: | 663 | default: |
| 757 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); | 664 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); |
| @@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 789 | } | 696 | } |
| 790 | } | 697 | } |
| 791 | 698 | ||
| 792 | void Image::UploadMemory(const ImageBufferMap& map, | ||
| 793 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 794 | for (const VideoCommon::BufferCopy& copy : copies) { | ||
| 795 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, | ||
| 796 | copy.dst_offset, copy.size); | ||
| 797 | } | ||
| 798 | } | ||
| 799 | |||
| 800 | void Image::DownloadMemory(ImageBufferMap& map, | 699 | void Image::DownloadMemory(ImageBufferMap& map, |
| 801 | std::span<const VideoCommon::BufferImageCopy> copies) { | 700 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 802 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 701 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| @@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 958 | if (True(image.flags & ImageFlagBits::Converted)) { | 857 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 959 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 858 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 960 | } else { | 859 | } else { |
| 961 | internal_format = GetFormatTuple(format).internal_format; | 860 | internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; |
| 861 | } | ||
| 862 | full_range = info.range; | ||
| 863 | flat_range = info.range; | ||
| 864 | set_object_label = device.HasDebuggingToolAttached(); | ||
| 865 | is_render_target = info.IsRenderTarget(); | ||
| 866 | original_texture = image.texture.handle; | ||
| 867 | num_samples = image.info.num_samples; | ||
| 868 | if (!is_render_target) { | ||
| 869 | swizzle[0] = info.x_source; | ||
| 870 | swizzle[1] = info.y_source; | ||
| 871 | swizzle[2] = info.z_source; | ||
| 872 | swizzle[3] = info.w_source; | ||
| 962 | } | 873 | } |
| 963 | VideoCommon::SubresourceRange flatten_range = info.range; | ||
| 964 | std::array<GLuint, 2> handles; | ||
| 965 | stored_views.reserve(2); | ||
| 966 | |||
| 967 | switch (info.type) { | 874 | switch (info.type) { |
| 968 | case ImageViewType::e1DArray: | 875 | case ImageViewType::e1DArray: |
| 969 | flatten_range.extent.layers = 1; | 876 | flat_range.extent.layers = 1; |
| 970 | [[fallthrough]]; | 877 | [[fallthrough]]; |
| 971 | case ImageViewType::e1D: | 878 | case ImageViewType::e1D: |
| 972 | glGenTextures(2, handles.data()); | 879 | SetupView(Shader::TextureType::Color1D); |
| 973 | SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); | 880 | SetupView(Shader::TextureType::ColorArray1D); |
| 974 | SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); | ||
| 975 | break; | 881 | break; |
| 976 | case ImageViewType::e2DArray: | 882 | case ImageViewType::e2DArray: |
| 977 | flatten_range.extent.layers = 1; | 883 | flat_range.extent.layers = 1; |
| 978 | [[fallthrough]]; | 884 | [[fallthrough]]; |
| 979 | case ImageViewType::e2D: | 885 | case ImageViewType::e2D: |
| 980 | if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { | 886 | if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| @@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 984 | .base = {.level = info.range.base.level, .layer = 0}, | 890 | .base = {.level = info.range.base.level, .layer = 0}, |
| 985 | .extent = {.levels = 1, .layers = 1}, | 891 | .extent = {.levels = 1, .layers = 1}, |
| 986 | }; | 892 | }; |
| 987 | glGenTextures(1, handles.data()); | 893 | full_range = slice_range; |
| 988 | SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); | 894 | |
| 989 | break; | 895 | SetupView(Shader::TextureType::Color3D); |
| 896 | } else { | ||
| 897 | SetupView(Shader::TextureType::Color2D); | ||
| 898 | SetupView(Shader::TextureType::ColorArray2D); | ||
| 990 | } | 899 | } |
| 991 | glGenTextures(2, handles.data()); | ||
| 992 | SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); | ||
| 993 | SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); | ||
| 994 | break; | 900 | break; |
| 995 | case ImageViewType::e3D: | 901 | case ImageViewType::e3D: |
| 996 | glGenTextures(1, handles.data()); | 902 | SetupView(Shader::TextureType::Color3D); |
| 997 | SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); | ||
| 998 | break; | 903 | break; |
| 999 | case ImageViewType::CubeArray: | 904 | case ImageViewType::CubeArray: |
| 1000 | flatten_range.extent.layers = 6; | 905 | flat_range.extent.layers = 6; |
| 1001 | [[fallthrough]]; | 906 | [[fallthrough]]; |
| 1002 | case ImageViewType::Cube: | 907 | case ImageViewType::Cube: |
| 1003 | glGenTextures(2, handles.data()); | 908 | SetupView(Shader::TextureType::ColorCube); |
| 1004 | SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); | 909 | SetupView(Shader::TextureType::ColorArrayCube); |
| 1005 | SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); | ||
| 1006 | break; | 910 | break; |
| 1007 | case ImageViewType::Rect: | 911 | case ImageViewType::Rect: |
| 1008 | glGenTextures(1, handles.data()); | 912 | UNIMPLEMENTED(); |
| 1009 | SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); | ||
| 1010 | break; | 913 | break; |
| 1011 | case ImageViewType::Buffer: | 914 | case ImageViewType::Buffer: |
| 1012 | glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); | 915 | UNREACHABLE(); |
| 1013 | SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); | 916 | break; |
| 917 | } | ||
| 918 | switch (info.type) { | ||
| 919 | case ImageViewType::e1D: | ||
| 920 | default_handle = Handle(Shader::TextureType::Color1D); | ||
| 921 | break; | ||
| 922 | case ImageViewType::e1DArray: | ||
| 923 | default_handle = Handle(Shader::TextureType::ColorArray1D); | ||
| 924 | break; | ||
| 925 | case ImageViewType::e2D: | ||
| 926 | default_handle = Handle(Shader::TextureType::Color2D); | ||
| 927 | break; | ||
| 928 | case ImageViewType::e2DArray: | ||
| 929 | default_handle = Handle(Shader::TextureType::ColorArray2D); | ||
| 930 | break; | ||
| 931 | case ImageViewType::e3D: | ||
| 932 | default_handle = Handle(Shader::TextureType::Color3D); | ||
| 933 | break; | ||
| 934 | case ImageViewType::Cube: | ||
| 935 | default_handle = Handle(Shader::TextureType::ColorCube); | ||
| 936 | break; | ||
| 937 | case ImageViewType::CubeArray: | ||
| 938 | default_handle = Handle(Shader::TextureType::ColorArrayCube); | ||
| 939 | break; | ||
| 940 | default: | ||
| 1014 | break; | 941 | break; |
| 1015 | } | 942 | } |
| 1016 | default_handle = Handle(info.type); | ||
| 1017 | } | 943 | } |
| 1018 | 944 | ||
| 945 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 946 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | ||
| 947 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | ||
| 948 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | ||
| 949 | |||
| 950 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 951 | const VideoCommon::ImageViewInfo& view_info) | ||
| 952 | : VideoCommon::ImageViewBase{info, view_info} {} | ||
| 953 | |||
| 1019 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) | 954 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) |
| 1020 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} | 955 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| 1021 | 956 | ||
| 1022 | void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, | 957 | GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { |
| 1023 | GLuint handle, const VideoCommon::ImageViewInfo& info, | 958 | if (image_format == Shader::ImageFormat::Typeless) { |
| 1024 | VideoCommon::SubresourceRange view_range) { | 959 | return Handle(texture_type); |
| 1025 | if (info.type == ImageViewType::Buffer) { | 960 | } |
| 1026 | // TODO: Take offset from buffer cache | 961 | const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || |
| 1027 | glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, | 962 | image_format == Shader::ImageFormat::R16_SINT}; |
| 1028 | image.guest_size_bytes); | 963 | if (!storage_views) { |
| 1029 | } else { | 964 | storage_views = std::make_unique<StorageViews>(); |
| 1030 | const GLuint parent = image.texture.handle; | 965 | } |
| 1031 | const GLenum target = ImageTarget(view_type, image.info.num_samples); | 966 | auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; |
| 1032 | glTextureView(handle, target, parent, internal_format, view_range.base.level, | 967 | GLuint& view{type_views[static_cast<size_t>(texture_type)]}; |
| 1033 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); | 968 | if (view == 0) { |
| 1034 | if (!info.IsRenderTarget()) { | 969 | view = MakeView(texture_type, ShaderFormat(image_format)); |
| 1035 | ApplySwizzle(handle, format, info.Swizzle()); | 970 | } |
| 1036 | } | 971 | return view; |
| 972 | } | ||
| 973 | |||
| 974 | void ImageView::SetupView(Shader::TextureType view_type) { | ||
| 975 | views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format); | ||
| 976 | } | ||
| 977 | |||
| 978 | GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { | ||
| 979 | VideoCommon::SubresourceRange view_range; | ||
| 980 | switch (view_type) { | ||
| 981 | case Shader::TextureType::Color1D: | ||
| 982 | case Shader::TextureType::Color2D: | ||
| 983 | case Shader::TextureType::ColorCube: | ||
| 984 | view_range = flat_range; | ||
| 985 | break; | ||
| 986 | case Shader::TextureType::ColorArray1D: | ||
| 987 | case Shader::TextureType::ColorArray2D: | ||
| 988 | case Shader::TextureType::Color3D: | ||
| 989 | case Shader::TextureType::ColorArrayCube: | ||
| 990 | view_range = full_range; | ||
| 991 | break; | ||
| 992 | default: | ||
| 993 | UNREACHABLE(); | ||
| 1037 | } | 994 | } |
| 1038 | if (device.HasDebuggingToolAttached()) { | 995 | OGLTextureView& view = stored_views.emplace_back(); |
| 1039 | const std::string name = VideoCommon::Name(*this, view_type); | 996 | view.Create(); |
| 1040 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | 997 | |
| 998 | const GLenum target = ImageTarget(view_type, num_samples); | ||
| 999 | glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, | ||
| 1000 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); | ||
| 1001 | if (!is_render_target) { | ||
| 1002 | std::array<SwizzleSource, 4> casted_swizzle; | ||
| 1003 | std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { | ||
| 1004 | return static_cast<SwizzleSource>(component_swizzle); | ||
| 1005 | }); | ||
| 1006 | ApplySwizzle(view.handle, format, casted_swizzle); | ||
| 1007 | } | ||
| 1008 | if (set_object_label) { | ||
| 1009 | const std::string name = VideoCommon::Name(*this); | ||
| 1010 | glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 1041 | } | 1011 | } |
| 1042 | stored_views.emplace_back().handle = handle; | 1012 | return view.handle; |
| 1043 | views[static_cast<size_t>(view_type)] = handle; | ||
| 1044 | } | 1013 | } |
| 1045 | 1014 | ||
| 1046 | Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { | 1015 | Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "shader_recompiler/shader_info.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/util_shaders.h" | 14 | #include "video_core/renderer_opengl/util_shaders.h" |
| 14 | #include "video_core/texture_cache/texture_cache.h" | 15 | #include "video_core/texture_cache/texture_cache.h" |
| @@ -127,13 +128,12 @@ private: | |||
| 127 | OGLTexture null_image_1d_array; | 128 | OGLTexture null_image_1d_array; |
| 128 | OGLTexture null_image_cube_array; | 129 | OGLTexture null_image_cube_array; |
| 129 | OGLTexture null_image_3d; | 130 | OGLTexture null_image_3d; |
| 130 | OGLTexture null_image_rect; | ||
| 131 | OGLTextureView null_image_view_1d; | 131 | OGLTextureView null_image_view_1d; |
| 132 | OGLTextureView null_image_view_2d; | 132 | OGLTextureView null_image_view_2d; |
| 133 | OGLTextureView null_image_view_2d_array; | 133 | OGLTextureView null_image_view_2d_array; |
| 134 | OGLTextureView null_image_view_cube; | 134 | OGLTextureView null_image_view_cube; |
| 135 | 135 | ||
| 136 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; | 136 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; |
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | class Image : public VideoCommon::ImageBase { | 139 | class Image : public VideoCommon::ImageBase { |
| @@ -154,8 +154,6 @@ public: | |||
| 154 | void UploadMemory(const ImageBufferMap& map, | 154 | void UploadMemory(const ImageBufferMap& map, |
| 155 | std::span<const VideoCommon::BufferImageCopy> copies); | 155 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 156 | 156 | ||
| 157 | void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); | ||
| 158 | |||
| 159 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | 157 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 160 | 158 | ||
| 161 | GLuint StorageHandle() noexcept; | 159 | GLuint StorageHandle() noexcept; |
| @@ -170,7 +168,6 @@ private: | |||
| 170 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | 168 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 171 | 169 | ||
| 172 | OGLTexture texture; | 170 | OGLTexture texture; |
| 173 | OGLBuffer buffer; | ||
| 174 | OGLTextureView store_view; | 171 | OGLTextureView store_view; |
| 175 | GLenum gl_internal_format = GL_NONE; | 172 | GLenum gl_internal_format = GL_NONE; |
| 176 | GLenum gl_format = GL_NONE; | 173 | GLenum gl_format = GL_NONE; |
| @@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase { | |||
| 182 | 179 | ||
| 183 | public: | 180 | public: |
| 184 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | 181 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 182 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | ||
| 183 | const VideoCommon::ImageViewInfo&, GPUVAddr); | ||
| 184 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 185 | const VideoCommon::ImageViewInfo& view_info); | ||
| 185 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 186 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 186 | 187 | ||
| 187 | [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { | 188 | [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, |
| 188 | return views[static_cast<size_t>(query_type)]; | 189 | Shader::ImageFormat image_format); |
| 190 | |||
| 191 | [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { | ||
| 192 | return views[static_cast<size_t>(handle_type)]; | ||
| 189 | } | 193 | } |
| 190 | 194 | ||
| 191 | [[nodiscard]] GLuint DefaultHandle() const noexcept { | 195 | [[nodiscard]] GLuint DefaultHandle() const noexcept { |
| @@ -196,15 +200,38 @@ public: | |||
| 196 | return internal_format; | 200 | return internal_format; |
| 197 | } | 201 | } |
| 198 | 202 | ||
| 203 | [[nodiscard]] GPUVAddr GpuAddr() const noexcept { | ||
| 204 | return gpu_addr; | ||
| 205 | } | ||
| 206 | |||
| 207 | [[nodiscard]] u32 BufferSize() const noexcept { | ||
| 208 | return buffer_size; | ||
| 209 | } | ||
| 210 | |||
| 199 | private: | 211 | private: |
| 200 | void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, | 212 | struct StorageViews { |
| 201 | const VideoCommon::ImageViewInfo& info, | 213 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{}; |
| 202 | VideoCommon::SubresourceRange view_range); | 214 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{}; |
| 215 | }; | ||
| 216 | |||
| 217 | void SetupView(Shader::TextureType view_type); | ||
| 218 | |||
| 219 | GLuint MakeView(Shader::TextureType view_type, GLenum view_format); | ||
| 203 | 220 | ||
| 204 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; | 221 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{}; |
| 205 | std::vector<OGLTextureView> stored_views; | 222 | std::vector<OGLTextureView> stored_views; |
| 206 | GLuint default_handle = 0; | 223 | std::unique_ptr<StorageViews> storage_views; |
| 207 | GLenum internal_format = GL_NONE; | 224 | GLenum internal_format = GL_NONE; |
| 225 | GLuint default_handle = 0; | ||
| 226 | GPUVAddr gpu_addr = 0; | ||
| 227 | u32 buffer_size = 0; | ||
| 228 | GLuint original_texture = 0; | ||
| 229 | int num_samples = 0; | ||
| 230 | VideoCommon::SubresourceRange flat_range; | ||
| 231 | VideoCommon::SubresourceRange full_range; | ||
| 232 | std::array<u8, 4> swizzle{}; | ||
| 233 | bool set_object_label = false; | ||
| 234 | bool is_render_target = false; | ||
| 208 | }; | 235 | }; |
| 209 | 236 | ||
| 210 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | 237 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -5,12 +5,120 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/surface.h" | ||
| 9 | 11 | ||
| 10 | namespace OpenGL::MaxwellToGL { | 12 | namespace OpenGL::MaxwellToGL { |
| 11 | 13 | ||
| 12 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 14 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 13 | 15 | ||
| 16 | struct FormatTuple { | ||
| 17 | GLenum internal_format; | ||
| 18 | GLenum format = GL_NONE; | ||
| 19 | GLenum type = GL_NONE; | ||
| 20 | }; | ||
| 21 | |||
| 22 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{ | ||
| 23 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | ||
| 24 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | ||
| 25 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | ||
| 26 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT | ||
| 27 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM | ||
| 28 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM | ||
| 29 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | ||
| 30 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | ||
| 31 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | ||
| 32 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | ||
| 33 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | ||
| 34 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM | ||
| 35 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT | ||
| 36 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT | ||
| 37 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT | ||
| 38 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM | ||
| 39 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM | ||
| 40 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT | ||
| 41 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT | ||
| 42 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT | ||
| 43 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT | ||
| 44 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM | ||
| 45 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM | ||
| 46 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM | ||
| 47 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM | ||
| 48 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM | ||
| 49 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM | ||
| 50 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM | ||
| 51 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM | ||
| 52 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | ||
| 53 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | ||
| 54 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | ||
| 55 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | ||
| 56 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | ||
| 57 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | ||
| 58 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | ||
| 59 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT | ||
| 60 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT | ||
| 61 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT | ||
| 62 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM | ||
| 63 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM | ||
| 64 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT | ||
| 65 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT | ||
| 66 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM | ||
| 67 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT | ||
| 68 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT | ||
| 69 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT | ||
| 70 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM | ||
| 71 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT | ||
| 72 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB | ||
| 73 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM | ||
| 74 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM | ||
| 75 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT | ||
| 76 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT | ||
| 77 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT | ||
| 78 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT | ||
| 79 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT | ||
| 80 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT | ||
| 81 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | ||
| 82 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | ||
| 83 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | ||
| 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB | ||
| 85 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | ||
| 86 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | ||
| 87 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | ||
| 88 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | ||
| 89 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | ||
| 90 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | ||
| 91 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | ||
| 92 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | ||
| 93 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | ||
| 94 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | ||
| 95 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | ||
| 96 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | ||
| 97 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | ||
| 98 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | ||
| 99 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | ||
| 100 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | ||
| 101 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | ||
| 102 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | ||
| 103 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | ||
| 104 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | ||
| 105 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | ||
| 106 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | ||
| 107 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | ||
| 108 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | ||
| 109 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | ||
| 110 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 111 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 112 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 113 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||
| 114 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | ||
| 115 | }}; | ||
| 116 | |||
| 117 | inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { | ||
| 118 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||
| 119 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||
| 120 | } | ||
| 121 | |||
| 14 | inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { | 122 | inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { |
| 15 | switch (attrib.type) { | 123 | switch (attrib.type) { |
| 16 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 124 | case Maxwell::VertexAttribute::Type::UnsignedNorm: |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include "video_core/host_shaders/opengl_present_vert.h" | 25 | #include "video_core/host_shaders/opengl_present_vert.h" |
| 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 27 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 28 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 28 | #include "video_core/renderer_opengl/renderer_opengl.h" | 29 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 29 | #include "video_core/textures/decoders.h" | 30 | #include "video_core/textures/decoders.h" |
| 30 | 31 | ||
| @@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 139 | } | 140 | } |
| 140 | AddTelemetryFields(); | 141 | AddTelemetryFields(); |
| 141 | InitOpenGLObjects(); | 142 | InitOpenGLObjects(); |
| 143 | |||
| 144 | // Initialize default attributes to match hardware's disabled attributes | ||
| 145 | GLint max_attribs{}; | ||
| 146 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); | ||
| 147 | for (GLint attrib = 0; attrib < max_attribs; ++attrib) { | ||
| 148 | glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); | ||
| 149 | } | ||
| 150 | // Enable seamless cubemaps when per texture parameters are not available | ||
| 151 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 152 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | ||
| 153 | } | ||
| 154 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 155 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 156 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 157 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 158 | |||
| 159 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 160 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 161 | &vertex_buffer_address); | ||
| 162 | } | ||
| 142 | } | 163 | } |
| 143 | 164 | ||
| 144 | RendererOpenGL::~RendererOpenGL() = default; | 165 | RendererOpenGL::~RendererOpenGL() = default; |
| @@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color | |||
| 230 | 251 | ||
| 231 | void RendererOpenGL::InitOpenGLObjects() { | 252 | void RendererOpenGL::InitOpenGLObjects() { |
| 232 | // Create shader programs | 253 | // Create shader programs |
| 233 | OGLShader vertex_shader; | 254 | present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); |
| 234 | vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); | 255 | present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); |
| 235 | |||
| 236 | OGLShader fragment_shader; | ||
| 237 | fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); | ||
| 238 | |||
| 239 | vertex_program.Create(true, false, vertex_shader.handle); | ||
| 240 | fragment_program.Create(true, false, fragment_shader.handle); | ||
| 241 | |||
| 242 | pipeline.Create(); | ||
| 243 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); | ||
| 244 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||
| 245 | 256 | ||
| 246 | // Generate presentation sampler | 257 | // Generate presentation sampler |
| 247 | present_sampler.Create(); | 258 | present_sampler.Create(); |
| @@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 263 | 274 | ||
| 264 | // Clear screen to black | 275 | // Clear screen to black |
| 265 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 276 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 266 | |||
| 267 | // Enable seamless cubemaps when per texture parameters are not available | ||
| 268 | if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { | ||
| 269 | glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); | ||
| 270 | } | ||
| 271 | |||
| 272 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 273 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 274 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 275 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 276 | |||
| 277 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 278 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 279 | &vertex_buffer_address); | ||
| 280 | } | ||
| 281 | } | 277 | } |
| 282 | 278 | ||
| 283 | void RendererOpenGL::AddTelemetryFields() { | 279 | void RendererOpenGL::AddTelemetryFields() { |
| @@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 342 | // Set projection matrix | 338 | // Set projection matrix |
| 343 | const std::array ortho_matrix = | 339 | const std::array ortho_matrix = |
| 344 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | 340 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); |
| 345 | glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, | 341 | program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); |
| 346 | std::data(ortho_matrix)); | 342 | glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, |
| 343 | ortho_matrix.data()); | ||
| 347 | 344 | ||
| 348 | const auto& texcoords = screen_info.display_texcoords; | 345 | const auto& texcoords = screen_info.display_texcoords; |
| 349 | auto left = texcoords.left; | 346 | auto left = texcoords.left; |
| @@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 404 | state_tracker.NotifyClipControl(); | 401 | state_tracker.NotifyClipControl(); |
| 405 | state_tracker.NotifyAlphaTest(); | 402 | state_tracker.NotifyAlphaTest(); |
| 406 | 403 | ||
| 407 | program_manager.BindHostPipeline(pipeline.handle); | ||
| 408 | |||
| 409 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | 404 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); |
| 410 | glEnable(GL_CULL_FACE); | 405 | glEnable(GL_CULL_FACE); |
| 411 | if (screen_info.display_srgb) { | 406 | if (screen_info.display_srgb) { |
| @@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 453 | glClear(GL_COLOR_BUFFER_BIT); | 448 | glClear(GL_COLOR_BUFFER_BIT); |
| 454 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 449 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 455 | 450 | ||
| 456 | program_manager.RestoreGuestPipeline(); | 451 | // TODO |
| 452 | // program_manager.RestoreGuestPipeline(); | ||
| 457 | } | 453 | } |
| 458 | 454 | ||
| 459 | void RendererOpenGL::RenderScreenshot() { | 455 | void RendererOpenGL::RenderScreenshot() { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 17 | 16 | ||
| 18 | namespace Core { | 17 | namespace Core { |
| @@ -111,9 +110,8 @@ private: | |||
| 111 | // OpenGL object IDs | 110 | // OpenGL object IDs |
| 112 | OGLSampler present_sampler; | 111 | OGLSampler present_sampler; |
| 113 | OGLBuffer vertex_buffer; | 112 | OGLBuffer vertex_buffer; |
| 114 | OGLProgram vertex_program; | 113 | OGLProgram present_vertex; |
| 115 | OGLProgram fragment_program; | 114 | OGLProgram present_fragment; |
| 116 | OGLPipeline pipeline; | ||
| 117 | OGLFramebuffer screenshot_framebuffer; | 115 | OGLFramebuffer screenshot_framebuffer; |
| 118 | 116 | ||
| 119 | // GPU address of the vertex buffer | 117 | // GPU address of the vertex buffer |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" |
| 17 | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | 17 | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" |
| 18 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | 18 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" |
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 19 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 20 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 21 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 22 | #include "video_core/renderer_opengl/util_shaders.h" | 22 | #include "video_core/renderer_opengl/util_shaders.h" |
| 23 | #include "video_core/texture_cache/accelerated_swizzle.h" | 23 | #include "video_core/texture_cache/accelerated_swizzle.h" |
| @@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; | |||
| 41 | using VideoCore::Surface::BytesPerBlock; | 41 | using VideoCore::Surface::BytesPerBlock; |
| 42 | 42 | ||
| 43 | namespace { | 43 | namespace { |
| 44 | |||
| 45 | OGLProgram MakeProgram(std::string_view source) { | 44 | OGLProgram MakeProgram(std::string_view source) { |
| 46 | OGLShader shader; | 45 | return CreateProgram(source, GL_COMPUTE_SHADER); |
| 47 | shader.Create(source, GL_COMPUTE_SHADER); | ||
| 48 | |||
| 49 | OGLProgram program; | ||
| 50 | program.Create(true, false, shader.handle); | ||
| 51 | return program; | ||
| 52 | } | 46 | } |
| 53 | 47 | ||
| 54 | size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { | 48 | size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { |
| 55 | return static_cast<size_t>(copy.extent.width * copy.extent.height * | 49 | return static_cast<size_t>(copy.extent.width * copy.extent.height * |
| 56 | copy.src_subresource.num_layers); | 50 | copy.src_subresource.num_layers); |
| 57 | } | 51 | } |
| 58 | |||
| 59 | } // Anonymous namespace | 52 | } // Anonymous namespace |
| 60 | 53 | ||
| 61 | UtilShaders::UtilShaders(ProgramManager& program_manager_) | 54 | UtilShaders::UtilShaders(ProgramManager& program_manager_) |
| @@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 86 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | 79 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), |
| 87 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | 80 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), |
| 88 | }; | 81 | }; |
| 89 | program_manager.BindHostCompute(astc_decoder_program.handle); | 82 | program_manager.BindComputeProgram(astc_decoder_program.handle); |
| 90 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 83 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 91 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); | 84 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); |
| 92 | 85 | ||
| @@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | |||
| 134 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 127 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 135 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 128 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 136 | 129 | ||
| 137 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | 130 | program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); |
| 138 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 131 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 139 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 132 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 140 | 133 | ||
| @@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | |||
| 173 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 166 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 174 | 167 | ||
| 175 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 168 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 176 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | 169 | program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); |
| 177 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 170 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 178 | 171 | ||
| 179 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 172 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| @@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, | |||
| 222 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), | 215 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), |
| 223 | "Non-power of two images are not implemented"); | 216 | "Non-power of two images are not implemented"); |
| 224 | 217 | ||
| 225 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | 218 | program_manager.BindComputeProgram(pitch_unswizzle_program.handle); |
| 226 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 219 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 227 | glUniform2ui(LOC_ORIGIN, 0, 0); | 220 | glUniform2ui(LOC_ORIGIN, 0, 0); |
| 228 | glUniform2i(LOC_DESTINATION, 0, 0); | 221 | glUniform2i(LOC_DESTINATION, 0, 0); |
| @@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||
| 250 | static constexpr GLuint LOC_SRC_OFFSET = 0; | 243 | static constexpr GLuint LOC_SRC_OFFSET = 0; |
| 251 | static constexpr GLuint LOC_DST_OFFSET = 1; | 244 | static constexpr GLuint LOC_DST_OFFSET = 1; |
| 252 | 245 | ||
| 253 | program_manager.BindHostCompute(copy_bc4_program.handle); | 246 | program_manager.BindComputeProgram(copy_bc4_program.handle); |
| 254 | 247 | ||
| 255 | for (const ImageCopy& copy : copies) { | 248 | for (const ImageCopy& copy : copies) { |
| 256 | ASSERT(copy.src_subresource.base_layer == 0); | 249 | ASSERT(copy.src_subresource.base_layer == 0); |
| @@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | |||
| 286 | break; | 279 | break; |
| 287 | case 4: { | 280 | case 4: { |
| 288 | // BGRA8 copy | 281 | // BGRA8 copy |
| 289 | program_manager.BindHostCompute(copy_bgra_program.handle); | 282 | program_manager.BindComputeProgram(copy_bgra_program.handle); |
| 290 | constexpr GLenum FORMAT = GL_RGBA8; | 283 | constexpr GLenum FORMAT = GL_RGBA8; |
| 291 | for (const ImageCopy& copy : copies) { | 284 | for (const ImageCopy& copy : copies) { |
| 292 | ASSERT(copy.src_offset == zero_offset); | 285 | ASSERT(copy.src_offset == zero_offset); |
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA | |||
| 49 | .bindingCount = 1, | 49 | .bindingCount = 1, |
| 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, |
| 51 | }; | 51 | }; |
| 52 | template <u32 num_textures> | ||
| 53 | inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ | ||
| 54 | .uniform_buffers = 0, | ||
| 55 | .storage_buffers = 0, | ||
| 56 | .texture_buffers = 0, | ||
| 57 | .image_buffers = 0, | ||
| 58 | .textures = num_textures, | ||
| 59 | .images = 0, | ||
| 60 | .score = 2, | ||
| 61 | }; | ||
| 52 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | 62 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ |
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 54 | .pNext = nullptr, | 64 | .pNext = nullptr, |
| @@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi | |||
| 323 | cmdbuf.SetScissor(0, scissor); | 333 | cmdbuf.SetScissor(0, scissor); |
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | 334 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); |
| 325 | } | 335 | } |
| 326 | |||
| 327 | } // Anonymous namespace | 336 | } // Anonymous namespace |
| 328 | 337 | ||
| 329 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | 338 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, |
| 330 | StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) | 339 | StateTracker& state_tracker_, DescriptorPool& descriptor_pool) |
| 331 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, | 340 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, |
| 332 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( | 341 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( |
| 333 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | 342 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), |
| 334 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( | 343 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( |
| 335 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | 344 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), |
| 336 | one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), | 345 | one_texture_descriptor_allocator{ |
| 337 | two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), | 346 | descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, |
| 347 | two_textures_descriptor_allocator{ | ||
| 348 | descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, | ||
| 338 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( | 349 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( |
| 339 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), | 350 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), |
| 340 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( | 351 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( |
| @@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV | |||
| 362 | .operation = operation, | 373 | .operation = operation, |
| 363 | }; | 374 | }; |
| 364 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 375 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 365 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | 376 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); |
| 366 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; | 377 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; |
| 367 | const VkPipeline pipeline = FindOrEmplacePipeline(key); | 378 | const VkPipeline pipeline = FindOrEmplacePipeline(key); |
| 368 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 369 | scheduler.RequestRenderpass(dst_framebuffer); | 379 | scheduler.RequestRenderpass(dst_framebuffer); |
| 370 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, | 380 | scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, |
| 371 | &device = device](vk::CommandBuffer cmdbuf) { | 381 | src_view](vk::CommandBuffer cmdbuf) { |
| 372 | // TODO: Barriers | 382 | // TODO: Barriers |
| 383 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 373 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | 384 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); |
| 374 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 385 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| 375 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | 386 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, |
| @@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 391 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | 402 | const VkPipelineLayout layout = *two_textures_pipeline_layout; |
| 392 | const VkSampler sampler = *nearest_sampler; | 403 | const VkSampler sampler = *nearest_sampler; |
| 393 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); | 404 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); |
| 394 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 395 | scheduler.RequestRenderpass(dst_framebuffer); | 405 | scheduler.RequestRenderpass(dst_framebuffer); |
| 396 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, | 406 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, |
| 397 | src_stencil_view, descriptor_set, | 407 | src_stencil_view, this](vk::CommandBuffer cmdbuf) { |
| 398 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 399 | // TODO: Barriers | 408 | // TODO: Barriers |
| 409 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 400 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, | 410 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, |
| 401 | src_stencil_view); | 411 | src_stencil_view); |
| 402 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 412 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| @@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, | |||
| 416 | 426 | ||
| 417 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, | 427 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, |
| 418 | const ImageView& src_image_view) { | 428 | const ImageView& src_image_view) { |
| 419 | |||
| 420 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); | 429 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); |
| 421 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); | 430 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); |
| 422 | } | 431 | } |
| @@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | |||
| 436 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 445 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 437 | const ImageView& src_image_view) { | 446 | const ImageView& src_image_view) { |
| 438 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 447 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 439 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | 448 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); |
| 440 | const VkSampler sampler = *nearest_sampler; | 449 | const VkSampler sampler = *nearest_sampler; |
| 441 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 442 | const VkExtent2D extent{ | 450 | const VkExtent2D extent{ |
| 443 | .width = src_image_view.size.width, | 451 | .width = src_image_view.size.width, |
| 444 | .height = src_image_view.size.height, | 452 | .height = src_image_view.size.height, |
| 445 | }; | 453 | }; |
| 446 | scheduler.RequestRenderpass(dst_framebuffer); | 454 | scheduler.RequestRenderpass(dst_framebuffer); |
| 447 | scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, | 455 | scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { |
| 448 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 449 | const VkOffset2D offset{ | 456 | const VkOffset2D offset{ |
| 450 | .x = 0, | 457 | .x = 0, |
| 451 | .y = 0, | 458 | .y = 0, |
| @@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb | |||
| 466 | .tex_scale = {viewport.width, viewport.height}, | 473 | .tex_scale = {viewport.width, viewport.height}, |
| 467 | .tex_offset = {0.0f, 0.0f}, | 474 | .tex_offset = {0.0f, 0.0f}, |
| 468 | }; | 475 | }; |
| 476 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 469 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | 477 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); |
| 470 | 478 | ||
| 471 | // TODO: Barriers | 479 | // TODO: Barriers |
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { | |||
| 31 | class BlitImageHelper { | 31 | class BlitImageHelper { |
| 32 | public: | 32 | public: |
| 33 | explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, | 33 | explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, |
| 34 | StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); | 34 | StateTracker& state_tracker, DescriptorPool& descriptor_pool); |
| 35 | ~BlitImageHelper(); | 35 | ~BlitImageHelper(); |
| 36 | 36 | ||
| 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -15,9 +15,7 @@ | |||
| 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | |||
| 19 | namespace { | 18 | namespace { |
| 20 | |||
| 21 | constexpr size_t POINT = 0; | 19 | constexpr size_t POINT = 0; |
| 22 | constexpr size_t LINE = 1; | 20 | constexpr size_t LINE = 1; |
| 23 | constexpr size_t POLYGON = 2; | 21 | constexpr size_t POLYGON = 2; |
| @@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { | |||
| 39 | POLYGON, // Patches | 37 | POLYGON, // Patches |
| 40 | }; | 38 | }; |
| 41 | 39 | ||
| 40 | void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { | ||
| 41 | std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { | ||
| 42 | return VideoCommon::TransformFeedbackState::Layout{ | ||
| 43 | .stream = layout.stream, | ||
| 44 | .varying_count = layout.varying_count, | ||
| 45 | .stride = layout.stride, | ||
| 46 | }; | ||
| 47 | }); | ||
| 48 | state.varyings = regs.tfb_varying_locs; | ||
| 49 | } | ||
| 42 | } // Anonymous namespace | 50 | } // Anonymous namespace |
| 43 | 51 | ||
| 44 | void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | 52 | void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, |
| 45 | bool has_extended_dynamic_state) { | 53 | bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { |
| 46 | const Maxwell& regs = maxwell3d.regs; | 54 | const Maxwell& regs = maxwell3d.regs; |
| 47 | const std::array enabled_lut{ | 55 | const std::array enabled_lut{ |
| 48 | regs.polygon_offset_point_enable, | 56 | regs.polygon_offset_point_enable, |
| @@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 52 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); | 60 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 53 | 61 | ||
| 54 | raw1 = 0; | 62 | raw1 = 0; |
| 63 | extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); | ||
| 64 | dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); | ||
| 65 | xfb_enabled.Assign(regs.tfb_enabled != 0); | ||
| 55 | primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); | 66 | primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); |
| 56 | depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); | 67 | depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); |
| 57 | depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); | 68 | depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); |
| @@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 63 | tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); | 74 | tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); |
| 64 | logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); | 75 | logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); |
| 65 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); | 76 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); |
| 66 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | ||
| 67 | topology.Assign(regs.draw.topology); | 77 | topology.Assign(regs.draw.topology); |
| 68 | msaa_mode.Assign(regs.multisample_mode); | 78 | msaa_mode.Assign(regs.multisample_mode); |
| 69 | 79 | ||
| 70 | raw2 = 0; | 80 | raw2 = 0; |
| 81 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | ||
| 71 | const auto test_func = | 82 | const auto test_func = |
| 72 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; | 83 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; |
| 73 | alpha_test_func.Assign(PackComparisonOp(test_func)); | 84 | alpha_test_func.Assign(PackComparisonOp(test_func)); |
| 74 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | 85 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); |
| 75 | 86 | depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); | |
| 87 | depth_format.Assign(static_cast<u32>(regs.zeta.format)); | ||
| 88 | y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); | ||
| 89 | provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); | ||
| 90 | conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); | ||
| 91 | smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); | ||
| 92 | |||
| 93 | for (size_t i = 0; i < regs.rt.size(); ++i) { | ||
| 94 | color_formats[i] = static_cast<u8>(regs.rt[i].format); | ||
| 95 | } | ||
| 76 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); | 96 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); |
| 77 | point_size = Common::BitCast<u32>(regs.point_size); | 97 | point_size = Common::BitCast<u32>(regs.point_size); |
| 78 | 98 | ||
| 79 | if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { | 99 | if (maxwell3d.dirty.flags[Dirty::VertexInput]) { |
| 80 | maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; | 100 | if (has_dynamic_vertex_input) { |
| 81 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 101 | // Dirty flag will be reset by the command buffer update |
| 82 | const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); | 102 | static constexpr std::array LUT{ |
| 83 | binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; | 103 | 0u, // Invalid |
| 84 | } | 104 | 1u, // SignedNorm |
| 85 | } | 105 | 1u, // UnsignedNorm |
| 86 | if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { | 106 | 2u, // SignedInt |
| 87 | maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; | 107 | 3u, // UnsignedInt |
| 88 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 108 | 1u, // UnsignedScaled |
| 89 | const auto& input = regs.vertex_attrib_format[index]; | 109 | 1u, // SignedScaled |
| 90 | auto& attribute = attributes[index]; | 110 | 1u, // Float |
| 91 | attribute.raw = 0; | 111 | }; |
| 92 | attribute.enabled.Assign(input.IsConstant() ? 0 : 1); | 112 | const auto& attrs = regs.vertex_attrib_format; |
| 93 | attribute.buffer.Assign(input.buffer); | 113 | attribute_types = 0; |
| 94 | attribute.offset.Assign(input.offset); | 114 | for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 95 | attribute.type.Assign(static_cast<u32>(input.type.Value())); | 115 | const u32 mask = attrs[i].constant != 0 ? 0 : 3; |
| 96 | attribute.size.Assign(static_cast<u32>(input.size.Value())); | 116 | const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())]; |
| 117 | attribute_types |= static_cast<u64>(type & mask) << (i * 2); | ||
| 118 | } | ||
| 119 | } else { | ||
| 120 | maxwell3d.dirty.flags[Dirty::VertexInput] = false; | ||
| 121 | enabled_divisors = 0; | ||
| 122 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 123 | const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); | ||
| 124 | binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; | ||
| 125 | enabled_divisors |= (is_enabled ? u64{1} : 0) << index; | ||
| 126 | } | ||
| 127 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 128 | const auto& input = regs.vertex_attrib_format[index]; | ||
| 129 | auto& attribute = attributes[index]; | ||
| 130 | attribute.raw = 0; | ||
| 131 | attribute.enabled.Assign(input.constant ? 0 : 1); | ||
| 132 | attribute.buffer.Assign(input.buffer); | ||
| 133 | attribute.offset.Assign(input.offset); | ||
| 134 | attribute.type.Assign(static_cast<u32>(input.type.Value())); | ||
| 135 | attribute.size.Assign(static_cast<u32>(input.size.Value())); | ||
| 136 | } | ||
| 97 | } | 137 | } |
| 98 | } | 138 | } |
| 99 | if (maxwell3d.dirty.flags[Dirty::Blending]) { | 139 | if (maxwell3d.dirty.flags[Dirty::Blending]) { |
| @@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 109 | return static_cast<u16>(viewport.swizzle.raw); | 149 | return static_cast<u16>(viewport.swizzle.raw); |
| 110 | }); | 150 | }); |
| 111 | } | 151 | } |
| 112 | if (!has_extended_dynamic_state) { | 152 | if (!extended_dynamic_state) { |
| 113 | no_extended_dynamic_state.Assign(1); | ||
| 114 | dynamic_state.Refresh(regs); | 153 | dynamic_state.Refresh(regs); |
| 115 | } | 154 | } |
| 155 | if (xfb_enabled) { | ||
| 156 | RefreshXfbState(xfb_state, regs); | ||
| 157 | } | ||
| 116 | } | 158 | } |
| 117 | 159 | ||
| 118 | void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { | 160 | void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/surface.h" | 14 | #include "video_core/surface.h" |
| 15 | #include "video_core/transform_feedback.h" | ||
| 15 | 16 | ||
| 16 | namespace Vulkan { | 17 | namespace Vulkan { |
| 17 | 18 | ||
| @@ -60,7 +61,7 @@ struct FixedPipelineState { | |||
| 60 | 61 | ||
| 61 | void Refresh(const Maxwell& regs, size_t index); | 62 | void Refresh(const Maxwell& regs, size_t index); |
| 62 | 63 | ||
| 63 | constexpr std::array<bool, 4> Mask() const noexcept { | 64 | std::array<bool, 4> Mask() const noexcept { |
| 64 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; | 65 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| @@ -97,11 +98,11 @@ struct FixedPipelineState { | |||
| 97 | BitField<20, 3, u32> type; | 98 | BitField<20, 3, u32> type; |
| 98 | BitField<23, 6, u32> size; | 99 | BitField<23, 6, u32> size; |
| 99 | 100 | ||
| 100 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { | 101 | Maxwell::VertexAttribute::Type Type() const noexcept { |
| 101 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); | 102 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); |
| 102 | } | 103 | } |
| 103 | 104 | ||
| 104 | constexpr Maxwell::VertexAttribute::Size Size() const noexcept { | 105 | Maxwell::VertexAttribute::Size Size() const noexcept { |
| 105 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); | 106 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); |
| 106 | } | 107 | } |
| 107 | }; | 108 | }; |
| @@ -167,37 +168,53 @@ struct FixedPipelineState { | |||
| 167 | 168 | ||
| 168 | union { | 169 | union { |
| 169 | u32 raw1; | 170 | u32 raw1; |
| 170 | BitField<0, 1, u32> no_extended_dynamic_state; | 171 | BitField<0, 1, u32> extended_dynamic_state; |
| 171 | BitField<2, 1, u32> primitive_restart_enable; | 172 | BitField<1, 1, u32> dynamic_vertex_input; |
| 172 | BitField<3, 1, u32> depth_bias_enable; | 173 | BitField<2, 1, u32> xfb_enabled; |
| 173 | BitField<4, 1, u32> depth_clamp_disabled; | 174 | BitField<3, 1, u32> primitive_restart_enable; |
| 174 | BitField<5, 1, u32> ndc_minus_one_to_one; | 175 | BitField<4, 1, u32> depth_bias_enable; |
| 175 | BitField<6, 2, u32> polygon_mode; | 176 | BitField<5, 1, u32> depth_clamp_disabled; |
| 176 | BitField<8, 5, u32> patch_control_points_minus_one; | 177 | BitField<6, 1, u32> ndc_minus_one_to_one; |
| 177 | BitField<13, 2, u32> tessellation_primitive; | 178 | BitField<7, 2, u32> polygon_mode; |
| 178 | BitField<15, 2, u32> tessellation_spacing; | 179 | BitField<9, 5, u32> patch_control_points_minus_one; |
| 179 | BitField<17, 1, u32> tessellation_clockwise; | 180 | BitField<14, 2, u32> tessellation_primitive; |
| 180 | BitField<18, 1, u32> logic_op_enable; | 181 | BitField<16, 2, u32> tessellation_spacing; |
| 181 | BitField<19, 4, u32> logic_op; | 182 | BitField<18, 1, u32> tessellation_clockwise; |
| 182 | BitField<23, 1, u32> rasterize_enable; | 183 | BitField<19, 1, u32> logic_op_enable; |
| 184 | BitField<20, 4, u32> logic_op; | ||
| 183 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; | 185 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; |
| 184 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; | 186 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; |
| 185 | }; | 187 | }; |
| 186 | union { | 188 | union { |
| 187 | u32 raw2; | 189 | u32 raw2; |
| 188 | BitField<0, 3, u32> alpha_test_func; | 190 | BitField<0, 1, u32> rasterize_enable; |
| 189 | BitField<3, 1, u32> early_z; | 191 | BitField<1, 3, u32> alpha_test_func; |
| 192 | BitField<4, 1, u32> early_z; | ||
| 193 | BitField<5, 1, u32> depth_enabled; | ||
| 194 | BitField<6, 5, u32> depth_format; | ||
| 195 | BitField<11, 1, u32> y_negate; | ||
| 196 | BitField<12, 1, u32> provoking_vertex_last; | ||
| 197 | BitField<13, 1, u32> conservative_raster_enable; | ||
| 198 | BitField<14, 1, u32> smooth_lines; | ||
| 190 | }; | 199 | }; |
| 200 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||
| 191 | 201 | ||
| 192 | u32 alpha_test_ref; | 202 | u32 alpha_test_ref; |
| 193 | u32 point_size; | 203 | u32 point_size; |
| 194 | std::array<u32, Maxwell::NumVertexArrays> binding_divisors; | ||
| 195 | std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; | ||
| 196 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; | 204 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; |
| 197 | std::array<u16, Maxwell::NumViewports> viewport_swizzles; | 205 | std::array<u16, Maxwell::NumViewports> viewport_swizzles; |
| 206 | union { | ||
| 207 | u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state | ||
| 208 | u64 enabled_divisors; | ||
| 209 | }; | ||
| 210 | std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; | ||
| 211 | std::array<u32, Maxwell::NumVertexArrays> binding_divisors; | ||
| 212 | |||
| 198 | DynamicState dynamic_state; | 213 | DynamicState dynamic_state; |
| 214 | VideoCommon::TransformFeedbackState xfb_state; | ||
| 199 | 215 | ||
| 200 | void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); | 216 | void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, |
| 217 | bool has_dynamic_vertex_input); | ||
| 201 | 218 | ||
| 202 | size_t Hash() const noexcept; | 219 | size_t Hash() const noexcept; |
| 203 | 220 | ||
| @@ -208,8 +225,24 @@ struct FixedPipelineState { | |||
| 208 | } | 225 | } |
| 209 | 226 | ||
| 210 | size_t Size() const noexcept { | 227 | size_t Size() const noexcept { |
| 211 | const size_t total_size = sizeof *this; | 228 | if (xfb_enabled) { |
| 212 | return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); | 229 | // When transform feedback is enabled, use the whole struct |
| 230 | return sizeof(*this); | ||
| 231 | } | ||
| 232 | if (dynamic_vertex_input) { | ||
| 233 | // Exclude dynamic state and attributes | ||
| 234 | return offsetof(FixedPipelineState, attributes); | ||
| 235 | } | ||
| 236 | if (extended_dynamic_state) { | ||
| 237 | // Exclude dynamic state | ||
| 238 | return offsetof(FixedPipelineState, dynamic_state); | ||
| 239 | } | ||
| 240 | // Default | ||
| 241 | return offsetof(FixedPipelineState, xfb_state); | ||
| 242 | } | ||
| 243 | |||
| 244 | u32 DynamicAttributeType(size_t index) const noexcept { | ||
| 245 | return (attribute_types >> (index * 2)) & 0b11; | ||
| 213 | } | 246 | } |
| 214 | }; | 247 | }; |
| 215 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); | 248 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -157,7 +157,7 @@ struct FormatTuple { | |||
| 157 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT | 157 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT |
| 158 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT | 158 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT |
| 159 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM | 159 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM |
| 160 | {VK_FORMAT_UNDEFINED}, // R16_SNORM | 160 | {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM |
| 161 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT | 161 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT |
| 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT | 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT |
| 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM | 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM |
| @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with | |||
| 266 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; | 266 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; |
| 267 | } | 267 | } |
| 268 | 268 | ||
| 269 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | 269 | VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { |
| 270 | switch (stage) { | 270 | switch (stage) { |
| 271 | case Tegra::Engines::ShaderType::Vertex: | 271 | case Shader::Stage::VertexA: |
| 272 | case Shader::Stage::VertexB: | ||
| 272 | return VK_SHADER_STAGE_VERTEX_BIT; | 273 | return VK_SHADER_STAGE_VERTEX_BIT; |
| 273 | case Tegra::Engines::ShaderType::TesselationControl: | 274 | case Shader::Stage::TessellationControl: |
| 274 | return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; | 275 | return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; |
| 275 | case Tegra::Engines::ShaderType::TesselationEval: | 276 | case Shader::Stage::TessellationEval: |
| 276 | return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; | 277 | return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; |
| 277 | case Tegra::Engines::ShaderType::Geometry: | 278 | case Shader::Stage::Geometry: |
| 278 | return VK_SHADER_STAGE_GEOMETRY_BIT; | 279 | return VK_SHADER_STAGE_GEOMETRY_BIT; |
| 279 | case Tegra::Engines::ShaderType::Fragment: | 280 | case Shader::Stage::Fragment: |
| 280 | return VK_SHADER_STAGE_FRAGMENT_BIT; | 281 | return VK_SHADER_STAGE_FRAGMENT_BIT; |
| 281 | case Tegra::Engines::ShaderType::Compute: | 282 | case Shader::Stage::Compute: |
| 282 | return VK_SHADER_STAGE_COMPUTE_BIT; | 283 | return VK_SHADER_STAGE_COMPUTE_BIT; |
| 283 | } | 284 | } |
| 284 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); | 285 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); |
| @@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { | |||
| 685 | return {}; | 686 | return {}; |
| 686 | } | 687 | } |
| 687 | 688 | ||
| 689 | VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { | ||
| 690 | switch (polygon_mode) { | ||
| 691 | case Maxwell::PolygonMode::Point: | ||
| 692 | return VK_POLYGON_MODE_POINT; | ||
| 693 | case Maxwell::PolygonMode::Line: | ||
| 694 | return VK_POLYGON_MODE_LINE; | ||
| 695 | case Maxwell::PolygonMode::Fill: | ||
| 696 | return VK_POLYGON_MODE_FILL; | ||
| 697 | } | ||
| 698 | UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); | ||
| 699 | return {}; | ||
| 700 | } | ||
| 701 | |||
| 688 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { | 702 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { |
| 689 | switch (swizzle) { | 703 | switch (swizzle) { |
| 690 | case Tegra::Texture::SwizzleSource::Zero: | 704 | case Tegra::Texture::SwizzleSource::Zero: |
| @@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti | |||
| 741 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | 755 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; |
| 742 | } | 756 | } |
| 743 | 757 | ||
| 758 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 759 | switch (msaa_mode) { | ||
| 760 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 761 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 762 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 763 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 764 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 765 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 766 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 767 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 768 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 769 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 770 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 771 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 772 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 773 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 774 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 775 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 776 | default: | ||
| 777 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 778 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 744 | } // namespace Vulkan::MaxwellToVK | 782 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "shader_recompiler/stage.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 10 | #include "video_core/textures/texture.h" | 11 | #include "video_core/textures/texture.h" |
| @@ -45,7 +46,7 @@ struct FormatInfo { | |||
| 45 | [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, | 46 | [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, |
| 46 | PixelFormat pixel_format); | 47 | PixelFormat pixel_format); |
| 47 | 48 | ||
| 48 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); | 49 | VkShaderStageFlagBits ShaderStage(Shader::Stage stage); |
| 49 | 50 | ||
| 50 | VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); | 51 | VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); |
| 51 | 52 | ||
| @@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); | |||
| 65 | 66 | ||
| 66 | VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); | 67 | VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); |
| 67 | 68 | ||
| 69 | VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); | ||
| 70 | |||
| 68 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | 71 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); |
| 69 | 72 | ||
| 70 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); | 73 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); |
| 71 | 74 | ||
| 72 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); | 75 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); |
| 73 | 76 | ||
| 77 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); | ||
| 78 | |||
| 74 | } // namespace Vulkan::MaxwellToVK | 79 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..4847db6b6 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 16 | #include "video_core/texture_cache/texture_cache.h" | ||
| 17 | #include "video_core/texture_cache/types.h" | ||
| 18 | #include "video_core/textures/texture.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 20 | |||
| 21 | namespace Vulkan { | ||
| 22 | |||
| 23 | class DescriptorLayoutBuilder { | ||
| 24 | public: | ||
| 25 | DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} | ||
| 26 | |||
| 27 | bool CanUsePushDescriptor() const noexcept { | ||
| 28 | return device->IsKhrPushDescriptorSupported() && | ||
| 29 | num_descriptors <= device->MaxPushDescriptors(); | ||
| 30 | } | ||
| 31 | |||
| 32 | vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { | ||
| 33 | if (bindings.empty()) { | ||
| 34 | return nullptr; | ||
| 35 | } | ||
| 36 | const VkDescriptorSetLayoutCreateFlags flags = | ||
| 37 | use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; | ||
| 38 | return device->GetLogical().CreateDescriptorSetLayout({ | ||
| 39 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 40 | .pNext = nullptr, | ||
| 41 | .flags = flags, | ||
| 42 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 43 | .pBindings = bindings.data(), | ||
| 44 | }); | ||
| 45 | } | ||
| 46 | |||
| 47 | vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, | ||
| 48 | VkPipelineLayout pipeline_layout, | ||
| 49 | bool use_push_descriptor) const { | ||
| 50 | if (entries.empty()) { | ||
| 51 | return nullptr; | ||
| 52 | } | ||
| 53 | const VkDescriptorUpdateTemplateType type = | ||
| 54 | use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR | ||
| 55 | : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; | ||
| 56 | return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||
| 57 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 58 | .pNext = nullptr, | ||
| 59 | .flags = 0, | ||
| 60 | .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), | ||
| 61 | .pDescriptorUpdateEntries = entries.data(), | ||
| 62 | .templateType = type, | ||
| 63 | .descriptorSetLayout = descriptor_set_layout, | ||
| 64 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 65 | .pipelineLayout = pipeline_layout, | ||
| 66 | .set = 0, | ||
| 67 | }); | ||
| 68 | } | ||
| 69 | |||
| 70 | vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { | ||
| 71 | return device->GetLogical().CreatePipelineLayout({ | ||
| 72 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 73 | .pNext = nullptr, | ||
| 74 | .flags = 0, | ||
| 75 | .setLayoutCount = descriptor_set_layout ? 1U : 0U, | ||
| 76 | .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, | ||
| 77 | .pushConstantRangeCount = 0, | ||
| 78 | .pPushConstantRanges = nullptr, | ||
| 79 | }); | ||
| 80 | } | ||
| 81 | |||
| 82 | void Add(const Shader::Info& info, VkShaderStageFlags stage) { | ||
| 83 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); | ||
| 84 | Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); | ||
| 85 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); | ||
| 86 | Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); | ||
| 87 | Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); | ||
| 88 | Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); | ||
| 89 | } | ||
| 90 | |||
| 91 | private: | ||
| 92 | template <typename Descriptors> | ||
| 93 | void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { | ||
| 94 | const size_t num{descriptors.size()}; | ||
| 95 | for (size_t i = 0; i < num; ++i) { | ||
| 96 | bindings.push_back({ | ||
| 97 | .binding = binding, | ||
| 98 | .descriptorType = type, | ||
| 99 | .descriptorCount = descriptors[i].count, | ||
| 100 | .stageFlags = stage, | ||
| 101 | .pImmutableSamplers = nullptr, | ||
| 102 | }); | ||
| 103 | entries.push_back({ | ||
| 104 | .dstBinding = binding, | ||
| 105 | .dstArrayElement = 0, | ||
| 106 | .descriptorCount = descriptors[i].count, | ||
| 107 | .descriptorType = type, | ||
| 108 | .offset = offset, | ||
| 109 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 110 | }); | ||
| 111 | ++binding; | ||
| 112 | num_descriptors += descriptors[i].count; | ||
| 113 | offset += sizeof(DescriptorUpdateEntry); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | const Device* device{}; | ||
| 118 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; | ||
| 119 | boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; | ||
| 120 | u32 binding{}; | ||
| 121 | u32 num_descriptors{}; | ||
| 122 | size_t offset{}; | ||
| 123 | }; | ||
| 124 | |||
| 125 | inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, | ||
| 126 | const ImageId*& image_view_ids, TextureCache& texture_cache, | ||
| 127 | VKUpdateDescriptorQueue& update_descriptor_queue) { | ||
| 128 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 129 | image_view_ids += desc.count; | ||
| 130 | } | ||
| 131 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 132 | image_view_ids += desc.count; | ||
| 133 | } | ||
| 134 | for (const auto& desc : info.texture_descriptors) { | ||
| 135 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 136 | const VkSampler sampler{*(samplers++)}; | ||
| 137 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | ||
| 138 | const VkImageView vk_image_view{image_view.Handle(desc.type)}; | ||
| 139 | update_descriptor_queue.AddSampledImage(vk_image_view, sampler); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | for (const auto& desc : info.image_descriptors) { | ||
| 143 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 144 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | ||
| 145 | if (desc.is_written) { | ||
| 146 | texture_cache.MarkModification(image_view.image_id); | ||
| 147 | } | ||
| 148 | const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; | ||
| 149 | update_descriptor_queue.AddImage(vk_image_view); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 130 | if (!framebuffer) { | 130 | if (!framebuffer) { |
| 131 | return; | 131 | return; |
| 132 | } | 132 | } |
| 133 | const auto& layout = render_window.GetFramebufferLayout(); | 133 | SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); |
| 134 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 134 | if (!render_window.IsShown()) { |
| 135 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 135 | return; |
| 136 | const bool use_accelerated = | 136 | } |
| 137 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 137 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 138 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 138 | const bool use_accelerated = |
| 139 | if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { | 139 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 140 | swapchain.Create(layout.width, layout.height, is_srgb); | 140 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 141 | blit_screen.Recreate(); | 141 | |
| 142 | } | 142 | bool has_been_recreated = false; |
| 143 | 143 | const auto recreate_swapchain = [&] { | |
| 144 | scheduler.WaitWorker(); | 144 | if (!has_been_recreated) { |
| 145 | 145 | has_been_recreated = true; | |
| 146 | while (!swapchain.AcquireNextImage()) { | 146 | scheduler.WaitWorker(); |
| 147 | swapchain.Create(layout.width, layout.height, is_srgb); | ||
| 148 | blit_screen.Recreate(); | ||
| 149 | } | 147 | } |
| 150 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); | 148 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); |
| 151 | 149 | swapchain.Create(layout.width, layout.height, is_srgb); | |
| 152 | scheduler.Flush(render_semaphore); | 150 | }; |
| 153 | 151 | if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { | |
| 154 | if (swapchain.Present(render_semaphore)) { | 152 | recreate_swapchain(); |
| 155 | blit_screen.Recreate(); | 153 | } |
| 154 | bool is_outdated; | ||
| 155 | do { | ||
| 156 | swapchain.AcquireNextImage(); | ||
| 157 | is_outdated = swapchain.IsOutDated(); | ||
| 158 | if (is_outdated) { | ||
| 159 | recreate_swapchain(); | ||
| 156 | } | 160 | } |
| 157 | gpu.RendererFrameEndNotify(); | 161 | } while (is_outdated); |
| 158 | rasterizer.TickFrame(); | 162 | if (has_been_recreated) { |
| 163 | blit_screen.Recreate(); | ||
| 159 | } | 164 | } |
| 165 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); | ||
| 166 | scheduler.Flush(render_semaphore); | ||
| 167 | scheduler.WaitWorker(); | ||
| 168 | swapchain.Present(render_semaphore); | ||
| 160 | 169 | ||
| 161 | render_window.OnFrameDisplayed(); | 170 | gpu.RendererFrameEndNotify(); |
| 171 | rasterizer.TickFrame(); | ||
| 162 | } | 172 | } |
| 163 | 173 | ||
| 164 | void RendererVulkan::Report() const { | 174 | void RendererVulkan::Report() const { |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 184 | .depth = 1, | 184 | .depth = 1, |
| 185 | }, | 185 | }, |
| 186 | }; | 186 | }; |
| 187 | scheduler.Record( | 187 | scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { |
| 188 | [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { | 188 | const VkImage image = *raw_images[image_index]; |
| 189 | const VkImageMemoryBarrier base_barrier{ | 189 | const VkImageMemoryBarrier base_barrier{ |
| 190 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 190 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 191 | .pNext = nullptr, | 191 | .pNext = nullptr, |
| 192 | .srcAccessMask = 0, | 192 | .srcAccessMask = 0, |
| 193 | .dstAccessMask = 0, | 193 | .dstAccessMask = 0, |
| 194 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 194 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 195 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 195 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 196 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 196 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 197 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 197 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 198 | .image = image, | 198 | .image = image, |
| 199 | .subresourceRange = | 199 | .subresourceRange{ |
| 200 | { | 200 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| 201 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | 201 | .baseMipLevel = 0, |
| 202 | .baseMipLevel = 0, | 202 | .levelCount = 1, |
| 203 | .levelCount = 1, | 203 | .baseArrayLayer = 0, |
| 204 | .baseArrayLayer = 0, | 204 | .layerCount = 1, |
| 205 | .layerCount = 1, | 205 | }, |
| 206 | }, | 206 | }; |
| 207 | }; | 207 | VkImageMemoryBarrier read_barrier = base_barrier; |
| 208 | VkImageMemoryBarrier read_barrier = base_barrier; | 208 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; |
| 209 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; | 209 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| 210 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 210 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; |
| 211 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | 211 | |
| 212 | 212 | VkImageMemoryBarrier write_barrier = base_barrier; | |
| 213 | VkImageMemoryBarrier write_barrier = base_barrier; | 213 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| 214 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 214 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; |
| 215 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | 215 | |
| 216 | 216 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | |
| 217 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 217 | read_barrier); |
| 218 | 0, read_barrier); | 218 | cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); |
| 219 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); | 219 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 220 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | 220 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); |
| 221 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); | 221 | }); |
| 222 | }); | ||
| 223 | } | 222 | } |
| 224 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], | 223 | scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { |
| 225 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, | ||
| 226 | size = swapchain.GetSize(), pipeline = *pipeline, | ||
| 227 | layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { | ||
| 228 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | 224 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; |
| 229 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; | 225 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; |
| 230 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; | 226 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; |
| @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 234 | const VkRenderPassBeginInfo renderpass_bi{ | 230 | const VkRenderPassBeginInfo renderpass_bi{ |
| 235 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 231 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 236 | .pNext = nullptr, | 232 | .pNext = nullptr, |
| 237 | .renderPass = renderpass, | 233 | .renderPass = *renderpass, |
| 238 | .framebuffer = framebuffer, | 234 | .framebuffer = *framebuffers[image_index], |
| 239 | .renderArea = | 235 | .renderArea = |
| 240 | { | 236 | { |
| 241 | .offset = {0, 0}, | 237 | .offset = {0, 0}, |
| @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 257 | .extent = size, | 253 | .extent = size, |
| 258 | }; | 254 | }; |
| 259 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 255 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 260 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 256 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); |
| 261 | cmdbuf.SetViewport(0, viewport); | 257 | cmdbuf.SetViewport(0, viewport); |
| 262 | cmdbuf.SetScissor(0, scissor); | 258 | cmdbuf.SetScissor(0, scissor); |
| 263 | 259 | ||
| 264 | cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); | 260 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 265 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); | 261 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, |
| 262 | descriptor_sets[image_index], {}); | ||
| 266 | cmdbuf.Draw(4, 1, 0, 0); | 263 | cmdbuf.Draw(4, 1, 0, 0); |
| 267 | cmdbuf.EndRenderPass(); | 264 | cmdbuf.EndRenderPass(); |
| 268 | }); | 265 | }); |
| @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { | |||
| 304 | 301 | ||
| 305 | void VKBlitScreen::CreateSemaphores() { | 302 | void VKBlitScreen::CreateSemaphores() { |
| 306 | semaphores.resize(image_count); | 303 | semaphores.resize(image_count); |
| 307 | std::generate(semaphores.begin(), semaphores.end(), | 304 | std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); |
| 308 | [this] { return device.GetLogical().CreateSemaphore(); }); | ||
| 309 | } | 305 | } |
| 310 | 306 | ||
| 311 | void VKBlitScreen::CreateDescriptorPool() { | 307 | void VKBlitScreen::CreateDescriptorPool() { |
| @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { | |||
| 633 | } | 629 | } |
| 634 | 630 | ||
| 635 | void VKBlitScreen::ReleaseRawImages() { | 631 | void VKBlitScreen::ReleaseRawImages() { |
| 636 | for (std::size_t i = 0; i < raw_images.size(); ++i) { | 632 | for (const u64 tick : resource_ticks) { |
| 637 | scheduler.Wait(resource_ticks.at(i)); | 633 | scheduler.Wait(tick); |
| 638 | } | 634 | } |
| 639 | raw_images.clear(); | 635 | raw_images.clear(); |
| 640 | raw_buffer_commits.clear(); | 636 | raw_buffer_commits.clear(); |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | |||
| 60 | } | 60 | } |
| 61 | return indices; | 61 | return indices; |
| 62 | } | 62 | } |
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 66 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 67 | 63 | ||
| 68 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 64 | vk::Buffer CreateBuffer(const Device& device, u64 size) { |
| 69 | VAddr cpu_addr_, u64 size_bytes_) | 65 | VkBufferUsageFlags flags = |
| 70 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | 66 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 71 | buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 67 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| 68 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | ||
| 69 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; | ||
| 70 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 71 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||
| 72 | } | ||
| 73 | return device.GetLogical().CreateBuffer({ | ||
| 72 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 74 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 73 | .pNext = nullptr, | 75 | .pNext = nullptr, |
| 74 | .flags = 0, | 76 | .flags = 0, |
| 75 | .size = SizeBytes(), | 77 | .size = size, |
| 76 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 78 | .usage = flags, |
| 77 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 78 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 79 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 80 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 81 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 79 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 82 | .queueFamilyIndexCount = 0, | 80 | .queueFamilyIndexCount = 0, |
| 83 | .pQueueFamilyIndices = nullptr, | 81 | .pQueueFamilyIndices = nullptr, |
| 84 | }); | 82 | }); |
| 83 | } | ||
| 84 | } // Anonymous namespace | ||
| 85 | |||
| 86 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 87 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 88 | |||
| 89 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||
| 90 | VAddr cpu_addr_, u64 size_bytes_) | ||
| 91 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | ||
| 92 | device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, | ||
| 93 | commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { | ||
| 85 | if (runtime.device.HasDebuggingToolAttached()) { | 94 | if (runtime.device.HasDebuggingToolAttached()) { |
| 86 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 95 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 87 | } | 96 | } |
| 88 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 97 | } |
| 98 | |||
| 99 | VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { | ||
| 100 | if (!device) { | ||
| 101 | // Null buffer, return a null descriptor | ||
| 102 | return VK_NULL_HANDLE; | ||
| 103 | } | ||
| 104 | const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { | ||
| 105 | return offset == view.offset && size == view.size && format == view.format; | ||
| 106 | })}; | ||
| 107 | if (it != views.end()) { | ||
| 108 | return *it->handle; | ||
| 109 | } | ||
| 110 | views.push_back({ | ||
| 111 | .offset = offset, | ||
| 112 | .size = size, | ||
| 113 | .format = format, | ||
| 114 | .handle = device->GetLogical().CreateBufferView({ | ||
| 115 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 116 | .pNext = nullptr, | ||
| 117 | .flags = 0, | ||
| 118 | .buffer = *buffer, | ||
| 119 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, | ||
| 120 | .offset = offset, | ||
| 121 | .range = size, | ||
| 122 | }), | ||
| 123 | }); | ||
| 124 | return *views.back().handle; | ||
| 89 | } | 125 | } |
| 90 | 126 | ||
| 91 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | 127 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 92 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | 128 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 93 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 129 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 94 | VKDescriptorPool& descriptor_pool) | 130 | DescriptorPool& descriptor_pool) |
| 95 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | 131 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, |
| 96 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | 132 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, |
| 97 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 133 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..c27402ff0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -9,13 +9,14 @@ | |||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 12 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 15 | ||
| 15 | namespace Vulkan { | 16 | namespace Vulkan { |
| 16 | 17 | ||
| 17 | class Device; | 18 | class Device; |
| 18 | class VKDescriptorPool; | 19 | class DescriptorPool; |
| 19 | class VKScheduler; | 20 | class VKScheduler; |
| 20 | 21 | ||
| 21 | class BufferCacheRuntime; | 22 | class BufferCacheRuntime; |
| @@ -26,6 +27,8 @@ public: | |||
| 26 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 27 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 27 | VAddr cpu_addr_, u64 size_bytes_); | 28 | VAddr cpu_addr_, u64 size_bytes_); |
| 28 | 29 | ||
| 30 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||
| 31 | |||
| 29 | [[nodiscard]] VkBuffer Handle() const noexcept { | 32 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 30 | return *buffer; | 33 | return *buffer; |
| 31 | } | 34 | } |
| @@ -35,8 +38,17 @@ public: | |||
| 35 | } | 38 | } |
| 36 | 39 | ||
| 37 | private: | 40 | private: |
| 41 | struct BufferView { | ||
| 42 | u32 offset; | ||
| 43 | u32 size; | ||
| 44 | VideoCore::Surface::PixelFormat format; | ||
| 45 | vk::BufferView handle; | ||
| 46 | }; | ||
| 47 | |||
| 48 | const Device* device{}; | ||
| 38 | vk::Buffer buffer; | 49 | vk::Buffer buffer; |
| 39 | MemoryCommit commit; | 50 | MemoryCommit commit; |
| 51 | std::vector<BufferView> views; | ||
| 40 | }; | 52 | }; |
| 41 | 53 | ||
| 42 | class BufferCacheRuntime { | 54 | class BufferCacheRuntime { |
| @@ -49,7 +61,7 @@ public: | |||
| 49 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, | 61 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, |
| 50 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | 62 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 51 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 63 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 52 | VKDescriptorPool& descriptor_pool); | 64 | DescriptorPool& descriptor_pool); |
| 53 | 65 | ||
| 54 | void Finish(); | 66 | void Finish(); |
| 55 | 67 | ||
| @@ -87,6 +99,11 @@ public: | |||
| 87 | BindBuffer(buffer, offset, size); | 99 | BindBuffer(buffer, offset, size); |
| 88 | } | 100 | } |
| 89 | 101 | ||
| 102 | void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 103 | VideoCore::Surface::PixelFormat format) { | ||
| 104 | update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); | ||
| 105 | } | ||
| 106 | |||
| 90 | private: | 107 | private: |
| 91 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { | 108 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { |
| 92 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 109 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| @@ -124,6 +141,7 @@ struct BufferCacheParams { | |||
| 124 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | 141 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; |
| 125 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 142 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 126 | static constexpr bool USE_MEMORY_MAPS = true; | 143 | static constexpr bool USE_MEMORY_MAPS = true; |
| 144 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | ||
| 127 | }; | 145 | }; |
| 128 | 146 | ||
| 129 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 147 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; | |||
| 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; | 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; |
| 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; | 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; |
| 43 | 43 | ||
| 44 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 44 | template <size_t size> |
| 45 | return { | 45 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ |
| 46 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 46 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 47 | .offset = 0, | 47 | .offset = 0, |
| 48 | .size = static_cast<u32>(size), | 48 | .size = static_cast<u32>(size), |
| 49 | }; | 49 | }; |
| 50 | } | ||
| 51 | |||
| 52 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | ||
| 53 | return {{ | ||
| 54 | { | ||
| 55 | .binding = 0, | ||
| 56 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 57 | .descriptorCount = 1, | ||
| 58 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 59 | .pImmutableSamplers = nullptr, | ||
| 60 | }, | ||
| 61 | { | ||
| 62 | .binding = 1, | ||
| 63 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 64 | .descriptorCount = 1, | ||
| 65 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 66 | .pImmutableSamplers = nullptr, | ||
| 67 | }, | ||
| 68 | }}; | ||
| 69 | } | ||
| 70 | 50 | ||
| 71 | std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { | 51 | constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ |
| 72 | return {{ | 52 | { |
| 73 | { | 53 | .binding = 0, |
| 74 | .binding = ASTC_BINDING_INPUT_BUFFER, | 54 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 75 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 55 | .descriptorCount = 1, |
| 76 | .descriptorCount = 1, | 56 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 77 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 57 | .pImmutableSamplers = nullptr, |
| 78 | .pImmutableSamplers = nullptr, | 58 | }, |
| 79 | }, | 59 | { |
| 80 | { | 60 | .binding = 1, |
| 81 | .binding = ASTC_BINDING_ENC_BUFFER, | 61 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 82 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 62 | .descriptorCount = 1, |
| 83 | .descriptorCount = 1, | 63 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 84 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 64 | .pImmutableSamplers = nullptr, |
| 85 | .pImmutableSamplers = nullptr, | 65 | }, |
| 86 | }, | 66 | }}; |
| 87 | { | 67 | |
| 88 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | 68 | constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ |
| 89 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 69 | .uniform_buffers = 0, |
| 90 | .descriptorCount = 1, | 70 | .storage_buffers = 2, |
| 91 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 71 | .texture_buffers = 0, |
| 92 | .pImmutableSamplers = nullptr, | 72 | .image_buffers = 0, |
| 93 | }, | 73 | .textures = 0, |
| 94 | { | 74 | .images = 0, |
| 95 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | 75 | .score = 2, |
| 96 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 76 | }; |
| 97 | .descriptorCount = 1, | ||
| 98 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 99 | .pImmutableSamplers = nullptr, | ||
| 100 | }, | ||
| 101 | }}; | ||
| 102 | } | ||
| 103 | 77 | ||
| 104 | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | 78 | constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{ |
| 105 | return { | 79 | { |
| 106 | .dstBinding = 0, | 80 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| 107 | .dstArrayElement = 0, | ||
| 108 | .descriptorCount = 2, | ||
| 109 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 81 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 110 | .offset = 0, | 82 | .descriptorCount = 1, |
| 111 | .stride = sizeof(DescriptorUpdateEntry), | 83 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 112 | }; | 84 | .pImmutableSamplers = nullptr, |
| 113 | } | 85 | }, |
| 86 | { | ||
| 87 | .binding = ASTC_BINDING_ENC_BUFFER, | ||
| 88 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 89 | .descriptorCount = 1, | ||
| 90 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 91 | .pImmutableSamplers = nullptr, | ||
| 92 | }, | ||
| 93 | { | ||
| 94 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 95 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 96 | .descriptorCount = 1, | ||
| 97 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 98 | .pImmutableSamplers = nullptr, | ||
| 99 | }, | ||
| 100 | { | ||
| 101 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | ||
| 102 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 103 | .descriptorCount = 1, | ||
| 104 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 105 | .pImmutableSamplers = nullptr, | ||
| 106 | }, | ||
| 107 | }}; | ||
| 108 | |||
| 109 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ | ||
| 110 | .uniform_buffers = 0, | ||
| 111 | .storage_buffers = 3, | ||
| 112 | .texture_buffers = 0, | ||
| 113 | .image_buffers = 0, | ||
| 114 | .textures = 0, | ||
| 115 | .images = 1, | ||
| 116 | .score = 4, | ||
| 117 | }; | ||
| 114 | 118 | ||
| 115 | std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | 119 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ |
| 116 | BuildASTCPassDescriptorUpdateTemplateEntry() { | 120 | .dstBinding = 0, |
| 117 | return {{ | 121 | .dstArrayElement = 0, |
| 122 | .descriptorCount = 2, | ||
| 123 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 124 | .offset = 0, | ||
| 125 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 126 | }; | ||
| 127 | |||
| 128 | constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | ||
| 129 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ | ||
| 118 | { | 130 | { |
| 119 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | 131 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, |
| 120 | .dstArrayElement = 0, | 132 | .dstArrayElement = 0, |
| @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { | |||
| 148 | .stride = sizeof(DescriptorUpdateEntry), | 160 | .stride = sizeof(DescriptorUpdateEntry), |
| 149 | }, | 161 | }, |
| 150 | }}; | 162 | }}; |
| 151 | } | ||
| 152 | 163 | ||
| 153 | struct AstcPushConstants { | 164 | struct AstcPushConstants { |
| 154 | std::array<u32, 2> blocks_dims; | 165 | std::array<u32, 2> blocks_dims; |
| @@ -159,14 +170,14 @@ struct AstcPushConstants { | |||
| 159 | u32 block_height; | 170 | u32 block_height; |
| 160 | u32 block_height_mask; | 171 | u32 block_height_mask; |
| 161 | }; | 172 | }; |
| 162 | |||
| 163 | } // Anonymous namespace | 173 | } // Anonymous namespace |
| 164 | 174 | ||
| 165 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 175 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, |
| 166 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 176 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 167 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 177 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 168 | vk::Span<VkPushConstantRange> push_constants, | 178 | const DescriptorBankInfo& bank_info, |
| 169 | std::span<const u32> code) { | 179 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) |
| 180 | : device{device_} { | ||
| 170 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | 181 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ |
| 171 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 182 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 172 | .pNext = nullptr, | 183 | .pNext = nullptr, |
| @@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ | |||
| 196 | .pipelineLayout = *layout, | 207 | .pipelineLayout = *layout, |
| 197 | .set = 0, | 208 | .set = 0, |
| 198 | }); | 209 | }); |
| 199 | 210 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); | |
| 200 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | ||
| 201 | } | 211 | } |
| 202 | module = device.GetLogical().CreateShaderModule({ | 212 | module = device.GetLogical().CreateShaderModule({ |
| 203 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 213 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| @@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ | |||
| 206 | .codeSize = static_cast<u32>(code.size_bytes()), | 216 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 207 | .pCode = code.data(), | 217 | .pCode = code.data(), |
| 208 | }); | 218 | }); |
| 219 | device.SaveShader(code); | ||
| 209 | pipeline = device.GetLogical().CreateComputePipeline({ | 220 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 210 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 221 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 211 | .pNext = nullptr, | 222 | .pNext = nullptr, |
| 212 | .flags = 0, | 223 | .flags = 0, |
| 213 | .stage = | 224 | .stage{ |
| 214 | { | 225 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 215 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 226 | .pNext = nullptr, |
| 216 | .pNext = nullptr, | 227 | .flags = 0, |
| 217 | .flags = 0, | 228 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 218 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 229 | .module = *module, |
| 219 | .module = *module, | 230 | .pName = "main", |
| 220 | .pName = "main", | 231 | .pSpecializationInfo = nullptr, |
| 221 | .pSpecializationInfo = nullptr, | 232 | }, |
| 222 | }, | ||
| 223 | .layout = *layout, | 233 | .layout = *layout, |
| 224 | .basePipelineHandle = nullptr, | 234 | .basePipelineHandle = nullptr, |
| 225 | .basePipelineIndex = 0, | 235 | .basePipelineIndex = 0, |
| 226 | }); | 236 | }); |
| 227 | } | 237 | } |
| 228 | 238 | ||
| 229 | VKComputePass::~VKComputePass() = default; | 239 | ComputePass::~ComputePass() = default; |
| 230 | 240 | ||
| 231 | VkDescriptorSet VKComputePass::CommitDescriptorSet( | 241 | Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| 232 | VKUpdateDescriptorQueue& update_descriptor_queue) { | 242 | DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, |
| 233 | if (!descriptor_template) { | ||
| 234 | return nullptr; | ||
| 235 | } | ||
| 236 | const VkDescriptorSet set = descriptor_allocator->Commit(); | ||
| 237 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 238 | return set; | ||
| 239 | } | ||
| 240 | |||
| 241 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | ||
| 242 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | ||
| 243 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 243 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 244 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), | 244 | : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 245 | BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), | 245 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, |
| 246 | VULKAN_UINT8_COMP_SPV), | ||
| 246 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 247 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 247 | update_descriptor_queue{update_descriptor_queue_} {} | 248 | update_descriptor_queue{update_descriptor_queue_} {} |
| 248 | 249 | ||
| @@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 256 | update_descriptor_queue.Acquire(); | 257 | update_descriptor_queue.Acquire(); |
| 257 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 258 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 258 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | 259 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 259 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 260 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 261 | const VkBuffer buffer{staging.buffer}; | ||
| 260 | 262 | ||
| 261 | scheduler.RequestOutsideRenderPassOperationContext(); | 263 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 262 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 264 | scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { |
| 263 | num_vertices](vk::CommandBuffer cmdbuf) { | ||
| 264 | static constexpr u32 DISPATCH_SIZE = 1024; | 265 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 265 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | 266 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 266 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 267 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| @@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 268 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 269 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 269 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | 270 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, |
| 270 | }; | 271 | }; |
| 271 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 272 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 272 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 273 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 274 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 275 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 273 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); | 276 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 274 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 277 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 275 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); | 278 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| @@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 278 | } | 281 | } |
| 279 | 282 | ||
| 280 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 283 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 281 | VKDescriptorPool& descriptor_pool_, | 284 | DescriptorPool& descriptor_pool_, |
| 282 | StagingBufferPool& staging_buffer_pool_, | 285 | StagingBufferPool& staging_buffer_pool_, |
| 283 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 286 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 284 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 287 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 285 | BuildInputOutputDescriptorUpdateTemplate(), | 288 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, |
| 286 | BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), | 289 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), |
| 287 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 290 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 288 | update_descriptor_queue{update_descriptor_queue_} {} | 291 | update_descriptor_queue{update_descriptor_queue_} {} |
| 289 | 292 | ||
| @@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 313 | update_descriptor_queue.Acquire(); | 316 | update_descriptor_queue.Acquire(); |
| 314 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 317 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 315 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | 318 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 316 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 319 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 317 | 320 | ||
| 318 | scheduler.RequestOutsideRenderPassOperationContext(); | 321 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 319 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 322 | scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, |
| 320 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 323 | index_shift](vk::CommandBuffer cmdbuf) { |
| 321 | static constexpr u32 DISPATCH_SIZE = 1024; | 324 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 322 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | 325 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 323 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 326 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| @@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 325 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 328 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 326 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | 329 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, |
| 327 | }; | 330 | }; |
| 328 | const std::array push_constants = {base_vertex, index_shift}; | 331 | const std::array push_constants{base_vertex, index_shift}; |
| 329 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 332 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 330 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 333 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 331 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 334 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); |
| 335 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 336 | cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | ||
| 332 | &push_constants); | 337 | &push_constants); |
| 333 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); | 338 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 334 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 339 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| @@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 338 | } | 343 | } |
| 339 | 344 | ||
| 340 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 345 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 341 | VKDescriptorPool& descriptor_pool_, | 346 | DescriptorPool& descriptor_pool_, |
| 342 | StagingBufferPool& staging_buffer_pool_, | 347 | StagingBufferPool& staging_buffer_pool_, |
| 343 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 348 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 344 | MemoryAllocator& memory_allocator_) | 349 | MemoryAllocator& memory_allocator_) |
| 345 | : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), | 350 | : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, |
| 346 | BuildASTCPassDescriptorUpdateTemplateEntry(), | 351 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, |
| 347 | BuildComputePushConstantRange(sizeof(AstcPushConstants)), | 352 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), |
| 348 | ASTC_DECODER_COMP_SPV), | 353 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 349 | device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 350 | update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} | 354 | update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} |
| 351 | 355 | ||
| 352 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 356 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| @@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 444 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), | 448 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), |
| 445 | sizeof(SWIZZLE_TABLE)); | 449 | sizeof(SWIZZLE_TABLE)); |
| 446 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 450 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 447 | 451 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | |
| 448 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 449 | const VkPipelineLayout vk_layout = *layout; | ||
| 450 | 452 | ||
| 451 | // To unswizzle the ASTC data | 453 | // To unswizzle the ASTC data |
| 452 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 454 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 453 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 455 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 454 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 456 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 455 | scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, | 457 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, |
| 456 | block_dims, params, set](vk::CommandBuffer cmdbuf) { | 458 | params, descriptor_data](vk::CommandBuffer cmdbuf) { |
| 457 | const AstcPushConstants uniforms{ | 459 | const AstcPushConstants uniforms{ |
| 458 | .blocks_dims = block_dims, | 460 | .blocks_dims = block_dims, |
| 459 | .bytes_per_block_log2 = params.bytes_per_block_log2, | 461 | .bytes_per_block_log2 = params.bytes_per_block_log2, |
| @@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 463 | .block_height = params.block_height, | 465 | .block_height = params.block_height, |
| 464 | .block_height_mask = params.block_height_mask, | 466 | .block_height_mask = params.block_height_mask, |
| 465 | }; | 467 | }; |
| 466 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); | 468 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 467 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | 469 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 470 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 471 | cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 468 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); | 472 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| 469 | }); | 473 | }); |
| 470 | } | 474 | } |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 8 | #include <span> | 7 | #include <span> |
| 9 | #include <utility> | 8 | #include <utility> |
| 10 | 9 | ||
| @@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue; | |||
| 27 | class Image; | 26 | class Image; |
| 28 | struct StagingBufferRef; | 27 | struct StagingBufferRef; |
| 29 | 28 | ||
| 30 | class VKComputePass { | 29 | class ComputePass { |
| 31 | public: | 30 | public: |
| 32 | explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 31 | explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, |
| 33 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 32 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 34 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 33 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 35 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); | 34 | const DescriptorBankInfo& bank_info, |
| 36 | ~VKComputePass(); | 35 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); |
| 36 | ~ComputePass(); | ||
| 37 | 37 | ||
| 38 | protected: | 38 | protected: |
| 39 | VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); | 39 | const Device& device; |
| 40 | |||
| 41 | vk::DescriptorUpdateTemplateKHR descriptor_template; | 40 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 42 | vk::PipelineLayout layout; | 41 | vk::PipelineLayout layout; |
| 43 | vk::Pipeline pipeline; | 42 | vk::Pipeline pipeline; |
| 43 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 44 | DescriptorAllocator descriptor_allocator; | ||
| 44 | 45 | ||
| 45 | private: | 46 | private: |
| 46 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 47 | std::optional<DescriptorAllocator> descriptor_allocator; | ||
| 48 | vk::ShaderModule module; | 47 | vk::ShaderModule module; |
| 49 | }; | 48 | }; |
| 50 | 49 | ||
| 51 | class Uint8Pass final : public VKComputePass { | 50 | class Uint8Pass final : public ComputePass { |
| 52 | public: | 51 | public: |
| 53 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | 52 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| 54 | VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, | 53 | DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, |
| 55 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 54 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 56 | ~Uint8Pass(); | 55 | ~Uint8Pass(); |
| 57 | 56 | ||
| @@ -66,10 +65,10 @@ private: | |||
| 66 | VKUpdateDescriptorQueue& update_descriptor_queue; | 65 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 67 | }; | 66 | }; |
| 68 | 67 | ||
| 69 | class QuadIndexedPass final : public VKComputePass { | 68 | class QuadIndexedPass final : public ComputePass { |
| 70 | public: | 69 | public: |
| 71 | explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 70 | explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 72 | VKDescriptorPool& descriptor_pool_, | 71 | DescriptorPool& descriptor_pool_, |
| 73 | StagingBufferPool& staging_buffer_pool_, | 72 | StagingBufferPool& staging_buffer_pool_, |
| 74 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 73 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 75 | ~QuadIndexedPass(); | 74 | ~QuadIndexedPass(); |
| @@ -84,10 +83,10 @@ private: | |||
| 84 | VKUpdateDescriptorQueue& update_descriptor_queue; | 83 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 85 | }; | 84 | }; |
| 86 | 85 | ||
| 87 | class ASTCDecoderPass final : public VKComputePass { | 86 | class ASTCDecoderPass final : public ComputePass { |
| 88 | public: | 87 | public: |
| 89 | explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 88 | explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 90 | VKDescriptorPool& descriptor_pool_, | 89 | DescriptorPool& descriptor_pool_, |
| 91 | StagingBufferPool& staging_buffer_pool_, | 90 | StagingBufferPool& staging_buffer_pool_, |
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 91 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 93 | MemoryAllocator& memory_allocator_); | 92 | MemoryAllocator& memory_allocator_); |
| @@ -99,7 +98,6 @@ public: | |||
| 99 | private: | 98 | private: |
| 100 | void MakeDataBuffer(); | 99 | void MakeDataBuffer(); |
| 101 | 100 | ||
| 102 | const Device& device; | ||
| 103 | VKScheduler& scheduler; | 101 | VKScheduler& scheduler; |
| 104 | StagingBufferPool& staging_buffer_pool; | 102 | StagingBufferPool& staging_buffer_pool; |
| 105 | VKUpdateDescriptorQueue& update_descriptor_queue; | 103 | VKUpdateDescriptorQueue& update_descriptor_queue; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -2,152 +2,198 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <vector> | 6 | #include <vector> |
| 6 | 7 | ||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | |||
| 10 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 7 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 12 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 17 | #include "video_core/shader_notify.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_device.h" | 18 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 20 | ||
| 16 | namespace Vulkan { | 21 | namespace Vulkan { |
| 17 | 22 | ||
| 18 | VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 23 | using Shader::ImageBufferDescriptor; |
| 19 | VKDescriptorPool& descriptor_pool_, | 24 | using Tegra::Texture::TexturePair; |
| 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 25 | |
| 21 | const SPIRVShader& shader_) | 26 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, |
| 22 | : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, | 27 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 23 | descriptor_set_layout{CreateDescriptorSetLayout()}, | 28 | Common::ThreadWorker* thread_worker, |
| 24 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 29 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, |
| 25 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | 30 | vk::ShaderModule spv_module_) |
| 26 | descriptor_template{CreateDescriptorUpdateTemplate()}, | 31 | : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, |
| 27 | shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} | 32 | spv_module(std::move(spv_module_)) { |
| 28 | 33 | if (shader_notify) { | |
| 29 | VKComputePipeline::~VKComputePipeline() = default; | 34 | shader_notify->MarkShaderBuilding(); |
| 30 | |||
| 31 | VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { | ||
| 32 | if (!descriptor_template) { | ||
| 33 | return {}; | ||
| 34 | } | ||
| 35 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 36 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 37 | return set; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | ||
| 41 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 42 | u32 binding = 0; | ||
| 43 | const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { | ||
| 44 | // TODO(Rodrigo): Maybe make individual bindings here? | ||
| 45 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | ||
| 46 | bindings.push_back({ | ||
| 47 | .binding = binding++, | ||
| 48 | .descriptorType = descriptor_type, | ||
| 49 | .descriptorCount = 1, | ||
| 50 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 51 | .pImmutableSamplers = nullptr, | ||
| 52 | }); | ||
| 53 | } | ||
| 54 | }; | ||
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | ||
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | ||
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | ||
| 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 60 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | ||
| 61 | |||
| 62 | return device.GetLogical().CreateDescriptorSetLayout({ | ||
| 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 64 | .pNext = nullptr, | ||
| 65 | .flags = 0, | ||
| 66 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 67 | .pBindings = bindings.data(), | ||
| 68 | }); | ||
| 69 | } | ||
| 70 | |||
| 71 | vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { | ||
| 72 | return device.GetLogical().CreatePipelineLayout({ | ||
| 73 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 74 | .pNext = nullptr, | ||
| 75 | .flags = 0, | ||
| 76 | .setLayoutCount = 1, | ||
| 77 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 78 | .pushConstantRangeCount = 0, | ||
| 79 | .pPushConstantRanges = nullptr, | ||
| 80 | }); | ||
| 81 | } | ||
| 82 | |||
| 83 | vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { | ||
| 84 | std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; | ||
| 85 | u32 binding = 0; | ||
| 86 | u32 offset = 0; | ||
| 87 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); | ||
| 88 | if (template_entries.empty()) { | ||
| 89 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 90 | return {}; | ||
| 91 | } | 35 | } |
| 92 | 36 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | |
| 93 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | 37 | uniform_buffer_sizes.begin()); |
| 94 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 38 | |
| 95 | .pNext = nullptr, | 39 | auto func{[this, &descriptor_pool, shader_notify] { |
| 96 | .flags = 0, | 40 | DescriptorLayoutBuilder builder{device}; |
| 97 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | 41 | builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); |
| 98 | .pDescriptorUpdateEntries = template_entries.data(), | 42 | |
| 99 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | 43 | descriptor_set_layout = builder.CreateDescriptorSetLayout(false); |
| 100 | .descriptorSetLayout = *descriptor_set_layout, | 44 | pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); |
| 101 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | 45 | descriptor_update_template = |
| 102 | .pipelineLayout = *layout, | 46 | builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); |
| 103 | .set = DESCRIPTOR_SET, | 47 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); |
| 104 | }); | 48 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ |
| 105 | } | 49 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |
| 106 | 50 | .pNext = nullptr, | |
| 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | 51 | .requiredSubgroupSize = GuestWarpSize, |
| 108 | device.SaveShader(code); | 52 | }; |
| 109 | 53 | pipeline = device.GetLogical().CreateComputePipeline({ | |
| 110 | return device.GetLogical().CreateShaderModule({ | 54 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 111 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 55 | .pNext = nullptr, |
| 112 | .pNext = nullptr, | 56 | .flags = 0, |
| 113 | .flags = 0, | 57 | .stage{ |
| 114 | .codeSize = code.size() * sizeof(u32), | ||
| 115 | .pCode = code.data(), | ||
| 116 | }); | ||
| 117 | } | ||
| 118 | |||
| 119 | vk::Pipeline VKComputePipeline::CreatePipeline() const { | ||
| 120 | |||
| 121 | VkComputePipelineCreateInfo ci{ | ||
| 122 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 123 | .pNext = nullptr, | ||
| 124 | .flags = 0, | ||
| 125 | .stage = | ||
| 126 | { | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 58 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 128 | .pNext = nullptr, | 59 | .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, |
| 129 | .flags = 0, | 60 | .flags = 0, |
| 130 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 61 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 131 | .module = *shader_module, | 62 | .module = *spv_module, |
| 132 | .pName = "main", | 63 | .pName = "main", |
| 133 | .pSpecializationInfo = nullptr, | 64 | .pSpecializationInfo = nullptr, |
| 134 | }, | 65 | }, |
| 135 | .layout = *layout, | 66 | .layout = *pipeline_layout, |
| 136 | .basePipelineHandle = nullptr, | 67 | .basePipelineHandle = 0, |
| 137 | .basePipelineIndex = 0, | 68 | .basePipelineIndex = 0, |
| 138 | }; | 69 | }); |
| 139 | 70 | std::lock_guard lock{build_mutex}; | |
| 140 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | 71 | is_built = true; |
| 141 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | 72 | build_condvar.notify_one(); |
| 142 | .pNext = nullptr, | 73 | if (shader_notify) { |
| 143 | .requiredSubgroupSize = GuestWarpSize, | 74 | shader_notify->MarkShaderComplete(); |
| 144 | }; | 75 | } |
| 145 | 76 | }}; | |
| 146 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { | 77 | if (thread_worker) { |
| 147 | ci.stage.pNext = &subgroup_size_ci; | 78 | thread_worker->QueueWork(std::move(func)); |
| 79 | } else { | ||
| 80 | func(); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | ||
| 85 | Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, | ||
| 86 | BufferCache& buffer_cache, TextureCache& texture_cache) { | ||
| 87 | update_descriptor_queue.Acquire(); | ||
| 88 | |||
| 89 | buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); | ||
| 90 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 91 | size_t ssbo_index{}; | ||
| 92 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 93 | ASSERT(desc.count == 1); | ||
| 94 | buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, | ||
| 95 | desc.is_written); | ||
| 96 | ++ssbo_index; | ||
| 148 | } | 97 | } |
| 149 | 98 | ||
| 150 | return device.GetLogical().CreateComputePipeline(ci); | 99 | texture_cache.SynchronizeComputeDescriptors(); |
| 100 | |||
| 101 | static constexpr size_t max_elements = 64; | ||
| 102 | std::array<ImageId, max_elements> image_view_ids; | ||
| 103 | boost::container::static_vector<u32, max_elements> image_view_indices; | ||
| 104 | boost::container::static_vector<VkSampler, max_elements> samplers; | ||
| 105 | |||
| 106 | const auto& qmd{kepler_compute.launch_description}; | ||
| 107 | const auto& cbufs{qmd.const_buffer_config}; | ||
| 108 | const bool via_header_index{qmd.linked_tsc != 0}; | ||
| 109 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 110 | ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); | ||
| 111 | const u32 index_offset{index << desc.size_shift}; | ||
| 112 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 113 | const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; | ||
| 114 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 115 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 116 | if (desc.has_secondary) { | ||
| 117 | ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); | ||
| 118 | const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 119 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + | ||
| 120 | secondary_offset}; | ||
| 121 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 122 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 123 | return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 127 | }}; | ||
| 128 | const auto add_image{[&](const auto& desc) { | ||
| 129 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 130 | const auto handle{read_handle(desc, index)}; | ||
| 131 | image_view_indices.push_back(handle.first); | ||
| 132 | } | ||
| 133 | }}; | ||
| 134 | std::ranges::for_each(info.texture_buffer_descriptors, add_image); | ||
| 135 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||
| 136 | for (const auto& desc : info.texture_descriptors) { | ||
| 137 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 138 | const auto handle{read_handle(desc, index)}; | ||
| 139 | image_view_indices.push_back(handle.first); | ||
| 140 | |||
| 141 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||
| 142 | samplers.push_back(sampler->Handle()); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | std::ranges::for_each(info.image_descriptors, add_image); | ||
| 146 | |||
| 147 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 148 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 149 | |||
| 150 | buffer_cache.UnbindComputeTextureBuffers(); | ||
| 151 | ImageId* texture_buffer_ids{image_view_ids.data()}; | ||
| 152 | size_t index{}; | ||
| 153 | const auto add_buffer{[&](const auto& desc) { | ||
| 154 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 155 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 156 | bool is_written{false}; | ||
| 157 | if constexpr (is_image) { | ||
| 158 | is_written = desc.is_written; | ||
| 159 | } | ||
| 160 | ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); | ||
| 161 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), | ||
| 162 | image_view.BufferSize(), image_view.format, | ||
| 163 | is_written, is_image); | ||
| 164 | ++texture_buffer_ids; | ||
| 165 | ++index; | ||
| 166 | } | ||
| 167 | }}; | ||
| 168 | std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); | ||
| 169 | std::ranges::for_each(info.image_buffer_descriptors, add_buffer); | ||
| 170 | |||
| 171 | buffer_cache.UpdateComputeBuffers(); | ||
| 172 | buffer_cache.BindHostComputeBuffers(); | ||
| 173 | |||
| 174 | const VkSampler* samplers_it{samplers.data()}; | ||
| 175 | const ImageId* views_it{image_view_ids.data()}; | ||
| 176 | PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); | ||
| 177 | |||
| 178 | if (!is_built.load(std::memory_order::relaxed)) { | ||
| 179 | // Wait for the pipeline to be built | ||
| 180 | scheduler.Record([this](vk::CommandBuffer) { | ||
| 181 | std::unique_lock lock{build_mutex}; | ||
| 182 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); | ||
| 183 | }); | ||
| 184 | } | ||
| 185 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||
| 186 | scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||
| 187 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 188 | if (!descriptor_set_layout) { | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||
| 192 | const vk::Device& dev{device.GetLogical()}; | ||
| 193 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||
| 194 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, | ||
| 195 | descriptor_set, nullptr); | ||
| 196 | }); | ||
| 151 | } | 197 | } |
| 152 | 198 | ||
| 153 | } // namespace Vulkan | 199 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -4,61 +4,63 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 8 | #include <condition_variable> | ||
| 9 | #include <mutex> | ||
| 10 | |||
| 7 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/thread_worker.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 16 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 17 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 20 | ||
| 21 | namespace VideoCore { | ||
| 22 | class ShaderNotify; | ||
| 23 | } | ||
| 24 | |||
| 12 | namespace Vulkan { | 25 | namespace Vulkan { |
| 13 | 26 | ||
| 14 | class Device; | 27 | class Device; |
| 15 | class VKScheduler; | 28 | class VKScheduler; |
| 16 | class VKUpdateDescriptorQueue; | ||
| 17 | 29 | ||
| 18 | class VKComputePipeline final { | 30 | class ComputePipeline { |
| 19 | public: | 31 | public: |
| 20 | explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 32 | explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, |
| 21 | VKDescriptorPool& descriptor_pool_, | 33 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 34 | Common::ThreadWorker* thread_worker, |
| 23 | const SPIRVShader& shader_); | 35 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, |
| 24 | ~VKComputePipeline(); | 36 | vk::ShaderModule spv_module); |
| 25 | |||
| 26 | VkDescriptorSet CommitDescriptorSet(); | ||
| 27 | 37 | ||
| 28 | VkPipeline GetHandle() const { | 38 | ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; |
| 29 | return *pipeline; | 39 | ComputePipeline(ComputePipeline&&) noexcept = delete; |
| 30 | } | ||
| 31 | 40 | ||
| 32 | VkPipelineLayout GetLayout() const { | 41 | ComputePipeline& operator=(const ComputePipeline&) = delete; |
| 33 | return *layout; | 42 | ComputePipeline(const ComputePipeline&) = delete; |
| 34 | } | ||
| 35 | 43 | ||
| 36 | const ShaderEntries& GetEntries() const { | 44 | void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, |
| 37 | return entries; | 45 | VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); |
| 38 | } | ||
| 39 | 46 | ||
| 40 | private: | 47 | private: |
| 41 | vk::DescriptorSetLayout CreateDescriptorSetLayout() const; | ||
| 42 | |||
| 43 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 44 | |||
| 45 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; | ||
| 46 | |||
| 47 | vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; | ||
| 48 | |||
| 49 | vk::Pipeline CreatePipeline() const; | ||
| 50 | |||
| 51 | const Device& device; | 48 | const Device& device; |
| 52 | VKScheduler& scheduler; | 49 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 53 | ShaderEntries entries; | 50 | Shader::Info info; |
| 54 | 51 | ||
| 52 | VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; | ||
| 53 | |||
| 54 | vk::ShaderModule spv_module; | ||
| 55 | vk::DescriptorSetLayout descriptor_set_layout; | 55 | vk::DescriptorSetLayout descriptor_set_layout; |
| 56 | DescriptorAllocator descriptor_allocator; | 56 | DescriptorAllocator descriptor_allocator; |
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | 57 | vk::PipelineLayout pipeline_layout; |
| 58 | vk::PipelineLayout layout; | 58 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; |
| 59 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 60 | vk::ShaderModule shader_module; | ||
| 61 | vk::Pipeline pipeline; | 59 | vk::Pipeline pipeline; |
| 60 | |||
| 61 | std::condition_variable build_condvar; | ||
| 62 | std::mutex build_mutex; | ||
| 63 | std::atomic_bool is_built{false}; | ||
| 62 | }; | 64 | }; |
| 63 | 65 | ||
| 64 | } // namespace Vulkan | 66 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | ||
| 6 | #include <span> | ||
| 5 | #include <vector> | 7 | #include <vector> |
| 6 | 8 | ||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| @@ -13,79 +15,149 @@ | |||
| 13 | 15 | ||
| 14 | namespace Vulkan { | 16 | namespace Vulkan { |
| 15 | 17 | ||
| 16 | // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. | 18 | // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines |
| 17 | constexpr std::size_t SETS_GROW_RATE = 0x20; | 19 | constexpr size_t SETS_GROW_RATE = 16; |
| 20 | constexpr s32 SCORE_THRESHOLD = 3; | ||
| 21 | constexpr u32 SETS_PER_POOL = 64; | ||
| 18 | 22 | ||
| 19 | DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, | 23 | struct DescriptorBank { |
| 20 | VkDescriptorSetLayout layout_) | 24 | DescriptorBankInfo info; |
| 21 | : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), | 25 | std::vector<vk::DescriptorPool> pools; |
| 22 | descriptor_pool{descriptor_pool_}, layout{layout_} {} | 26 | }; |
| 23 | 27 | ||
| 24 | DescriptorAllocator::~DescriptorAllocator() = default; | 28 | bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { |
| 29 | return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && | ||
| 30 | texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && | ||
| 31 | textures >= subset.textures && images >= subset.image_buffers; | ||
| 32 | } | ||
| 25 | 33 | ||
| 26 | VkDescriptorSet DescriptorAllocator::Commit() { | 34 | template <typename Descriptors> |
| 27 | const std::size_t index = CommitResource(); | 35 | static u32 Accumulate(const Descriptors& descriptors) { |
| 28 | return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; | 36 | u32 count = 0; |
| 37 | for (const auto& descriptor : descriptors) { | ||
| 38 | count += descriptor.count; | ||
| 39 | } | ||
| 40 | return count; | ||
| 29 | } | 41 | } |
| 30 | 42 | ||
| 31 | void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { | 43 | static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) { |
| 32 | descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); | 44 | DescriptorBankInfo bank; |
| 45 | for (const Shader::Info& info : infos) { | ||
| 46 | bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); | ||
| 47 | bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); | ||
| 48 | bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); | ||
| 49 | bank.image_buffers += Accumulate(info.image_buffer_descriptors); | ||
| 50 | bank.textures += Accumulate(info.texture_descriptors); | ||
| 51 | bank.images += Accumulate(info.image_descriptors); | ||
| 52 | } | ||
| 53 | bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + | ||
| 54 | bank.image_buffers + bank.textures + bank.images; | ||
| 55 | return bank; | ||
| 33 | } | 56 | } |
| 34 | 57 | ||
| 35 | VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) | 58 | static void AllocatePool(const Device& device, DescriptorBank& bank) { |
| 36 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ | 59 | std::array<VkDescriptorPoolSize, 6> pool_sizes; |
| 37 | AllocateNewPool()} {} | 60 | size_t pool_cursor{}; |
| 38 | 61 | const auto add = [&](VkDescriptorType type, u32 count) { | |
| 39 | VKDescriptorPool::~VKDescriptorPool() = default; | 62 | if (count > 0) { |
| 40 | 63 | pool_sizes[pool_cursor++] = { | |
| 41 | vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { | 64 | .type = type, |
| 42 | static constexpr u32 num_sets = 0x20000; | 65 | .descriptorCount = count * SETS_PER_POOL, |
| 43 | static constexpr VkDescriptorPoolSize pool_sizes[] = { | 66 | }; |
| 44 | {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, | 67 | } |
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, | ||
| 46 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, | ||
| 47 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, | ||
| 48 | {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, | ||
| 49 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, | ||
| 50 | }; | 68 | }; |
| 51 | 69 | const auto& info{bank.info}; | |
| 52 | const VkDescriptorPoolCreateInfo ci{ | 70 | add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); |
| 71 | add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); | ||
| 72 | add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); | ||
| 73 | add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); | ||
| 74 | add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); | ||
| 75 | add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); | ||
| 76 | bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ | ||
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | 77 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
| 54 | .pNext = nullptr, | 78 | .pNext = nullptr, |
| 55 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, | 79 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, |
| 56 | .maxSets = num_sets, | 80 | .maxSets = SETS_PER_POOL, |
| 57 | .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), | 81 | .poolSizeCount = static_cast<u32>(pool_cursor), |
| 58 | .pPoolSizes = std::data(pool_sizes), | 82 | .pPoolSizes = std::data(pool_sizes), |
| 59 | }; | 83 | })); |
| 60 | return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); | 84 | } |
| 85 | |||
| 86 | DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, | ||
| 87 | DescriptorBank& bank_, VkDescriptorSetLayout layout_) | ||
| 88 | : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, | ||
| 89 | layout{layout_} {} | ||
| 90 | |||
| 91 | VkDescriptorSet DescriptorAllocator::Commit() { | ||
| 92 | const size_t index = CommitResource(); | ||
| 93 | return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; | ||
| 61 | } | 94 | } |
| 62 | 95 | ||
| 63 | vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, | 96 | void DescriptorAllocator::Allocate(size_t begin, size_t end) { |
| 64 | std::size_t count) { | 97 | sets.push_back(AllocateDescriptors(end - begin)); |
| 65 | const std::vector layout_copies(count, layout); | 98 | } |
| 66 | VkDescriptorSetAllocateInfo ai{ | 99 | |
| 100 | vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { | ||
| 101 | const std::vector<VkDescriptorSetLayout> layouts(count, layout); | ||
| 102 | VkDescriptorSetAllocateInfo allocate_info{ | ||
| 67 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | 103 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
| 68 | .pNext = nullptr, | 104 | .pNext = nullptr, |
| 69 | .descriptorPool = **active_pool, | 105 | .descriptorPool = *bank->pools.back(), |
| 70 | .descriptorSetCount = static_cast<u32>(count), | 106 | .descriptorSetCount = static_cast<u32>(count), |
| 71 | .pSetLayouts = layout_copies.data(), | 107 | .pSetLayouts = layouts.data(), |
| 72 | }; | 108 | }; |
| 73 | 109 | vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); | |
| 74 | vk::DescriptorSets sets = active_pool->Allocate(ai); | 110 | if (!new_sets.IsOutOfPoolMemory()) { |
| 75 | if (!sets.IsOutOfPoolMemory()) { | 111 | return new_sets; |
| 76 | return sets; | ||
| 77 | } | 112 | } |
| 78 | |||
| 79 | // Our current pool is out of memory. Allocate a new one and retry | 113 | // Our current pool is out of memory. Allocate a new one and retry |
| 80 | active_pool = AllocateNewPool(); | 114 | AllocatePool(*device, *bank); |
| 81 | ai.descriptorPool = **active_pool; | 115 | allocate_info.descriptorPool = *bank->pools.back(); |
| 82 | sets = active_pool->Allocate(ai); | 116 | new_sets = bank->pools.back().Allocate(allocate_info); |
| 83 | if (!sets.IsOutOfPoolMemory()) { | 117 | if (!new_sets.IsOutOfPoolMemory()) { |
| 84 | return sets; | 118 | return new_sets; |
| 85 | } | 119 | } |
| 86 | |||
| 87 | // After allocating a new pool, we are out of memory again. We can't handle this from here. | 120 | // After allocating a new pool, we are out of memory again. We can't handle this from here. |
| 88 | throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); | 121 | throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); |
| 89 | } | 122 | } |
| 90 | 123 | ||
| 124 | DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) | ||
| 125 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} | ||
| 126 | |||
| 127 | DescriptorPool::~DescriptorPool() = default; | ||
| 128 | |||
| 129 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 130 | std::span<const Shader::Info> infos) { | ||
| 131 | return Allocator(layout, MakeBankInfo(infos)); | ||
| 132 | } | ||
| 133 | |||
| 134 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 135 | const Shader::Info& info) { | ||
| 136 | return Allocator(layout, MakeBankInfo(std::array{info})); | ||
| 137 | } | ||
| 138 | |||
| 139 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 140 | const DescriptorBankInfo& info) { | ||
| 141 | return DescriptorAllocator(device, master_semaphore, Bank(info), layout); | ||
| 142 | } | ||
| 143 | |||
| 144 | DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { | ||
| 145 | std::shared_lock read_lock{banks_mutex}; | ||
| 146 | const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { | ||
| 147 | return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); | ||
| 148 | }); | ||
| 149 | if (it != bank_infos.end()) { | ||
| 150 | return *banks[std::distance(bank_infos.begin(), it)].get(); | ||
| 151 | } | ||
| 152 | read_lock.unlock(); | ||
| 153 | |||
| 154 | std::unique_lock write_lock{banks_mutex}; | ||
| 155 | bank_infos.push_back(reqs); | ||
| 156 | |||
| 157 | auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>()); | ||
| 158 | bank.info = reqs; | ||
| 159 | AllocatePool(device, bank); | ||
| 160 | return bank; | ||
| 161 | } | ||
| 162 | |||
| 91 | } // namespace Vulkan | 163 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h | |||
| @@ -4,57 +4,85 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <shared_mutex> | ||
| 8 | #include <span> | ||
| 7 | #include <vector> | 9 | #include <vector> |
| 8 | 10 | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 14 | ||
| 12 | namespace Vulkan { | 15 | namespace Vulkan { |
| 13 | 16 | ||
| 14 | class Device; | 17 | class Device; |
| 15 | class VKDescriptorPool; | ||
| 16 | class VKScheduler; | 18 | class VKScheduler; |
| 17 | 19 | ||
| 20 | struct DescriptorBank; | ||
| 21 | |||
| 22 | struct DescriptorBankInfo { | ||
| 23 | [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; | ||
| 24 | |||
| 25 | u32 uniform_buffers{}; ///< Number of uniform buffer descriptors | ||
| 26 | u32 storage_buffers{}; ///< Number of storage buffer descriptors | ||
| 27 | u32 texture_buffers{}; ///< Number of texture buffer descriptors | ||
| 28 | u32 image_buffers{}; ///< Number of image buffer descriptors | ||
| 29 | u32 textures{}; ///< Number of texture descriptors | ||
| 30 | u32 images{}; ///< Number of image descriptors | ||
| 31 | s32 score{}; ///< Number of descriptors in total | ||
| 32 | }; | ||
| 33 | |||
| 18 | class DescriptorAllocator final : public ResourcePool { | 34 | class DescriptorAllocator final : public ResourcePool { |
| 35 | friend class DescriptorPool; | ||
| 36 | |||
| 19 | public: | 37 | public: |
| 20 | explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); | 38 | explicit DescriptorAllocator() = default; |
| 21 | ~DescriptorAllocator() override; | 39 | ~DescriptorAllocator() override = default; |
| 40 | |||
| 41 | DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; | ||
| 42 | DescriptorAllocator(DescriptorAllocator&&) noexcept = default; | ||
| 22 | 43 | ||
| 23 | DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; | 44 | DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; |
| 24 | DescriptorAllocator(const DescriptorAllocator&) = delete; | 45 | DescriptorAllocator(const DescriptorAllocator&) = delete; |
| 25 | 46 | ||
| 26 | VkDescriptorSet Commit(); | 47 | VkDescriptorSet Commit(); |
| 27 | 48 | ||
| 28 | protected: | ||
| 29 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 30 | |||
| 31 | private: | 49 | private: |
| 32 | VKDescriptorPool& descriptor_pool; | 50 | explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, |
| 33 | const VkDescriptorSetLayout layout; | 51 | DescriptorBank& bank_, VkDescriptorSetLayout layout_); |
| 34 | 52 | ||
| 35 | std::vector<vk::DescriptorSets> descriptors_allocations; | 53 | void Allocate(size_t begin, size_t end) override; |
| 36 | }; | 54 | |
| 55 | vk::DescriptorSets AllocateDescriptors(size_t count); | ||
| 56 | |||
| 57 | const Device* device{}; | ||
| 58 | DescriptorBank* bank{}; | ||
| 59 | VkDescriptorSetLayout layout{}; | ||
| 37 | 60 | ||
| 38 | class VKDescriptorPool final { | 61 | std::vector<vk::DescriptorSets> sets; |
| 39 | friend DescriptorAllocator; | 62 | }; |
| 40 | 63 | ||
| 64 | class DescriptorPool { | ||
| 41 | public: | 65 | public: |
| 42 | explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); | 66 | explicit DescriptorPool(const Device& device, VKScheduler& scheduler); |
| 43 | ~VKDescriptorPool(); | 67 | ~DescriptorPool(); |
| 44 | 68 | ||
| 45 | VKDescriptorPool(const VKDescriptorPool&) = delete; | 69 | DescriptorPool& operator=(const DescriptorPool&) = delete; |
| 46 | VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; | 70 | DescriptorPool(const DescriptorPool&) = delete; |
| 47 | 71 | ||
| 48 | private: | 72 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, |
| 49 | vk::DescriptorPool* AllocateNewPool(); | 73 | std::span<const Shader::Info> infos); |
| 74 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); | ||
| 75 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); | ||
| 50 | 76 | ||
| 51 | vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); | 77 | private: |
| 78 | DescriptorBank& Bank(const DescriptorBankInfo& reqs); | ||
| 52 | 79 | ||
| 53 | const Device& device; | 80 | const Device& device; |
| 54 | MasterSemaphore& master_semaphore; | 81 | MasterSemaphore& master_semaphore; |
| 55 | 82 | ||
| 56 | std::vector<vk::DescriptorPool> pools; | 83 | std::shared_mutex banks_mutex; |
| 57 | vk::DescriptorPool* active_pool; | 84 | std::vector<DescriptorBankInfo> bank_infos; |
| 85 | std::vector<std::unique_ptr<DescriptorBank>> banks; | ||
| 58 | }; | 86 | }; |
| 59 | 87 | ||
| 60 | } // namespace Vulkan \ No newline at end of file | 88 | } // namespace Vulkan \ No newline at end of file |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index fc6dd83eb..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -1,29 +1,58 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <span> |
| 7 | #include <cstring> | ||
| 8 | #include <vector> | ||
| 9 | 7 | ||
| 10 | #include "common/common_types.h" | 8 | #include <boost/container/small_vector.hpp> |
| 11 | #include "common/microprofile.h" | 9 | #include <boost/container/static_vector.hpp> |
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 10 | |
| 11 | #include "common/bit_field.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/pipeline_helper.h" |
| 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 16 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 20 | #include "video_core/shader_notify.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | 21 | #include "video_core/vulkan_common/vulkan_device.h" |
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | 22 | ||
| 24 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 23 | #if defined(_MSC_VER) && defined(NDEBUG) |
| 24 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | ||
| 25 | #else | ||
| 26 | #define LAMBDA_FORCEINLINE | ||
| 27 | #endif | ||
| 25 | 28 | ||
| 29 | namespace Vulkan { | ||
| 26 | namespace { | 30 | namespace { |
| 31 | using boost::container::small_vector; | ||
| 32 | using boost::container::static_vector; | ||
| 33 | using Shader::ImageBufferDescriptor; | ||
| 34 | using Tegra::Texture::TexturePair; | ||
| 35 | using VideoCore::Surface::PixelFormat; | ||
| 36 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 37 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 38 | |||
| 39 | constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; | ||
| 40 | constexpr size_t MAX_IMAGE_ELEMENTS = 64; | ||
| 41 | |||
| 42 | DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) { | ||
| 43 | DescriptorLayoutBuilder builder{device}; | ||
| 44 | for (size_t index = 0; index < infos.size(); ++index) { | ||
| 45 | static constexpr std::array stages{ | ||
| 46 | VK_SHADER_STAGE_VERTEX_BIT, | ||
| 47 | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, | ||
| 48 | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, | ||
| 49 | VK_SHADER_STAGE_GEOMETRY_BIT, | ||
| 50 | VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 51 | }; | ||
| 52 | builder.Add(infos[index], stages.at(index)); | ||
| 53 | } | ||
| 54 | return builder; | ||
| 55 | } | ||
| 27 | 56 | ||
| 28 | template <class StencilFace> | 57 | template <class StencilFace> |
| 29 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | 58 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { |
| @@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) { | |||
| 39 | } | 68 | } |
| 40 | 69 | ||
| 41 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { | 70 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { |
| 42 | static constexpr std::array unsupported_topologies = { | 71 | static constexpr std::array unsupported_topologies{ |
| 43 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, | 72 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, |
| 44 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, | 73 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, |
| 45 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | 74 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, |
| 46 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, | 75 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, |
| 47 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, | 76 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, |
| 48 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; | 77 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, |
| 49 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | 78 | // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, |
| 50 | topology) == std::end(unsupported_topologies); | 79 | }; |
| 80 | return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); | ||
| 81 | } | ||
| 82 | |||
| 83 | bool IsLine(VkPrimitiveTopology topology) { | ||
| 84 | static constexpr std::array line_topologies{ | ||
| 85 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, | ||
| 86 | // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, | ||
| 87 | }; | ||
| 88 | return std::ranges::find(line_topologies, topology) == line_topologies.end(); | ||
| 51 | } | 89 | } |
| 52 | 90 | ||
| 53 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | 91 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { |
| @@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 59 | BitField<12, 3, Maxwell::ViewportSwizzle> w; | 97 | BitField<12, 3, Maxwell::ViewportSwizzle> w; |
| 60 | }; | 98 | }; |
| 61 | const Swizzle unpacked{swizzle}; | 99 | const Swizzle unpacked{swizzle}; |
| 62 | 100 | return VkViewportSwizzleNV{ | |
| 63 | return { | ||
| 64 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), | 101 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), |
| 65 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), | 102 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), |
| 66 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), | 103 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), |
| @@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 68 | }; | 105 | }; |
| 69 | } | 106 | } |
| 70 | 107 | ||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | 108 | PixelFormat DecodeFormat(u8 encoded_format) { |
| 72 | switch (msaa_mode) { | 109 | const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)}; |
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | 110 | if (format == Tegra::RenderTargetFormat::NONE) { |
| 74 | return VK_SAMPLE_COUNT_1_BIT; | 111 | return PixelFormat::Invalid; |
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | 112 | } |
| 113 | return PixelFormatFromRenderTargetFormat(format); | ||
| 93 | } | 114 | } |
| 94 | 115 | ||
| 95 | } // Anonymous namespace | 116 | RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { |
| 117 | RenderPassKey key; | ||
| 118 | std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); | ||
| 119 | if (state.depth_enabled != 0) { | ||
| 120 | const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())}; | ||
| 121 | key.depth_format = PixelFormatFromDepthFormat(depth_format); | ||
| 122 | } else { | ||
| 123 | key.depth_format = PixelFormat::Invalid; | ||
| 124 | } | ||
| 125 | key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); | ||
| 126 | return key; | ||
| 127 | } | ||
| 96 | 128 | ||
| 97 | VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | 129 | size_t NumAttachments(const FixedPipelineState& state) { |
| 98 | VKDescriptorPool& descriptor_pool_, | 130 | size_t num{}; |
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 131 | for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 100 | const GraphicsPipelineCacheKey& key, | 132 | const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])}; |
| 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 133 | if (format != Tegra::RenderTargetFormat::NONE) { |
| 102 | const SPIRVProgram& program, u32 num_color_buffers) | 134 | num = index + 1; |
| 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, | 135 | } |
| 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | 136 | } |
| 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 137 | return num; |
| 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, | ||
| 108 | modules(CreateShaderModules(program)), | ||
| 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} | ||
| 110 | |||
| 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||
| 112 | |||
| 113 | VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | ||
| 114 | if (!descriptor_template) { | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 118 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 119 | return set; | ||
| 120 | } | 138 | } |
| 121 | 139 | ||
| 122 | vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | 140 | template <typename Spec> |
| 123 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const { | 141 | bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules, |
| 124 | const VkDescriptorSetLayoutCreateInfo ci{ | 142 | const std::array<Shader::Info, NUM_STAGES>& stage_infos) { |
| 125 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 143 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { |
| 126 | .pNext = nullptr, | 144 | if (!Spec::enabled_stages[stage] && modules[stage]) { |
| 127 | .flags = 0, | 145 | return false; |
| 128 | .bindingCount = bindings.size(), | 146 | } |
| 129 | .pBindings = bindings.data(), | 147 | const auto& info{stage_infos[stage]}; |
| 130 | }; | 148 | if constexpr (!Spec::has_storage_buffers) { |
| 131 | return device.GetLogical().CreateDescriptorSetLayout(ci); | 149 | if (!info.storage_buffers_descriptors.empty()) { |
| 150 | return false; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | if constexpr (!Spec::has_texture_buffers) { | ||
| 154 | if (!info.texture_buffer_descriptors.empty()) { | ||
| 155 | return false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | if constexpr (!Spec::has_image_buffers) { | ||
| 159 | if (!info.image_buffer_descriptors.empty()) { | ||
| 160 | return false; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | if constexpr (!Spec::has_images) { | ||
| 164 | if (!info.image_descriptors.empty()) { | ||
| 165 | return false; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | } | ||
| 169 | return true; | ||
| 132 | } | 170 | } |
| 133 | 171 | ||
| 134 | vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | 172 | using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); |
| 135 | const VkPipelineLayoutCreateInfo ci{ | 173 | |
| 136 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 174 | template <typename Spec, typename... Specs> |
| 137 | .pNext = nullptr, | 175 | ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules, |
| 138 | .flags = 0, | 176 | const std::array<Shader::Info, NUM_STAGES>& stage_infos) { |
| 139 | .setLayoutCount = 1, | 177 | if constexpr (sizeof...(Specs) > 0) { |
| 140 | .pSetLayouts = descriptor_set_layout.address(), | 178 | if (!Passes<Spec>(modules, stage_infos)) { |
| 141 | .pushConstantRangeCount = 0, | 179 | return FindSpec<Specs...>(modules, stage_infos); |
| 142 | .pPushConstantRanges = nullptr, | 180 | } |
| 143 | }; | 181 | } |
| 144 | return device.GetLogical().CreatePipelineLayout(ci); | 182 | return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); |
| 145 | } | 183 | } |
| 146 | 184 | ||
| 147 | vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | 185 | struct SimpleVertexFragmentSpec { |
| 148 | const SPIRVProgram& program) const { | 186 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; |
| 149 | std::vector<VkDescriptorUpdateTemplateEntry> template_entries; | 187 | static constexpr bool has_storage_buffers = false; |
| 150 | u32 binding = 0; | 188 | static constexpr bool has_texture_buffers = false; |
| 151 | u32 offset = 0; | 189 | static constexpr bool has_image_buffers = false; |
| 152 | for (const auto& stage : program) { | 190 | static constexpr bool has_images = false; |
| 153 | if (stage) { | 191 | }; |
| 154 | FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); | 192 | |
| 193 | struct SimpleVertexSpec { | ||
| 194 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; | ||
| 195 | static constexpr bool has_storage_buffers = false; | ||
| 196 | static constexpr bool has_texture_buffers = false; | ||
| 197 | static constexpr bool has_image_buffers = false; | ||
| 198 | static constexpr bool has_images = false; | ||
| 199 | }; | ||
| 200 | |||
| 201 | struct DefaultSpec { | ||
| 202 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 203 | static constexpr bool has_storage_buffers = true; | ||
| 204 | static constexpr bool has_texture_buffers = true; | ||
| 205 | static constexpr bool has_image_buffers = true; | ||
| 206 | static constexpr bool has_images = true; | ||
| 207 | }; | ||
| 208 | |||
| 209 | ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules, | ||
| 210 | const std::array<Shader::Info, NUM_STAGES>& infos) { | ||
| 211 | return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos); | ||
| 212 | } | ||
| 213 | } // Anonymous namespace | ||
| 214 | |||
| 215 | GraphicsPipeline::GraphicsPipeline( | ||
| 216 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | ||
| 217 | VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, | ||
| 218 | VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, | ||
| 219 | VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, | ||
| 220 | RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, | ||
| 221 | std::array<vk::ShaderModule, NUM_STAGES> stages, | ||
| 222 | const std::array<const Shader::Info*, NUM_STAGES>& infos) | ||
| 223 | : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, | ||
| 224 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, | ||
| 225 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { | ||
| 226 | if (shader_notify) { | ||
| 227 | shader_notify->MarkShaderBuilding(); | ||
| 228 | } | ||
| 229 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 230 | const Shader::Info* const info{infos[stage]}; | ||
| 231 | if (!info) { | ||
| 232 | continue; | ||
| 155 | } | 233 | } |
| 234 | stage_infos[stage] = *info; | ||
| 235 | enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; | ||
| 236 | std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 156 | } | 237 | } |
| 157 | if (template_entries.empty()) { | 238 | auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { |
| 158 | // If the shader doesn't use descriptor sets, skip template creation. | 239 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; |
| 159 | return {}; | 240 | uses_push_descriptor = builder.CanUsePushDescriptor(); |
| 241 | descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); | ||
| 242 | if (!uses_push_descriptor) { | ||
| 243 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); | ||
| 244 | } | ||
| 245 | const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; | ||
| 246 | pipeline_layout = builder.CreatePipelineLayout(set_layout); | ||
| 247 | descriptor_update_template = | ||
| 248 | builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); | ||
| 249 | |||
| 250 | const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; | ||
| 251 | Validate(); | ||
| 252 | MakePipeline(render_pass); | ||
| 253 | |||
| 254 | std::lock_guard lock{build_mutex}; | ||
| 255 | is_built = true; | ||
| 256 | build_condvar.notify_one(); | ||
| 257 | if (shader_notify) { | ||
| 258 | shader_notify->MarkShaderComplete(); | ||
| 259 | } | ||
| 260 | }}; | ||
| 261 | if (worker_thread) { | ||
| 262 | worker_thread->QueueWork(std::move(func)); | ||
| 263 | } else { | ||
| 264 | func(); | ||
| 160 | } | 265 | } |
| 266 | configure_func = ConfigureFunc(spv_modules, stage_infos); | ||
| 267 | } | ||
| 161 | 268 | ||
| 162 | const VkDescriptorUpdateTemplateCreateInfoKHR ci{ | 269 | void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { |
| 163 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 270 | transition_keys.push_back(transition->key); |
| 164 | .pNext = nullptr, | 271 | transitions.push_back(transition); |
| 165 | .flags = 0, | ||
| 166 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 167 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 168 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 169 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 170 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 171 | .pipelineLayout = *layout, | ||
| 172 | .set = DESCRIPTOR_SET, | ||
| 173 | }; | ||
| 174 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 175 | } | 272 | } |
| 176 | 273 | ||
| 177 | std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | 274 | template <typename Spec> |
| 178 | const SPIRVProgram& program) const { | 275 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { |
| 179 | VkShaderModuleCreateInfo ci{ | 276 | std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids; |
| 180 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 277 | std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices; |
| 181 | .pNext = nullptr, | 278 | std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; |
| 182 | .flags = 0, | 279 | size_t sampler_index{}; |
| 183 | .codeSize = 0, | 280 | size_t image_index{}; |
| 184 | .pCode = nullptr, | 281 | |
| 185 | }; | 282 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 283 | |||
| 284 | buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); | ||
| 285 | |||
| 286 | const auto& regs{maxwell3d.regs}; | ||
| 287 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 288 | const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 289 | const Shader::Info& info{stage_infos[stage]}; | ||
| 290 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 291 | if constexpr (Spec::has_storage_buffers) { | ||
| 292 | size_t ssbo_index{}; | ||
| 293 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 294 | ASSERT(desc.count == 1); | ||
| 295 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, | ||
| 296 | desc.cbuf_offset, desc.is_written); | ||
| 297 | ++ssbo_index; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; | ||
| 301 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 302 | ASSERT(cbufs[desc.cbuf_index].enabled); | ||
| 303 | const u32 index_offset{index << desc.size_shift}; | ||
| 304 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 305 | const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; | ||
| 306 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 307 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 308 | if (desc.has_secondary) { | ||
| 309 | ASSERT(cbufs[desc.secondary_cbuf_index].enabled); | ||
| 310 | const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 311 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + | ||
| 312 | second_offset}; | ||
| 313 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 314 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 315 | const u32 raw{lhs_raw | rhs_raw}; | ||
| 316 | return TexturePair(raw, via_header_index); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 320 | }}; | ||
| 321 | const auto add_image{[&](const auto& desc) { | ||
| 322 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 323 | const auto handle{read_handle(desc, index)}; | ||
| 324 | image_view_indices[image_index++] = handle.first; | ||
| 325 | } | ||
| 326 | }}; | ||
| 327 | if constexpr (Spec::has_texture_buffers) { | ||
| 328 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 329 | add_image(desc); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | if constexpr (Spec::has_image_buffers) { | ||
| 333 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 334 | add_image(desc); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | for (const auto& desc : info.texture_descriptors) { | ||
| 338 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 339 | const auto handle{read_handle(desc, index)}; | ||
| 340 | image_view_indices[image_index++] = handle.first; | ||
| 186 | 341 | ||
| 187 | std::vector<vk::ShaderModule> shader_modules; | 342 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; |
| 188 | shader_modules.reserve(Maxwell::MaxShaderStage); | 343 | samplers[sampler_index++] = sampler->Handle(); |
| 189 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | 344 | } |
| 190 | const auto& stage = program[i]; | 345 | } |
| 191 | if (!stage) { | 346 | if constexpr (Spec::has_images) { |
| 192 | continue; | 347 | for (const auto& desc : info.image_descriptors) { |
| 348 | add_image(desc); | ||
| 349 | } | ||
| 193 | } | 350 | } |
| 351 | }}; | ||
| 352 | if constexpr (Spec::enabled_stages[0]) { | ||
| 353 | config_stage(0); | ||
| 354 | } | ||
| 355 | if constexpr (Spec::enabled_stages[1]) { | ||
| 356 | config_stage(1); | ||
| 357 | } | ||
| 358 | if constexpr (Spec::enabled_stages[2]) { | ||
| 359 | config_stage(2); | ||
| 360 | } | ||
| 361 | if constexpr (Spec::enabled_stages[3]) { | ||
| 362 | config_stage(3); | ||
| 363 | } | ||
| 364 | if constexpr (Spec::enabled_stages[4]) { | ||
| 365 | config_stage(4); | ||
| 366 | } | ||
| 367 | const std::span indices_span(image_view_indices.data(), image_index); | ||
| 368 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 369 | |||
| 370 | ImageId* texture_buffer_index{image_view_ids.data()}; | ||
| 371 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 372 | size_t index{}; | ||
| 373 | const auto add_buffer{[&](const auto& desc) { | ||
| 374 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 375 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 376 | bool is_written{false}; | ||
| 377 | if constexpr (is_image) { | ||
| 378 | is_written = desc.is_written; | ||
| 379 | } | ||
| 380 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||
| 381 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||
| 382 | image_view.BufferSize(), image_view.format, | ||
| 383 | is_written, is_image); | ||
| 384 | ++index; | ||
| 385 | ++texture_buffer_index; | ||
| 386 | } | ||
| 387 | }}; | ||
| 388 | buffer_cache.UnbindGraphicsTextureBuffers(stage); | ||
| 194 | 389 | ||
| 195 | device.SaveShader(stage->code); | 390 | const Shader::Info& info{stage_infos[stage]}; |
| 391 | if constexpr (Spec::has_texture_buffers) { | ||
| 392 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 393 | add_buffer(desc); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if constexpr (Spec::has_image_buffers) { | ||
| 397 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 398 | add_buffer(desc); | ||
| 399 | } | ||
| 400 | } | ||
| 401 | for (const auto& desc : info.texture_descriptors) { | ||
| 402 | texture_buffer_index += desc.count; | ||
| 403 | } | ||
| 404 | if constexpr (Spec::has_images) { | ||
| 405 | for (const auto& desc : info.image_descriptors) { | ||
| 406 | texture_buffer_index += desc.count; | ||
| 407 | } | ||
| 408 | } | ||
| 409 | }}; | ||
| 410 | if constexpr (Spec::enabled_stages[0]) { | ||
| 411 | bind_stage_info(0); | ||
| 412 | } | ||
| 413 | if constexpr (Spec::enabled_stages[1]) { | ||
| 414 | bind_stage_info(1); | ||
| 415 | } | ||
| 416 | if constexpr (Spec::enabled_stages[2]) { | ||
| 417 | bind_stage_info(2); | ||
| 418 | } | ||
| 419 | if constexpr (Spec::enabled_stages[3]) { | ||
| 420 | bind_stage_info(3); | ||
| 421 | } | ||
| 422 | if constexpr (Spec::enabled_stages[4]) { | ||
| 423 | bind_stage_info(4); | ||
| 424 | } | ||
| 425 | |||
| 426 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 427 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 196 | 428 | ||
| 197 | ci.codeSize = stage->code.size() * sizeof(u32); | 429 | update_descriptor_queue.Acquire(); |
| 198 | ci.pCode = stage->code.data(); | 430 | |
| 199 | shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); | 431 | const VkSampler* samplers_it{samplers.data()}; |
| 432 | const ImageId* views_it{image_view_ids.data()}; | ||
| 433 | const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 434 | buffer_cache.BindHostStageBuffers(stage); | ||
| 435 | PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, | ||
| 436 | update_descriptor_queue); | ||
| 437 | }}; | ||
| 438 | if constexpr (Spec::enabled_stages[0]) { | ||
| 439 | prepare_stage(0); | ||
| 440 | } | ||
| 441 | if constexpr (Spec::enabled_stages[1]) { | ||
| 442 | prepare_stage(1); | ||
| 200 | } | 443 | } |
| 201 | return shader_modules; | 444 | if constexpr (Spec::enabled_stages[2]) { |
| 445 | prepare_stage(2); | ||
| 446 | } | ||
| 447 | if constexpr (Spec::enabled_stages[3]) { | ||
| 448 | prepare_stage(3); | ||
| 449 | } | ||
| 450 | if constexpr (Spec::enabled_stages[4]) { | ||
| 451 | prepare_stage(4); | ||
| 452 | } | ||
| 453 | ConfigureDraw(); | ||
| 202 | } | 454 | } |
| 203 | 455 | ||
| 204 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | 456 | void GraphicsPipeline::ConfigureDraw() { |
| 205 | VkRenderPass renderpass, | 457 | texture_cache.UpdateRenderTargets(false); |
| 206 | u32 num_color_buffers) const { | 458 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); |
| 207 | const auto& state = cache_key.fixed_state; | 459 | |
| 208 | const auto& viewport_swizzles = state.viewport_swizzles; | 460 | if (!is_built.load(std::memory_order::relaxed)) { |
| 209 | 461 | // Wait for the pipeline to be built | |
| 210 | FixedPipelineState::DynamicState dynamic; | 462 | scheduler.Record([this](vk::CommandBuffer) { |
| 211 | if (device.IsExtExtendedDynamicStateSupported()) { | 463 | std::unique_lock lock{build_mutex}; |
| 212 | // Insert dummy values, as long as they are valid they don't matter as extended dynamic | 464 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); |
| 213 | // state is ignored | ||
| 214 | dynamic.raw1 = 0; | ||
| 215 | dynamic.raw2 = 0; | ||
| 216 | dynamic.vertex_strides.fill(0); | ||
| 217 | } else { | ||
| 218 | dynamic = state.dynamic_state; | ||
| 219 | } | ||
| 220 | |||
| 221 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | ||
| 222 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | ||
| 223 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 224 | const bool instanced = state.binding_divisors[index] != 0; | ||
| 225 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 226 | vertex_bindings.push_back({ | ||
| 227 | .binding = static_cast<u32>(index), | ||
| 228 | .stride = dynamic.vertex_strides[index], | ||
| 229 | .inputRate = rate, | ||
| 230 | }); | 465 | }); |
| 231 | if (instanced) { | ||
| 232 | vertex_binding_divisors.push_back({ | ||
| 233 | .binding = static_cast<u32>(index), | ||
| 234 | .divisor = state.binding_divisors[index], | ||
| 235 | }); | ||
| 236 | } | ||
| 237 | } | 466 | } |
| 467 | const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; | ||
| 468 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||
| 469 | scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { | ||
| 470 | if (bind_pipeline) { | ||
| 471 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); | ||
| 472 | } | ||
| 473 | if (!descriptor_set_layout) { | ||
| 474 | return; | ||
| 475 | } | ||
| 476 | if (uses_push_descriptor) { | ||
| 477 | cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, | ||
| 478 | 0, descriptor_data); | ||
| 479 | } else { | ||
| 480 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||
| 481 | const vk::Device& dev{device.GetLogical()}; | ||
| 482 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||
| 483 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | ||
| 484 | descriptor_set, nullptr); | ||
| 485 | } | ||
| 486 | }); | ||
| 487 | } | ||
| 238 | 488 | ||
| 239 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; | 489 | void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { |
| 240 | const auto& input_attributes = program[0]->entries.attributes; | 490 | FixedPipelineState::DynamicState dynamic{}; |
| 241 | for (std::size_t index = 0; index < state.attributes.size(); ++index) { | 491 | if (!key.state.extended_dynamic_state) { |
| 242 | const auto& attribute = state.attributes[index]; | 492 | dynamic = key.state.dynamic_state; |
| 243 | if (!attribute.enabled) { | 493 | } |
| 244 | continue; | 494 | static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; |
| 495 | static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; | ||
| 496 | static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; | ||
| 497 | if (key.state.dynamic_vertex_input) { | ||
| 498 | for (size_t index = 0; index < key.state.attributes.size(); ++index) { | ||
| 499 | const u32 type = key.state.DynamicAttributeType(index); | ||
| 500 | if (!stage_infos[0].loads.Generic(index) || type == 0) { | ||
| 501 | continue; | ||
| 502 | } | ||
| 503 | vertex_attributes.push_back({ | ||
| 504 | .location = static_cast<u32>(index), | ||
| 505 | .binding = 0, | ||
| 506 | .format = type == 1 ? VK_FORMAT_R32_SFLOAT | ||
| 507 | : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, | ||
| 508 | .offset = 0, | ||
| 509 | }); | ||
| 245 | } | 510 | } |
| 246 | if (!input_attributes.contains(static_cast<u32>(index))) { | 511 | if (!vertex_attributes.empty()) { |
| 247 | // Skip attributes not used by the vertex shaders. | 512 | vertex_bindings.push_back({ |
| 248 | continue; | 513 | .binding = 0, |
| 514 | .stride = 4, | ||
| 515 | .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, | ||
| 516 | }); | ||
| 517 | } | ||
| 518 | } else { | ||
| 519 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 520 | const bool instanced = key.state.binding_divisors[index] != 0; | ||
| 521 | const auto rate = | ||
| 522 | instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 523 | vertex_bindings.push_back({ | ||
| 524 | .binding = static_cast<u32>(index), | ||
| 525 | .stride = dynamic.vertex_strides[index], | ||
| 526 | .inputRate = rate, | ||
| 527 | }); | ||
| 528 | if (instanced) { | ||
| 529 | vertex_binding_divisors.push_back({ | ||
| 530 | .binding = static_cast<u32>(index), | ||
| 531 | .divisor = key.state.binding_divisors[index], | ||
| 532 | }); | ||
| 533 | } | ||
| 534 | } | ||
| 535 | for (size_t index = 0; index < key.state.attributes.size(); ++index) { | ||
| 536 | const auto& attribute = key.state.attributes[index]; | ||
| 537 | if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { | ||
| 538 | continue; | ||
| 539 | } | ||
| 540 | vertex_attributes.push_back({ | ||
| 541 | .location = static_cast<u32>(index), | ||
| 542 | .binding = attribute.buffer, | ||
| 543 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 544 | .offset = attribute.offset, | ||
| 545 | }); | ||
| 249 | } | 546 | } |
| 250 | vertex_attributes.push_back({ | ||
| 251 | .location = static_cast<u32>(index), | ||
| 252 | .binding = attribute.buffer, | ||
| 253 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 254 | .offset = attribute.offset, | ||
| 255 | }); | ||
| 256 | } | 547 | } |
| 257 | |||
| 258 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | 548 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ |
| 259 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | 549 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |
| 260 | .pNext = nullptr, | 550 | .pNext = nullptr, |
| @@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 264 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), | 554 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), |
| 265 | .pVertexAttributeDescriptions = vertex_attributes.data(), | 555 | .pVertexAttributeDescriptions = vertex_attributes.data(), |
| 266 | }; | 556 | }; |
| 267 | |||
| 268 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ | 557 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ |
| 269 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, | 558 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, |
| 270 | .pNext = nullptr, | 559 | .pNext = nullptr, |
| @@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 274 | if (!vertex_binding_divisors.empty()) { | 563 | if (!vertex_binding_divisors.empty()) { |
| 275 | vertex_input_ci.pNext = &input_divisor_ci; | 564 | vertex_input_ci.pNext = &input_divisor_ci; |
| 276 | } | 565 | } |
| 277 | 566 | auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); | |
| 278 | const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); | 567 | if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { |
| 568 | if (!spv_modules[1] && !spv_modules[2]) { | ||
| 569 | LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); | ||
| 570 | input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; | ||
| 571 | } | ||
| 572 | } | ||
| 279 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | 573 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ |
| 280 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | 574 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, |
| 281 | .pNext = nullptr, | 575 | .pNext = nullptr, |
| 282 | .flags = 0, | 576 | .flags = 0, |
| 283 | .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), | 577 | .topology = input_assembly_topology, |
| 284 | .primitiveRestartEnable = state.primitive_restart_enable != 0 && | 578 | .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && |
| 285 | SupportsPrimitiveRestart(input_assembly_topology), | 579 | SupportsPrimitiveRestart(input_assembly_topology), |
| 286 | }; | 580 | }; |
| 287 | |||
| 288 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ | 581 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ |
| 289 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, | 582 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, |
| 290 | .pNext = nullptr, | 583 | .pNext = nullptr, |
| 291 | .flags = 0, | 584 | .flags = 0, |
| 292 | .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, | 585 | .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, |
| 293 | }; | ||
| 294 | |||
| 295 | VkPipelineViewportStateCreateInfo viewport_ci{ | ||
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 297 | .pNext = nullptr, | ||
| 298 | .flags = 0, | ||
| 299 | .viewportCount = Maxwell::NumViewports, | ||
| 300 | .pViewports = nullptr, | ||
| 301 | .scissorCount = Maxwell::NumViewports, | ||
| 302 | .pScissors = nullptr, | ||
| 303 | }; | 586 | }; |
| 304 | 587 | ||
| 305 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | 588 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; |
| 306 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); | 589 | std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); |
| 307 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | 590 | const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ |
| 308 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | 591 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, |
| 309 | .pNext = nullptr, | 592 | .pNext = nullptr, |
| 310 | .flags = 0, | 593 | .flags = 0, |
| 311 | .viewportCount = Maxwell::NumViewports, | 594 | .viewportCount = Maxwell::NumViewports, |
| 312 | .pViewportSwizzles = swizzles.data(), | 595 | .pViewportSwizzles = swizzles.data(), |
| 313 | }; | 596 | }; |
| 314 | if (device.IsNvViewportSwizzleSupported()) { | 597 | const VkPipelineViewportStateCreateInfo viewport_ci{ |
| 315 | viewport_ci.pNext = &swizzle_ci; | 598 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |
| 316 | } | 599 | .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, |
| 600 | .flags = 0, | ||
| 601 | .viewportCount = Maxwell::NumViewports, | ||
| 602 | .pViewports = nullptr, | ||
| 603 | .scissorCount = Maxwell::NumViewports, | ||
| 604 | .pScissors = nullptr, | ||
| 605 | }; | ||
| 317 | 606 | ||
| 318 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | 607 | VkPipelineRasterizationStateCreateInfo rasterization_ci{ |
| 319 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | 608 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, |
| 320 | .pNext = nullptr, | 609 | .pNext = nullptr, |
| 321 | .flags = 0, | 610 | .flags = 0, |
| 322 | .depthClampEnable = | 611 | .depthClampEnable = |
| 323 | static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), | 612 | static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), |
| 324 | .rasterizerDiscardEnable = | 613 | .rasterizerDiscardEnable = |
| 325 | static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), | 614 | static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), |
| 326 | .polygonMode = VK_POLYGON_MODE_FILL, | 615 | .polygonMode = |
| 616 | MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), | ||
| 327 | .cullMode = static_cast<VkCullModeFlags>( | 617 | .cullMode = static_cast<VkCullModeFlags>( |
| 328 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), | 618 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), |
| 329 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), | 619 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), |
| 330 | .depthBiasEnable = state.depth_bias_enable, | 620 | .depthBiasEnable = key.state.depth_bias_enable, |
| 331 | .depthBiasConstantFactor = 0.0f, | 621 | .depthBiasConstantFactor = 0.0f, |
| 332 | .depthBiasClamp = 0.0f, | 622 | .depthBiasClamp = 0.0f, |
| 333 | .depthBiasSlopeFactor = 0.0f, | 623 | .depthBiasSlopeFactor = 0.0f, |
| 334 | .lineWidth = 1.0f, | 624 | .lineWidth = 1.0f, |
| 335 | }; | 625 | }; |
| 626 | VkPipelineRasterizationLineStateCreateInfoEXT line_state{ | ||
| 627 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, | ||
| 628 | .pNext = nullptr, | ||
| 629 | .lineRasterizationMode = key.state.smooth_lines != 0 | ||
| 630 | ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT | ||
| 631 | : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, | ||
| 632 | .stippledLineEnable = VK_FALSE, // TODO | ||
| 633 | .lineStippleFactor = 0, | ||
| 634 | .lineStipplePattern = 0, | ||
| 635 | }; | ||
| 636 | VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ | ||
| 637 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, | ||
| 638 | .pNext = nullptr, | ||
| 639 | .flags = 0, | ||
| 640 | .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 | ||
| 641 | ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT | ||
| 642 | : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, | ||
| 643 | .extraPrimitiveOverestimationSize = 0.0f, | ||
| 644 | }; | ||
| 645 | VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ | ||
| 646 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, | ||
| 647 | .pNext = nullptr, | ||
| 648 | .provokingVertexMode = key.state.provoking_vertex_last != 0 | ||
| 649 | ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT | ||
| 650 | : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, | ||
| 651 | }; | ||
| 652 | if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { | ||
| 653 | line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); | ||
| 654 | } | ||
| 655 | if (device.IsExtConservativeRasterizationSupported()) { | ||
| 656 | conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); | ||
| 657 | } | ||
| 658 | if (device.IsExtProvokingVertexSupported()) { | ||
| 659 | provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); | ||
| 660 | } | ||
| 336 | 661 | ||
| 337 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ | 662 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ |
| 338 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | 663 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| 339 | .pNext = nullptr, | 664 | .pNext = nullptr, |
| 340 | .flags = 0, | 665 | .flags = 0, |
| 341 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), | 666 | .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), |
| 342 | .sampleShadingEnable = VK_FALSE, | 667 | .sampleShadingEnable = VK_FALSE, |
| 343 | .minSampleShading = 0.0f, | 668 | .minSampleShading = 0.0f, |
| 344 | .pSampleMask = nullptr, | 669 | .pSampleMask = nullptr, |
| 345 | .alphaToCoverageEnable = VK_FALSE, | 670 | .alphaToCoverageEnable = VK_FALSE, |
| 346 | .alphaToOneEnable = VK_FALSE, | 671 | .alphaToOneEnable = VK_FALSE, |
| 347 | }; | 672 | }; |
| 348 | |||
| 349 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ | 673 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ |
| 350 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | 674 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, |
| 351 | .pNext = nullptr, | 675 | .pNext = nullptr, |
| @@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 355 | .depthCompareOp = dynamic.depth_test_enable | 679 | .depthCompareOp = dynamic.depth_test_enable |
| 356 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) | 680 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) |
| 357 | : VK_COMPARE_OP_ALWAYS, | 681 | : VK_COMPARE_OP_ALWAYS, |
| 358 | .depthBoundsTestEnable = dynamic.depth_bounds_enable, | 682 | .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), |
| 359 | .stencilTestEnable = dynamic.stencil_enable, | 683 | .stencilTestEnable = dynamic.stencil_enable, |
| 360 | .front = GetStencilFaceState(dynamic.front), | 684 | .front = GetStencilFaceState(dynamic.front), |
| 361 | .back = GetStencilFaceState(dynamic.back), | 685 | .back = GetStencilFaceState(dynamic.back), |
| 362 | .minDepthBounds = 0.0f, | 686 | .minDepthBounds = 0.0f, |
| 363 | .maxDepthBounds = 0.0f, | 687 | .maxDepthBounds = 0.0f, |
| 364 | }; | 688 | }; |
| 365 | 689 | if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { | |
| 366 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 690 | LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); |
| 367 | for (std::size_t index = 0; index < num_color_buffers; ++index) { | 691 | } |
| 368 | static constexpr std::array COMPONENT_TABLE{ | 692 | static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 693 | const size_t num_attachments{NumAttachments(key.state)}; | ||
| 694 | for (size_t index = 0; index < num_attachments; ++index) { | ||
| 695 | static constexpr std::array mask_table{ | ||
| 369 | VK_COLOR_COMPONENT_R_BIT, | 696 | VK_COLOR_COMPONENT_R_BIT, |
| 370 | VK_COLOR_COMPONENT_G_BIT, | 697 | VK_COLOR_COMPONENT_G_BIT, |
| 371 | VK_COLOR_COMPONENT_B_BIT, | 698 | VK_COLOR_COMPONENT_B_BIT, |
| 372 | VK_COLOR_COMPONENT_A_BIT, | 699 | VK_COLOR_COMPONENT_A_BIT, |
| 373 | }; | 700 | }; |
| 374 | const auto& blend = state.attachments[index]; | 701 | const auto& blend{key.state.attachments[index]}; |
| 375 | 702 | const std::array mask{blend.Mask()}; | |
| 376 | VkColorComponentFlags color_components = 0; | 703 | VkColorComponentFlags write_mask{}; |
| 377 | for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { | 704 | for (size_t i = 0; i < mask_table.size(); ++i) { |
| 378 | if (blend.Mask()[i]) { | 705 | write_mask |= mask[i] ? mask_table[i] : 0; |
| 379 | color_components |= COMPONENT_TABLE[i]; | ||
| 380 | } | ||
| 381 | } | 706 | } |
| 382 | 707 | cb_attachments.push_back({ | |
| 383 | cb_attachments[index] = { | ||
| 384 | .blendEnable = blend.enable != 0, | 708 | .blendEnable = blend.enable != 0, |
| 385 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), | 709 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), |
| 386 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), | 710 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), |
| @@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 388 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), | 712 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), |
| 389 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), | 713 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), |
| 390 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), | 714 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), |
| 391 | .colorWriteMask = color_components, | 715 | .colorWriteMask = write_mask, |
| 392 | }; | 716 | }); |
| 393 | } | 717 | } |
| 394 | |||
| 395 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | 718 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ |
| 396 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | 719 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, |
| 397 | .pNext = nullptr, | 720 | .pNext = nullptr, |
| 398 | .flags = 0, | 721 | .flags = 0, |
| 399 | .logicOpEnable = VK_FALSE, | 722 | .logicOpEnable = VK_FALSE, |
| 400 | .logicOp = VK_LOGIC_OP_COPY, | 723 | .logicOp = VK_LOGIC_OP_COPY, |
| 401 | .attachmentCount = num_color_buffers, | 724 | .attachmentCount = static_cast<u32>(cb_attachments.size()), |
| 402 | .pAttachments = cb_attachments.data(), | 725 | .pAttachments = cb_attachments.data(), |
| 403 | .blendConstants = {}, | 726 | .blendConstants = {}, |
| 404 | }; | 727 | }; |
| 405 | 728 | static_vector<VkDynamicState, 19> dynamic_states{ | |
| 406 | std::vector dynamic_states{ | ||
| 407 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, | 729 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, |
| 408 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | 730 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, |
| 409 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | 731 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, |
| 410 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, | 732 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, |
| 733 | VK_DYNAMIC_STATE_LINE_WIDTH, | ||
| 411 | }; | 734 | }; |
| 412 | if (device.IsExtExtendedDynamicStateSupported()) { | 735 | if (key.state.extended_dynamic_state) { |
| 413 | static constexpr std::array extended{ | 736 | static constexpr std::array extended{ |
| 414 | VK_DYNAMIC_STATE_CULL_MODE_EXT, | 737 | VK_DYNAMIC_STATE_CULL_MODE_EXT, |
| 415 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, | 738 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, |
| @@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 421 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, | 744 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, |
| 422 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, | 745 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, |
| 423 | }; | 746 | }; |
| 747 | if (key.state.dynamic_vertex_input) { | ||
| 748 | dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); | ||
| 749 | } | ||
| 424 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); | 750 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); |
| 425 | } | 751 | } |
| 426 | |||
| 427 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | 752 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ |
| 428 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | 753 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, |
| 429 | .pNext = nullptr, | 754 | .pNext = nullptr, |
| @@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 431 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | 756 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), |
| 432 | .pDynamicStates = dynamic_states.data(), | 757 | .pDynamicStates = dynamic_states.data(), |
| 433 | }; | 758 | }; |
| 434 | 759 | [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | |
| 435 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 436 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | 760 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |
| 437 | .pNext = nullptr, | 761 | .pNext = nullptr, |
| 438 | .requiredSubgroupSize = GuestWarpSize, | 762 | .requiredSubgroupSize = GuestWarpSize, |
| 439 | }; | 763 | }; |
| 440 | 764 | static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages; | |
| 441 | std::vector<VkPipelineShaderStageCreateInfo> shader_stages; | 765 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 442 | std::size_t module_index = 0; | 766 | if (!spv_modules[stage]) { |
| 443 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 444 | if (!program[stage]) { | ||
| 445 | continue; | 767 | continue; |
| 446 | } | 768 | } |
| 447 | 769 | [[maybe_unused]] auto& stage_ci = | |
| 448 | VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); | 770 | shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ |
| 449 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; | 771 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 450 | stage_ci.pNext = nullptr; | 772 | .pNext = nullptr, |
| 451 | stage_ci.flags = 0; | 773 | .flags = 0, |
| 452 | stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); | 774 | .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), |
| 453 | stage_ci.module = *modules[module_index++]; | 775 | .module = *spv_modules[stage], |
| 454 | stage_ci.pName = "main"; | 776 | .pName = "main", |
| 455 | stage_ci.pSpecializationInfo = nullptr; | 777 | .pSpecializationInfo = nullptr, |
| 456 | 778 | }); | |
| 779 | /* | ||
| 457 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | 780 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { |
| 458 | stage_ci.pNext = &subgroup_size_ci; | 781 | stage_ci.pNext = &subgroup_size_ci; |
| 459 | } | 782 | } |
| 783 | */ | ||
| 460 | } | 784 | } |
| 461 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | 785 | pipeline = device.GetLogical().CreateGraphicsPipeline({ |
| 462 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 786 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 463 | .pNext = nullptr, | 787 | .pNext = nullptr, |
| 464 | .flags = 0, | 788 | .flags = 0, |
| @@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 473 | .pDepthStencilState = &depth_stencil_ci, | 797 | .pDepthStencilState = &depth_stencil_ci, |
| 474 | .pColorBlendState = &color_blend_ci, | 798 | .pColorBlendState = &color_blend_ci, |
| 475 | .pDynamicState = &dynamic_state_ci, | 799 | .pDynamicState = &dynamic_state_ci, |
| 476 | .layout = *layout, | 800 | .layout = *pipeline_layout, |
| 477 | .renderPass = renderpass, | 801 | .renderPass = render_pass, |
| 478 | .subpass = 0, | 802 | .subpass = 0, |
| 479 | .basePipelineHandle = nullptr, | 803 | .basePipelineHandle = nullptr, |
| 480 | .basePipelineIndex = 0, | 804 | .basePipelineIndex = 0, |
| 481 | }); | 805 | }); |
| 482 | } | 806 | } |
| 483 | 807 | ||
| 808 | void GraphicsPipeline::Validate() { | ||
| 809 | size_t num_images{}; | ||
| 810 | for (const auto& info : stage_infos) { | ||
| 811 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 812 | num_images += desc.count; | ||
| 813 | } | ||
| 814 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 815 | num_images += desc.count; | ||
| 816 | } | ||
| 817 | for (const auto& desc : info.texture_descriptors) { | ||
| 818 | num_images += desc.count; | ||
| 819 | } | ||
| 820 | for (const auto& desc : info.image_descriptors) { | ||
| 821 | num_images += desc.count; | ||
| 822 | } | ||
| 823 | } | ||
| 824 | ASSERT(num_images <= MAX_IMAGE_ELEMENTS); | ||
| 825 | } | ||
| 826 | |||
| 484 | } // namespace Vulkan | 827 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8b6a98fe0..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -1,30 +1,36 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 7 | #include <array> | 8 | #include <array> |
| 8 | #include <optional> | 9 | #include <atomic> |
| 9 | #include <vector> | 10 | #include <condition_variable> |
| 11 | #include <mutex> | ||
| 12 | #include <type_traits> | ||
| 10 | 13 | ||
| 11 | #include "common/common_types.h" | 14 | #include "common/thread_worker.h" |
| 15 | #include "shader_recompiler/shader_info.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 17 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 18 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 19 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 17 | 22 | ||
| 18 | namespace Vulkan { | 23 | namespace VideoCore { |
| 24 | class ShaderNotify; | ||
| 25 | } | ||
| 19 | 26 | ||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 27 | namespace Vulkan { |
| 21 | 28 | ||
| 22 | struct GraphicsPipelineCacheKey { | 29 | struct GraphicsPipelineCacheKey { |
| 23 | VkRenderPass renderpass; | 30 | std::array<u64, 6> unique_hashes; |
| 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | 31 | FixedPipelineState state; |
| 25 | FixedPipelineState fixed_state; | ||
| 26 | 32 | ||
| 27 | std::size_t Hash() const noexcept; | 33 | size_t Hash() const noexcept; |
| 28 | 34 | ||
| 29 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | 35 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; |
| 30 | 36 | ||
| @@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey { | |||
| 32 | return !operator==(rhs); | 38 | return !operator==(rhs); |
| 33 | } | 39 | } |
| 34 | 40 | ||
| 35 | std::size_t Size() const noexcept { | 41 | size_t Size() const noexcept { |
| 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); | 42 | return sizeof(unique_hashes) + state.Size(); |
| 37 | } | 43 | } |
| 38 | }; | 44 | }; |
| 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | 45 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |
| 40 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | 46 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); |
| 41 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | 47 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); |
| 42 | 48 | ||
| 49 | } // namespace Vulkan | ||
| 50 | |||
| 51 | namespace std { | ||
| 52 | template <> | ||
| 53 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 54 | size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 55 | return k.Hash(); | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | } // namespace std | ||
| 59 | |||
| 60 | namespace Vulkan { | ||
| 61 | |||
| 43 | class Device; | 62 | class Device; |
| 44 | class VKDescriptorPool; | 63 | class RenderPassCache; |
| 45 | class VKScheduler; | 64 | class VKScheduler; |
| 46 | class VKUpdateDescriptorQueue; | 65 | class VKUpdateDescriptorQueue; |
| 47 | 66 | ||
| 48 | using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; | 67 | class GraphicsPipeline { |
| 68 | static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||
| 49 | 69 | ||
| 50 | class VKGraphicsPipeline final { | ||
| 51 | public: | 70 | public: |
| 52 | explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | 71 | explicit GraphicsPipeline( |
| 53 | VKDescriptorPool& descriptor_pool, | 72 | Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, |
| 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 73 | VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, |
| 55 | const GraphicsPipelineCacheKey& key, | 74 | VideoCore::ShaderNotify* shader_notify, const Device& device, |
| 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 75 | DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, |
| 57 | const SPIRVProgram& program, u32 num_color_buffers); | 76 | Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, |
| 58 | ~VKGraphicsPipeline(); | 77 | const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, |
| 59 | 78 | const std::array<const Shader::Info*, NUM_STAGES>& infos); | |
| 60 | VkDescriptorSet CommitDescriptorSet(); | 79 | |
| 61 | 80 | GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; | |
| 62 | VkPipeline GetHandle() const { | 81 | GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; |
| 63 | return *pipeline; | 82 | |
| 83 | GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; | ||
| 84 | GraphicsPipeline(const GraphicsPipeline&) = delete; | ||
| 85 | |||
| 86 | void AddTransition(GraphicsPipeline* transition); | ||
| 87 | |||
| 88 | void Configure(bool is_indexed) { | ||
| 89 | configure_func(this, is_indexed); | ||
| 64 | } | 90 | } |
| 65 | 91 | ||
| 66 | VkPipelineLayout GetLayout() const { | 92 | [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { |
| 67 | return *layout; | 93 | if (key == current_key) { |
| 94 | return this; | ||
| 95 | } | ||
| 96 | const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; | ||
| 97 | return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] | ||
| 98 | : nullptr; | ||
| 68 | } | 99 | } |
| 69 | 100 | ||
| 70 | GraphicsPipelineCacheKey GetCacheKey() const { | 101 | [[nodiscard]] bool IsBuilt() const noexcept { |
| 71 | return cache_key; | 102 | return is_built.load(std::memory_order::relaxed); |
| 72 | } | 103 | } |
| 73 | 104 | ||
| 74 | private: | 105 | template <typename Spec> |
| 75 | vk::DescriptorSetLayout CreateDescriptorSetLayout( | 106 | static auto MakeConfigureSpecFunc() { |
| 76 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const; | 107 | return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); }; |
| 108 | } | ||
| 77 | 109 | ||
| 78 | vk::PipelineLayout CreatePipelineLayout() const; | 110 | private: |
| 111 | template <typename Spec> | ||
| 112 | void ConfigureImpl(bool is_indexed); | ||
| 79 | 113 | ||
| 80 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | 114 | void ConfigureDraw(); |
| 81 | const SPIRVProgram& program) const; | ||
| 82 | 115 | ||
| 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | 116 | void MakePipeline(VkRenderPass render_pass); |
| 84 | 117 | ||
| 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, | 118 | void Validate(); |
| 86 | u32 num_color_buffers) const; | ||
| 87 | 119 | ||
| 120 | const GraphicsPipelineCacheKey key; | ||
| 121 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 122 | Tegra::MemoryManager& gpu_memory; | ||
| 88 | const Device& device; | 123 | const Device& device; |
| 124 | TextureCache& texture_cache; | ||
| 125 | BufferCache& buffer_cache; | ||
| 89 | VKScheduler& scheduler; | 126 | VKScheduler& scheduler; |
| 90 | const GraphicsPipelineCacheKey cache_key; | 127 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 91 | const u64 hash; | 128 | |
| 129 | void (*configure_func)(GraphicsPipeline*, bool){}; | ||
| 130 | |||
| 131 | std::vector<GraphicsPipelineCacheKey> transition_keys; | ||
| 132 | std::vector<GraphicsPipeline*> transitions; | ||
| 133 | |||
| 134 | std::array<vk::ShaderModule, NUM_STAGES> spv_modules; | ||
| 135 | |||
| 136 | std::array<Shader::Info, NUM_STAGES> stage_infos; | ||
| 137 | std::array<u32, 5> enabled_uniform_buffer_masks{}; | ||
| 138 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | ||
| 92 | 139 | ||
| 93 | vk::DescriptorSetLayout descriptor_set_layout; | 140 | vk::DescriptorSetLayout descriptor_set_layout; |
| 94 | DescriptorAllocator descriptor_allocator; | 141 | DescriptorAllocator descriptor_allocator; |
| 95 | VKUpdateDescriptorQueue& update_descriptor_queue; | 142 | vk::PipelineLayout pipeline_layout; |
| 96 | vk::PipelineLayout layout; | 143 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; |
| 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 98 | std::vector<vk::ShaderModule> modules; | ||
| 99 | |||
| 100 | vk::Pipeline pipeline; | 144 | vk::Pipeline pipeline; |
| 145 | |||
| 146 | std::condition_variable build_condvar; | ||
| 147 | std::mutex build_mutex; | ||
| 148 | std::atomic_bool is_built{false}; | ||
| 149 | bool uses_push_descriptor{false}; | ||
| 101 | }; | 150 | }; |
| 102 | 151 | ||
| 103 | } // namespace Vulkan | 152 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -39,9 +39,9 @@ public: | |||
| 39 | return KnownGpuTick() >= tick; | 39 | return KnownGpuTick() >= tick; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /// Advance to the logical tick. | 42 | /// Advance to the logical tick and return the old one |
| 43 | void NextTick() noexcept { | 43 | [[nodiscard]] u64 NextTick() noexcept { |
| 44 | ++current_tick; | 44 | return current_tick.fetch_add(1, std::memory_order::relaxed); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | /// Refresh the known GPU tick | 47 | /// Refresh the known GPU tick |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -4,444 +4,613 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <fstream> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 9 | #include <thread> | ||
| 8 | #include <vector> | 10 | #include <vector> |
| 9 | 11 | ||
| 10 | #include "common/bit_cast.h" | 12 | #include "common/bit_cast.h" |
| 11 | #include "common/cityhash.h" | 13 | #include "common/cityhash.h" |
| 14 | #include "common/fs/fs.h" | ||
| 15 | #include "common/fs/path_util.h" | ||
| 12 | #include "common/microprofile.h" | 16 | #include "common/microprofile.h" |
| 17 | #include "common/thread_worker.h" | ||
| 13 | #include "core/core.h" | 18 | #include "core/core.h" |
| 14 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 21 | #include "shader_recompiler/environment.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 23 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 24 | #include "shader_recompiler/program_header.h" | ||
| 25 | #include "video_core/dirty_flags.h" | ||
| 15 | #include "video_core/engines/kepler_compute.h" | 26 | #include "video_core/engines/kepler_compute.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/memory_manager.h" | 28 | #include "video_core/memory_manager.h" |
| 18 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 29 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 30 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 31 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 32 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 33 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 34 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 35 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 36 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 37 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 38 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 27 | #include "video_core/shader/compiler_settings.h" | ||
| 28 | #include "video_core/shader/memory_util.h" | ||
| 29 | #include "video_core/shader_cache.h" | 39 | #include "video_core/shader_cache.h" |
| 40 | #include "video_core/shader_environment.h" | ||
| 30 | #include "video_core/shader_notify.h" | 41 | #include "video_core/shader_notify.h" |
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | 42 | #include "video_core/vulkan_common/vulkan_device.h" |
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 43 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 33 | 44 | ||
| 34 | namespace Vulkan { | 45 | namespace Vulkan { |
| 35 | |||
| 36 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 46 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 37 | 47 | ||
| 38 | using Tegra::Engines::ShaderType; | ||
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 42 | using VideoCommon::Shader::ProgramCode; | ||
| 43 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 44 | |||
| 45 | namespace { | 48 | namespace { |
| 49 | using Shader::Backend::SPIRV::EmitSPIRV; | ||
| 50 | using Shader::Maxwell::MergeDualVertexPrograms; | ||
| 51 | using Shader::Maxwell::TranslateProgram; | ||
| 52 | using VideoCommon::ComputeEnvironment; | ||
| 53 | using VideoCommon::FileEnvironment; | ||
| 54 | using VideoCommon::GenericEnvironment; | ||
| 55 | using VideoCommon::GraphicsEnvironment; | ||
| 56 | |||
| 57 | constexpr u32 CACHE_VERSION = 5; | ||
| 58 | |||
| 59 | template <typename Container> | ||
| 60 | auto MakeSpan(Container& container) { | ||
| 61 | return std::span(container.data(), container.size()); | ||
| 62 | } | ||
| 46 | 63 | ||
| 47 | constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | 64 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { |
| 48 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | 65 | switch (comparison) { |
| 49 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | 66 | case Maxwell::ComparisonOp::Never: |
| 50 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | 67 | case Maxwell::ComparisonOp::NeverOld: |
| 51 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | 68 | return Shader::CompareFunction::Never; |
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 69 | case Maxwell::ComparisonOp::Less: |
| 53 | 70 | case Maxwell::ComparisonOp::LessOld: | |
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 71 | return Shader::CompareFunction::Less; |
| 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, | 72 | case Maxwell::ComparisonOp::Equal: |
| 56 | .disable_else_derivation = true, | 73 | case Maxwell::ComparisonOp::EqualOld: |
| 57 | }; | 74 | return Shader::CompareFunction::Equal; |
| 58 | 75 | case Maxwell::ComparisonOp::LessEqual: | |
| 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | 76 | case Maxwell::ComparisonOp::LessEqualOld: |
| 60 | return program == 0 ? 0 : program - 1; | 77 | return Shader::CompareFunction::LessThanEqual; |
| 78 | case Maxwell::ComparisonOp::Greater: | ||
| 79 | case Maxwell::ComparisonOp::GreaterOld: | ||
| 80 | return Shader::CompareFunction::Greater; | ||
| 81 | case Maxwell::ComparisonOp::NotEqual: | ||
| 82 | case Maxwell::ComparisonOp::NotEqualOld: | ||
| 83 | return Shader::CompareFunction::NotEqual; | ||
| 84 | case Maxwell::ComparisonOp::GreaterEqual: | ||
| 85 | case Maxwell::ComparisonOp::GreaterEqualOld: | ||
| 86 | return Shader::CompareFunction::GreaterThanEqual; | ||
| 87 | case Maxwell::ComparisonOp::Always: | ||
| 88 | case Maxwell::ComparisonOp::AlwaysOld: | ||
| 89 | return Shader::CompareFunction::Always; | ||
| 90 | } | ||
| 91 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); | ||
| 92 | return {}; | ||
| 61 | } | 93 | } |
| 62 | 94 | ||
| 63 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | 95 | Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { |
| 64 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | 96 | if (attr.enabled == 0) { |
| 97 | return Shader::AttributeType::Disabled; | ||
| 98 | } | ||
| 99 | switch (attr.Type()) { | ||
| 100 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 101 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 102 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 103 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 104 | case Maxwell::VertexAttribute::Type::Float: | ||
| 105 | return Shader::AttributeType::Float; | ||
| 106 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 107 | return Shader::AttributeType::SignedInt; | ||
| 108 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 109 | return Shader::AttributeType::UnsignedInt; | ||
| 110 | } | ||
| 111 | return Shader::AttributeType::Float; | ||
| 65 | } | 112 | } |
| 66 | 113 | ||
| 67 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | 114 | Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { |
| 68 | switch (program) { | 115 | switch (state.DynamicAttributeType(index)) { |
| 69 | case Maxwell::ShaderProgram::VertexB: | 116 | case 0: |
| 70 | return ShaderType::Vertex; | 117 | return Shader::AttributeType::Disabled; |
| 71 | case Maxwell::ShaderProgram::TesselationControl: | 118 | case 1: |
| 72 | return ShaderType::TesselationControl; | 119 | return Shader::AttributeType::Float; |
| 73 | case Maxwell::ShaderProgram::TesselationEval: | 120 | case 2: |
| 74 | return ShaderType::TesselationEval; | 121 | return Shader::AttributeType::SignedInt; |
| 75 | case Maxwell::ShaderProgram::Geometry: | 122 | case 3: |
| 76 | return ShaderType::Geometry; | 123 | return Shader::AttributeType::UnsignedInt; |
| 77 | case Maxwell::ShaderProgram::Fragment: | ||
| 78 | return ShaderType::Fragment; | ||
| 79 | default: | ||
| 80 | UNIMPLEMENTED_MSG("program={}", program); | ||
| 81 | return ShaderType::Vertex; | ||
| 82 | } | 124 | } |
| 125 | return Shader::AttributeType::Disabled; | ||
| 83 | } | 126 | } |
| 84 | 127 | ||
| 85 | template <VkDescriptorType descriptor_type, class Container> | 128 | Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs, |
| 86 | void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, | 129 | const GraphicsPipelineCacheKey& key, |
| 87 | VkShaderStageFlags stage_flags, const Container& container) { | 130 | const Shader::IR::Program& program, |
| 88 | const u32 num_entries = static_cast<u32>(std::size(container)); | 131 | const Shader::IR::Program* previous_program) { |
| 89 | for (std::size_t i = 0; i < num_entries; ++i) { | 132 | Shader::RuntimeInfo info; |
| 90 | u32 count = 1; | 133 | if (previous_program) { |
| 91 | if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { | 134 | info.previous_stage_stores = previous_program->info.stores; |
| 92 | // Combined image samplers can be arrayed. | 135 | if (previous_program->is_geometry_passthrough) { |
| 93 | count = container[i].size; | 136 | info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; |
| 94 | } | 137 | } |
| 95 | bindings.push_back({ | 138 | } else { |
| 96 | .binding = binding++, | 139 | info.previous_stage_stores.mask.set(); |
| 97 | .descriptorType = descriptor_type, | 140 | } |
| 98 | .descriptorCount = count, | 141 | const Shader::Stage stage{program.stage}; |
| 99 | .stageFlags = stage_flags, | 142 | const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; |
| 100 | .pImmutableSamplers = nullptr, | 143 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; |
| 101 | }); | 144 | const float point_size{Common::BitCast<float>(key.state.point_size)}; |
| 145 | switch (stage) { | ||
| 146 | case Shader::Stage::VertexB: | ||
| 147 | if (!has_geometry) { | ||
| 148 | if (key.state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 149 | info.fixed_state_point_size = point_size; | ||
| 150 | } | ||
| 151 | if (key.state.xfb_enabled) { | ||
| 152 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 153 | } | ||
| 154 | info.convert_depth_mode = gl_ndc; | ||
| 155 | } | ||
| 156 | if (key.state.dynamic_vertex_input) { | ||
| 157 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 158 | info.generic_input_types[index] = AttributeType(key.state, index); | ||
| 159 | } | ||
| 160 | } else { | ||
| 161 | std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), | ||
| 162 | &CastAttributeType); | ||
| 163 | } | ||
| 164 | break; | ||
| 165 | case Shader::Stage::TessellationEval: | ||
| 166 | // We have to flip tessellation clockwise for some reason... | ||
| 167 | info.tess_clockwise = key.state.tessellation_clockwise == 0; | ||
| 168 | info.tess_primitive = [&key] { | ||
| 169 | const u32 raw{key.state.tessellation_primitive.Value()}; | ||
| 170 | switch (static_cast<Maxwell::TessellationPrimitive>(raw)) { | ||
| 171 | case Maxwell::TessellationPrimitive::Isolines: | ||
| 172 | return Shader::TessPrimitive::Isolines; | ||
| 173 | case Maxwell::TessellationPrimitive::Triangles: | ||
| 174 | return Shader::TessPrimitive::Triangles; | ||
| 175 | case Maxwell::TessellationPrimitive::Quads: | ||
| 176 | return Shader::TessPrimitive::Quads; | ||
| 177 | } | ||
| 178 | UNREACHABLE(); | ||
| 179 | return Shader::TessPrimitive::Triangles; | ||
| 180 | }(); | ||
| 181 | info.tess_spacing = [&] { | ||
| 182 | const u32 raw{key.state.tessellation_spacing}; | ||
| 183 | switch (static_cast<Maxwell::TessellationSpacing>(raw)) { | ||
| 184 | case Maxwell::TessellationSpacing::Equal: | ||
| 185 | return Shader::TessSpacing::Equal; | ||
| 186 | case Maxwell::TessellationSpacing::FractionalOdd: | ||
| 187 | return Shader::TessSpacing::FractionalOdd; | ||
| 188 | case Maxwell::TessellationSpacing::FractionalEven: | ||
| 189 | return Shader::TessSpacing::FractionalEven; | ||
| 190 | } | ||
| 191 | UNREACHABLE(); | ||
| 192 | return Shader::TessSpacing::Equal; | ||
| 193 | }(); | ||
| 194 | break; | ||
| 195 | case Shader::Stage::Geometry: | ||
| 196 | if (program.output_topology == Shader::OutputTopology::PointList) { | ||
| 197 | info.fixed_state_point_size = point_size; | ||
| 198 | } | ||
| 199 | if (key.state.xfb_enabled != 0) { | ||
| 200 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 201 | } | ||
| 202 | info.convert_depth_mode = gl_ndc; | ||
| 203 | break; | ||
| 204 | case Shader::Stage::Fragment: | ||
| 205 | info.alpha_test_func = MaxwellToCompareFunction( | ||
| 206 | key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); | ||
| 207 | info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref); | ||
| 208 | break; | ||
| 209 | default: | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | switch (key.state.topology) { | ||
| 213 | case Maxwell::PrimitiveTopology::Points: | ||
| 214 | info.input_topology = Shader::InputTopology::Points; | ||
| 215 | break; | ||
| 216 | case Maxwell::PrimitiveTopology::Lines: | ||
| 217 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 218 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 219 | info.input_topology = Shader::InputTopology::Lines; | ||
| 220 | break; | ||
| 221 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 222 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 223 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 224 | case Maxwell::PrimitiveTopology::Quads: | ||
| 225 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 226 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 227 | case Maxwell::PrimitiveTopology::Patches: | ||
| 228 | info.input_topology = Shader::InputTopology::Triangles; | ||
| 229 | break; | ||
| 230 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 231 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 232 | info.input_topology = Shader::InputTopology::LinesAdjacency; | ||
| 233 | break; | ||
| 234 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 235 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 236 | info.input_topology = Shader::InputTopology::TrianglesAdjacency; | ||
| 237 | break; | ||
| 102 | } | 238 | } |
| 239 | info.force_early_z = key.state.early_z != 0; | ||
| 240 | info.y_negate = key.state.y_negate != 0; | ||
| 241 | return info; | ||
| 103 | } | 242 | } |
| 243 | } // Anonymous namespace | ||
| 104 | 244 | ||
| 105 | u32 FillDescriptorLayout(const ShaderEntries& entries, | 245 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 106 | std::vector<VkDescriptorSetLayoutBinding>& bindings, | 246 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 107 | Maxwell::ShaderProgram program_type, u32 base_binding) { | 247 | return static_cast<size_t>(hash); |
| 108 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 109 | const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); | ||
| 110 | |||
| 111 | u32 binding = base_binding; | ||
| 112 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | ||
| 113 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | ||
| 114 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); | ||
| 115 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | ||
| 116 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 117 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | ||
| 118 | return binding; | ||
| 119 | } | 248 | } |
| 120 | 249 | ||
| 121 | } // Anonymous namespace | 250 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { |
| 251 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 252 | } | ||
| 122 | 253 | ||
| 123 | std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | 254 | size_t GraphicsPipelineCacheKey::Hash() const noexcept { |
| 124 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); | 255 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); |
| 125 | return static_cast<std::size_t>(hash); | 256 | return static_cast<size_t>(hash); |
| 126 | } | 257 | } |
| 127 | 258 | ||
| 128 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | 259 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { |
| 129 | return std::memcmp(&rhs, this, Size()) == 0; | 260 | return std::memcmp(&rhs, this, Size()) == 0; |
| 130 | } | 261 | } |
| 131 | 262 | ||
| 132 | std::size_t ComputePipelineCacheKey::Hash() const noexcept { | 263 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 133 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 264 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 134 | return static_cast<std::size_t>(hash); | 265 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 135 | } | 266 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 136 | 267 | VKUpdateDescriptorQueue& update_descriptor_queue_, | |
| 137 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | 268 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 138 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 269 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |
| 270 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | ||
| 271 | device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | ||
| 272 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | ||
| 273 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, | ||
| 274 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, | ||
| 275 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), | ||
| 276 | serialization_thread(1, "yuzu:PipelineSerialization") { | ||
| 277 | const auto& float_control{device.FloatControlProperties()}; | ||
| 278 | const VkDriverIdKHR driver_id{device.GetDriverID()}; | ||
| 279 | profile = Shader::Profile{ | ||
| 280 | .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, | ||
| 281 | .unified_descriptor_binding = true, | ||
| 282 | .support_descriptor_aliasing = true, | ||
| 283 | .support_int8 = true, | ||
| 284 | .support_int16 = device.IsShaderInt16Supported(), | ||
| 285 | .support_int64 = device.IsShaderInt64Supported(), | ||
| 286 | .support_vertex_instance_id = false, | ||
| 287 | .support_float_controls = true, | ||
| 288 | .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | ||
| 289 | VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 290 | .support_separate_rounding_mode = | ||
| 291 | float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 292 | .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||
| 293 | .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||
| 294 | .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||
| 295 | .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||
| 296 | .support_fp16_signed_zero_nan_preserve = | ||
| 297 | float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, | ||
| 298 | .support_fp32_signed_zero_nan_preserve = | ||
| 299 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, | ||
| 300 | .support_fp64_signed_zero_nan_preserve = | ||
| 301 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, | ||
| 302 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), | ||
| 303 | .support_vote = true, | ||
| 304 | .support_viewport_index_layer_non_geometry = | ||
| 305 | device.IsExtShaderViewportIndexLayerSupported(), | ||
| 306 | .support_viewport_mask = device.IsNvViewportArray2Supported(), | ||
| 307 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | ||
| 308 | .support_demote_to_helper_invocation = true, | ||
| 309 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | ||
| 310 | .support_derivative_control = true, | ||
| 311 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | ||
| 312 | |||
| 313 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | ||
| 314 | |||
| 315 | .lower_left_origin_mode = false, | ||
| 316 | .need_declared_frag_colors = false, | ||
| 317 | |||
| 318 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, | ||
| 319 | .has_broken_unsigned_image_offsets = false, | ||
| 320 | .has_broken_signed_operations = false, | ||
| 321 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, | ||
| 322 | .ignore_nan_fp_comparisons = false, | ||
| 323 | }; | ||
| 324 | host_info = Shader::HostTranslateInfo{ | ||
| 325 | .support_float16 = device.IsFloat16Supported(), | ||
| 326 | .support_int64 = device.IsShaderInt64Supported(), | ||
| 327 | }; | ||
| 139 | } | 328 | } |
| 140 | 329 | ||
| 141 | Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, | 330 | PipelineCache::~PipelineCache() = default; |
| 142 | GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) | ||
| 143 | : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), | ||
| 144 | shader_ir(program_code, main_offset_, compiler_settings, registry), | ||
| 145 | entries(GenerateShaderEntries(shader_ir)) {} | ||
| 146 | |||
| 147 | Shader::~Shader() = default; | ||
| 148 | |||
| 149 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | ||
| 150 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 151 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 152 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||
| 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | ||
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) | ||
| 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | ||
| 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | ||
| 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | ||
| 158 | update_descriptor_queue_} {} | ||
| 159 | |||
| 160 | VKPipelineCache::~VKPipelineCache() = default; | ||
| 161 | 331 | ||
| 162 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | 332 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { |
| 163 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; | 333 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 164 | |||
| 165 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 166 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 167 | |||
| 168 | // Skip stages that are not enabled | ||
| 169 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | |||
| 173 | const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; | ||
| 174 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 175 | ASSERT(cpu_addr); | ||
| 176 | |||
| 177 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 178 | if (!result) { | ||
| 179 | const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; | ||
| 180 | |||
| 181 | // No shader found - create a new one | ||
| 182 | static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | ||
| 183 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); | ||
| 184 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); | ||
| 185 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 186 | |||
| 187 | auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, | ||
| 188 | std::move(code), stage_offset); | ||
| 189 | result = shader.get(); | ||
| 190 | 334 | ||
| 191 | if (cpu_addr) { | 335 | if (!RefreshStages(graphics_key.unique_hashes)) { |
| 192 | Register(std::move(shader), *cpu_addr, size_in_bytes); | 336 | current_pipeline = nullptr; |
| 193 | } else { | 337 | return nullptr; |
| 194 | null_shader = std::move(shader); | 338 | } |
| 195 | } | 339 | graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), |
| 340 | device.IsExtVertexInputDynamicStateSupported()); | ||
| 341 | |||
| 342 | if (current_pipeline) { | ||
| 343 | GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; | ||
| 344 | if (next) { | ||
| 345 | current_pipeline = next; | ||
| 346 | return BuiltPipeline(current_pipeline); | ||
| 196 | } | 347 | } |
| 197 | shaders[index] = result; | ||
| 198 | } | 348 | } |
| 199 | return last_shaders = shaders; | 349 | return CurrentGraphicsPipelineSlowPath(); |
| 200 | } | 350 | } |
| 201 | 351 | ||
| 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | 352 | ComputePipeline* PipelineCache::CurrentComputePipeline() { |
| 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, | ||
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 353 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 206 | 354 | ||
| 207 | if (last_graphics_pipeline && last_graphics_key == key) { | 355 | const ShaderInfo* const shader{ComputeShader()}; |
| 208 | return last_graphics_pipeline; | 356 | if (!shader) { |
| 209 | } | 357 | return nullptr; |
| 210 | last_graphics_key = key; | ||
| 211 | |||
| 212 | if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { | ||
| 213 | std::unique_lock lock{pipeline_cache}; | ||
| 214 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 215 | if (is_cache_miss) { | ||
| 216 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | ||
| 220 | update_descriptor_queue, bindings, program, key, | ||
| 221 | num_color_buffers); | ||
| 222 | } | ||
| 223 | last_graphics_pipeline = pair->second.get(); | ||
| 224 | return last_graphics_pipeline; | ||
| 225 | } | 358 | } |
| 226 | 359 | const auto& qmd{kepler_compute.launch_description}; | |
| 227 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | 360 | const ComputePipelineCacheKey key{ |
| 228 | auto& entry = pair->second; | 361 | .unique_hash = shader->unique_hash, |
| 229 | if (is_cache_miss) { | 362 | .shared_memory_size = qmd.shared_alloc, |
| 230 | gpu.ShaderNotify().MarkSharderBuilding(); | 363 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, |
| 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 364 | }; |
| 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 365 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; |
| 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | 366 | auto& pipeline{pair->second}; |
| 234 | update_descriptor_queue, key, bindings, | 367 | if (!is_new) { |
| 235 | program, num_color_buffers); | 368 | return pipeline.get(); |
| 236 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 237 | } | 369 | } |
| 238 | last_graphics_pipeline = entry.get(); | 370 | pipeline = CreateComputePipeline(key, shader); |
| 239 | return last_graphics_pipeline; | 371 | return pipeline.get(); |
| 240 | } | 372 | } |
| 241 | 373 | ||
| 242 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | 374 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 243 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 375 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 244 | 376 | if (title_id == 0) { | |
| 245 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | 377 | return; |
| 246 | auto& entry = pair->second; | ||
| 247 | if (!is_cache_miss) { | ||
| 248 | return *entry; | ||
| 249 | } | 378 | } |
| 250 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 379 | const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; |
| 251 | 380 | const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; | |
| 252 | const GPUVAddr gpu_addr = key.shader; | 381 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { |
| 253 | 382 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); | |
| 254 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 383 | return; |
| 255 | ASSERT(cpu_addr); | 384 | } |
| 385 | pipeline_cache_filename = base_dir / "vulkan.bin"; | ||
| 386 | |||
| 387 | struct { | ||
| 388 | std::mutex mutex; | ||
| 389 | size_t total{}; | ||
| 390 | size_t built{}; | ||
| 391 | bool has_loaded{}; | ||
| 392 | } state; | ||
| 393 | |||
| 394 | const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { | ||
| 395 | ComputePipelineCacheKey key; | ||
| 396 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 397 | |||
| 398 | workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { | ||
| 399 | ShaderPools pools; | ||
| 400 | auto pipeline{CreateComputePipeline(pools, key, env, false)}; | ||
| 401 | std::lock_guard lock{state.mutex}; | ||
| 402 | if (pipeline) { | ||
| 403 | compute_cache.emplace(key, std::move(pipeline)); | ||
| 404 | } | ||
| 405 | ++state.built; | ||
| 406 | if (state.has_loaded) { | ||
| 407 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 408 | } | ||
| 409 | }); | ||
| 410 | ++state.total; | ||
| 411 | }}; | ||
| 412 | const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); | ||
| 413 | const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); | ||
| 414 | const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { | ||
| 415 | GraphicsPipelineCacheKey key; | ||
| 416 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 417 | |||
| 418 | if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || | ||
| 419 | (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { | ||
| 420 | return; | ||
| 421 | } | ||
| 422 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 423 | ShaderPools pools; | ||
| 424 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 425 | for (auto& env : envs) { | ||
| 426 | env_ptrs.push_back(&env); | ||
| 427 | } | ||
| 428 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 256 | 429 | ||
| 257 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); | 430 | std::lock_guard lock{state.mutex}; |
| 258 | if (!shader) { | 431 | graphics_cache.emplace(key, std::move(pipeline)); |
| 259 | // No shader found - create a new one | 432 | ++state.built; |
| 260 | const auto host_ptr = gpu_memory.GetPointer(gpu_addr); | 433 | if (state.has_loaded) { |
| 434 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 435 | } | ||
| 436 | }); | ||
| 437 | ++state.total; | ||
| 438 | }}; | ||
| 439 | VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, | ||
| 440 | load_graphics); | ||
| 261 | 441 | ||
| 262 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); | 442 | std::unique_lock lock{state.mutex}; |
| 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 443 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); |
| 444 | state.has_loaded = true; | ||
| 445 | lock.unlock(); | ||
| 264 | 446 | ||
| 265 | auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, | 447 | workers.WaitForRequests(); |
| 266 | *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); | 448 | } |
| 267 | shader = shader_info.get(); | ||
| 268 | 449 | ||
| 269 | if (cpu_addr) { | 450 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { |
| 270 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); | 451 | const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; |
| 271 | } else { | 452 | auto& pipeline{pair->second}; |
| 272 | null_kernel = std::move(shader_info); | 453 | if (is_new) { |
| 273 | } | 454 | pipeline = CreateGraphicsPipeline(); |
| 274 | } | 455 | } |
| 275 | 456 | if (!pipeline) { | |
| 276 | const Specialization specialization{ | 457 | return nullptr; |
| 277 | .base_binding = 0, | 458 | } |
| 278 | .workgroup_size = key.workgroup_size, | 459 | if (current_pipeline) { |
| 279 | .shared_memory_size = key.shared_memory_size, | 460 | current_pipeline->AddTransition(pipeline.get()); |
| 280 | .point_size = std::nullopt, | 461 | } |
| 281 | .enabled_attributes = {}, | 462 | current_pipeline = pipeline.get(); |
| 282 | .attribute_types = {}, | 463 | return BuiltPipeline(current_pipeline); |
| 283 | .ndc_minus_one_to_one = false, | ||
| 284 | }; | ||
| 285 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, | ||
| 286 | shader->GetRegistry(), specialization), | ||
| 287 | shader->GetEntries()}; | ||
| 288 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 289 | update_descriptor_queue, spirv_shader); | ||
| 290 | return *entry; | ||
| 291 | } | 464 | } |
| 292 | 465 | ||
| 293 | void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | 466 | GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { |
| 294 | gpu.ShaderNotify().MarkShaderComplete(); | 467 | if (pipeline->IsBuilt()) { |
| 295 | std::unique_lock lock{pipeline_cache}; | 468 | return pipeline; |
| 296 | graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); | 469 | } |
| 470 | if (!use_asynchronous_shaders) { | ||
| 471 | return pipeline; | ||
| 472 | } | ||
| 473 | // If something is using depth, we can assume that games are not rendering anything which | ||
| 474 | // will be used one time. | ||
| 475 | if (maxwell3d.regs.zeta_enable) { | ||
| 476 | return nullptr; | ||
| 477 | } | ||
| 478 | // If games are using a small index count, we can assume these are full screen quads. | ||
| 479 | // Usually these shaders are only used once for building textures so we can assume they | ||
| 480 | // can't be built async | ||
| 481 | if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { | ||
| 482 | return pipeline; | ||
| 483 | } | ||
| 484 | return nullptr; | ||
| 297 | } | 485 | } |
| 298 | 486 | ||
| 299 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { | 487 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( |
| 300 | bool finished = false; | 488 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, |
| 301 | const auto Finish = [&] { | 489 | std::span<Shader::Environment* const> envs, bool build_in_parallel) try { |
| 302 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | 490 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 303 | // flush. | 491 | size_t env_index{0}; |
| 304 | if (finished) { | 492 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 305 | return; | 493 | const bool uses_vertex_a{key.unique_hashes[0] != 0}; |
| 306 | } | 494 | const bool uses_vertex_b{key.unique_hashes[1] != 0}; |
| 307 | finished = true; | 495 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 308 | scheduler.Finish(); | 496 | if (key.unique_hashes[index] == 0) { |
| 309 | }; | ||
| 310 | |||
| 311 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 312 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 313 | auto& entry = it->first; | ||
| 314 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 315 | entry.shaders.end()) { | ||
| 316 | ++it; | ||
| 317 | continue; | 497 | continue; |
| 318 | } | 498 | } |
| 319 | Finish(); | 499 | Shader::Environment& env{*envs[env_index]}; |
| 320 | it = graphics_cache.erase(it); | 500 | ++env_index; |
| 501 | |||
| 502 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | ||
| 503 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||
| 504 | if (!uses_vertex_a || index != 1) { | ||
| 505 | // Normal path | ||
| 506 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | ||
| 507 | } else { | ||
| 508 | // VertexB path when VertexA is present. | ||
| 509 | auto& program_va{programs[0]}; | ||
| 510 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||
| 511 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||
| 512 | } | ||
| 321 | } | 513 | } |
| 322 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | 514 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; |
| 323 | auto& entry = it->first; | 515 | std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; |
| 324 | if (entry.shader != invalidated_addr) { | 516 | |
| 325 | ++it; | 517 | const Shader::IR::Program* previous_stage{}; |
| 518 | Shader::Backend::Bindings binding; | ||
| 519 | for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; | ||
| 520 | ++index) { | ||
| 521 | if (key.unique_hashes[index] == 0) { | ||
| 326 | continue; | 522 | continue; |
| 327 | } | 523 | } |
| 328 | Finish(); | 524 | UNIMPLEMENTED_IF(index == 0); |
| 329 | it = compute_cache.erase(it); | 525 | |
| 526 | Shader::IR::Program& program{programs[index]}; | ||
| 527 | const size_t stage_index{index - 1}; | ||
| 528 | infos[stage_index] = &program.info; | ||
| 529 | |||
| 530 | const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; | ||
| 531 | const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; | ||
| 532 | device.SaveShader(code); | ||
| 533 | modules[stage_index] = BuildShader(device, code); | ||
| 534 | if (device.HasDebuggingToolAttached()) { | ||
| 535 | const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; | ||
| 536 | modules[stage_index].SetObjectNameEXT(name.c_str()); | ||
| 537 | } | ||
| 538 | previous_stage = &program; | ||
| 330 | } | 539 | } |
| 540 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | ||
| 541 | return std::make_unique<GraphicsPipeline>( | ||
| 542 | maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, | ||
| 543 | descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, | ||
| 544 | std::move(modules), infos); | ||
| 545 | |||
| 546 | } catch (const Shader::Exception& exception) { | ||
| 547 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | ||
| 548 | return nullptr; | ||
| 331 | } | 549 | } |
| 332 | 550 | ||
| 333 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | 551 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { |
| 334 | VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { | 552 | GraphicsEnvironments environments; |
| 335 | Specialization specialization; | 553 | GetGraphicsEnvironments(environments, graphics_key.unique_hashes); |
| 336 | if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 337 | float point_size; | ||
| 338 | std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); | ||
| 339 | specialization.point_size = point_size; | ||
| 340 | ASSERT(point_size != 0.0f); | ||
| 341 | } | ||
| 342 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 343 | const auto& attribute = fixed_state.attributes[i]; | ||
| 344 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 345 | specialization.attribute_types[i] = attribute.Type(); | ||
| 346 | } | ||
| 347 | specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; | ||
| 348 | specialization.early_fragment_tests = fixed_state.early_z; | ||
| 349 | |||
| 350 | // Alpha test | ||
| 351 | specialization.alpha_test_func = | ||
| 352 | FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); | ||
| 353 | specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); | ||
| 354 | |||
| 355 | SPIRVProgram program; | ||
| 356 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 357 | 554 | ||
| 358 | for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { | 555 | main_pools.ReleaseContents(); |
| 359 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | 556 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; |
| 360 | // Skip stages that are not enabled | 557 | if (!pipeline || pipeline_cache_filename.empty()) { |
| 361 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | 558 | return pipeline; |
| 362 | continue; | ||
| 363 | } | ||
| 364 | const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); | ||
| 365 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 366 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 367 | |||
| 368 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 369 | const ShaderType program_type = GetShaderType(program_enum); | ||
| 370 | const auto& entries = shader->GetEntries(); | ||
| 371 | program[stage] = { | ||
| 372 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | ||
| 373 | entries, | ||
| 374 | }; | ||
| 375 | |||
| 376 | const u32 old_binding = specialization.base_binding; | ||
| 377 | specialization.base_binding = | ||
| 378 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 379 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 380 | } | 559 | } |
| 381 | return {std::move(program), std::move(bindings)}; | 560 | serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { |
| 382 | } | 561 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> |
| 383 | 562 | env_ptrs; | |
| 384 | template <VkDescriptorType descriptor_type, class Container> | 563 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 385 | void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, | 564 | if (key.unique_hashes[index] != 0) { |
| 386 | u32& offset, const Container& container) { | 565 | env_ptrs.push_back(&envs[index]); |
| 387 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | 566 | } |
| 388 | const u32 count = static_cast<u32>(std::size(container)); | ||
| 389 | |||
| 390 | if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { | ||
| 391 | for (u32 i = 0; i < count; ++i) { | ||
| 392 | const u32 num_samplers = container[i].size; | ||
| 393 | template_entries.push_back({ | ||
| 394 | .dstBinding = binding, | ||
| 395 | .dstArrayElement = 0, | ||
| 396 | .descriptorCount = num_samplers, | ||
| 397 | .descriptorType = descriptor_type, | ||
| 398 | .offset = offset, | ||
| 399 | .stride = entry_size, | ||
| 400 | }); | ||
| 401 | |||
| 402 | ++binding; | ||
| 403 | offset += num_samplers * entry_size; | ||
| 404 | } | 567 | } |
| 405 | return; | 568 | SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); |
| 406 | } | 569 | }); |
| 570 | return pipeline; | ||
| 571 | } | ||
| 407 | 572 | ||
| 408 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || | 573 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 409 | descriptor_type == STORAGE_TEXEL_BUFFER) { | 574 | const ComputePipelineCacheKey& key, const ShaderInfo* shader) { |
| 410 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. | 575 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; |
| 411 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | 576 | const auto& qmd{kepler_compute.launch_description}; |
| 412 | for (u32 i = 0; i < count; ++i) { | 577 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; |
| 413 | template_entries.push_back({ | 578 | env.SetCachedSize(shader->size_bytes); |
| 414 | .dstBinding = binding + i, | 579 | |
| 415 | .dstArrayElement = 0, | 580 | main_pools.ReleaseContents(); |
| 416 | .descriptorCount = 1, | 581 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; |
| 417 | .descriptorType = descriptor_type, | 582 | if (!pipeline || pipeline_cache_filename.empty()) { |
| 418 | .offset = static_cast<std::size_t>(offset + i * entry_size), | 583 | return pipeline; |
| 419 | .stride = entry_size, | ||
| 420 | }); | ||
| 421 | } | ||
| 422 | } else if (count > 0) { | ||
| 423 | template_entries.push_back({ | ||
| 424 | .dstBinding = binding, | ||
| 425 | .dstArrayElement = 0, | ||
| 426 | .descriptorCount = count, | ||
| 427 | .descriptorType = descriptor_type, | ||
| 428 | .offset = offset, | ||
| 429 | .stride = entry_size, | ||
| 430 | }); | ||
| 431 | } | 584 | } |
| 432 | offset += count * entry_size; | 585 | serialization_thread.QueueWork([this, key, env = std::move(env)] { |
| 433 | binding += count; | 586 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, |
| 587 | pipeline_cache_filename, CACHE_VERSION); | ||
| 588 | }); | ||
| 589 | return pipeline; | ||
| 434 | } | 590 | } |
| 435 | 591 | ||
| 436 | void FillDescriptorUpdateTemplateEntries( | 592 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 437 | const ShaderEntries& entries, u32& binding, u32& offset, | 593 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 438 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | 594 | bool build_in_parallel) try { |
| 439 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | 595 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 440 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | 596 | |
| 441 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); | 597 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 442 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | 598 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 443 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | 599 | const std::vector<u32> code{EmitSPIRV(profile, program)}; |
| 444 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | 600 | device.SaveShader(code); |
| 601 | vk::ShaderModule spv_module{BuildShader(device, code)}; | ||
| 602 | if (device.HasDebuggingToolAttached()) { | ||
| 603 | const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; | ||
| 604 | spv_module.SetObjectNameEXT(name.c_str()); | ||
| 605 | } | ||
| 606 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | ||
| 607 | return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, | ||
| 608 | thread_worker, &shader_notify, program.info, | ||
| 609 | std::move(spv_module)); | ||
| 610 | |||
| 611 | } catch (const Shader::Exception& exception) { | ||
| 612 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | ||
| 613 | return nullptr; | ||
| 445 | } | 614 | } |
| 446 | 615 | ||
| 447 | } // namespace Vulkan | 616 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -6,24 +6,28 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <filesystem> | ||
| 10 | #include <iosfwd> | ||
| 9 | #include <memory> | 11 | #include <memory> |
| 10 | #include <type_traits> | 12 | #include <type_traits> |
| 11 | #include <unordered_map> | 13 | #include <unordered_map> |
| 12 | #include <utility> | 14 | #include <utility> |
| 13 | #include <vector> | 15 | #include <vector> |
| 14 | 16 | ||
| 15 | #include <boost/functional/hash.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "common/thread_worker.h" |
| 19 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 20 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 22 | #include "shader_recompiler/host_translate_info.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | ||
| 24 | #include "shader_recompiler/profile.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 26 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 27 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 29 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 30 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 23 | #include "video_core/shader/async_shaders.h" | ||
| 24 | #include "video_core/shader/memory_util.h" | ||
| 25 | #include "video_core/shader/registry.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader_cache.h" | 31 | #include "video_core/shader_cache.h" |
| 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 29 | 33 | ||
| @@ -31,23 +35,24 @@ namespace Core { | |||
| 31 | class System; | 35 | class System; |
| 32 | } | 36 | } |
| 33 | 37 | ||
| 34 | namespace Vulkan { | 38 | namespace Shader::IR { |
| 39 | struct Program; | ||
| 40 | } | ||
| 35 | 41 | ||
| 36 | class Device; | 42 | namespace VideoCore { |
| 37 | class RasterizerVulkan; | 43 | class ShaderNotify; |
| 38 | class VKComputePipeline; | 44 | } |
| 39 | class VKDescriptorPool; | 45 | |
| 40 | class VKScheduler; | 46 | namespace Vulkan { |
| 41 | class VKUpdateDescriptorQueue; | ||
| 42 | 47 | ||
| 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 48 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 44 | 49 | ||
| 45 | struct ComputePipelineCacheKey { | 50 | struct ComputePipelineCacheKey { |
| 46 | GPUVAddr shader; | 51 | u64 unique_hash; |
| 47 | u32 shared_memory_size; | 52 | u32 shared_memory_size; |
| 48 | std::array<u32, 3> workgroup_size; | 53 | std::array<u32, 3> workgroup_size; |
| 49 | 54 | ||
| 50 | std::size_t Hash() const noexcept; | 55 | size_t Hash() const noexcept; |
| 51 | 56 | ||
| 52 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; | 57 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |
| 53 | 58 | ||
| @@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | |||
| 64 | namespace std { | 69 | namespace std { |
| 65 | 70 | ||
| 66 | template <> | 71 | template <> |
| 67 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 68 | std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 69 | return k.Hash(); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | template <> | ||
| 74 | struct hash<Vulkan::ComputePipelineCacheKey> { | 72 | struct hash<Vulkan::ComputePipelineCacheKey> { |
| 75 | std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | 73 | size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { |
| 76 | return k.Hash(); | 74 | return k.Hash(); |
| 77 | } | 75 | } |
| 78 | }; | 76 | }; |
| @@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 81 | 79 | ||
| 82 | namespace Vulkan { | 80 | namespace Vulkan { |
| 83 | 81 | ||
| 84 | class Shader { | 82 | class ComputePipeline; |
| 85 | public: | 83 | class Device; |
| 86 | explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, | 84 | class DescriptorPool; |
| 87 | Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, | 85 | class RasterizerVulkan; |
| 88 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); | 86 | class RenderPassCache; |
| 89 | ~Shader(); | 87 | class VKScheduler; |
| 90 | 88 | class VKUpdateDescriptorQueue; | |
| 91 | GPUVAddr GetGpuAddr() const { | ||
| 92 | return gpu_addr; | ||
| 93 | } | ||
| 94 | |||
| 95 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 96 | return shader_ir; | ||
| 97 | } | ||
| 98 | |||
| 99 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 100 | return shader_ir; | ||
| 101 | } | ||
| 102 | 89 | ||
| 103 | const VideoCommon::Shader::Registry& GetRegistry() const { | 90 | using VideoCommon::ShaderInfo; |
| 104 | return registry; | ||
| 105 | } | ||
| 106 | 91 | ||
| 107 | const ShaderEntries& GetEntries() const { | 92 | struct ShaderPools { |
| 108 | return entries; | 93 | void ReleaseContents() { |
| 94 | flow_block.ReleaseContents(); | ||
| 95 | block.ReleaseContents(); | ||
| 96 | inst.ReleaseContents(); | ||
| 109 | } | 97 | } |
| 110 | 98 | ||
| 111 | private: | 99 | Shader::ObjectPool<Shader::IR::Inst> inst; |
| 112 | GPUVAddr gpu_addr{}; | 100 | Shader::ObjectPool<Shader::IR::Block> block; |
| 113 | VideoCommon::Shader::ProgramCode program_code; | 101 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 114 | VideoCommon::Shader::Registry registry; | ||
| 115 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 116 | ShaderEntries entries; | ||
| 117 | }; | 102 | }; |
| 118 | 103 | ||
| 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 104 | class PipelineCache : public VideoCommon::ShaderCache { |
| 120 | public: | 105 | public: |
| 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 106 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, |
| 122 | Tegra::Engines::Maxwell3D& maxwell3d, | 107 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 123 | Tegra::Engines::KeplerCompute& kepler_compute, | 108 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 124 | Tegra::MemoryManager& gpu_memory, const Device& device, | 109 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, |
| 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | 110 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 126 | VKUpdateDescriptorQueue& update_descriptor_queue); | 111 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 127 | ~VKPipelineCache() override; | 112 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); |
| 113 | ~PipelineCache(); | ||
| 114 | |||
| 115 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); | ||
| 128 | 116 | ||
| 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | 117 | [[nodiscard]] ComputePipeline* CurrentComputePipeline(); |
| 130 | 118 | ||
| 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 119 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 132 | u32 num_color_buffers, | 120 | const VideoCore::DiskResourceLoadCallback& callback); |
| 133 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 134 | 121 | ||
| 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 122 | private: |
| 123 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); | ||
| 136 | 124 | ||
| 137 | void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); | 125 | [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; |
| 138 | 126 | ||
| 139 | protected: | 127 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); |
| 140 | void OnShaderRemoval(Shader* shader) final; | ||
| 141 | 128 | ||
| 142 | private: | 129 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( |
| 143 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | 130 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, |
| 144 | const FixedPipelineState& fixed_state); | 131 | std::span<Shader::Environment* const> envs, bool build_in_parallel); |
| 145 | 132 | ||
| 146 | Tegra::GPU& gpu; | 133 | std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key, |
| 147 | Tegra::Engines::Maxwell3D& maxwell3d; | 134 | const ShaderInfo* shader); |
| 148 | Tegra::Engines::KeplerCompute& kepler_compute; | 135 | |
| 149 | Tegra::MemoryManager& gpu_memory; | 136 | std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools, |
| 137 | const ComputePipelineCacheKey& key, | ||
| 138 | Shader::Environment& env, | ||
| 139 | bool build_in_parallel); | ||
| 150 | 140 | ||
| 151 | const Device& device; | 141 | const Device& device; |
| 152 | VKScheduler& scheduler; | 142 | VKScheduler& scheduler; |
| 153 | VKDescriptorPool& descriptor_pool; | 143 | DescriptorPool& descriptor_pool; |
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue; | 144 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 145 | RenderPassCache& render_pass_cache; | ||
| 146 | BufferCache& buffer_cache; | ||
| 147 | TextureCache& texture_cache; | ||
| 148 | VideoCore::ShaderNotify& shader_notify; | ||
| 149 | bool use_asynchronous_shaders{}; | ||
| 155 | 150 | ||
| 156 | std::unique_ptr<Shader> null_shader; | 151 | GraphicsPipelineCacheKey graphics_key{}; |
| 157 | std::unique_ptr<Shader> null_kernel; | 152 | GraphicsPipeline* current_pipeline{}; |
| 158 | 153 | ||
| 159 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | 154 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 155 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; | ||
| 160 | 156 | ||
| 161 | GraphicsPipelineCacheKey last_graphics_key; | 157 | ShaderPools main_pools; |
| 162 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 163 | 158 | ||
| 164 | std::mutex pipeline_cache; | 159 | Shader::Profile profile; |
| 165 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | 160 | Shader::HostTranslateInfo host_info; |
| 166 | graphics_cache; | ||
| 167 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 168 | }; | ||
| 169 | 161 | ||
| 170 | void FillDescriptorUpdateTemplateEntries( | 162 | std::filesystem::path pipeline_cache_filename; |
| 171 | const ShaderEntries& entries, u32& binding, u32& offset, | 163 | |
| 172 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); | 164 | Common::ThreadWorker workers; |
| 165 | Common::ThreadWorker serialization_thread; | ||
| 166 | }; | ||
| 173 | 167 | ||
| 174 | } // namespace Vulkan | 168 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -114,14 +114,10 @@ void HostCounter::EndQuery() { | |||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | u64 HostCounter::BlockingQuery() const { | 116 | u64 HostCounter::BlockingQuery() const { |
| 117 | if (tick >= cache.GetScheduler().CurrentTick()) { | 117 | cache.GetScheduler().Wait(tick); |
| 118 | cache.GetScheduler().Flush(); | ||
| 119 | } | ||
| 120 | |||
| 121 | u64 data; | 118 | u64 data; |
| 122 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 119 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( |
| 123 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 120 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); |
| 124 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | ||
| 125 | 121 | ||
| 126 | switch (query_result) { | 122 | switch (query_result) { |
| 127 | case VK_SUCCESS: | 123 | case VK_SUCCESS: |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 29 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -55,11 +54,10 @@ struct DrawParams { | |||
| 55 | u32 num_instances; | 54 | u32 num_instances; |
| 56 | u32 base_vertex; | 55 | u32 base_vertex; |
| 57 | u32 num_vertices; | 56 | u32 num_vertices; |
| 57 | u32 first_index; | ||
| 58 | bool is_indexed; | 58 | bool is_indexed; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | ||
| 62 | |||
| 63 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { | 61 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { |
| 64 | const auto& src = regs.viewport_transform[index]; | 62 | const auto& src = regs.viewport_transform[index]; |
| 65 | const float width = src.scale_x * 2.0f; | 63 | const float width = src.scale_x * 2.0f; |
| @@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
| 97 | return scissor; | 95 | return scissor; |
| 98 | } | 96 | } |
| 99 | 97 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 101 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | ||
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 103 | for (size_t i = 0; i < std::size(addresses); ++i) { | ||
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 105 | } | ||
| 106 | return addresses; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct TextureHandle { | ||
| 110 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 111 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 112 | image = handle.tic_id; | ||
| 113 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 114 | } | ||
| 115 | |||
| 116 | u32 image; | ||
| 117 | u32 sampler; | ||
| 118 | }; | ||
| 119 | |||
| 120 | template <typename Engine, typename Entry> | ||
| 121 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 122 | size_t stage, size_t index = 0) { | ||
| 123 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 124 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 125 | if (entry.is_separated) { | ||
| 126 | const u32 buffer_1 = entry.buffer; | ||
| 127 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 128 | const u32 offset_1 = entry.offset; | ||
| 129 | const u32 offset_2 = entry.secondary_offset; | ||
| 130 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 131 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 132 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | if (entry.is_bindless) { | ||
| 136 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 137 | return TextureHandle(raw, via_header_index); | ||
| 138 | } | ||
| 139 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 140 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 145 | if (entry.is_buffer) { | ||
| 146 | return ImageViewType::e2D; | ||
| 147 | } | ||
| 148 | switch (entry.type) { | ||
| 149 | case Tegra::Shader::TextureType::Texture1D: | ||
| 150 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 151 | case Tegra::Shader::TextureType::Texture2D: | ||
| 152 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 153 | case Tegra::Shader::TextureType::Texture3D: | ||
| 154 | return ImageViewType::e3D; | ||
| 155 | case Tegra::Shader::TextureType::TextureCube: | ||
| 156 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 157 | } | ||
| 158 | UNREACHABLE(); | ||
| 159 | return ImageViewType::e2D; | ||
| 160 | } | ||
| 161 | |||
| 162 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 163 | switch (entry.type) { | ||
| 164 | case Tegra::Shader::ImageType::Texture1D: | ||
| 165 | return ImageViewType::e1D; | ||
| 166 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 167 | return ImageViewType::e1DArray; | ||
| 168 | case Tegra::Shader::ImageType::Texture2D: | ||
| 169 | return ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 171 | return ImageViewType::e2DArray; | ||
| 172 | case Tegra::Shader::ImageType::Texture3D: | ||
| 173 | return ImageViewType::e3D; | ||
| 174 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 175 | return ImageViewType::Buffer; | ||
| 176 | } | ||
| 177 | UNREACHABLE(); | ||
| 178 | return ImageViewType::e2D; | ||
| 179 | } | ||
| 180 | |||
| 181 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 183 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 184 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 185 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 186 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 187 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 188 | } | ||
| 189 | for (const auto& entry : entries.samplers) { | ||
| 190 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 191 | const VkSampler sampler = *sampler_ptr++; | ||
| 192 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 193 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 194 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 195 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 199 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 200 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 201 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 202 | } | ||
| 203 | for (const auto& entry : entries.images) { | ||
| 204 | // TODO: Mark as modified | ||
| 205 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 206 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 207 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 208 | update_descriptor_queue.AddImage(handle); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | 98 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 213 | bool is_indexed) { | 99 | bool is_indexed) { |
| 214 | DrawParams params{ | 100 | DrawParams params{ |
| @@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan | |||
| 216 | .num_instances = is_instanced ? num_instances : 1, | 102 | .num_instances = is_instanced ? num_instances : 1, |
| 217 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, | 103 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, |
| 218 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, | 104 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, |
| 105 | .first_index = is_indexed ? regs.index_array.first : 0, | ||
| 219 | .is_indexed = is_indexed, | 106 | .is_indexed = is_indexed, |
| 220 | }; | 107 | }; |
| 221 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | 108 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| @@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 130 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 244 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, | 131 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, |
| 245 | memory_allocator), | 132 | memory_allocator), |
| 246 | texture_cache_runtime{device, scheduler, memory_allocator, | 133 | render_pass_cache(device), texture_cache_runtime{device, scheduler, |
| 247 | staging_pool, blit_image, astc_decoder_pass}, | 134 | memory_allocator, staging_pool, |
| 135 | blit_image, astc_decoder_pass, | ||
| 136 | render_pass_cache}, | ||
| 248 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 137 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 249 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 138 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 250 | update_descriptor_queue, descriptor_pool), | 139 | update_descriptor_queue, descriptor_pool), |
| 251 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 140 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 252 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 141 | pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 253 | descriptor_pool, update_descriptor_queue), | 142 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, |
| 143 | texture_cache, gpu.ShaderNotify()), | ||
| 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | 144 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |
| 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 145 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 146 | wfi_event(device.GetLogical().CreateEvent()) { |
| 257 | scheduler.SetQueryCache(query_cache); | 147 | scheduler.SetQueryCache(query_cache); |
| 258 | if (device.UseAsynchronousShaders()) { | ||
| 259 | async_shaders.AllocateWorkers(); | ||
| 260 | } | ||
| 261 | } | 148 | } |
| 262 | 149 | ||
| 263 | RasterizerVulkan::~RasterizerVulkan() = default; | 150 | RasterizerVulkan::~RasterizerVulkan() = default; |
| @@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 270 | 157 | ||
| 271 | query_cache.UpdateCounters(); | 158 | query_cache.UpdateCounters(); |
| 272 | 159 | ||
| 273 | graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); | 160 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |
| 274 | 161 | if (!pipeline) { | |
| 275 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 276 | |||
| 277 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 278 | texture_cache.UpdateRenderTargets(false); | ||
| 279 | |||
| 280 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 281 | graphics_key.shaders = GetShaderAddresses(shaders); | ||
| 282 | |||
| 283 | SetupShaderDescriptors(shaders, is_indexed); | ||
| 284 | |||
| 285 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||
| 286 | graphics_key.renderpass = framebuffer->RenderPass(); | ||
| 287 | |||
| 288 | VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( | ||
| 289 | graphics_key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 290 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | ||
| 291 | // Async graphics pipeline was not ready. | ||
| 292 | return; | 162 | return; |
| 293 | } | 163 | } |
| 164 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 165 | pipeline->Configure(is_indexed); | ||
| 294 | 166 | ||
| 295 | BeginTransformFeedback(); | 167 | BeginTransformFeedback(); |
| 296 | 168 | ||
| 297 | scheduler.RequestRenderpass(framebuffer); | ||
| 298 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 299 | UpdateDynamicStates(); | 169 | UpdateDynamicStates(); |
| 300 | 170 | ||
| 301 | const auto& regs = maxwell3d.regs; | 171 | const auto& regs{maxwell3d.regs}; |
| 302 | const u32 num_instances = maxwell3d.mme_draw.instance_count; | 172 | const u32 num_instances{maxwell3d.mme_draw.instance_count}; |
| 303 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | 173 | const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; |
| 304 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | 174 | scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { |
| 305 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 306 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||
| 307 | if (descriptor_set) { | ||
| 308 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||
| 309 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 310 | } | ||
| 311 | if (draw_params.is_indexed) { | 175 | if (draw_params.is_indexed) { |
| 312 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | 176 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, |
| 313 | draw_params.base_vertex, draw_params.base_instance); | 177 | draw_params.first_index, draw_params.base_vertex, |
| 178 | draw_params.base_instance); | ||
| 314 | } else { | 179 | } else { |
| 315 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | 180 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, |
| 316 | draw_params.base_vertex, draw_params.base_instance); | 181 | draw_params.base_vertex, draw_params.base_instance); |
| 317 | } | 182 | } |
| 318 | }); | 183 | }); |
| 319 | |||
| 320 | EndTransformFeedback(); | 184 | EndTransformFeedback(); |
| 321 | } | 185 | } |
| 322 | 186 | ||
| @@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() { | |||
| 326 | if (!maxwell3d.ShouldExecute()) { | 190 | if (!maxwell3d.ShouldExecute()) { |
| 327 | return; | 191 | return; |
| 328 | } | 192 | } |
| 193 | FlushWork(); | ||
| 329 | 194 | ||
| 330 | query_cache.UpdateCounters(); | 195 | query_cache.UpdateCounters(); |
| 331 | 196 | ||
| @@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() { | |||
| 395 | }); | 260 | }); |
| 396 | } | 261 | } |
| 397 | 262 | ||
| 398 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 263 | void RasterizerVulkan::DispatchCompute() { |
| 399 | MICROPROFILE_SCOPE(Vulkan_Compute); | 264 | FlushWork(); |
| 400 | |||
| 401 | query_cache.UpdateCounters(); | ||
| 402 | 265 | ||
| 403 | const auto& launch_desc = kepler_compute.launch_description; | 266 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; |
| 404 | auto& pipeline = pipeline_cache.GetComputePipeline({ | 267 | if (!pipeline) { |
| 405 | .shader = code_addr, | 268 | return; |
| 406 | .shared_memory_size = launch_desc.shared_alloc, | 269 | } |
| 407 | .workgroup_size{ | 270 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; |
| 408 | launch_desc.block_dim_x, | 271 | pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); |
| 409 | launch_desc.block_dim_y, | ||
| 410 | launch_desc.block_dim_z, | ||
| 411 | }, | ||
| 412 | }); | ||
| 413 | 272 | ||
| 414 | // Compute dispatches can't be executed inside a renderpass | 273 | const auto& qmd{kepler_compute.launch_description}; |
| 274 | const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | 275 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 416 | 276 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | |
| 417 | image_view_indices.clear(); | ||
| 418 | sampler_handles.clear(); | ||
| 419 | |||
| 420 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 421 | |||
| 422 | const auto& entries = pipeline.GetEntries(); | ||
| 423 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 424 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 425 | u32 ssbo_index = 0; | ||
| 426 | for (const auto& buffer : entries.global_buffers) { | ||
| 427 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 428 | buffer.is_written); | ||
| 429 | ++ssbo_index; | ||
| 430 | } | ||
| 431 | buffer_cache.UpdateComputeBuffers(); | ||
| 432 | |||
| 433 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 434 | |||
| 435 | SetupComputeUniformTexels(entries); | ||
| 436 | SetupComputeTextures(entries); | ||
| 437 | SetupComputeStorageTexels(entries); | ||
| 438 | SetupComputeImages(entries); | ||
| 439 | |||
| 440 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 441 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 442 | |||
| 443 | update_descriptor_queue.Acquire(); | ||
| 444 | |||
| 445 | buffer_cache.BindHostComputeBuffers(); | ||
| 446 | |||
| 447 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 448 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 449 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 450 | sampler_ptr); | ||
| 451 | |||
| 452 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 453 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 454 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 455 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 456 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, | ||
| 457 | descriptor_set](vk::CommandBuffer cmdbuf) { | ||
| 458 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | ||
| 459 | if (descriptor_set) { | ||
| 460 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, | ||
| 461 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 462 | } | ||
| 463 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | ||
| 464 | }); | ||
| 465 | } | 277 | } |
| 466 | 278 | ||
| 467 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 279 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { |
| @@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 626 | 438 | ||
| 627 | void RasterizerVulkan::FragmentBarrier() { | 439 | void RasterizerVulkan::FragmentBarrier() { |
| 628 | // We already put barriers when a render pass finishes | 440 | // We already put barriers when a render pass finishes |
| 441 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 629 | } | 442 | } |
| 630 | 443 | ||
| 631 | void RasterizerVulkan::TiledCacheBarrier() { | 444 | void RasterizerVulkan::TiledCacheBarrier() { |
| @@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() { | |||
| 633 | } | 446 | } |
| 634 | 447 | ||
| 635 | void RasterizerVulkan::FlushCommands() { | 448 | void RasterizerVulkan::FlushCommands() { |
| 636 | if (draw_counter > 0) { | 449 | if (draw_counter == 0) { |
| 637 | draw_counter = 0; | 450 | return; |
| 638 | scheduler.Flush(); | ||
| 639 | } | 451 | } |
| 452 | draw_counter = 0; | ||
| 453 | scheduler.Flush(); | ||
| 640 | } | 454 | } |
| 641 | 455 | ||
| 642 | void RasterizerVulkan::TickFrame() { | 456 | void RasterizerVulkan::TickFrame() { |
| @@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 676 | if (!image_view) { | 490 | if (!image_view) { |
| 677 | return false; | 491 | return false; |
| 678 | } | 492 | } |
| 679 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | 493 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); |
| 680 | screen_info.width = image_view->size.width; | 494 | screen_info.width = image_view->size.width; |
| 681 | screen_info.height = image_view->size.height; | 495 | screen_info.height = image_view->size.height; |
| 682 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); | 496 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 683 | return true; | 497 | return true; |
| 684 | } | 498 | } |
| 685 | 499 | ||
| 500 | void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||
| 501 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 502 | pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); | ||
| 503 | } | ||
| 504 | |||
| 686 | void RasterizerVulkan::FlushWork() { | 505 | void RasterizerVulkan::FlushWork() { |
| 687 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; | 506 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; |
| 688 | 507 | ||
| @@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() { | |||
| 691 | if ((++draw_counter & 7) != 7) { | 510 | if ((++draw_counter & 7) != 7) { |
| 692 | return; | 511 | return; |
| 693 | } | 512 | } |
| 694 | |||
| 695 | if (draw_counter < DRAWS_TO_DISPATCH) { | 513 | if (draw_counter < DRAWS_TO_DISPATCH) { |
| 696 | // Send recorded tasks to the worker thread | 514 | // Send recorded tasks to the worker thread |
| 697 | scheduler.DispatchWork(); | 515 | scheduler.DispatchWork(); |
| 698 | return; | 516 | return; |
| 699 | } | 517 | } |
| 700 | |||
| 701 | // Otherwise (every certain number of draws) flush execution. | 518 | // Otherwise (every certain number of draws) flush execution. |
| 702 | // This submits commands to the Vulkan driver. | 519 | // This submits commands to the Vulkan driver. |
| 703 | scheduler.Flush(); | 520 | scheduler.Flush(); |
| @@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | |||
| 716 | return buffer_cache.DMACopy(src_address, dest_address, amount); | 533 | return buffer_cache.DMACopy(src_address, dest_address, amount); |
| 717 | } | 534 | } |
| 718 | 535 | ||
| 719 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 720 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||
| 721 | image_view_indices.clear(); | ||
| 722 | sampler_handles.clear(); | ||
| 723 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 724 | Shader* const shader = shaders[stage + 1]; | ||
| 725 | if (!shader) { | ||
| 726 | continue; | ||
| 727 | } | ||
| 728 | const ShaderEntries& entries = shader->GetEntries(); | ||
| 729 | SetupGraphicsUniformTexels(entries, stage); | ||
| 730 | SetupGraphicsTextures(entries, stage); | ||
| 731 | SetupGraphicsStorageTexels(entries, stage); | ||
| 732 | SetupGraphicsImages(entries, stage); | ||
| 733 | |||
| 734 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 735 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 736 | u32 ssbo_index = 0; | ||
| 737 | for (const auto& buffer : entries.global_buffers) { | ||
| 738 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 739 | buffer.cbuf_offset, buffer.is_written); | ||
| 740 | ++ssbo_index; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 744 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 745 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 746 | |||
| 747 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 748 | |||
| 749 | update_descriptor_queue.Acquire(); | ||
| 750 | |||
| 751 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 752 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 753 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 754 | // Skip VertexA stage | ||
| 755 | Shader* const shader = shaders[stage + 1]; | ||
| 756 | if (!shader) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | buffer_cache.BindHostStageBuffers(stage); | ||
| 760 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, | ||
| 761 | image_view_id_ptr, sampler_ptr); | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | void RasterizerVulkan::UpdateDynamicStates() { | 536 | void RasterizerVulkan::UpdateDynamicStates() { |
| 766 | auto& regs = maxwell3d.regs; | 537 | auto& regs = maxwell3d.regs; |
| 767 | UpdateViewportsState(regs); | 538 | UpdateViewportsState(regs); |
| @@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 770 | UpdateBlendConstants(regs); | 541 | UpdateBlendConstants(regs); |
| 771 | UpdateDepthBounds(regs); | 542 | UpdateDepthBounds(regs); |
| 772 | UpdateStencilFaces(regs); | 543 | UpdateStencilFaces(regs); |
| 544 | UpdateLineWidth(regs); | ||
| 773 | if (device.IsExtExtendedDynamicStateSupported()) { | 545 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 774 | UpdateCullMode(regs); | 546 | UpdateCullMode(regs); |
| 775 | UpdateDepthBoundsTestEnable(regs); | 547 | UpdateDepthBoundsTestEnable(regs); |
| @@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 779 | UpdateFrontFace(regs); | 551 | UpdateFrontFace(regs); |
| 780 | UpdateStencilOp(regs); | 552 | UpdateStencilOp(regs); |
| 781 | UpdateStencilTestEnable(regs); | 553 | UpdateStencilTestEnable(regs); |
| 554 | if (device.IsExtVertexInputDynamicStateSupported()) { | ||
| 555 | UpdateVertexInput(regs); | ||
| 556 | } | ||
| 782 | } | 557 | } |
| 783 | } | 558 | } |
| 784 | 559 | ||
| @@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 810 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 585 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 811 | } | 586 | } |
| 812 | 587 | ||
| 813 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | ||
| 814 | const auto& regs = maxwell3d.regs; | ||
| 815 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 816 | for (const auto& entry : entries.uniform_texels) { | ||
| 817 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 818 | image_view_indices.push_back(handle.image); | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | ||
| 823 | const auto& regs = maxwell3d.regs; | ||
| 824 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 825 | for (const auto& entry : entries.samplers) { | ||
| 826 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 827 | const TextureHandle handle = | ||
| 828 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); | ||
| 829 | image_view_indices.push_back(handle.image); | ||
| 830 | |||
| 831 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 832 | sampler_handles.push_back(sampler->Handle()); | ||
| 833 | } | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | ||
| 838 | const auto& regs = maxwell3d.regs; | ||
| 839 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 840 | for (const auto& entry : entries.storage_texels) { | ||
| 841 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 842 | image_view_indices.push_back(handle.image); | ||
| 843 | } | ||
| 844 | } | ||
| 845 | |||
| 846 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | ||
| 847 | const auto& regs = maxwell3d.regs; | ||
| 848 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 849 | for (const auto& entry : entries.images) { | ||
| 850 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 851 | image_view_indices.push_back(handle.image); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | ||
| 856 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 857 | for (const auto& entry : entries.uniform_texels) { | ||
| 858 | const TextureHandle handle = | ||
| 859 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 860 | image_view_indices.push_back(handle.image); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 865 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 866 | for (const auto& entry : entries.samplers) { | ||
| 867 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 868 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, | ||
| 869 | COMPUTE_SHADER_INDEX, index); | ||
| 870 | image_view_indices.push_back(handle.image); | ||
| 871 | |||
| 872 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 873 | sampler_handles.push_back(sampler->Handle()); | ||
| 874 | } | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 879 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 880 | for (const auto& entry : entries.storage_texels) { | ||
| 881 | const TextureHandle handle = | ||
| 882 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 883 | image_view_indices.push_back(handle.image); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 888 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 889 | for (const auto& entry : entries.images) { | ||
| 890 | const TextureHandle handle = | ||
| 891 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 892 | image_view_indices.push_back(handle.image); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 588 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 897 | if (!state_tracker.TouchViewports()) { | 589 | if (!state_tracker.TouchViewports()) { |
| 898 | return; | 590 | return; |
| @@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 985 | } | 677 | } |
| 986 | } | 678 | } |
| 987 | 679 | ||
| 680 | void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 681 | if (!state_tracker.TouchLineWidth()) { | ||
| 682 | return; | ||
| 683 | } | ||
| 684 | const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; | ||
| 685 | scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); | ||
| 686 | } | ||
| 687 | |||
| 988 | void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { | 688 | void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 989 | if (!state_tracker.TouchCullMode()) { | 689 | if (!state_tracker.TouchCullMode()) { |
| 990 | return; | 690 | return; |
| @@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re | |||
| 999 | if (!state_tracker.TouchDepthBoundsTestEnable()) { | 699 | if (!state_tracker.TouchDepthBoundsTestEnable()) { |
| 1000 | return; | 700 | return; |
| 1001 | } | 701 | } |
| 702 | bool enabled = regs.depth_bounds_enable; | ||
| 703 | if (enabled && !device.IsDepthBoundsSupported()) { | ||
| 704 | LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); | ||
| 705 | enabled = false; | ||
| 706 | } | ||
| 1002 | scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { | 707 | scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { |
| 1003 | cmdbuf.SetDepthBoundsTestEnableEXT(enable); | 708 | cmdbuf.SetDepthBoundsTestEnableEXT(enable); |
| 1004 | }); | 709 | }); |
| @@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1086 | }); | 791 | }); |
| 1087 | } | 792 | } |
| 1088 | 793 | ||
| 794 | void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 795 | auto& dirty{maxwell3d.dirty.flags}; | ||
| 796 | if (!dirty[Dirty::VertexInput]) { | ||
| 797 | return; | ||
| 798 | } | ||
| 799 | dirty[Dirty::VertexInput] = false; | ||
| 800 | |||
| 801 | boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings; | ||
| 802 | boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes; | ||
| 803 | |||
| 804 | // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up | ||
| 805 | // generating dirty state. Track the highest dirty attribute and update all attributes until | ||
| 806 | // that one. | ||
| 807 | size_t highest_dirty_attr{}; | ||
| 808 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 809 | if (dirty[Dirty::VertexAttribute0 + index]) { | ||
| 810 | highest_dirty_attr = index; | ||
| 811 | } | ||
| 812 | } | ||
| 813 | for (size_t index = 0; index < highest_dirty_attr; ++index) { | ||
| 814 | const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; | ||
| 815 | const u32 binding{attribute.buffer}; | ||
| 816 | dirty[Dirty::VertexAttribute0 + index] = false; | ||
| 817 | dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true; | ||
| 818 | if (!attribute.constant) { | ||
| 819 | attributes.push_back({ | ||
| 820 | .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, | ||
| 821 | .pNext = nullptr, | ||
| 822 | .location = static_cast<u32>(index), | ||
| 823 | .binding = binding, | ||
| 824 | .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), | ||
| 825 | .offset = attribute.offset, | ||
| 826 | }); | ||
| 827 | } | ||
| 828 | } | ||
| 829 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 830 | if (!dirty[Dirty::VertexBinding0 + index]) { | ||
| 831 | continue; | ||
| 832 | } | ||
| 833 | dirty[Dirty::VertexBinding0 + index] = false; | ||
| 834 | |||
| 835 | const u32 binding{static_cast<u32>(index)}; | ||
| 836 | const auto& input_binding{regs.vertex_array[binding]}; | ||
| 837 | const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; | ||
| 838 | bindings.push_back({ | ||
| 839 | .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, | ||
| 840 | .pNext = nullptr, | ||
| 841 | .binding = binding, | ||
| 842 | .stride = input_binding.stride, | ||
| 843 | .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, | ||
| 844 | .divisor = is_instanced ? input_binding.divisor : 1, | ||
| 845 | }); | ||
| 846 | } | ||
| 847 | scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { | ||
| 848 | cmdbuf.SetVertexInputEXT(bindings, attributes); | ||
| 849 | }); | ||
| 850 | } | ||
| 851 | |||
| 1089 | } // namespace Vulkan | 852 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -21,14 +21,13 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 31 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 33 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 34 | 33 | ||
| @@ -73,7 +72,7 @@ public: | |||
| 73 | 72 | ||
| 74 | void Draw(bool is_indexed, bool is_instanced) override; | 73 | void Draw(bool is_indexed, bool is_instanced) override; |
| 75 | void Clear() override; | 74 | void Clear() override; |
| 76 | void DispatchCompute(GPUVAddr code_addr) override; | 75 | void DispatchCompute() override; |
| 77 | void ResetCounter(VideoCore::QueryType type) override; | 76 | void ResetCounter(VideoCore::QueryType type) override; |
| 78 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 77 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 79 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 78 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -102,19 +101,8 @@ public: | |||
| 102 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 101 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 102 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 104 | u32 pixel_stride) override; | 103 | u32 pixel_stride) override; |
| 105 | 104 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | |
| 106 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | 105 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 107 | return async_shaders; | ||
| 108 | } | ||
| 109 | |||
| 110 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 111 | return async_shaders; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 115 | static constexpr size_t MaxConstbufferSize = 0x10000; | ||
| 116 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 117 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 118 | 106 | ||
| 119 | private: | 107 | private: |
| 120 | static constexpr size_t MAX_TEXTURES = 192; | 108 | static constexpr size_t MAX_TEXTURES = 192; |
| @@ -125,46 +113,19 @@ private: | |||
| 125 | 113 | ||
| 126 | void FlushWork(); | 114 | void FlushWork(); |
| 127 | 115 | ||
| 128 | /// Setup descriptors in the graphics pipeline. | ||
| 129 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, | ||
| 130 | bool is_indexed); | ||
| 131 | |||
| 132 | void UpdateDynamicStates(); | 116 | void UpdateDynamicStates(); |
| 133 | 117 | ||
| 134 | void BeginTransformFeedback(); | 118 | void BeginTransformFeedback(); |
| 135 | 119 | ||
| 136 | void EndTransformFeedback(); | 120 | void EndTransformFeedback(); |
| 137 | 121 | ||
| 138 | /// Setup uniform texels in the graphics pipeline. | ||
| 139 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 140 | |||
| 141 | /// Setup textures in the graphics pipeline. | ||
| 142 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 143 | |||
| 144 | /// Setup storage texels in the graphics pipeline. | ||
| 145 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 146 | |||
| 147 | /// Setup images in the graphics pipeline. | ||
| 148 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 149 | |||
| 150 | /// Setup texel buffers in the compute pipeline. | ||
| 151 | void SetupComputeUniformTexels(const ShaderEntries& entries); | ||
| 152 | |||
| 153 | /// Setup textures in the compute pipeline. | ||
| 154 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 155 | |||
| 156 | /// Setup storage texels in the compute pipeline. | ||
| 157 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 158 | |||
| 159 | /// Setup images in the compute pipeline. | ||
| 160 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 161 | |||
| 162 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 122 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 163 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 123 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 164 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 124 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| 165 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); | 125 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); |
| 166 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); | 126 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); |
| 167 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); | 127 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); |
| 128 | void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); | ||
| 168 | 129 | ||
| 169 | void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); | 130 | void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); |
| 170 | void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 131 | void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -175,6 +136,8 @@ private: | |||
| 175 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 136 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 176 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 137 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 177 | 138 | ||
| 139 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | ||
| 140 | |||
| 178 | Tegra::GPU& gpu; | 141 | Tegra::GPU& gpu; |
| 179 | Tegra::MemoryManager& gpu_memory; | 142 | Tegra::MemoryManager& gpu_memory; |
| 180 | Tegra::Engines::Maxwell3D& maxwell3d; | 143 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -187,24 +150,22 @@ private: | |||
| 187 | VKScheduler& scheduler; | 150 | VKScheduler& scheduler; |
| 188 | 151 | ||
| 189 | StagingBufferPool staging_pool; | 152 | StagingBufferPool staging_pool; |
| 190 | VKDescriptorPool descriptor_pool; | 153 | DescriptorPool descriptor_pool; |
| 191 | VKUpdateDescriptorQueue update_descriptor_queue; | 154 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 192 | BlitImageHelper blit_image; | 155 | BlitImageHelper blit_image; |
| 193 | ASTCDecoderPass astc_decoder_pass; | 156 | ASTCDecoderPass astc_decoder_pass; |
| 194 | 157 | RenderPassCache render_pass_cache; | |
| 195 | GraphicsPipelineCacheKey graphics_key; | ||
| 196 | 158 | ||
| 197 | TextureCacheRuntime texture_cache_runtime; | 159 | TextureCacheRuntime texture_cache_runtime; |
| 198 | TextureCache texture_cache; | 160 | TextureCache texture_cache; |
| 199 | BufferCacheRuntime buffer_cache_runtime; | 161 | BufferCacheRuntime buffer_cache_runtime; |
| 200 | BufferCache buffer_cache; | 162 | BufferCache buffer_cache; |
| 201 | VKPipelineCache pipeline_cache; | 163 | PipelineCache pipeline_cache; |
| 202 | VKQueryCache query_cache; | 164 | VKQueryCache query_cache; |
| 203 | AccelerateDMA accelerate_dma; | 165 | AccelerateDMA accelerate_dma; |
| 204 | VKFenceManager fence_manager; | 166 | VKFenceManager fence_manager; |
| 205 | 167 | ||
| 206 | vk::Event wfi_event; | 168 | vk::Event wfi_event; |
| 207 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 208 | 169 | ||
| 209 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 170 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 210 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 171 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..451ffe019 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include <boost/container/static_vector.hpp> | ||
| 8 | |||
| 9 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | namespace { | ||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | |||
| 19 | VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, | ||
| 20 | VkSampleCountFlagBits samples) { | ||
| 21 | using MaxwellToVK::SurfaceFormat; | ||
| 22 | return { | ||
| 23 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 24 | .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, | ||
| 25 | .samples = samples, | ||
| 26 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 27 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 28 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 29 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 30 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 31 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 32 | }; | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} | ||
| 37 | |||
| 38 | VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { | ||
| 39 | std::lock_guard lock{mutex}; | ||
| 40 | const auto [pair, is_new] = cache.try_emplace(key); | ||
| 41 | if (!is_new) { | ||
| 42 | return *pair->second; | ||
| 43 | } | ||
| 44 | boost::container::static_vector<VkAttachmentDescription, 9> descriptions; | ||
| 45 | std::array<VkAttachmentReference, 8> references{}; | ||
| 46 | u32 num_attachments{}; | ||
| 47 | u32 num_colors{}; | ||
| 48 | for (size_t index = 0; index < key.color_formats.size(); ++index) { | ||
| 49 | const PixelFormat format{key.color_formats[index]}; | ||
| 50 | const bool is_valid{format != PixelFormat::Invalid}; | ||
| 51 | references[index] = VkAttachmentReference{ | ||
| 52 | .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, | ||
| 53 | .layout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 54 | }; | ||
| 55 | if (is_valid) { | ||
| 56 | descriptions.push_back(AttachmentDescription(*device, format, key.samples)); | ||
| 57 | num_attachments = static_cast<u32>(index + 1); | ||
| 58 | ++num_colors; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | const bool has_depth{key.depth_format != PixelFormat::Invalid}; | ||
| 62 | VkAttachmentReference depth_reference{}; | ||
| 63 | if (key.depth_format != PixelFormat::Invalid) { | ||
| 64 | depth_reference = VkAttachmentReference{ | ||
| 65 | .attachment = num_colors, | ||
| 66 | .layout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 67 | }; | ||
| 68 | descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); | ||
| 69 | } | ||
| 70 | const VkSubpassDescription subpass{ | ||
| 71 | .flags = 0, | ||
| 72 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 73 | .inputAttachmentCount = 0, | ||
| 74 | .pInputAttachments = nullptr, | ||
| 75 | .colorAttachmentCount = num_attachments, | ||
| 76 | .pColorAttachments = references.data(), | ||
| 77 | .pResolveAttachments = nullptr, | ||
| 78 | .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, | ||
| 79 | .preserveAttachmentCount = 0, | ||
| 80 | .pPreserveAttachments = nullptr, | ||
| 81 | }; | ||
| 82 | pair->second = device->GetLogical().CreateRenderPass({ | ||
| 83 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 84 | .pNext = nullptr, | ||
| 85 | .flags = 0, | ||
| 86 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 87 | .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), | ||
| 88 | .subpassCount = 1, | ||
| 89 | .pSubpasses = &subpass, | ||
| 90 | .dependencyCount = 0, | ||
| 91 | .pDependencies = nullptr, | ||
| 92 | }); | ||
| 93 | return *pair->second; | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..eaa0ed775 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 12 | |||
| 13 | namespace Vulkan { | ||
| 14 | |||
| 15 | struct RenderPassKey { | ||
| 16 | auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 17 | |||
| 18 | std::array<VideoCore::Surface::PixelFormat, 8> color_formats; | ||
| 19 | VideoCore::Surface::PixelFormat depth_format; | ||
| 20 | VkSampleCountFlagBits samples; | ||
| 21 | }; | ||
| 22 | |||
| 23 | } // namespace Vulkan | ||
| 24 | |||
| 25 | namespace std { | ||
| 26 | template <> | ||
| 27 | struct hash<Vulkan::RenderPassKey> { | ||
| 28 | [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 29 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 30 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 31 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 32 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 33 | } | ||
| 34 | return value; | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | } // namespace std | ||
| 38 | |||
| 39 | namespace Vulkan { | ||
| 40 | |||
| 41 | class Device; | ||
| 42 | |||
| 43 | class RenderPassCache { | ||
| 44 | public: | ||
| 45 | explicit RenderPassCache(const Device& device_); | ||
| 46 | |||
| 47 | VkRenderPass Get(const RenderPassKey& key); | ||
| 48 | |||
| 49 | private: | ||
| 50 | const Device* device{}; | ||
| 51 | std::unordered_map<RenderPassKey, vk::RenderPass> cache; | ||
| 52 | std::mutex mutex; | ||
| 53 | }; | ||
| 54 | |||
| 55 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp | |||
| @@ -10,18 +10,16 @@ | |||
| 10 | namespace Vulkan { | 10 | namespace Vulkan { |
| 11 | 11 | ||
| 12 | ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) | 12 | ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) |
| 13 | : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} | 13 | : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} |
| 14 | |||
| 15 | ResourcePool::~ResourcePool() = default; | ||
| 16 | 14 | ||
| 17 | size_t ResourcePool::CommitResource() { | 15 | size_t ResourcePool::CommitResource() { |
| 18 | // Refresh semaphore to query updated results | 16 | // Refresh semaphore to query updated results |
| 19 | master_semaphore.Refresh(); | 17 | master_semaphore->Refresh(); |
| 20 | const u64 gpu_tick = master_semaphore.KnownGpuTick(); | 18 | const u64 gpu_tick = master_semaphore->KnownGpuTick(); |
| 21 | const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { | 19 | const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { |
| 22 | for (size_t iterator = begin; iterator < end; ++iterator) { | 20 | for (size_t iterator = begin; iterator < end; ++iterator) { |
| 23 | if (gpu_tick >= ticks[iterator]) { | 21 | if (gpu_tick >= ticks[iterator]) { |
| 24 | ticks[iterator] = master_semaphore.CurrentTick(); | 22 | ticks[iterator] = master_semaphore->CurrentTick(); |
| 25 | return iterator; | 23 | return iterator; |
| 26 | } | 24 | } |
| 27 | } | 25 | } |
| @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { | |||
| 36 | // Both searches failed, the pool is full; handle it. | 34 | // Both searches failed, the pool is full; handle it. |
| 37 | const size_t free_resource = ManageOverflow(); | 35 | const size_t free_resource = ManageOverflow(); |
| 38 | 36 | ||
| 39 | ticks[free_resource] = master_semaphore.CurrentTick(); | 37 | ticks[free_resource] = master_semaphore->CurrentTick(); |
| 40 | found = free_resource; | 38 | found = free_resource; |
| 41 | } | 39 | } |
| 42 | } | 40 | } |
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h | |||
| @@ -18,8 +18,16 @@ class MasterSemaphore; | |||
| 18 | */ | 18 | */ |
| 19 | class ResourcePool { | 19 | class ResourcePool { |
| 20 | public: | 20 | public: |
| 21 | explicit ResourcePool() = default; | ||
| 21 | explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); | 22 | explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); |
| 22 | virtual ~ResourcePool(); | 23 | |
| 24 | virtual ~ResourcePool() = default; | ||
| 25 | |||
| 26 | ResourcePool& operator=(ResourcePool&&) noexcept = default; | ||
| 27 | ResourcePool(ResourcePool&&) noexcept = default; | ||
| 28 | |||
| 29 | ResourcePool& operator=(const ResourcePool&) = default; | ||
| 30 | ResourcePool(const ResourcePool&) = default; | ||
| 23 | 31 | ||
| 24 | protected: | 32 | protected: |
| 25 | size_t CommitResource(); | 33 | size_t CommitResource(); |
| @@ -34,7 +42,7 @@ private: | |||
| 34 | /// Allocates a new page of resources. | 42 | /// Allocates a new page of resources. |
| 35 | void Grow(); | 43 | void Grow(); |
| 36 | 44 | ||
| 37 | MasterSemaphore& master_semaphore; | 45 | MasterSemaphore* master_semaphore{}; |
| 38 | size_t grow_step = 0; ///< Number of new resources created after an overflow | 46 | size_t grow_step = 0; ///< Number of new resources created after an overflow |
| 39 | size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found | 47 | size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found |
| 40 | std::vector<u64> ticks; ///< Ticks for each resource | 48 | std::vector<u64> ticks; ///< Ticks for each resource |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { | |||
| 31 | command->~Command(); | 31 | command->~Command(); |
| 32 | command = next; | 32 | command = next; |
| 33 | } | 33 | } |
| 34 | 34 | submit = false; | |
| 35 | command_offset = 0; | 35 | command_offset = 0; |
| 36 | first = nullptr; | 36 | first = nullptr; |
| 37 | last = nullptr; | 37 | last = nullptr; |
| @@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) | |||
| 42 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, | 42 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, |
| 43 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { | 43 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { |
| 44 | AcquireNewChunk(); | 44 | AcquireNewChunk(); |
| 45 | AllocateNewContext(); | 45 | AllocateWorkerCommandBuffer(); |
| 46 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); | 46 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | VKScheduler::~VKScheduler() { | 49 | VKScheduler::~VKScheduler() { |
| 50 | quit = true; | 50 | { |
| 51 | cv.notify_all(); | 51 | std::lock_guard lock{work_mutex}; |
| 52 | quit = true; | ||
| 53 | } | ||
| 54 | work_cv.notify_all(); | ||
| 52 | worker_thread.join(); | 55 | worker_thread.join(); |
| 53 | } | 56 | } |
| 54 | 57 | ||
| @@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { | |||
| 60 | void VKScheduler::Finish(VkSemaphore semaphore) { | 63 | void VKScheduler::Finish(VkSemaphore semaphore) { |
| 61 | const u64 presubmit_tick = CurrentTick(); | 64 | const u64 presubmit_tick = CurrentTick(); |
| 62 | SubmitExecution(semaphore); | 65 | SubmitExecution(semaphore); |
| 66 | WaitWorker(); | ||
| 63 | Wait(presubmit_tick); | 67 | Wait(presubmit_tick); |
| 64 | AllocateNewContext(); | 68 | AllocateNewContext(); |
| 65 | } | 69 | } |
| @@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() { | |||
| 68 | MICROPROFILE_SCOPE(Vulkan_WaitForWorker); | 72 | MICROPROFILE_SCOPE(Vulkan_WaitForWorker); |
| 69 | DispatchWork(); | 73 | DispatchWork(); |
| 70 | 74 | ||
| 71 | bool finished = false; | 75 | std::unique_lock lock{work_mutex}; |
| 72 | do { | 76 | wait_cv.wait(lock, [this] { return work_queue.empty(); }); |
| 73 | cv.notify_all(); | ||
| 74 | std::unique_lock lock{mutex}; | ||
| 75 | finished = chunk_queue.Empty(); | ||
| 76 | } while (!finished); | ||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void VKScheduler::DispatchWork() { | 79 | void VKScheduler::DispatchWork() { |
| 80 | if (chunk->Empty()) { | 80 | if (chunk->Empty()) { |
| 81 | return; | 81 | return; |
| 82 | } | 82 | } |
| 83 | chunk_queue.Push(std::move(chunk)); | 83 | { |
| 84 | cv.notify_all(); | 84 | std::lock_guard lock{work_mutex}; |
| 85 | work_queue.push(std::move(chunk)); | ||
| 86 | } | ||
| 87 | work_cv.notify_one(); | ||
| 85 | AcquireNewChunk(); | 88 | AcquireNewChunk(); |
| 86 | } | 89 | } |
| 87 | 90 | ||
| @@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { | |||
| 124 | EndRenderPass(); | 127 | EndRenderPass(); |
| 125 | } | 128 | } |
| 126 | 129 | ||
| 127 | void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { | 130 | bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { |
| 128 | if (state.graphics_pipeline == pipeline) { | 131 | if (state.graphics_pipeline == pipeline) { |
| 129 | return; | 132 | return false; |
| 130 | } | 133 | } |
| 131 | state.graphics_pipeline = pipeline; | 134 | state.graphics_pipeline = pipeline; |
| 132 | Record([pipeline](vk::CommandBuffer cmdbuf) { | 135 | return true; |
| 133 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 134 | }); | ||
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | void VKScheduler::WorkerThread() { | 138 | void VKScheduler::WorkerThread() { |
| 138 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | 139 | Common::SetCurrentThreadName("yuzu:VulkanWorker"); |
| 139 | std::unique_lock lock{mutex}; | ||
| 140 | do { | 140 | do { |
| 141 | cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); | 141 | if (work_queue.empty()) { |
| 142 | if (quit) { | 142 | wait_cv.notify_all(); |
| 143 | continue; | 143 | } |
| 144 | std::unique_ptr<CommandChunk> work; | ||
| 145 | { | ||
| 146 | std::unique_lock lock{work_mutex}; | ||
| 147 | work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); | ||
| 148 | if (quit) { | ||
| 149 | continue; | ||
| 150 | } | ||
| 151 | work = std::move(work_queue.front()); | ||
| 152 | work_queue.pop(); | ||
| 153 | } | ||
| 154 | const bool has_submit = work->HasSubmit(); | ||
| 155 | work->ExecuteAll(current_cmdbuf); | ||
| 156 | if (has_submit) { | ||
| 157 | AllocateWorkerCommandBuffer(); | ||
| 144 | } | 158 | } |
| 145 | auto extracted_chunk = std::move(chunk_queue.Front()); | 159 | std::lock_guard reserve_lock{reserve_mutex}; |
| 146 | chunk_queue.Pop(); | 160 | chunk_reserve.push_back(std::move(work)); |
| 147 | extracted_chunk->ExecuteAll(current_cmdbuf); | ||
| 148 | chunk_reserve.Push(std::move(extracted_chunk)); | ||
| 149 | } while (!quit); | 161 | } while (!quit); |
| 150 | } | 162 | } |
| 151 | 163 | ||
| 164 | void VKScheduler::AllocateWorkerCommandBuffer() { | ||
| 165 | current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 166 | current_cmdbuf.Begin({ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 170 | .pInheritanceInfo = nullptr, | ||
| 171 | }); | ||
| 172 | } | ||
| 173 | |||
| 152 | void VKScheduler::SubmitExecution(VkSemaphore semaphore) { | 174 | void VKScheduler::SubmitExecution(VkSemaphore semaphore) { |
| 153 | EndPendingOperations(); | 175 | EndPendingOperations(); |
| 154 | InvalidateState(); | 176 | InvalidateState(); |
| 155 | WaitWorker(); | ||
| 156 | 177 | ||
| 157 | std::unique_lock lock{mutex}; | 178 | const u64 signal_value = master_semaphore->NextTick(); |
| 179 | Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { | ||
| 180 | cmdbuf.End(); | ||
| 158 | 181 | ||
| 159 | current_cmdbuf.End(); | 182 | const u32 num_signal_semaphores = semaphore ? 2U : 1U; |
| 160 | 183 | ||
| 161 | const VkSemaphore timeline_semaphore = master_semaphore->Handle(); | 184 | const u64 wait_value = signal_value - 1; |
| 162 | const u32 num_signal_semaphores = semaphore ? 2U : 1U; | 185 | const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; |
| 163 | 186 | ||
| 164 | const u64 signal_value = master_semaphore->CurrentTick(); | 187 | const VkSemaphore timeline_semaphore = master_semaphore->Handle(); |
| 165 | const u64 wait_value = signal_value - 1; | 188 | const std::array signal_values{signal_value, u64(0)}; |
| 166 | const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; | 189 | const std::array signal_semaphores{timeline_semaphore, semaphore}; |
| 167 | 190 | ||
| 168 | master_semaphore->NextTick(); | 191 | const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ |
| 169 | 192 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, | |
| 170 | const std::array signal_values{signal_value, u64(0)}; | 193 | .pNext = nullptr, |
| 171 | const std::array signal_semaphores{timeline_semaphore, semaphore}; | 194 | .waitSemaphoreValueCount = 1, |
| 172 | 195 | .pWaitSemaphoreValues = &wait_value, | |
| 173 | const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ | 196 | .signalSemaphoreValueCount = num_signal_semaphores, |
| 174 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, | 197 | .pSignalSemaphoreValues = signal_values.data(), |
| 175 | .pNext = nullptr, | 198 | }; |
| 176 | .waitSemaphoreValueCount = 1, | 199 | const VkSubmitInfo submit_info{ |
| 177 | .pWaitSemaphoreValues = &wait_value, | 200 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, |
| 178 | .signalSemaphoreValueCount = num_signal_semaphores, | 201 | .pNext = &timeline_si, |
| 179 | .pSignalSemaphoreValues = signal_values.data(), | 202 | .waitSemaphoreCount = 1, |
| 180 | }; | 203 | .pWaitSemaphores = &timeline_semaphore, |
| 181 | const VkSubmitInfo submit_info{ | 204 | .pWaitDstStageMask = &wait_stage_mask, |
| 182 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | 205 | .commandBufferCount = 1, |
| 183 | .pNext = &timeline_si, | 206 | .pCommandBuffers = cmdbuf.address(), |
| 184 | .waitSemaphoreCount = 1, | 207 | .signalSemaphoreCount = num_signal_semaphores, |
| 185 | .pWaitSemaphores = &timeline_semaphore, | 208 | .pSignalSemaphores = signal_semaphores.data(), |
| 186 | .pWaitDstStageMask = &wait_stage_mask, | 209 | }; |
| 187 | .commandBufferCount = 1, | 210 | switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { |
| 188 | .pCommandBuffers = current_cmdbuf.address(), | 211 | case VK_SUCCESS: |
| 189 | .signalSemaphoreCount = num_signal_semaphores, | 212 | break; |
| 190 | .pSignalSemaphores = signal_semaphores.data(), | 213 | case VK_ERROR_DEVICE_LOST: |
| 191 | }; | 214 | device.ReportLoss(); |
| 192 | switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { | 215 | [[fallthrough]]; |
| 193 | case VK_SUCCESS: | 216 | default: |
| 194 | break; | 217 | vk::Check(result); |
| 195 | case VK_ERROR_DEVICE_LOST: | 218 | } |
| 196 | device.ReportLoss(); | 219 | }); |
| 197 | [[fallthrough]]; | 220 | chunk->MarkSubmit(); |
| 198 | default: | 221 | DispatchWork(); |
| 199 | vk::Check(result); | ||
| 200 | } | ||
| 201 | } | 222 | } |
| 202 | 223 | ||
| 203 | void VKScheduler::AllocateNewContext() { | 224 | void VKScheduler::AllocateNewContext() { |
| 204 | std::unique_lock lock{mutex}; | ||
| 205 | |||
| 206 | current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 207 | current_cmdbuf.Begin({ | ||
| 208 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 209 | .pNext = nullptr, | ||
| 210 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 211 | .pInheritanceInfo = nullptr, | ||
| 212 | }); | ||
| 213 | |||
| 214 | // Enable counters once again. These are disabled when a command buffer is finished. | 225 | // Enable counters once again. These are disabled when a command buffer is finished. |
| 215 | if (query_cache) { | 226 | if (query_cache) { |
| 216 | query_cache->UpdateCounters(); | 227 | query_cache->UpdateCounters(); |
| @@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() { | |||
| 265 | } | 276 | } |
| 266 | 277 | ||
| 267 | void VKScheduler::AcquireNewChunk() { | 278 | void VKScheduler::AcquireNewChunk() { |
| 268 | if (chunk_reserve.Empty()) { | 279 | std::lock_guard lock{reserve_mutex}; |
| 280 | if (chunk_reserve.empty()) { | ||
| 269 | chunk = std::make_unique<CommandChunk>(); | 281 | chunk = std::make_unique<CommandChunk>(); |
| 270 | return; | 282 | return; |
| 271 | } | 283 | } |
| 272 | chunk = std::move(chunk_reserve.Front()); | 284 | chunk = std::move(chunk_reserve.back()); |
| 273 | chunk_reserve.Pop(); | 285 | chunk_reserve.pop_back(); |
| 274 | } | 286 | } |
| 275 | 287 | ||
| 276 | } // namespace Vulkan | 288 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -8,12 +8,12 @@ | |||
| 8 | #include <condition_variable> | 8 | #include <condition_variable> |
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <stack> | ||
| 12 | #include <thread> | 11 | #include <thread> |
| 13 | #include <utility> | 12 | #include <utility> |
| 13 | #include <queue> | ||
| 14 | |||
| 14 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 15 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 16 | #include "common/threadsafe_queue.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 19 | ||
| @@ -22,6 +22,7 @@ namespace Vulkan { | |||
| 22 | class CommandPool; | 22 | class CommandPool; |
| 23 | class Device; | 23 | class Device; |
| 24 | class Framebuffer; | 24 | class Framebuffer; |
| 25 | class GraphicsPipeline; | ||
| 25 | class StateTracker; | 26 | class StateTracker; |
| 26 | class VKQueryCache; | 27 | class VKQueryCache; |
| 27 | 28 | ||
| @@ -52,8 +53,8 @@ public: | |||
| 52 | /// of a renderpass. | 53 | /// of a renderpass. |
| 53 | void RequestOutsideRenderPassOperationContext(); | 54 | void RequestOutsideRenderPassOperationContext(); |
| 54 | 55 | ||
| 55 | /// Binds a pipeline to the current execution context. | 56 | /// Update the pipeline to the current execution context. |
| 56 | void BindGraphicsPipeline(VkPipeline pipeline); | 57 | bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); |
| 57 | 58 | ||
| 58 | /// Invalidates current command buffer state except for render passes | 59 | /// Invalidates current command buffer state except for render passes |
| 59 | void InvalidateState(); | 60 | void InvalidateState(); |
| @@ -85,6 +86,10 @@ public: | |||
| 85 | 86 | ||
| 86 | /// Waits for the given tick to trigger on the GPU. | 87 | /// Waits for the given tick to trigger on the GPU. |
| 87 | void Wait(u64 tick) { | 88 | void Wait(u64 tick) { |
| 89 | if (tick >= master_semaphore->CurrentTick()) { | ||
| 90 | // Make sure we are not waiting for the current tick without signalling | ||
| 91 | Flush(); | ||
| 92 | } | ||
| 88 | master_semaphore->Wait(tick); | 93 | master_semaphore->Wait(tick); |
| 89 | } | 94 | } |
| 90 | 95 | ||
| @@ -154,15 +159,24 @@ private: | |||
| 154 | return true; | 159 | return true; |
| 155 | } | 160 | } |
| 156 | 161 | ||
| 162 | void MarkSubmit() { | ||
| 163 | submit = true; | ||
| 164 | } | ||
| 165 | |||
| 157 | bool Empty() const { | 166 | bool Empty() const { |
| 158 | return command_offset == 0; | 167 | return command_offset == 0; |
| 159 | } | 168 | } |
| 160 | 169 | ||
| 170 | bool HasSubmit() const { | ||
| 171 | return submit; | ||
| 172 | } | ||
| 173 | |||
| 161 | private: | 174 | private: |
| 162 | Command* first = nullptr; | 175 | Command* first = nullptr; |
| 163 | Command* last = nullptr; | 176 | Command* last = nullptr; |
| 164 | 177 | ||
| 165 | size_t command_offset = 0; | 178 | size_t command_offset = 0; |
| 179 | bool submit = false; | ||
| 166 | alignas(std::max_align_t) std::array<u8, 0x8000> data{}; | 180 | alignas(std::max_align_t) std::array<u8, 0x8000> data{}; |
| 167 | }; | 181 | }; |
| 168 | 182 | ||
| @@ -170,11 +184,13 @@ private: | |||
| 170 | VkRenderPass renderpass = nullptr; | 184 | VkRenderPass renderpass = nullptr; |
| 171 | VkFramebuffer framebuffer = nullptr; | 185 | VkFramebuffer framebuffer = nullptr; |
| 172 | VkExtent2D render_area = {0, 0}; | 186 | VkExtent2D render_area = {0, 0}; |
| 173 | VkPipeline graphics_pipeline = nullptr; | 187 | GraphicsPipeline* graphics_pipeline = nullptr; |
| 174 | }; | 188 | }; |
| 175 | 189 | ||
| 176 | void WorkerThread(); | 190 | void WorkerThread(); |
| 177 | 191 | ||
| 192 | void AllocateWorkerCommandBuffer(); | ||
| 193 | |||
| 178 | void SubmitExecution(VkSemaphore semaphore); | 194 | void SubmitExecution(VkSemaphore semaphore); |
| 179 | 195 | ||
| 180 | void AllocateNewContext(); | 196 | void AllocateNewContext(); |
| @@ -204,11 +220,13 @@ private: | |||
| 204 | std::array<VkImage, 9> renderpass_images{}; | 220 | std::array<VkImage, 9> renderpass_images{}; |
| 205 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; | 221 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; |
| 206 | 222 | ||
| 207 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; | 223 | std::queue<std::unique_ptr<CommandChunk>> work_queue; |
| 208 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | 224 | std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; |
| 209 | std::mutex mutex; | 225 | std::mutex reserve_mutex; |
| 210 | std::condition_variable cv; | 226 | std::mutex work_mutex; |
| 211 | bool quit = false; | 227 | std::condition_variable work_cv; |
| 228 | std::condition_variable wait_cv; | ||
| 229 | std::atomic_bool quit{}; | ||
| 212 | }; | 230 | }; |
| 213 | 231 | ||
| 214 | } // namespace Vulkan | 232 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,3166 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <functional> | ||
| 6 | #include <limits> | ||
| 7 | #include <map> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include <sirit/sirit.h> | ||
| 16 | |||
| 17 | #include "common/alignment.h" | ||
| 18 | #include "common/assert.h" | ||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "common/logging/log.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/engines/shader_bytecode.h" | ||
| 23 | #include "video_core/engines/shader_header.h" | ||
| 24 | #include "video_core/engines/shader_type.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 26 | #include "video_core/shader/node.h" | ||
| 27 | #include "video_core/shader/shader_ir.h" | ||
| 28 | #include "video_core/shader/transform_feedback.h" | ||
| 29 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | |||
| 33 | namespace { | ||
| 34 | |||
| 35 | using Sirit::Id; | ||
| 36 | using Tegra::Engines::ShaderType; | ||
| 37 | using Tegra::Shader::Attribute; | ||
| 38 | using Tegra::Shader::PixelImap; | ||
| 39 | using Tegra::Shader::Register; | ||
| 40 | using namespace VideoCommon::Shader; | ||
| 41 | |||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 43 | using Operation = const OperationNode&; | ||
| 44 | |||
| 45 | class ASTDecompiler; | ||
| 46 | class ExprDecompiler; | ||
| 47 | |||
| 48 | // TODO(Rodrigo): Use rasterizer's value | ||
| 49 | constexpr u32 MaxConstBufferFloats = 0x4000; | ||
| 50 | constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; | ||
| 51 | |||
| 52 | constexpr u32 NumInputPatches = 32; // This value seems to be the standard | ||
| 53 | |||
| 54 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 55 | |||
| 56 | class Expression final { | ||
| 57 | public: | ||
| 58 | Expression(Id id_, Type type_) : id{id_}, type{type_} { | ||
| 59 | ASSERT(type_ != Type::Void); | ||
| 60 | } | ||
| 61 | Expression() : type{Type::Void} {} | ||
| 62 | |||
| 63 | Id id{}; | ||
| 64 | Type type{}; | ||
| 65 | }; | ||
| 66 | static_assert(std::is_standard_layout_v<Expression>); | ||
| 67 | |||
| 68 | struct TexelBuffer { | ||
| 69 | Id image_type{}; | ||
| 70 | Id image{}; | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct SampledImage { | ||
| 74 | Id image_type{}; | ||
| 75 | Id sampler_type{}; | ||
| 76 | Id sampler_pointer_type{}; | ||
| 77 | Id variable{}; | ||
| 78 | }; | ||
| 79 | |||
| 80 | struct StorageImage { | ||
| 81 | Id image_type{}; | ||
| 82 | Id image{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct AttributeType { | ||
| 86 | Type type; | ||
| 87 | Id scalar; | ||
| 88 | Id vector; | ||
| 89 | }; | ||
| 90 | |||
| 91 | struct VertexIndices { | ||
| 92 | std::optional<u32> position; | ||
| 93 | std::optional<u32> layer; | ||
| 94 | std::optional<u32> viewport; | ||
| 95 | std::optional<u32> point_size; | ||
| 96 | std::optional<u32> clip_distances; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 105 | spv::Dim GetSamplerDim(const SamplerEntry& sampler) { | ||
| 106 | ASSERT(!sampler.is_buffer); | ||
| 107 | switch (sampler.type) { | ||
| 108 | case Tegra::Shader::TextureType::Texture1D: | ||
| 109 | return spv::Dim::Dim1D; | ||
| 110 | case Tegra::Shader::TextureType::Texture2D: | ||
| 111 | return spv::Dim::Dim2D; | ||
| 112 | case Tegra::Shader::TextureType::Texture3D: | ||
| 113 | return spv::Dim::Dim3D; | ||
| 114 | case Tegra::Shader::TextureType::TextureCube: | ||
| 115 | return spv::Dim::Cube; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); | ||
| 118 | return spv::Dim::Dim2D; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { | ||
| 123 | switch (image.type) { | ||
| 124 | case Tegra::Shader::ImageType::Texture1D: | ||
| 125 | return {spv::Dim::Dim1D, false}; | ||
| 126 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 127 | return {spv::Dim::Buffer, false}; | ||
| 128 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 129 | return {spv::Dim::Dim1D, true}; | ||
| 130 | case Tegra::Shader::ImageType::Texture2D: | ||
| 131 | return {spv::Dim::Dim2D, false}; | ||
| 132 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 133 | return {spv::Dim::Dim2D, true}; | ||
| 134 | case Tegra::Shader::ImageType::Texture3D: | ||
| 135 | return {spv::Dim::Dim3D, false}; | ||
| 136 | default: | ||
| 137 | UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); | ||
| 138 | return {spv::Dim::Dim2D, false}; | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | /// Returns the number of vertices present in a primitive topology. | ||
| 143 | u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { | ||
| 144 | switch (primitive_topology) { | ||
| 145 | case Maxwell::PrimitiveTopology::Points: | ||
| 146 | return 1; | ||
| 147 | case Maxwell::PrimitiveTopology::Lines: | ||
| 148 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 149 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 150 | return 2; | ||
| 151 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 152 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 153 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 154 | return 3; | ||
| 155 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 156 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 157 | return 4; | ||
| 158 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 159 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 160 | return 6; | ||
| 161 | case Maxwell::PrimitiveTopology::Quads: | ||
| 162 | UNIMPLEMENTED_MSG("Quads"); | ||
| 163 | return 3; | ||
| 164 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 165 | UNIMPLEMENTED_MSG("QuadStrip"); | ||
| 166 | return 3; | ||
| 167 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 168 | UNIMPLEMENTED_MSG("Polygon"); | ||
| 169 | return 3; | ||
| 170 | case Maxwell::PrimitiveTopology::Patches: | ||
| 171 | UNIMPLEMENTED_MSG("Patches"); | ||
| 172 | return 3; | ||
| 173 | default: | ||
| 174 | UNREACHABLE(); | ||
| 175 | return 3; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { | ||
| 180 | switch (primitive) { | ||
| 181 | case Maxwell::TessellationPrimitive::Isolines: | ||
| 182 | return spv::ExecutionMode::Isolines; | ||
| 183 | case Maxwell::TessellationPrimitive::Triangles: | ||
| 184 | return spv::ExecutionMode::Triangles; | ||
| 185 | case Maxwell::TessellationPrimitive::Quads: | ||
| 186 | return spv::ExecutionMode::Quads; | ||
| 187 | } | ||
| 188 | UNREACHABLE(); | ||
| 189 | return spv::ExecutionMode::Triangles; | ||
| 190 | } | ||
| 191 | |||
| 192 | spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { | ||
| 193 | switch (spacing) { | ||
| 194 | case Maxwell::TessellationSpacing::Equal: | ||
| 195 | return spv::ExecutionMode::SpacingEqual; | ||
| 196 | case Maxwell::TessellationSpacing::FractionalOdd: | ||
| 197 | return spv::ExecutionMode::SpacingFractionalOdd; | ||
| 198 | case Maxwell::TessellationSpacing::FractionalEven: | ||
| 199 | return spv::ExecutionMode::SpacingFractionalEven; | ||
| 200 | } | ||
| 201 | UNREACHABLE(); | ||
| 202 | return spv::ExecutionMode::SpacingEqual; | ||
| 203 | } | ||
| 204 | |||
| 205 | spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { | ||
| 206 | switch (input_topology) { | ||
| 207 | case Maxwell::PrimitiveTopology::Points: | ||
| 208 | return spv::ExecutionMode::InputPoints; | ||
| 209 | case Maxwell::PrimitiveTopology::Lines: | ||
| 210 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 211 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 212 | return spv::ExecutionMode::InputLines; | ||
| 213 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 214 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 215 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 216 | return spv::ExecutionMode::Triangles; | ||
| 217 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 218 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 219 | return spv::ExecutionMode::InputLinesAdjacency; | ||
| 220 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 221 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 222 | return spv::ExecutionMode::InputTrianglesAdjacency; | ||
| 223 | case Maxwell::PrimitiveTopology::Quads: | ||
| 224 | UNIMPLEMENTED_MSG("Quads"); | ||
| 225 | return spv::ExecutionMode::Triangles; | ||
| 226 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 227 | UNIMPLEMENTED_MSG("QuadStrip"); | ||
| 228 | return spv::ExecutionMode::Triangles; | ||
| 229 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 230 | UNIMPLEMENTED_MSG("Polygon"); | ||
| 231 | return spv::ExecutionMode::Triangles; | ||
| 232 | case Maxwell::PrimitiveTopology::Patches: | ||
| 233 | UNIMPLEMENTED_MSG("Patches"); | ||
| 234 | return spv::ExecutionMode::Triangles; | ||
| 235 | } | ||
| 236 | UNREACHABLE(); | ||
| 237 | return spv::ExecutionMode::Triangles; | ||
| 238 | } | ||
| 239 | |||
| 240 | spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { | ||
| 241 | switch (output_topology) { | ||
| 242 | case Tegra::Shader::OutputTopology::PointList: | ||
| 243 | return spv::ExecutionMode::OutputPoints; | ||
| 244 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 245 | return spv::ExecutionMode::OutputLineStrip; | ||
| 246 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 247 | return spv::ExecutionMode::OutputTriangleStrip; | ||
| 248 | default: | ||
| 249 | UNREACHABLE(); | ||
| 250 | return spv::ExecutionMode::OutputPoints; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | /// Returns true if an attribute index is one of the 32 generic attributes | ||
| 255 | constexpr bool IsGenericAttribute(Attribute::Index attribute) { | ||
| 256 | return attribute >= Attribute::Index::Attribute_0 && | ||
| 257 | attribute <= Attribute::Index::Attribute_31; | ||
| 258 | } | ||
| 259 | |||
| 260 | /// Returns the location of a generic attribute | ||
| 261 | u32 GetGenericAttributeLocation(Attribute::Index attribute) { | ||
| 262 | ASSERT(IsGenericAttribute(attribute)); | ||
| 263 | return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 264 | } | ||
| 265 | |||
| 266 | /// Returns true if an object has to be treated as precise | ||
| 267 | bool IsPrecise(Operation operand) { | ||
| 268 | const auto& meta{operand.GetMeta()}; | ||
| 269 | if (std::holds_alternative<MetaArithmetic>(meta)) { | ||
| 270 | return std::get<MetaArithmetic>(meta).precise; | ||
| 271 | } | ||
| 272 | return false; | ||
| 273 | } | ||
| 274 | |||
| 275 | class SPIRVDecompiler final : public Sirit::Module { | ||
| 276 | public: | ||
| 277 | explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, | ||
| 278 | const Registry& registry_, const Specialization& specialization_) | ||
| 279 | : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, | ||
| 280 | registry{registry_}, specialization{specialization_} { | ||
| 281 | if (stage_ != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 285 | AddCapability(spv::Capability::Shader); | ||
| 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | ||
| 287 | AddCapability(spv::Capability::ImageQuery); | ||
| 288 | AddCapability(spv::Capability::Image1D); | ||
| 289 | AddCapability(spv::Capability::ImageBuffer); | ||
| 290 | AddCapability(spv::Capability::ImageGatherExtended); | ||
| 291 | AddCapability(spv::Capability::SampledBuffer); | ||
| 292 | AddCapability(spv::Capability::StorageImageWriteWithoutFormat); | ||
| 293 | AddCapability(spv::Capability::DrawParameters); | ||
| 294 | AddCapability(spv::Capability::SubgroupBallotKHR); | ||
| 295 | AddCapability(spv::Capability::SubgroupVoteKHR); | ||
| 296 | AddExtension("SPV_KHR_16bit_storage"); | ||
| 297 | AddExtension("SPV_KHR_shader_ballot"); | ||
| 298 | AddExtension("SPV_KHR_subgroup_vote"); | ||
| 299 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | ||
| 300 | AddExtension("SPV_KHR_variable_pointers"); | ||
| 301 | AddExtension("SPV_KHR_shader_draw_parameters"); | ||
| 302 | |||
| 303 | if (!transform_feedback.empty()) { | ||
| 304 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 305 | AddCapability(spv::Capability::TransformFeedback); | ||
| 306 | } else { | ||
| 307 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 308 | "supported on this device"); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | ||
| 312 | if (ir.UsesViewportIndex()) { | ||
| 313 | AddCapability(spv::Capability::MultiViewport); | ||
| 314 | } | ||
| 315 | if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 316 | AddExtension("SPV_EXT_shader_viewport_index_layer"); | ||
| 317 | AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); | ||
| 318 | } | ||
| 319 | } | ||
| 320 | if (device.IsFormatlessImageLoadSupported()) { | ||
| 321 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | ||
| 322 | } | ||
| 323 | if (device.IsFloat16Supported()) { | ||
| 324 | AddCapability(spv::Capability::Float16); | ||
| 325 | } | ||
| 326 | t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); | ||
| 327 | t_half = Name(TypeVector(t_scalar_half, 2), "half"); | ||
| 328 | |||
| 329 | const Id main = Decompile(); | ||
| 330 | |||
| 331 | switch (stage) { | ||
| 332 | case ShaderType::Vertex: | ||
| 333 | AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); | ||
| 334 | break; | ||
| 335 | case ShaderType::TesselationControl: | ||
| 336 | AddCapability(spv::Capability::Tessellation); | ||
| 337 | AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); | ||
| 338 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | ||
| 339 | header.common2.threads_per_input_primitive); | ||
| 340 | break; | ||
| 341 | case ShaderType::TesselationEval: { | ||
| 342 | const auto& info = registry.GetGraphicsInfo(); | ||
| 343 | AddCapability(spv::Capability::Tessellation); | ||
| 344 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | ||
| 345 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); | ||
| 346 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); | ||
| 347 | AddExecutionMode(main, info.tessellation_clockwise | ||
| 348 | ? spv::ExecutionMode::VertexOrderCw | ||
| 349 | : spv::ExecutionMode::VertexOrderCcw); | ||
| 350 | break; | ||
| 351 | } | ||
| 352 | case ShaderType::Geometry: { | ||
| 353 | const auto& info = registry.GetGraphicsInfo(); | ||
| 354 | AddCapability(spv::Capability::Geometry); | ||
| 355 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | ||
| 356 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); | ||
| 357 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | ||
| 358 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | ||
| 359 | header.common4.max_output_vertices); | ||
| 360 | // TODO(Rodrigo): Where can we get this info from? | ||
| 361 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | ||
| 362 | break; | ||
| 363 | } | ||
| 364 | case ShaderType::Fragment: | ||
| 365 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | ||
| 366 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | ||
| 367 | if (header.ps.omap.depth) { | ||
| 368 | AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); | ||
| 369 | } | ||
| 370 | if (specialization.early_fragment_tests) { | ||
| 371 | AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); | ||
| 372 | } | ||
| 373 | break; | ||
| 374 | case ShaderType::Compute: | ||
| 375 | const auto workgroup_size = specialization.workgroup_size; | ||
| 376 | AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], | ||
| 377 | workgroup_size[1], workgroup_size[2]); | ||
| 378 | AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | private: | ||
| 384 | Id Decompile() { | ||
| 385 | DeclareCommon(); | ||
| 386 | DeclareVertex(); | ||
| 387 | DeclareTessControl(); | ||
| 388 | DeclareTessEval(); | ||
| 389 | DeclareGeometry(); | ||
| 390 | DeclareFragment(); | ||
| 391 | DeclareCompute(); | ||
| 392 | DeclareRegisters(); | ||
| 393 | DeclareCustomVariables(); | ||
| 394 | DeclarePredicates(); | ||
| 395 | DeclareLocalMemory(); | ||
| 396 | DeclareSharedMemory(); | ||
| 397 | DeclareInternalFlags(); | ||
| 398 | DeclareInputAttributes(); | ||
| 399 | DeclareOutputAttributes(); | ||
| 400 | |||
| 401 | u32 binding = specialization.base_binding; | ||
| 402 | binding = DeclareConstantBuffers(binding); | ||
| 403 | binding = DeclareGlobalBuffers(binding); | ||
| 404 | binding = DeclareUniformTexels(binding); | ||
| 405 | binding = DeclareSamplers(binding); | ||
| 406 | binding = DeclareStorageTexels(binding); | ||
| 407 | binding = DeclareImages(binding); | ||
| 408 | |||
| 409 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); | ||
| 410 | AddLabel(); | ||
| 411 | |||
| 412 | if (ir.IsDecompiled()) { | ||
| 413 | DeclareFlowVariables(); | ||
| 414 | DecompileAST(); | ||
| 415 | } else { | ||
| 416 | AllocateLabels(); | ||
| 417 | DecompileBranchMode(); | ||
| 418 | } | ||
| 419 | |||
| 420 | OpReturn(); | ||
| 421 | OpFunctionEnd(); | ||
| 422 | |||
| 423 | return main; | ||
| 424 | } | ||
| 425 | |||
| 426 | void DefinePrologue() { | ||
| 427 | if (stage == ShaderType::Vertex) { | ||
| 428 | // Clear Position to avoid reading trash on the Z conversion. | ||
| 429 | const auto position_index = out_indices.position.value(); | ||
| 430 | const Id position = AccessElement(t_out_float4, out_vertex, position_index); | ||
| 431 | OpStore(position, v_varying_default); | ||
| 432 | |||
| 433 | if (specialization.point_size) { | ||
| 434 | const u32 point_size_index = out_indices.point_size.value(); | ||
| 435 | const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); | ||
| 436 | OpStore(out_point_size, Constant(t_float, *specialization.point_size)); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | } | ||
| 440 | |||
| 441 | void DecompileAST(); | ||
| 442 | |||
| 443 | void DecompileBranchMode() { | ||
| 444 | const u32 first_address = ir.GetBasicBlocks().begin()->first; | ||
| 445 | const Id loop_label = OpLabel("loop"); | ||
| 446 | const Id merge_label = OpLabel("merge"); | ||
| 447 | const Id dummy_label = OpLabel(); | ||
| 448 | const Id jump_label = OpLabel(); | ||
| 449 | continue_label = OpLabel("continue"); | ||
| 450 | |||
| 451 | std::vector<Sirit::Literal> literals; | ||
| 452 | std::vector<Id> branch_labels; | ||
| 453 | for (const auto& [literal, label] : labels) { | ||
| 454 | literals.push_back(literal); | ||
| 455 | branch_labels.push_back(label); | ||
| 456 | } | ||
| 457 | |||
| 458 | jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), | ||
| 459 | spv::StorageClass::Function, Constant(t_uint, first_address)); | ||
| 460 | AddLocalVariable(jmp_to); | ||
| 461 | |||
| 462 | std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); | ||
| 463 | std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); | ||
| 464 | |||
| 465 | Name(jmp_to, "jmp_to"); | ||
| 466 | Name(ssy_flow_stack, "ssy_flow_stack"); | ||
| 467 | Name(ssy_flow_stack_top, "ssy_flow_stack_top"); | ||
| 468 | Name(pbk_flow_stack, "pbk_flow_stack"); | ||
| 469 | Name(pbk_flow_stack_top, "pbk_flow_stack_top"); | ||
| 470 | |||
| 471 | DefinePrologue(); | ||
| 472 | |||
| 473 | OpBranch(loop_label); | ||
| 474 | AddLabel(loop_label); | ||
| 475 | OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); | ||
| 476 | OpBranch(dummy_label); | ||
| 477 | |||
| 478 | AddLabel(dummy_label); | ||
| 479 | const Id default_branch = OpLabel(); | ||
| 480 | const Id jmp_to_load = OpLoad(t_uint, jmp_to); | ||
| 481 | OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); | ||
| 482 | OpSwitch(jmp_to_load, default_branch, literals, branch_labels); | ||
| 483 | |||
| 484 | AddLabel(default_branch); | ||
| 485 | OpReturn(); | ||
| 486 | |||
| 487 | for (const auto& [address, bb] : ir.GetBasicBlocks()) { | ||
| 488 | AddLabel(labels.at(address)); | ||
| 489 | |||
| 490 | VisitBasicBlock(bb); | ||
| 491 | |||
| 492 | const auto next_it = labels.lower_bound(address + 1); | ||
| 493 | const Id next_label = next_it != labels.end() ? next_it->second : default_branch; | ||
| 494 | OpBranch(next_label); | ||
| 495 | } | ||
| 496 | |||
| 497 | AddLabel(jump_label); | ||
| 498 | OpBranch(continue_label); | ||
| 499 | AddLabel(continue_label); | ||
| 500 | OpBranch(loop_label); | ||
| 501 | AddLabel(merge_label); | ||
| 502 | } | ||
| 503 | |||
| 504 | private: | ||
| 505 | friend class ASTDecompiler; | ||
| 506 | friend class ExprDecompiler; | ||
| 507 | |||
| 508 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | ||
| 509 | |||
| 510 | void AllocateLabels() { | ||
| 511 | for (const auto& pair : ir.GetBasicBlocks()) { | ||
| 512 | const u32 address = pair.first; | ||
| 513 | labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); | ||
| 514 | } | ||
| 515 | } | ||
| 516 | |||
| 517 | void DeclareCommon() { | ||
| 518 | thread_id = | ||
| 519 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); | ||
| 520 | thread_masks[0] = | ||
| 521 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); | ||
| 522 | thread_masks[1] = | ||
| 523 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); | ||
| 524 | thread_masks[2] = | ||
| 525 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); | ||
| 526 | thread_masks[3] = | ||
| 527 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); | ||
| 528 | thread_masks[4] = | ||
| 529 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); | ||
| 530 | } | ||
| 531 | |||
| 532 | void DeclareVertex() { | ||
| 533 | if (stage != ShaderType::Vertex) { | ||
| 534 | return; | ||
| 535 | } | ||
| 536 | Id out_vertex_struct; | ||
| 537 | std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); | ||
| 538 | const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); | ||
| 539 | out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); | ||
| 540 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); | ||
| 541 | |||
| 542 | // Declare input attributes | ||
| 543 | vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); | ||
| 544 | instance_index = | ||
| 545 | DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); | ||
| 546 | base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); | ||
| 547 | base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); | ||
| 548 | } | ||
| 549 | |||
| 550 | void DeclareTessControl() { | ||
| 551 | if (stage != ShaderType::TesselationControl) { | ||
| 552 | return; | ||
| 553 | } | ||
| 554 | DeclareInputVertexArray(NumInputPatches); | ||
| 555 | DeclareOutputVertexArray(header.common2.threads_per_input_primitive); | ||
| 556 | |||
| 557 | tess_level_outer = DeclareBuiltIn( | ||
| 558 | spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, | ||
| 559 | TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), | ||
| 560 | "tess_level_outer"); | ||
| 561 | Decorate(tess_level_outer, spv::Decoration::Patch); | ||
| 562 | |||
| 563 | tess_level_inner = DeclareBuiltIn( | ||
| 564 | spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, | ||
| 565 | TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), | ||
| 566 | "tess_level_inner"); | ||
| 567 | Decorate(tess_level_inner, spv::Decoration::Patch); | ||
| 568 | |||
| 569 | invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); | ||
| 570 | } | ||
| 571 | |||
| 572 | void DeclareTessEval() { | ||
| 573 | if (stage != ShaderType::TesselationEval) { | ||
| 574 | return; | ||
| 575 | } | ||
| 576 | DeclareInputVertexArray(NumInputPatches); | ||
| 577 | DeclareOutputVertex(); | ||
| 578 | |||
| 579 | tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); | ||
| 580 | } | ||
| 581 | |||
| 582 | void DeclareGeometry() { | ||
| 583 | if (stage != ShaderType::Geometry) { | ||
| 584 | return; | ||
| 585 | } | ||
| 586 | const auto& info = registry.GetGraphicsInfo(); | ||
| 587 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 588 | DeclareInputVertexArray(num_input); | ||
| 589 | DeclareOutputVertex(); | ||
| 590 | } | ||
| 591 | |||
| 592 | void DeclareFragment() { | ||
| 593 | if (stage != ShaderType::Fragment) { | ||
| 594 | return; | ||
| 595 | } | ||
| 596 | |||
| 597 | for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) { | ||
| 598 | if (!IsRenderTargetEnabled(rt)) { | ||
| 599 | continue; | ||
| 600 | } | ||
| 601 | const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); | ||
| 602 | Name(id, fmt::format("frag_color{}", rt)); | ||
| 603 | Decorate(id, spv::Decoration::Location, rt); | ||
| 604 | |||
| 605 | frag_colors[rt] = id; | ||
| 606 | interfaces.push_back(id); | ||
| 607 | } | ||
| 608 | |||
| 609 | if (header.ps.omap.depth) { | ||
| 610 | frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); | ||
| 611 | Name(frag_depth, "frag_depth"); | ||
| 612 | Decorate(frag_depth, spv::Decoration::BuiltIn, | ||
| 613 | static_cast<u32>(spv::BuiltIn::FragDepth)); | ||
| 614 | |||
| 615 | interfaces.push_back(frag_depth); | ||
| 616 | } | ||
| 617 | |||
| 618 | frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); | ||
| 619 | front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); | ||
| 620 | point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); | ||
| 621 | } | ||
| 622 | |||
| 623 | void DeclareCompute() { | ||
| 624 | if (stage != ShaderType::Compute) { | ||
| 625 | return; | ||
| 626 | } | ||
| 627 | |||
| 628 | workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); | ||
| 629 | local_invocation_id = | ||
| 630 | DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); | ||
| 631 | } | ||
| 632 | |||
| 633 | void DeclareRegisters() { | ||
| 634 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 635 | const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||
| 636 | Name(id, fmt::format("gpr_{}", gpr)); | ||
| 637 | registers.emplace(gpr, AddGlobalVariable(id)); | ||
| 638 | } | ||
| 639 | } | ||
| 640 | |||
| 641 | void DeclareCustomVariables() { | ||
| 642 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 643 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 644 | const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||
| 645 | Name(id, fmt::format("custom_var_{}", i)); | ||
| 646 | custom_variables.emplace(i, AddGlobalVariable(id)); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | void DeclarePredicates() { | ||
| 651 | for (const auto pred : ir.GetPredicates()) { | ||
| 652 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 653 | Name(id, fmt::format("pred_{}", static_cast<u32>(pred))); | ||
| 654 | predicates.emplace(pred, AddGlobalVariable(id)); | ||
| 655 | } | ||
| 656 | } | ||
| 657 | |||
| 658 | void DeclareFlowVariables() { | ||
| 659 | for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { | ||
| 660 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 661 | Name(id, fmt::format("flow_var_{}", static_cast<u32>(i))); | ||
| 662 | flow_variables.emplace(i, AddGlobalVariable(id)); | ||
| 663 | } | ||
| 664 | } | ||
| 665 | |||
| 666 | void DeclareLocalMemory() { | ||
| 667 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at | ||
| 668 | // specialization time. | ||
| 669 | const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); | ||
| 670 | if (lmem_size == 0) { | ||
| 671 | return; | ||
| 672 | } | ||
| 673 | const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4); | ||
| 674 | const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); | ||
| 675 | const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); | ||
| 676 | Name(type_pointer, "LocalMemory"); | ||
| 677 | |||
| 678 | local_memory = | ||
| 679 | OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); | ||
| 680 | AddGlobalVariable(Name(local_memory, "local_memory")); | ||
| 681 | } | ||
| 682 | |||
| 683 | void DeclareSharedMemory() { | ||
| 684 | if (stage != ShaderType::Compute) { | ||
| 685 | return; | ||
| 686 | } | ||
| 687 | t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); | ||
| 688 | |||
| 689 | u32 smem_size = specialization.shared_memory_size * 4; | ||
| 690 | if (smem_size == 0) { | ||
| 691 | // Avoid declaring an empty array. | ||
| 692 | return; | ||
| 693 | } | ||
| 694 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 695 | if (smem_size > limit) { | ||
| 696 | LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", | ||
| 697 | smem_size, limit); | ||
| 698 | smem_size = limit; | ||
| 699 | } | ||
| 700 | |||
| 701 | const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); | ||
| 702 | const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); | ||
| 703 | Name(type_pointer, "SharedMemory"); | ||
| 704 | |||
| 705 | shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); | ||
| 706 | AddGlobalVariable(Name(shared_memory, "shared_memory")); | ||
| 707 | } | ||
| 708 | |||
| 709 | void DeclareInternalFlags() { | ||
| 710 | static constexpr std::array names{"zero", "sign", "carry", "overflow"}; | ||
| 711 | |||
| 712 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { | ||
| 713 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 714 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | void DeclareInputVertexArray(u32 length) { | ||
| 719 | constexpr auto storage = spv::StorageClass::Input; | ||
| 720 | std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); | ||
| 721 | } | ||
| 722 | |||
| 723 | void DeclareOutputVertexArray(u32 length) { | ||
| 724 | constexpr auto storage = spv::StorageClass::Output; | ||
| 725 | std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); | ||
| 726 | } | ||
| 727 | |||
| 728 | std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class, | ||
| 729 | std::string name, u32 length) { | ||
| 730 | const auto [struct_id, indices] = DeclareVertexStruct(); | ||
| 731 | const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); | ||
| 732 | const Id vertex_ptr = TypePointer(storage_class, vertex_array); | ||
| 733 | const Id vertex = OpVariable(vertex_ptr, storage_class); | ||
| 734 | AddGlobalVariable(Name(vertex, std::move(name))); | ||
| 735 | interfaces.push_back(vertex); | ||
| 736 | return {indices, vertex}; | ||
| 737 | } | ||
| 738 | |||
| 739 | void DeclareOutputVertex() { | ||
| 740 | Id out_vertex_struct; | ||
| 741 | std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); | ||
| 742 | const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); | ||
| 743 | out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); | ||
| 744 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); | ||
| 745 | } | ||
| 746 | |||
| 747 | void DeclareInputAttributes() { | ||
| 748 | for (const auto index : ir.GetInputAttributes()) { | ||
| 749 | if (!IsGenericAttribute(index)) { | ||
| 750 | continue; | ||
| 751 | } | ||
| 752 | const u32 location = GetGenericAttributeLocation(index); | ||
| 753 | if (!IsAttributeEnabled(location)) { | ||
| 754 | continue; | ||
| 755 | } | ||
| 756 | const auto type_descriptor = GetAttributeType(location); | ||
| 757 | Id type; | ||
| 758 | if (IsInputAttributeArray()) { | ||
| 759 | type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); | ||
| 760 | type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); | ||
| 761 | type = TypePointer(spv::StorageClass::Input, type); | ||
| 762 | } else { | ||
| 763 | type = type_descriptor.vector; | ||
| 764 | } | ||
| 765 | const Id id = OpVariable(type, spv::StorageClass::Input); | ||
| 766 | AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); | ||
| 767 | input_attributes.emplace(index, id); | ||
| 768 | interfaces.push_back(id); | ||
| 769 | |||
| 770 | Decorate(id, spv::Decoration::Location, location); | ||
| 771 | |||
| 772 | if (stage != ShaderType::Fragment) { | ||
| 773 | continue; | ||
| 774 | } | ||
| 775 | switch (header.ps.GetPixelImap(location)) { | ||
| 776 | case PixelImap::Constant: | ||
| 777 | Decorate(id, spv::Decoration::Flat); | ||
| 778 | break; | ||
| 779 | case PixelImap::Perspective: | ||
| 780 | // Default | ||
| 781 | break; | ||
| 782 | case PixelImap::ScreenLinear: | ||
| 783 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 784 | break; | ||
| 785 | default: | ||
| 786 | UNREACHABLE_MSG("Unused attribute being fetched"); | ||
| 787 | } | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | void DeclareOutputAttributes() { | ||
| 792 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 793 | return; | ||
| 794 | } | ||
| 795 | |||
| 796 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 797 | for (const auto index : ir.GetOutputAttributes()) { | ||
| 798 | if (!IsGenericAttribute(index)) { | ||
| 799 | continue; | ||
| 800 | } | ||
| 801 | DeclareOutputAttribute(index); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 806 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 807 | |||
| 808 | const u32 location = GetGenericAttributeLocation(index); | ||
| 809 | u8 element = 0; | ||
| 810 | while (element < 4) { | ||
| 811 | const std::size_t remainder = 4 - element; | ||
| 812 | |||
| 813 | std::size_t num_components = remainder; | ||
| 814 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 815 | if (tfb) { | ||
| 816 | num_components = tfb->components; | ||
| 817 | } | ||
| 818 | |||
| 819 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 820 | Id varying_default = v_varying_default; | ||
| 821 | if (IsOutputAttributeArray()) { | ||
| 822 | const u32 num = GetNumOutputVertices(); | ||
| 823 | type = TypeArray(type, Constant(t_uint, num)); | ||
| 824 | if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { | ||
| 825 | // Intel's proprietary driver fails to setup defaults for arrayed output | ||
| 826 | // attributes. | ||
| 827 | varying_default = ConstantComposite(type, std::vector(num, varying_default)); | ||
| 828 | } | ||
| 829 | } | ||
| 830 | type = TypePointer(spv::StorageClass::Output, type); | ||
| 831 | |||
| 832 | std::string name = fmt::format("out_attr{}", location); | ||
| 833 | if (num_components < 4 || element > 0) { | ||
| 834 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 835 | } | ||
| 836 | |||
| 837 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | ||
| 838 | Name(AddGlobalVariable(id), name); | ||
| 839 | |||
| 840 | GenericVaryingDescription description; | ||
| 841 | description.id = id; | ||
| 842 | description.first_element = element; | ||
| 843 | description.is_scalar = num_components == 1; | ||
| 844 | for (u32 i = 0; i < num_components; ++i) { | ||
| 845 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 846 | output_attributes.emplace(offset, description); | ||
| 847 | } | ||
| 848 | interfaces.push_back(id); | ||
| 849 | |||
| 850 | Decorate(id, spv::Decoration::Location, location); | ||
| 851 | if (element > 0) { | ||
| 852 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 853 | } | ||
| 854 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 855 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 856 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 857 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 858 | } | ||
| 859 | |||
| 860 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 865 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 866 | const auto it = transform_feedback.find(location); | ||
| 867 | if (it == transform_feedback.end()) { | ||
| 868 | return {}; | ||
| 869 | } | ||
| 870 | return it->second; | ||
| 871 | } | ||
| 872 | |||
| 873 | u32 DeclareConstantBuffers(u32 binding) { | ||
| 874 | for (const auto& [index, size] : ir.GetConstantBuffers()) { | ||
| 875 | const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo | ||
| 876 | : t_cbuf_std140_ubo; | ||
| 877 | const Id id = OpVariable(type, spv::StorageClass::Uniform); | ||
| 878 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | ||
| 879 | |||
| 880 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 881 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 882 | constant_buffers.emplace(index, id); | ||
| 883 | } | ||
| 884 | return binding; | ||
| 885 | } | ||
| 886 | |||
| 887 | u32 DeclareGlobalBuffers(u32 binding) { | ||
| 888 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 889 | const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); | ||
| 890 | AddGlobalVariable( | ||
| 891 | Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); | ||
| 892 | |||
| 893 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 894 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 895 | global_buffers.emplace(base, id); | ||
| 896 | } | ||
| 897 | return binding; | ||
| 898 | } | ||
| 899 | |||
| 900 | u32 DeclareUniformTexels(u32 binding) { | ||
| 901 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 902 | if (!sampler.is_buffer) { | ||
| 903 | continue; | ||
| 904 | } | ||
| 905 | ASSERT(!sampler.is_array); | ||
| 906 | ASSERT(!sampler.is_shadow); | ||
| 907 | |||
| 908 | constexpr auto dim = spv::Dim::Buffer; | ||
| 909 | constexpr int depth = 0; | ||
| 910 | constexpr int arrayed = 0; | ||
| 911 | constexpr bool ms = false; | ||
| 912 | constexpr int sampled = 1; | ||
| 913 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 914 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); | ||
| 915 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 916 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 917 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); | ||
| 918 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 919 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 920 | |||
| 921 | uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); | ||
| 922 | } | ||
| 923 | return binding; | ||
| 924 | } | ||
| 925 | |||
| 926 | u32 DeclareSamplers(u32 binding) { | ||
| 927 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 928 | if (sampler.is_buffer) { | ||
| 929 | continue; | ||
| 930 | } | ||
| 931 | const auto dim = GetSamplerDim(sampler); | ||
| 932 | const int depth = sampler.is_shadow ? 1 : 0; | ||
| 933 | const int arrayed = sampler.is_array ? 1 : 0; | ||
| 934 | constexpr bool ms = false; | ||
| 935 | constexpr int sampled = 1; | ||
| 936 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 937 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); | ||
| 938 | const Id sampler_type = TypeSampledImage(image_type); | ||
| 939 | const Id sampler_pointer_type = | ||
| 940 | TypePointer(spv::StorageClass::UniformConstant, sampler_type); | ||
| 941 | const Id type = sampler.is_indexed | ||
| 942 | ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) | ||
| 943 | : sampler_type; | ||
| 944 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); | ||
| 945 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 946 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); | ||
| 947 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 948 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 949 | |||
| 950 | sampled_images.emplace( | ||
| 951 | sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); | ||
| 952 | } | ||
| 953 | return binding; | ||
| 954 | } | ||
| 955 | |||
| 956 | u32 DeclareStorageTexels(u32 binding) { | ||
| 957 | for (const auto& image : ir.GetImages()) { | ||
| 958 | if (image.type != Tegra::Shader::ImageType::TextureBuffer) { | ||
| 959 | continue; | ||
| 960 | } | ||
| 961 | DeclareImage(image, binding); | ||
| 962 | } | ||
| 963 | return binding; | ||
| 964 | } | ||
| 965 | |||
| 966 | u32 DeclareImages(u32 binding) { | ||
| 967 | for (const auto& image : ir.GetImages()) { | ||
| 968 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 969 | continue; | ||
| 970 | } | ||
| 971 | DeclareImage(image, binding); | ||
| 972 | } | ||
| 973 | return binding; | ||
| 974 | } | ||
| 975 | |||
| 976 | void DeclareImage(const ImageEntry& image, u32& binding) { | ||
| 977 | const auto [dim, arrayed] = GetImageDim(image); | ||
| 978 | constexpr int depth = 0; | ||
| 979 | constexpr bool ms = false; | ||
| 980 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 981 | const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; | ||
| 982 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 983 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 984 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 985 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 986 | |||
| 987 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 988 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 989 | if (image.is_read && !image.is_written) { | ||
| 990 | Decorate(id, spv::Decoration::NonWritable); | ||
| 991 | } else if (image.is_written && !image.is_read) { | ||
| 992 | Decorate(id, spv::Decoration::NonReadable); | ||
| 993 | } | ||
| 994 | |||
| 995 | images.emplace(image.index, StorageImage{image_type, id}); | ||
| 996 | } | ||
| 997 | |||
| 998 | bool IsRenderTargetEnabled(u32 rt) const { | ||
| 999 | for (u32 component = 0; component < 4; ++component) { | ||
| 1000 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1001 | return true; | ||
| 1002 | } | ||
| 1003 | } | ||
| 1004 | return false; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | bool IsInputAttributeArray() const { | ||
| 1008 | return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || | ||
| 1009 | stage == ShaderType::Geometry; | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | bool IsOutputAttributeArray() const { | ||
| 1013 | return stage == ShaderType::TesselationControl; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | bool IsAttributeEnabled(u32 location) const { | ||
| 1017 | return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | u32 GetNumInputVertices() const { | ||
| 1021 | switch (stage) { | ||
| 1022 | case ShaderType::Geometry: | ||
| 1023 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); | ||
| 1024 | case ShaderType::TesselationControl: | ||
| 1025 | case ShaderType::TesselationEval: | ||
| 1026 | return NumInputPatches; | ||
| 1027 | default: | ||
| 1028 | UNREACHABLE(); | ||
| 1029 | return 1; | ||
| 1030 | } | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | u32 GetNumOutputVertices() const { | ||
| 1034 | switch (stage) { | ||
| 1035 | case ShaderType::TesselationControl: | ||
| 1036 | return header.common2.threads_per_input_primitive; | ||
| 1037 | default: | ||
| 1038 | UNREACHABLE(); | ||
| 1039 | return 1; | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | std::tuple<Id, VertexIndices> DeclareVertexStruct() { | ||
| 1044 | struct BuiltIn { | ||
| 1045 | Id type; | ||
| 1046 | spv::BuiltIn builtin; | ||
| 1047 | const char* name; | ||
| 1048 | }; | ||
| 1049 | std::vector<BuiltIn> members; | ||
| 1050 | members.reserve(4); | ||
| 1051 | |||
| 1052 | const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { | ||
| 1053 | const auto index = static_cast<u32>(members.size()); | ||
| 1054 | members.push_back(BuiltIn{type, builtin, name}); | ||
| 1055 | return index; | ||
| 1056 | }; | ||
| 1057 | |||
| 1058 | VertexIndices indices; | ||
| 1059 | indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); | ||
| 1060 | |||
| 1061 | if (ir.UsesLayer()) { | ||
| 1062 | if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 1063 | indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); | ||
| 1064 | } else { | ||
| 1065 | LOG_ERROR( | ||
| 1066 | Render_Vulkan, | ||
| 1067 | "Shader requires Layer but it's not supported on this stage with this device."); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | if (ir.UsesViewportIndex()) { | ||
| 1072 | if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 1073 | indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); | ||
| 1074 | } else { | ||
| 1075 | LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " | ||
| 1076 | "this stage with this device."); | ||
| 1077 | } | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | if (ir.UsesPointSize() || specialization.point_size) { | ||
| 1081 | indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | const auto& ir_output_attributes = ir.GetOutputAttributes(); | ||
| 1085 | const bool declare_clip_distances = std::any_of( | ||
| 1086 | ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { | ||
| 1087 | return index == Attribute::Index::ClipDistances0123 || | ||
| 1088 | index == Attribute::Index::ClipDistances4567; | ||
| 1089 | }); | ||
| 1090 | if (declare_clip_distances) { | ||
| 1091 | indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), | ||
| 1092 | spv::BuiltIn::ClipDistance, "clip_distances"); | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | std::vector<Id> member_types; | ||
| 1096 | member_types.reserve(members.size()); | ||
| 1097 | for (std::size_t i = 0; i < members.size(); ++i) { | ||
| 1098 | member_types.push_back(members[i].type); | ||
| 1099 | } | ||
| 1100 | const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); | ||
| 1101 | Decorate(per_vertex_struct, spv::Decoration::Block); | ||
| 1102 | |||
| 1103 | for (std::size_t index = 0; index < members.size(); ++index) { | ||
| 1104 | const auto& member = members[index]; | ||
| 1105 | MemberName(per_vertex_struct, static_cast<u32>(index), member.name); | ||
| 1106 | MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn, | ||
| 1107 | static_cast<u32>(member.builtin)); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | return {per_vertex_struct, indices}; | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | void VisitBasicBlock(const NodeBlock& bb) { | ||
| 1114 | for (const auto& node : bb) { | ||
| 1115 | Visit(node); | ||
| 1116 | } | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | Expression Visit(const Node& node) { | ||
| 1120 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1121 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1122 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 1123 | ASSERT(type == Type::Void); | ||
| 1124 | } | ||
| 1125 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1126 | const auto decompiler = operation_decompilers[operation_index]; | ||
| 1127 | if (decompiler == nullptr) { | ||
| 1128 | UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); | ||
| 1129 | } | ||
| 1130 | return (this->*decompiler)(*operation); | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1134 | const u32 index = gpr->GetIndex(); | ||
| 1135 | if (index == Register::ZeroIndex) { | ||
| 1136 | return {v_float_zero, Type::Float}; | ||
| 1137 | } | ||
| 1138 | return {OpLoad(t_float, registers.at(index)), Type::Float}; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1142 | const u32 index = cv->GetIndex(); | ||
| 1143 | return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1147 | return {Constant(t_uint, immediate->GetValue()), Type::Uint}; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1151 | const auto value = [&]() -> Id { | ||
| 1152 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1153 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1154 | return v_true; | ||
| 1155 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1156 | return v_false; | ||
| 1157 | default: | ||
| 1158 | return OpLoad(t_bool, predicates.at(index)); | ||
| 1159 | } | ||
| 1160 | }(); | ||
| 1161 | if (predicate->IsNegated()) { | ||
| 1162 | return {OpLogicalNot(t_bool, value), Type::Bool}; | ||
| 1163 | } | ||
| 1164 | return {value, Type::Bool}; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1168 | const auto attribute = abuf->GetIndex(); | ||
| 1169 | const u32 element = abuf->GetElement(); | ||
| 1170 | const auto& buffer = abuf->GetBuffer(); | ||
| 1171 | |||
| 1172 | const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { | ||
| 1173 | std::vector<Id> members; | ||
| 1174 | members.reserve(std::size(indices) + 1); | ||
| 1175 | |||
| 1176 | if (buffer && IsInputAttributeArray()) { | ||
| 1177 | members.push_back(AsUint(Visit(buffer))); | ||
| 1178 | } | ||
| 1179 | for (const u32 index : indices) { | ||
| 1180 | members.push_back(Constant(t_uint, index)); | ||
| 1181 | } | ||
| 1182 | return OpAccessChain(pointer_type, composite, members); | ||
| 1183 | }; | ||
| 1184 | |||
| 1185 | switch (attribute) { | ||
| 1186 | case Attribute::Index::Position: { | ||
| 1187 | if (stage == ShaderType::Fragment) { | ||
| 1188 | return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), | ||
| 1189 | Type::Float}; | ||
| 1190 | } | ||
| 1191 | const std::vector elements = {in_indices.position.value(), element}; | ||
| 1192 | return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; | ||
| 1193 | } | ||
| 1194 | case Attribute::Index::PointCoord: { | ||
| 1195 | switch (element) { | ||
| 1196 | case 0: | ||
| 1197 | case 1: | ||
| 1198 | return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), | ||
| 1199 | Type::Float}; | ||
| 1200 | } | ||
| 1201 | UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); | ||
| 1202 | return {v_float_zero, Type::Float}; | ||
| 1203 | } | ||
| 1204 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1205 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 1206 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 1207 | // shader. | ||
| 1208 | switch (element) { | ||
| 1209 | case 0: | ||
| 1210 | case 1: | ||
| 1211 | return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), | ||
| 1212 | Type::Float}; | ||
| 1213 | case 2: | ||
| 1214 | return { | ||
| 1215 | OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), | ||
| 1216 | Type::Int}; | ||
| 1217 | case 3: | ||
| 1218 | return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), | ||
| 1219 | Type::Int}; | ||
| 1220 | } | ||
| 1221 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1222 | return {Constant(t_uint, 0U), Type::Uint}; | ||
| 1223 | case Attribute::Index::FrontFacing: | ||
| 1224 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 1225 | ASSERT(stage == ShaderType::Fragment); | ||
| 1226 | if (element == 3) { | ||
| 1227 | const Id is_front_facing = OpLoad(t_bool, front_facing); | ||
| 1228 | const Id true_value = Constant(t_int, static_cast<s32>(-1)); | ||
| 1229 | const Id false_value = Constant(t_int, 0); | ||
| 1230 | return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; | ||
| 1231 | } | ||
| 1232 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 1233 | return {v_float_zero, Type::Float}; | ||
| 1234 | default: | ||
| 1235 | if (!IsGenericAttribute(attribute)) { | ||
| 1236 | break; | ||
| 1237 | } | ||
| 1238 | const u32 location = GetGenericAttributeLocation(attribute); | ||
| 1239 | if (!IsAttributeEnabled(location)) { | ||
| 1240 | // Disabled attributes (also known as constant attributes) always return zero. | ||
| 1241 | return {v_float_zero, Type::Float}; | ||
| 1242 | } | ||
| 1243 | const auto type_descriptor = GetAttributeType(location); | ||
| 1244 | const Type type = type_descriptor.type; | ||
| 1245 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1246 | const std::vector elements = {element}; | ||
| 1247 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1248 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1249 | } | ||
| 1250 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); | ||
| 1251 | return {v_float_zero, Type::Float}; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1255 | const Node& offset = cbuf->GetOffset(); | ||
| 1256 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | ||
| 1257 | |||
| 1258 | Id pointer{}; | ||
| 1259 | if (device.IsKhrUniformBufferStandardLayoutSupported()) { | ||
| 1260 | const Id buffer_offset = | ||
| 1261 | OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); | ||
| 1262 | pointer = | ||
| 1263 | OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); | ||
| 1264 | } else { | ||
| 1265 | Id buffer_index{}; | ||
| 1266 | Id buffer_element{}; | ||
| 1267 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1268 | // Direct access | ||
| 1269 | const u32 offset_imm = immediate->GetValue(); | ||
| 1270 | ASSERT(offset_imm % 4 == 0); | ||
| 1271 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 1272 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 1273 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 1274 | // Indirect access | ||
| 1275 | const Id offset_id = AsUint(Visit(offset)); | ||
| 1276 | const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); | ||
| 1277 | const Id final_offset = | ||
| 1278 | OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); | ||
| 1279 | buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); | ||
| 1280 | buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); | ||
| 1281 | } else { | ||
| 1282 | UNREACHABLE_MSG("Unmanaged offset node type"); | ||
| 1283 | } | ||
| 1284 | pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, | ||
| 1285 | buffer_element); | ||
| 1286 | } | ||
| 1287 | return {OpLoad(t_float, pointer), Type::Float}; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1291 | return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1295 | Id address = AsUint(Visit(lmem->GetAddress())); | ||
| 1296 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1297 | const Id pointer = OpAccessChain(t_prv_float, local_memory, address); | ||
| 1298 | return {OpLoad(t_float, pointer), Type::Float}; | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1302 | return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1306 | const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag())); | ||
| 1307 | return {OpLoad(t_bool, flag), Type::Bool}; | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1311 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1312 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 1313 | ASSERT(type == Type::Void); | ||
| 1314 | } | ||
| 1315 | // It's invalid to call conditional on nested nodes, use an operation instead | ||
| 1316 | const Id true_label = OpLabel(); | ||
| 1317 | const Id skip_label = OpLabel(); | ||
| 1318 | const Id condition = AsBool(Visit(conditional->GetCondition())); | ||
| 1319 | OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); | ||
| 1320 | OpBranchConditional(condition, true_label, skip_label); | ||
| 1321 | AddLabel(true_label); | ||
| 1322 | |||
| 1323 | conditional_branch_set = true; | ||
| 1324 | inside_branch = false; | ||
| 1325 | VisitBasicBlock(conditional->GetCode()); | ||
| 1326 | conditional_branch_set = false; | ||
| 1327 | if (!inside_branch) { | ||
| 1328 | OpBranch(skip_label); | ||
| 1329 | } else { | ||
| 1330 | inside_branch = false; | ||
| 1331 | } | ||
| 1332 | AddLabel(skip_label); | ||
| 1333 | return {}; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | ||
| 1337 | if (device.HasDebuggingToolAttached()) { | ||
| 1338 | // We should insert comments with OpString instead of using named variables | ||
| 1339 | Name(OpUndef(t_int), comment->GetText()); | ||
| 1340 | } | ||
| 1341 | return {}; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | UNREACHABLE(); | ||
| 1345 | return {}; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type> | ||
| 1349 | Expression Unary(Operation operation) { | ||
| 1350 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1351 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1352 | |||
| 1353 | const Id value = (this->*func)(type_def, op_a); | ||
| 1354 | if (IsPrecise(operation)) { | ||
| 1355 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1356 | } | ||
| 1357 | return {value, result_type}; | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1361 | Type type_b = type_a> | ||
| 1362 | Expression Binary(Operation operation) { | ||
| 1363 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1364 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1365 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1366 | |||
| 1367 | const Id value = (this->*func)(type_def, op_a, op_b); | ||
| 1368 | if (IsPrecise(operation)) { | ||
| 1369 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1370 | } | ||
| 1371 | return {value, result_type}; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1375 | Type type_b = type_a, Type type_c = type_b> | ||
| 1376 | Expression Ternary(Operation operation) { | ||
| 1377 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1378 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1379 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1380 | const Id op_c = As(Visit(operation[2]), type_c); | ||
| 1381 | |||
| 1382 | const Id value = (this->*func)(type_def, op_a, op_b, op_c); | ||
| 1383 | if (IsPrecise(operation)) { | ||
| 1384 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1385 | } | ||
| 1386 | return {value, result_type}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1390 | Type type_b = type_a, Type type_c = type_b, Type type_d = type_c> | ||
| 1391 | Expression Quaternary(Operation operation) { | ||
| 1392 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1393 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1394 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1395 | const Id op_c = As(Visit(operation[2]), type_c); | ||
| 1396 | const Id op_d = As(Visit(operation[3]), type_d); | ||
| 1397 | |||
| 1398 | const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); | ||
| 1399 | if (IsPrecise(operation)) { | ||
| 1400 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1401 | } | ||
| 1402 | return {value, result_type}; | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | Expression Assign(Operation operation) { | ||
| 1406 | const Node& dest = operation[0]; | ||
| 1407 | const Node& src = operation[1]; | ||
| 1408 | |||
| 1409 | Expression target{}; | ||
| 1410 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1411 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1412 | // Writing to Register::ZeroIndex is a no op but we still have to visit its source | ||
| 1413 | // because it might have side effects. | ||
| 1414 | Visit(src); | ||
| 1415 | return {}; | ||
| 1416 | } | ||
| 1417 | target = {registers.at(gpr->GetIndex()), Type::Float}; | ||
| 1418 | |||
| 1419 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1420 | const auto& buffer = abuf->GetBuffer(); | ||
| 1421 | const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { | ||
| 1422 | std::vector<Id> members; | ||
| 1423 | members.reserve(std::size(indices) + 1); | ||
| 1424 | |||
| 1425 | if (buffer && IsOutputAttributeArray()) { | ||
| 1426 | members.push_back(AsUint(Visit(buffer))); | ||
| 1427 | } | ||
| 1428 | for (const u32 index : indices) { | ||
| 1429 | members.push_back(Constant(t_uint, index)); | ||
| 1430 | } | ||
| 1431 | return OpAccessChain(pointer_type, composite, members); | ||
| 1432 | }; | ||
| 1433 | |||
| 1434 | target = [&]() -> Expression { | ||
| 1435 | const u32 element = abuf->GetElement(); | ||
| 1436 | switch (const auto attribute = abuf->GetIndex(); attribute) { | ||
| 1437 | case Attribute::Index::Position: { | ||
| 1438 | const u32 index = out_indices.position.value(); | ||
| 1439 | return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; | ||
| 1440 | } | ||
| 1441 | case Attribute::Index::LayerViewportPointSize: | ||
| 1442 | switch (element) { | ||
| 1443 | case 1: { | ||
| 1444 | if (!out_indices.layer) { | ||
| 1445 | return {}; | ||
| 1446 | } | ||
| 1447 | const u32 index = out_indices.layer.value(); | ||
| 1448 | return {AccessElement(t_out_int, out_vertex, index), Type::Int}; | ||
| 1449 | } | ||
| 1450 | case 2: { | ||
| 1451 | if (!out_indices.viewport) { | ||
| 1452 | return {}; | ||
| 1453 | } | ||
| 1454 | const u32 index = out_indices.viewport.value(); | ||
| 1455 | return {AccessElement(t_out_int, out_vertex, index), Type::Int}; | ||
| 1456 | } | ||
| 1457 | case 3: { | ||
| 1458 | const auto index = out_indices.point_size.value(); | ||
| 1459 | return {AccessElement(t_out_float, out_vertex, index), Type::Float}; | ||
| 1460 | } | ||
| 1461 | default: | ||
| 1462 | UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); | ||
| 1463 | return {}; | ||
| 1464 | } | ||
| 1465 | case Attribute::Index::ClipDistances0123: { | ||
| 1466 | const u32 index = out_indices.clip_distances.value(); | ||
| 1467 | return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; | ||
| 1468 | } | ||
| 1469 | case Attribute::Index::ClipDistances4567: { | ||
| 1470 | const u32 index = out_indices.clip_distances.value(); | ||
| 1471 | return {AccessElement(t_out_float, out_vertex, index, element + 4), | ||
| 1472 | Type::Float}; | ||
| 1473 | } | ||
| 1474 | default: | ||
| 1475 | if (IsGenericAttribute(attribute)) { | ||
| 1476 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); | ||
| 1477 | const GenericVaryingDescription description = output_attributes.at(offset); | ||
| 1478 | const Id composite = description.id; | ||
| 1479 | std::vector<u32> indices; | ||
| 1480 | if (!description.is_scalar) { | ||
| 1481 | indices.push_back(element - description.first_element); | ||
| 1482 | } | ||
| 1483 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1484 | } | ||
| 1485 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 1486 | static_cast<u32>(attribute)); | ||
| 1487 | return {}; | ||
| 1488 | } | ||
| 1489 | }(); | ||
| 1490 | |||
| 1491 | } else if (const auto patch = std::get_if<PatchNode>(&*dest)) { | ||
| 1492 | target = [&]() -> Expression { | ||
| 1493 | const u32 offset = patch->GetOffset(); | ||
| 1494 | switch (offset) { | ||
| 1495 | case 0: | ||
| 1496 | case 1: | ||
| 1497 | case 2: | ||
| 1498 | case 3: | ||
| 1499 | return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; | ||
| 1500 | case 4: | ||
| 1501 | case 5: | ||
| 1502 | return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; | ||
| 1503 | } | ||
| 1504 | UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); | ||
| 1505 | return {}; | ||
| 1506 | }(); | ||
| 1507 | |||
| 1508 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1509 | Id address = AsUint(Visit(lmem->GetAddress())); | ||
| 1510 | address = OpUDiv(t_uint, address, Constant(t_uint, 4)); | ||
| 1511 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | ||
| 1512 | |||
| 1513 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1514 | target = {GetSharedMemoryPointer(*smem), Type::Uint}; | ||
| 1515 | |||
| 1516 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1517 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | ||
| 1518 | |||
| 1519 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1520 | target = {custom_variables.at(cv->GetIndex()), Type::Float}; | ||
| 1521 | |||
| 1522 | } else { | ||
| 1523 | UNIMPLEMENTED(); | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | if (!target.id) { | ||
| 1527 | // On failure we return a nullptr target.id, skip these stores. | ||
| 1528 | return {}; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | OpStore(target.id, As(Visit(src), target.type)); | ||
| 1532 | return {}; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | template <u32 offset> | ||
| 1536 | Expression FCastHalf(Operation operation) { | ||
| 1537 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1538 | return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), | ||
| 1539 | Type::Float}; | ||
| 1540 | } | ||
| 1541 | |||
| 1542 | Expression FSwizzleAdd(Operation operation) { | ||
| 1543 | const Id minus = Constant(t_float, -1.0f); | ||
| 1544 | const Id plus = v_float_one; | ||
| 1545 | const Id zero = v_float_zero; | ||
| 1546 | const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); | ||
| 1547 | const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); | ||
| 1548 | |||
| 1549 | Id mask = OpLoad(t_uint, thread_id); | ||
| 1550 | mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); | ||
| 1551 | mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); | ||
| 1552 | mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); | ||
| 1553 | mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); | ||
| 1554 | |||
| 1555 | const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); | ||
| 1556 | const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); | ||
| 1557 | |||
| 1558 | const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); | ||
| 1559 | const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); | ||
| 1560 | return {OpFAdd(t_float, op_a, op_b), Type::Float}; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | Expression HNegate(Operation operation) { | ||
| 1564 | const bool is_f16 = device.IsFloat16Supported(); | ||
| 1565 | const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); | ||
| 1566 | const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); | ||
| 1567 | const auto GetNegate = [&](std::size_t index) { | ||
| 1568 | return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); | ||
| 1569 | }; | ||
| 1570 | const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); | ||
| 1571 | return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | Expression HClamp(Operation operation) { | ||
| 1575 | const auto Pack = [&](std::size_t index) { | ||
| 1576 | const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); | ||
| 1577 | return OpCompositeConstruct(t_half, scalar, scalar); | ||
| 1578 | }; | ||
| 1579 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1580 | const Id min = Pack(1); | ||
| 1581 | const Id max = Pack(2); | ||
| 1582 | |||
| 1583 | const Id clamped = OpFClamp(t_half, value, min, max); | ||
| 1584 | if (IsPrecise(operation)) { | ||
| 1585 | Decorate(clamped, spv::Decoration::NoContraction); | ||
| 1586 | } | ||
| 1587 | return {clamped, Type::HalfFloat}; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | Expression HCastFloat(Operation operation) { | ||
| 1591 | const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); | ||
| 1592 | return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | Expression HUnpack(Operation operation) { | ||
| 1596 | Expression operand = Visit(operation[0]); | ||
| 1597 | const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta()); | ||
| 1598 | if (type == Tegra::Shader::HalfType::H0_H1) { | ||
| 1599 | return operand; | ||
| 1600 | } | ||
| 1601 | const auto value = [&] { | ||
| 1602 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1603 | case Tegra::Shader::HalfType::F32: | ||
| 1604 | return GetHalfScalarFromFloat(AsFloat(operand)); | ||
| 1605 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1606 | return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); | ||
| 1607 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1608 | return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); | ||
| 1609 | default: | ||
| 1610 | UNREACHABLE(); | ||
| 1611 | return ConstantNull(t_half); | ||
| 1612 | } | ||
| 1613 | }(); | ||
| 1614 | return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | Expression HMergeF32(Operation operation) { | ||
| 1618 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1619 | return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | template <u32 offset> | ||
| 1623 | Expression HMergeHN(Operation operation) { | ||
| 1624 | const Id target = AsHalfFloat(Visit(operation[0])); | ||
| 1625 | const Id source = AsHalfFloat(Visit(operation[1])); | ||
| 1626 | const Id object = OpCompositeExtract(t_scalar_half, source, offset); | ||
| 1627 | return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | Expression HPack2(Operation operation) { | ||
| 1631 | const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); | ||
| 1632 | const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); | ||
| 1633 | return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; | ||
| 1634 | } | ||
| 1635 | |||
| 1636 | Expression LogicalAddCarry(Operation operation) { | ||
| 1637 | const Id op_a = AsUint(Visit(operation[0])); | ||
| 1638 | const Id op_b = AsUint(Visit(operation[1])); | ||
| 1639 | |||
| 1640 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); | ||
| 1641 | const Id carry = OpCompositeExtract(t_uint, result, 1); | ||
| 1642 | return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; | ||
| 1643 | } | ||
| 1644 | |||
| 1645 | Expression LogicalAssign(Operation operation) { | ||
| 1646 | const Node& dest = operation[0]; | ||
| 1647 | const Node& src = operation[1]; | ||
| 1648 | |||
| 1649 | Id target{}; | ||
| 1650 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1651 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1652 | |||
| 1653 | const auto index = pred->GetIndex(); | ||
| 1654 | switch (index) { | ||
| 1655 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1656 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1657 | // Writing to these predicates is a no-op | ||
| 1658 | return {}; | ||
| 1659 | } | ||
| 1660 | target = predicates.at(index); | ||
| 1661 | |||
| 1662 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1663 | target = internal_flags.at(static_cast<u32>(flag->GetFlag())); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | OpStore(target, AsBool(Visit(src))); | ||
| 1667 | return {}; | ||
| 1668 | } | ||
| 1669 | |||
| 1670 | Expression LogicalFOrdered(Operation operation) { | ||
| 1671 | // Emulate SPIR-V's OpOrdered | ||
| 1672 | const Id op_a = AsFloat(Visit(operation[0])); | ||
| 1673 | const Id op_b = AsFloat(Visit(operation[1])); | ||
| 1674 | const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); | ||
| 1675 | const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); | ||
| 1676 | return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | Expression LogicalFUnordered(Operation operation) { | ||
| 1680 | // Emulate SPIR-V's OpUnordered | ||
| 1681 | const Id op_a = AsFloat(Visit(operation[0])); | ||
| 1682 | const Id op_b = AsFloat(Visit(operation[1])); | ||
| 1683 | const Id is_nan_a = OpIsNan(t_bool, op_a); | ||
| 1684 | const Id is_nan_b = OpIsNan(t_bool, op_b); | ||
| 1685 | return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | Id GetTextureSampler(Operation operation) { | ||
| 1689 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1690 | ASSERT(!meta.sampler.is_buffer); | ||
| 1691 | |||
| 1692 | const auto& entry = sampled_images.at(meta.sampler.index); | ||
| 1693 | Id sampler = entry.variable; | ||
| 1694 | if (meta.sampler.is_indexed) { | ||
| 1695 | const Id index = AsInt(Visit(meta.index)); | ||
| 1696 | sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); | ||
| 1697 | } | ||
| 1698 | return OpLoad(entry.sampler_type, sampler); | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | Id GetTextureImage(Operation operation) { | ||
| 1702 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1703 | const u32 index = meta.sampler.index; | ||
| 1704 | if (meta.sampler.is_buffer) { | ||
| 1705 | const auto& entry = uniform_texels.at(index); | ||
| 1706 | return OpLoad(entry.image_type, entry.image); | ||
| 1707 | } else { | ||
| 1708 | const auto& entry = sampled_images.at(index); | ||
| 1709 | return OpImage(entry.image_type, GetTextureSampler(operation)); | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | Id GetImage(Operation operation) { | ||
| 1714 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1715 | const auto entry = images.at(meta.image.index); | ||
| 1716 | return OpLoad(entry.image_type, entry.image); | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | Id AssembleVector(const std::vector<Id>& coords, Type type) { | ||
| 1720 | const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); | ||
| 1721 | return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | Id GetCoordinates(Operation operation, Type type) { | ||
| 1725 | std::vector<Id> coords; | ||
| 1726 | for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { | ||
| 1727 | coords.push_back(As(Visit(operation[i]), type)); | ||
| 1728 | } | ||
| 1729 | if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) { | ||
| 1730 | // Add array coordinate for textures | ||
| 1731 | if (meta->sampler.is_array) { | ||
| 1732 | Id array = AsInt(Visit(meta->array)); | ||
| 1733 | if (type == Type::Float) { | ||
| 1734 | array = OpConvertSToF(t_float, array); | ||
| 1735 | } | ||
| 1736 | coords.push_back(array); | ||
| 1737 | } | ||
| 1738 | } | ||
| 1739 | return AssembleVector(coords, type); | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | Id GetOffsetCoordinates(Operation operation) { | ||
| 1743 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1744 | std::vector<Id> coords; | ||
| 1745 | coords.reserve(meta.aoffi.size()); | ||
| 1746 | for (const auto& coord : meta.aoffi) { | ||
| 1747 | coords.push_back(AsInt(Visit(coord))); | ||
| 1748 | } | ||
| 1749 | return AssembleVector(coords, Type::Int); | ||
| 1750 | } | ||
| 1751 | |||
| 1752 | std::pair<Id, Id> GetDerivatives(Operation operation) { | ||
| 1753 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1754 | const auto& derivatives = meta.derivates; | ||
| 1755 | ASSERT(derivatives.size() % 2 == 0); | ||
| 1756 | |||
| 1757 | const std::size_t components = derivatives.size() / 2; | ||
| 1758 | std::vector<Id> dx, dy; | ||
| 1759 | dx.reserve(components); | ||
| 1760 | dy.reserve(components); | ||
| 1761 | for (std::size_t index = 0; index < components; ++index) { | ||
| 1762 | dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); | ||
| 1763 | dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); | ||
| 1764 | } | ||
| 1765 | return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | Expression GetTextureElement(Operation operation, Id sample_value, Type type) { | ||
| 1769 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1770 | const auto type_def = GetTypeDefinition(type); | ||
| 1771 | return {OpCompositeExtract(type_def, sample_value, meta.element), type}; | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | Expression Texture(Operation operation) { | ||
| 1775 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1776 | |||
| 1777 | const bool can_implicit = stage == ShaderType::Fragment; | ||
| 1778 | const Id sampler = GetTextureSampler(operation); | ||
| 1779 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1780 | |||
| 1781 | std::vector<Id> operands; | ||
| 1782 | spv::ImageOperandsMask mask{}; | ||
| 1783 | if (meta.bias) { | ||
| 1784 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1785 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1786 | } | ||
| 1787 | |||
| 1788 | if (!can_implicit) { | ||
| 1789 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1790 | operands.push_back(v_float_zero); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | if (!meta.aoffi.empty()) { | ||
| 1794 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1795 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | if (meta.depth_compare) { | ||
| 1799 | // Depth sampling | ||
| 1800 | UNIMPLEMENTED_IF(meta.bias); | ||
| 1801 | const Id dref = AsFloat(Visit(meta.depth_compare)); | ||
| 1802 | if (can_implicit) { | ||
| 1803 | return { | ||
| 1804 | OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1805 | Type::Float}; | ||
| 1806 | } else { | ||
| 1807 | return { | ||
| 1808 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1809 | Type::Float}; | ||
| 1810 | } | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | Id texture; | ||
| 1814 | if (can_implicit) { | ||
| 1815 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1816 | } else { | ||
| 1817 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1818 | } | ||
| 1819 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | Expression TextureLod(Operation operation) { | ||
| 1823 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1824 | |||
| 1825 | const Id sampler = GetTextureSampler(operation); | ||
| 1826 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1827 | const Id lod = AsFloat(Visit(meta.lod)); | ||
| 1828 | |||
| 1829 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; | ||
| 1830 | std::vector<Id> operands{lod}; | ||
| 1831 | |||
| 1832 | if (!meta.aoffi.empty()) { | ||
| 1833 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1834 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | if (meta.sampler.is_shadow) { | ||
| 1838 | const Id dref = AsFloat(Visit(meta.depth_compare)); | ||
| 1839 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1840 | Type::Float}; | ||
| 1841 | } | ||
| 1842 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1843 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | Expression TextureGather(Operation operation) { | ||
| 1847 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1848 | |||
| 1849 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1850 | |||
| 1851 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; | ||
| 1852 | std::vector<Id> operands; | ||
| 1853 | Id texture{}; | ||
| 1854 | |||
| 1855 | if (!meta.aoffi.empty()) { | ||
| 1856 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1857 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1858 | } | ||
| 1859 | |||
| 1860 | if (meta.sampler.is_shadow) { | ||
| 1861 | texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, | ||
| 1862 | AsFloat(Visit(meta.depth_compare)), mask, operands); | ||
| 1863 | } else { | ||
| 1864 | u32 component_value = 0; | ||
| 1865 | if (meta.component) { | ||
| 1866 | const auto component = std::get_if<ImmediateNode>(&*meta.component); | ||
| 1867 | ASSERT_MSG(component, "Component is not an immediate value"); | ||
| 1868 | component_value = component->GetValue(); | ||
| 1869 | } | ||
| 1870 | texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, | ||
| 1871 | Constant(t_uint, component_value), mask, operands); | ||
| 1872 | } | ||
| 1873 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1874 | } | ||
| 1875 | |||
| 1876 | Expression TextureQueryDimensions(Operation operation) { | ||
| 1877 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1878 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1879 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1880 | |||
| 1881 | const auto image_id = GetTextureImage(operation); | ||
| 1882 | if (meta.element == 3) { | ||
| 1883 | return {OpImageQueryLevels(t_int, image_id), Type::Int}; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | const Id lod = AsUint(Visit(operation[0])); | ||
| 1887 | const std::size_t coords_count = [&meta] { | ||
| 1888 | switch (const auto type = meta.sampler.type) { | ||
| 1889 | case Tegra::Shader::TextureType::Texture1D: | ||
| 1890 | return 1; | ||
| 1891 | case Tegra::Shader::TextureType::Texture2D: | ||
| 1892 | case Tegra::Shader::TextureType::TextureCube: | ||
| 1893 | return 2; | ||
| 1894 | case Tegra::Shader::TextureType::Texture3D: | ||
| 1895 | return 3; | ||
| 1896 | default: | ||
| 1897 | UNREACHABLE_MSG("Invalid texture type={}", type); | ||
| 1898 | return 2; | ||
| 1899 | } | ||
| 1900 | }(); | ||
| 1901 | |||
| 1902 | if (meta.element >= coords_count) { | ||
| 1903 | return {v_float_zero, Type::Float}; | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | const std::array<Id, 3> types = {t_int, t_int2, t_int3}; | ||
| 1907 | const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); | ||
| 1908 | const Id size = OpCompositeExtract(t_int, sizes, meta.element); | ||
| 1909 | return {size, Type::Int}; | ||
| 1910 | } | ||
| 1911 | |||
| 1912 | Expression TextureQueryLod(Operation operation) { | ||
| 1913 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1914 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1915 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1916 | |||
| 1917 | if (meta.element >= 2) { | ||
| 1918 | UNREACHABLE_MSG("Invalid element"); | ||
| 1919 | return {v_float_zero, Type::Float}; | ||
| 1920 | } | ||
| 1921 | const auto sampler_id = GetTextureSampler(operation); | ||
| 1922 | |||
| 1923 | const Id multiplier = Constant(t_float, 256.0f); | ||
| 1924 | const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); | ||
| 1925 | |||
| 1926 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1927 | Id size = OpImageQueryLod(t_float2, sampler_id, coords); | ||
| 1928 | size = OpFMul(t_float2, size, multipliers); | ||
| 1929 | size = OpConvertFToS(t_int2, size); | ||
| 1930 | return GetTextureElement(operation, size, Type::Int); | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | Expression TexelFetch(Operation operation) { | ||
| 1934 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1935 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1936 | |||
| 1937 | const Id image = GetTextureImage(operation); | ||
| 1938 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1939 | |||
| 1940 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; | ||
| 1941 | std::vector<Id> operands; | ||
| 1942 | Id fetch; | ||
| 1943 | |||
| 1944 | if (meta.lod && !meta.sampler.is_buffer) { | ||
| 1945 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1946 | operands.push_back(AsInt(Visit(meta.lod))); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | if (!meta.aoffi.empty()) { | ||
| 1950 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1951 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | fetch = OpImageFetch(t_float4, image, coords, mask, operands); | ||
| 1955 | return GetTextureElement(operation, fetch, Type::Float); | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | Expression TextureGradient(Operation operation) { | ||
| 1959 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1960 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1961 | |||
| 1962 | const Id sampler = GetTextureSampler(operation); | ||
| 1963 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1964 | const auto [dx, dy] = GetDerivatives(operation); | ||
| 1965 | const std::vector grad = {dx, dy}; | ||
| 1966 | |||
| 1967 | static constexpr auto mask = spv::ImageOperandsMask::Grad; | ||
| 1968 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); | ||
| 1969 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | Expression ImageLoad(Operation operation) { | ||
| 1973 | if (!device.IsFormatlessImageLoadSupported()) { | ||
| 1974 | return {v_float_zero, Type::Float}; | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1978 | |||
| 1979 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1980 | const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); | ||
| 1981 | |||
| 1982 | return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; | ||
| 1983 | } | ||
| 1984 | |||
| 1985 | Expression ImageStore(Operation operation) { | ||
| 1986 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1987 | std::vector<Id> colors; | ||
| 1988 | for (const auto& value : meta.values) { | ||
| 1989 | colors.push_back(AsUint(Visit(value))); | ||
| 1990 | } | ||
| 1991 | |||
| 1992 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1993 | const Id texel = OpCompositeConstruct(t_uint4, colors); | ||
| 1994 | |||
| 1995 | OpImageWrite(GetImage(operation), coords, texel, {}); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2000 | Expression AtomicImage(Operation operation) { | ||
| 2001 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2002 | ASSERT(meta.values.size() == 1); | ||
| 2003 | |||
| 2004 | const Id coordinate = GetCoordinates(operation, Type::Int); | ||
| 2005 | const Id image = images.at(meta.image.index).image; | ||
| 2006 | const Id sample = v_uint_zero; | ||
| 2007 | const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); | ||
| 2008 | |||
| 2009 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||
| 2010 | const Id semantics = v_uint_zero; | ||
| 2011 | const Id value = AsUint(Visit(meta.values[0])); | ||
| 2012 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2016 | Expression Atomic(Operation operation) { | ||
| 2017 | Id pointer; | ||
| 2018 | if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 2019 | pointer = GetSharedMemoryPointer(*smem); | ||
| 2020 | } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 2021 | pointer = GetGlobalMemoryPointer(*gmem); | ||
| 2022 | } else { | ||
| 2023 | UNREACHABLE(); | ||
| 2024 | return {v_float_zero, Type::Float}; | ||
| 2025 | } | ||
| 2026 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||
| 2027 | const Id semantics = v_uint_zero; | ||
| 2028 | const Id value = AsUint(Visit(operation[1])); | ||
| 2029 | |||
| 2030 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2034 | Expression Reduce(Operation operation) { | ||
| 2035 | Atomic<func>(operation); | ||
| 2036 | return {}; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | Expression Branch(Operation operation) { | ||
| 2040 | const auto& target = std::get<ImmediateNode>(*operation[0]); | ||
| 2041 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); | ||
| 2042 | OpBranch(continue_label); | ||
| 2043 | inside_branch = true; | ||
| 2044 | if (!conditional_branch_set) { | ||
| 2045 | AddLabel(); | ||
| 2046 | } | ||
| 2047 | return {}; | ||
| 2048 | } | ||
| 2049 | |||
| 2050 | Expression BranchIndirect(Operation operation) { | ||
| 2051 | const Id op_a = AsUint(Visit(operation[0])); | ||
| 2052 | |||
| 2053 | OpStore(jmp_to, op_a); | ||
| 2054 | OpBranch(continue_label); | ||
| 2055 | inside_branch = true; | ||
| 2056 | if (!conditional_branch_set) { | ||
| 2057 | AddLabel(); | ||
| 2058 | } | ||
| 2059 | return {}; | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | Expression PushFlowStack(Operation operation) { | ||
| 2063 | const auto& target = std::get<ImmediateNode>(*operation[0]); | ||
| 2064 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 2065 | const Id current = OpLoad(t_uint, flow_stack_top); | ||
| 2066 | const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); | ||
| 2067 | const Id access = OpAccessChain(t_func_uint, flow_stack, current); | ||
| 2068 | |||
| 2069 | OpStore(access, Constant(t_uint, target.GetValue())); | ||
| 2070 | OpStore(flow_stack_top, next); | ||
| 2071 | return {}; | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | Expression PopFlowStack(Operation operation) { | ||
| 2075 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 2076 | const Id current = OpLoad(t_uint, flow_stack_top); | ||
| 2077 | const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); | ||
| 2078 | const Id access = OpAccessChain(t_func_uint, flow_stack, previous); | ||
| 2079 | const Id target = OpLoad(t_uint, access); | ||
| 2080 | |||
| 2081 | OpStore(flow_stack_top, previous); | ||
| 2082 | OpStore(jmp_to, target); | ||
| 2083 | OpBranch(continue_label); | ||
| 2084 | inside_branch = true; | ||
| 2085 | if (!conditional_branch_set) { | ||
| 2086 | AddLabel(); | ||
| 2087 | } | ||
| 2088 | return {}; | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { | ||
| 2092 | using Compare = Maxwell::ComparisonOp; | ||
| 2093 | switch (compare_op) { | ||
| 2094 | case Compare::NeverOld: | ||
| 2095 | return v_false; // Never let the test pass | ||
| 2096 | case Compare::LessOld: | ||
| 2097 | return OpFOrdLessThan(t_bool, operand_1, operand_2); | ||
| 2098 | case Compare::EqualOld: | ||
| 2099 | return OpFOrdEqual(t_bool, operand_1, operand_2); | ||
| 2100 | case Compare::LessEqualOld: | ||
| 2101 | return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); | ||
| 2102 | case Compare::GreaterOld: | ||
| 2103 | return OpFOrdGreaterThan(t_bool, operand_1, operand_2); | ||
| 2104 | case Compare::NotEqualOld: | ||
| 2105 | return OpFOrdNotEqual(t_bool, operand_1, operand_2); | ||
| 2106 | case Compare::GreaterEqualOld: | ||
| 2107 | return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); | ||
| 2108 | default: | ||
| 2109 | UNREACHABLE(); | ||
| 2110 | return v_true; | ||
| 2111 | } | ||
| 2112 | } | ||
| 2113 | |||
| 2114 | void AlphaTest(Id pointer) { | ||
| 2115 | if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { | ||
| 2116 | return; | ||
| 2117 | } | ||
| 2118 | const Id true_label = OpLabel(); | ||
| 2119 | const Id discard_label = OpLabel(); | ||
| 2120 | const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); | ||
| 2121 | const Id alpha_value = OpLoad(t_float, pointer); | ||
| 2122 | const Id condition = | ||
| 2123 | MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); | ||
| 2124 | |||
| 2125 | OpBranchConditional(condition, true_label, discard_label); | ||
| 2126 | AddLabel(discard_label); | ||
| 2127 | OpKill(); | ||
| 2128 | AddLabel(true_label); | ||
| 2129 | } | ||
| 2130 | |||
| 2131 | void PreExit() { | ||
| 2132 | if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { | ||
| 2133 | const u32 position_index = out_indices.position.value(); | ||
| 2134 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); | ||
| 2135 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); | ||
| 2136 | Id depth = OpLoad(t_float, z_pointer); | ||
| 2137 | depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); | ||
| 2138 | depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); | ||
| 2139 | OpStore(z_pointer, depth); | ||
| 2140 | } | ||
| 2141 | if (stage == ShaderType::Fragment) { | ||
| 2142 | const auto SafeGetRegister = [this](u32 reg) { | ||
| 2143 | if (const auto it = registers.find(reg); it != registers.end()) { | ||
| 2144 | return OpLoad(t_float, it->second); | ||
| 2145 | } | ||
| 2146 | return v_float_zero; | ||
| 2147 | }; | ||
| 2148 | |||
| 2149 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, | ||
| 2150 | "Sample mask write is unimplemented"); | ||
| 2151 | |||
| 2152 | // Write the color outputs using the data in the shader registers, disabled | ||
| 2153 | // rendertargets/components are skipped in the register assignment. | ||
| 2154 | u32 current_reg = 0; | ||
| 2155 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 2156 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 2157 | for (u32 component = 0; component < 4; ++component) { | ||
| 2158 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 2159 | continue; | ||
| 2160 | } | ||
| 2161 | const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); | ||
| 2162 | OpStore(pointer, SafeGetRegister(current_reg)); | ||
| 2163 | if (rt == 0 && component == 3) { | ||
| 2164 | AlphaTest(pointer); | ||
| 2165 | } | ||
| 2166 | ++current_reg; | ||
| 2167 | } | ||
| 2168 | } | ||
| 2169 | if (header.ps.omap.depth) { | ||
| 2170 | // The depth output is always 2 registers after the last color output, and | ||
| 2171 | // current_reg already contains one past the last color register. | ||
| 2172 | OpStore(frag_depth, SafeGetRegister(current_reg + 1)); | ||
| 2173 | } | ||
| 2174 | } | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | Expression Exit(Operation operation) { | ||
| 2178 | PreExit(); | ||
| 2179 | inside_branch = true; | ||
| 2180 | if (conditional_branch_set) { | ||
| 2181 | OpReturn(); | ||
| 2182 | } else { | ||
| 2183 | const Id dummy = OpLabel(); | ||
| 2184 | OpBranch(dummy); | ||
| 2185 | AddLabel(dummy); | ||
| 2186 | OpReturn(); | ||
| 2187 | AddLabel(); | ||
| 2188 | } | ||
| 2189 | return {}; | ||
| 2190 | } | ||
| 2191 | |||
| 2192 | Expression Discard(Operation operation) { | ||
| 2193 | inside_branch = true; | ||
| 2194 | if (conditional_branch_set) { | ||
| 2195 | OpKill(); | ||
| 2196 | } else { | ||
| 2197 | const Id dummy = OpLabel(); | ||
| 2198 | OpBranch(dummy); | ||
| 2199 | AddLabel(dummy); | ||
| 2200 | OpKill(); | ||
| 2201 | AddLabel(); | ||
| 2202 | } | ||
| 2203 | return {}; | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | Expression EmitVertex(Operation) { | ||
| 2207 | OpEmitVertex(); | ||
| 2208 | return {}; | ||
| 2209 | } | ||
| 2210 | |||
| 2211 | Expression EndPrimitive(Operation operation) { | ||
| 2212 | OpEndPrimitive(); | ||
| 2213 | return {}; | ||
| 2214 | } | ||
| 2215 | |||
| 2216 | Expression InvocationId(Operation) { | ||
| 2217 | return {OpLoad(t_int, invocation_id), Type::Int}; | ||
| 2218 | } | ||
| 2219 | |||
| 2220 | Expression YNegate(Operation) { | ||
| 2221 | LOG_WARNING(Render_Vulkan, "(STUBBED)"); | ||
| 2222 | return {Constant(t_float, 1.0f), Type::Float}; | ||
| 2223 | } | ||
| 2224 | |||
| 2225 | template <u32 element> | ||
| 2226 | Expression LocalInvocationId(Operation) { | ||
| 2227 | const Id id = OpLoad(t_uint3, local_invocation_id); | ||
| 2228 | return {OpCompositeExtract(t_uint, id, element), Type::Uint}; | ||
| 2229 | } | ||
| 2230 | |||
| 2231 | template <u32 element> | ||
| 2232 | Expression WorkGroupId(Operation operation) { | ||
| 2233 | const Id id = OpLoad(t_uint3, workgroup_id); | ||
| 2234 | return {OpCompositeExtract(t_uint, id, element), Type::Uint}; | ||
| 2235 | } | ||
| 2236 | |||
| 2237 | Expression BallotThread(Operation operation) { | ||
| 2238 | const Id predicate = AsBool(Visit(operation[0])); | ||
| 2239 | const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); | ||
| 2240 | |||
| 2241 | if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { | ||
| 2242 | // Guest-like devices can just return the first index. | ||
| 2243 | return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | // The others will have to return what is local to the current thread. | ||
| 2247 | // For instance a device with a warp size of 64 will return the upper uint when the current | ||
| 2248 | // thread is 38. | ||
| 2249 | const Id tid = OpLoad(t_uint, thread_id); | ||
| 2250 | const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); | ||
| 2251 | return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | template <Id (Module::*func)(Id, Id)> | ||
| 2255 | Expression Vote(Operation operation) { | ||
| 2256 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2257 | const Id predicate = AsBool(Visit(operation[0])); | ||
| 2258 | return {(this->*func)(t_bool, predicate), Type::Bool}; | ||
| 2259 | } | ||
| 2260 | |||
| 2261 | Expression ThreadId(Operation) { | ||
| 2262 | return {OpLoad(t_uint, thread_id), Type::Uint}; | ||
| 2263 | } | ||
| 2264 | |||
| 2265 | template <std::size_t index> | ||
| 2266 | Expression ThreadMask(Operation) { | ||
| 2267 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2268 | const Id mask = thread_masks[index]; | ||
| 2269 | return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; | ||
| 2270 | } | ||
| 2271 | |||
| 2272 | Expression ShuffleIndexed(Operation operation) { | ||
| 2273 | const Id value = AsFloat(Visit(operation[0])); | ||
| 2274 | const Id index = AsUint(Visit(operation[1])); | ||
| 2275 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; | ||
| 2276 | } | ||
| 2277 | |||
| 2278 | Expression Barrier(Operation) { | ||
| 2279 | if (!ir.IsDecompiled()) { | ||
| 2280 | LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); | ||
| 2281 | return {}; | ||
| 2282 | } | ||
| 2283 | |||
| 2284 | const auto scope = spv::Scope::Workgroup; | ||
| 2285 | const auto memory = spv::Scope::Workgroup; | ||
| 2286 | const auto semantics = | ||
| 2287 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; | ||
| 2288 | OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2289 | Constant(t_uint, static_cast<u32>(memory)), | ||
| 2290 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2291 | return {}; | ||
| 2292 | } | ||
| 2293 | |||
| 2294 | template <spv::Scope scope> | ||
| 2295 | Expression MemoryBarrier(Operation) { | ||
| 2296 | const auto semantics = | ||
| 2297 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | ||
| 2298 | spv::MemorySemanticsMask::WorkgroupMemory | | ||
| 2299 | spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; | ||
| 2300 | |||
| 2301 | OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2302 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2303 | return {}; | ||
| 2304 | } | ||
| 2305 | |||
| 2306 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { | ||
| 2307 | const Id id = OpVariable(type, storage); | ||
| 2308 | Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); | ||
| 2309 | AddGlobalVariable(Name(id, std::move(name))); | ||
| 2310 | interfaces.push_back(id); | ||
| 2311 | return id; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { | ||
| 2315 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); | ||
| 2316 | } | ||
| 2317 | |||
| 2318 | template <typename... Args> | ||
| 2319 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { | ||
| 2320 | std::vector<Id> members; | ||
| 2321 | auto elements = {elements_...}; | ||
| 2322 | for (const auto element : elements) { | ||
| 2323 | members.push_back(Constant(t_uint, element)); | ||
| 2324 | } | ||
| 2325 | |||
| 2326 | return OpAccessChain(pointer_type, composite, members); | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | Id As(Expression expr, Type wanted_type) { | ||
| 2330 | switch (wanted_type) { | ||
| 2331 | case Type::Bool: | ||
| 2332 | return AsBool(expr); | ||
| 2333 | case Type::Bool2: | ||
| 2334 | return AsBool2(expr); | ||
| 2335 | case Type::Float: | ||
| 2336 | return AsFloat(expr); | ||
| 2337 | case Type::Int: | ||
| 2338 | return AsInt(expr); | ||
| 2339 | case Type::Uint: | ||
| 2340 | return AsUint(expr); | ||
| 2341 | case Type::HalfFloat: | ||
| 2342 | return AsHalfFloat(expr); | ||
| 2343 | default: | ||
| 2344 | UNREACHABLE(); | ||
| 2345 | return expr.id; | ||
| 2346 | } | ||
| 2347 | } | ||
| 2348 | |||
| 2349 | Id AsBool(Expression expr) { | ||
| 2350 | ASSERT(expr.type == Type::Bool); | ||
| 2351 | return expr.id; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | Id AsBool2(Expression expr) { | ||
| 2355 | ASSERT(expr.type == Type::Bool2); | ||
| 2356 | return expr.id; | ||
| 2357 | } | ||
| 2358 | |||
| 2359 | Id AsFloat(Expression expr) { | ||
| 2360 | switch (expr.type) { | ||
| 2361 | case Type::Float: | ||
| 2362 | return expr.id; | ||
| 2363 | case Type::Int: | ||
| 2364 | case Type::Uint: | ||
| 2365 | return OpBitcast(t_float, expr.id); | ||
| 2366 | case Type::HalfFloat: | ||
| 2367 | if (device.IsFloat16Supported()) { | ||
| 2368 | return OpBitcast(t_float, expr.id); | ||
| 2369 | } | ||
| 2370 | return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); | ||
| 2371 | default: | ||
| 2372 | UNREACHABLE(); | ||
| 2373 | return expr.id; | ||
| 2374 | } | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | Id AsInt(Expression expr) { | ||
| 2378 | switch (expr.type) { | ||
| 2379 | case Type::Int: | ||
| 2380 | return expr.id; | ||
| 2381 | case Type::Float: | ||
| 2382 | case Type::Uint: | ||
| 2383 | return OpBitcast(t_int, expr.id); | ||
| 2384 | case Type::HalfFloat: | ||
| 2385 | if (device.IsFloat16Supported()) { | ||
| 2386 | return OpBitcast(t_int, expr.id); | ||
| 2387 | } | ||
| 2388 | return OpPackHalf2x16(t_int, expr.id); | ||
| 2389 | default: | ||
| 2390 | UNREACHABLE(); | ||
| 2391 | return expr.id; | ||
| 2392 | } | ||
| 2393 | } | ||
| 2394 | |||
| 2395 | Id AsUint(Expression expr) { | ||
| 2396 | switch (expr.type) { | ||
| 2397 | case Type::Uint: | ||
| 2398 | return expr.id; | ||
| 2399 | case Type::Float: | ||
| 2400 | case Type::Int: | ||
| 2401 | return OpBitcast(t_uint, expr.id); | ||
| 2402 | case Type::HalfFloat: | ||
| 2403 | if (device.IsFloat16Supported()) { | ||
| 2404 | return OpBitcast(t_uint, expr.id); | ||
| 2405 | } | ||
| 2406 | return OpPackHalf2x16(t_uint, expr.id); | ||
| 2407 | default: | ||
| 2408 | UNREACHABLE(); | ||
| 2409 | return expr.id; | ||
| 2410 | } | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | Id AsHalfFloat(Expression expr) { | ||
| 2414 | switch (expr.type) { | ||
| 2415 | case Type::HalfFloat: | ||
| 2416 | return expr.id; | ||
| 2417 | case Type::Float: | ||
| 2418 | case Type::Int: | ||
| 2419 | case Type::Uint: | ||
| 2420 | if (device.IsFloat16Supported()) { | ||
| 2421 | return OpBitcast(t_half, expr.id); | ||
| 2422 | } | ||
| 2423 | return OpUnpackHalf2x16(t_half, AsUint(expr)); | ||
| 2424 | default: | ||
| 2425 | UNREACHABLE(); | ||
| 2426 | return expr.id; | ||
| 2427 | } | ||
| 2428 | } | ||
| 2429 | |||
| 2430 | Id GetHalfScalarFromFloat(Id value) { | ||
| 2431 | if (device.IsFloat16Supported()) { | ||
| 2432 | return OpFConvert(t_scalar_half, value); | ||
| 2433 | } | ||
| 2434 | return value; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | Id GetFloatFromHalfScalar(Id value) { | ||
| 2438 | if (device.IsFloat16Supported()) { | ||
| 2439 | return OpFConvert(t_float, value); | ||
| 2440 | } | ||
| 2441 | return value; | ||
| 2442 | } | ||
| 2443 | |||
| 2444 | AttributeType GetAttributeType(u32 location) const { | ||
| 2445 | if (stage != ShaderType::Vertex) { | ||
| 2446 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2447 | } | ||
| 2448 | switch (specialization.attribute_types.at(location)) { | ||
| 2449 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 2450 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 2451 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 2452 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 2453 | case Maxwell::VertexAttribute::Type::Float: | ||
| 2454 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2455 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 2456 | return {Type::Int, t_in_int, t_in_int4}; | ||
| 2457 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 2458 | return {Type::Uint, t_in_uint, t_in_uint4}; | ||
| 2459 | default: | ||
| 2460 | UNREACHABLE(); | ||
| 2461 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2462 | } | ||
| 2463 | } | ||
| 2464 | |||
| 2465 | Id GetTypeDefinition(Type type) const { | ||
| 2466 | switch (type) { | ||
| 2467 | case Type::Bool: | ||
| 2468 | return t_bool; | ||
| 2469 | case Type::Bool2: | ||
| 2470 | return t_bool2; | ||
| 2471 | case Type::Float: | ||
| 2472 | return t_float; | ||
| 2473 | case Type::Int: | ||
| 2474 | return t_int; | ||
| 2475 | case Type::Uint: | ||
| 2476 | return t_uint; | ||
| 2477 | case Type::HalfFloat: | ||
| 2478 | return t_half; | ||
| 2479 | default: | ||
| 2480 | UNREACHABLE(); | ||
| 2481 | return {}; | ||
| 2482 | } | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | ||
| 2486 | switch (type) { | ||
| 2487 | case Type::Float: | ||
| 2488 | return {t_float, t_float2, t_float3, t_float4}; | ||
| 2489 | case Type::Int: | ||
| 2490 | return {t_int, t_int2, t_int3, t_int4}; | ||
| 2491 | case Type::Uint: | ||
| 2492 | return {t_uint, t_uint2, t_uint3, t_uint4}; | ||
| 2493 | default: | ||
| 2494 | UNIMPLEMENTED(); | ||
| 2495 | return {}; | ||
| 2496 | } | ||
| 2497 | } | ||
| 2498 | |||
| 2499 | std::tuple<Id, Id> CreateFlowStack() { | ||
| 2500 | // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely | ||
| 2501 | // that shaders will use 20 nested SSYs and PBKs. | ||
| 2502 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 2503 | constexpr auto storage_class = spv::StorageClass::Function; | ||
| 2504 | |||
| 2505 | const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||
| 2506 | const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, | ||
| 2507 | ConstantNull(flow_stack_type)); | ||
| 2508 | const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); | ||
| 2509 | AddLocalVariable(stack); | ||
| 2510 | AddLocalVariable(top); | ||
| 2511 | return std::tie(stack, top); | ||
| 2512 | } | ||
| 2513 | |||
| 2514 | std::pair<Id, Id> GetFlowStack(Operation operation) { | ||
| 2515 | const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2516 | switch (stack_class) { | ||
| 2517 | case MetaStackClass::Ssy: | ||
| 2518 | return {ssy_flow_stack, ssy_flow_stack_top}; | ||
| 2519 | case MetaStackClass::Pbk: | ||
| 2520 | return {pbk_flow_stack, pbk_flow_stack_top}; | ||
| 2521 | } | ||
| 2522 | UNREACHABLE(); | ||
| 2523 | return {}; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||
| 2527 | const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||
| 2528 | const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||
| 2529 | const Id diff = OpISub(t_uint, real, base); | ||
| 2530 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 2531 | const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||
| 2532 | return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||
| 2533 | } | ||
| 2534 | |||
| 2535 | Id GetSharedMemoryPointer(const SmemNode& smem) { | ||
| 2536 | ASSERT(stage == ShaderType::Compute); | ||
| 2537 | Id address = AsUint(Visit(smem.GetAddress())); | ||
| 2538 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 2539 | return OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 2540 | } | ||
| 2541 | |||
| 2542 | static constexpr std::array operation_decompilers = { | ||
| 2543 | &SPIRVDecompiler::Assign, | ||
| 2544 | |||
| 2545 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | ||
| 2546 | Type::Float>, | ||
| 2547 | |||
| 2548 | &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, | ||
| 2549 | &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, | ||
| 2550 | &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, | ||
| 2551 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, | ||
| 2552 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | ||
| 2553 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | ||
| 2554 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | ||
| 2555 | &SPIRVDecompiler::FCastHalf<0>, | ||
| 2556 | &SPIRVDecompiler::FCastHalf<1>, | ||
| 2557 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | ||
| 2558 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | ||
| 2559 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | ||
| 2560 | &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, | ||
| 2561 | &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, | ||
| 2562 | &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, | ||
| 2563 | &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, | ||
| 2564 | &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, | ||
| 2565 | &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, | ||
| 2566 | &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, | ||
| 2567 | &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, | ||
| 2568 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, | ||
| 2569 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, | ||
| 2570 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, | ||
| 2571 | &SPIRVDecompiler::FSwizzleAdd, | ||
| 2572 | |||
| 2573 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, | ||
| 2574 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, | ||
| 2575 | &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, | ||
| 2576 | &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, | ||
| 2577 | &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, | ||
| 2578 | &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, | ||
| 2579 | &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, | ||
| 2580 | |||
| 2581 | &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, | ||
| 2582 | &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, | ||
| 2583 | &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, | ||
| 2584 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, | ||
| 2585 | &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, | ||
| 2586 | &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, | ||
| 2587 | &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, | ||
| 2588 | &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, | ||
| 2589 | &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, | ||
| 2590 | &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, | ||
| 2591 | &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, | ||
| 2592 | &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, | ||
| 2593 | &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, | ||
| 2594 | |||
| 2595 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, | ||
| 2596 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, | ||
| 2597 | &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, | ||
| 2598 | &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, | ||
| 2599 | &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, | ||
| 2600 | &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, | ||
| 2601 | &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, | ||
| 2602 | &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, | ||
| 2603 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, | ||
| 2604 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, | ||
| 2605 | &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, | ||
| 2606 | &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, | ||
| 2607 | &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, | ||
| 2608 | &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, | ||
| 2609 | &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, | ||
| 2610 | &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, | ||
| 2611 | &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, | ||
| 2612 | &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, | ||
| 2613 | |||
| 2614 | &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, | ||
| 2615 | &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, | ||
| 2616 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, | ||
| 2617 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | ||
| 2618 | &SPIRVDecompiler::HNegate, | ||
| 2619 | &SPIRVDecompiler::HClamp, | ||
| 2620 | &SPIRVDecompiler::HCastFloat, | ||
| 2621 | &SPIRVDecompiler::HUnpack, | ||
| 2622 | &SPIRVDecompiler::HMergeF32, | ||
| 2623 | &SPIRVDecompiler::HMergeHN<0>, | ||
| 2624 | &SPIRVDecompiler::HMergeHN<1>, | ||
| 2625 | &SPIRVDecompiler::HPack2, | ||
| 2626 | |||
| 2627 | &SPIRVDecompiler::LogicalAssign, | ||
| 2628 | &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, | ||
| 2629 | &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, | ||
| 2630 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | ||
| 2631 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | ||
| 2632 | &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, | ||
| 2633 | Type::Uint>, | ||
| 2634 | &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, | ||
| 2635 | |||
| 2636 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | ||
| 2637 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | ||
| 2638 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, | ||
| 2639 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, | ||
| 2640 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, | ||
| 2641 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, | ||
| 2642 | &SPIRVDecompiler::LogicalFOrdered, | ||
| 2643 | &SPIRVDecompiler::LogicalFUnordered, | ||
| 2644 | &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, | ||
| 2645 | &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, | ||
| 2646 | &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, | ||
| 2647 | &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, | ||
| 2648 | &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, | ||
| 2649 | &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, | ||
| 2650 | |||
| 2651 | &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, | ||
| 2652 | &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, | ||
| 2653 | &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, | ||
| 2654 | &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, | ||
| 2655 | &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, | ||
| 2656 | &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, | ||
| 2657 | |||
| 2658 | &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, | ||
| 2659 | &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, | ||
| 2660 | &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, | ||
| 2661 | &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, | ||
| 2662 | &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, | ||
| 2663 | &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, | ||
| 2664 | |||
| 2665 | &SPIRVDecompiler::LogicalAddCarry, | ||
| 2666 | |||
| 2667 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, | ||
| 2668 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2669 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2670 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, | ||
| 2671 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2672 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2673 | // TODO(Rodrigo): Should these use the OpFUnord* variants? | ||
| 2674 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, | ||
| 2675 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2676 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2677 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, | ||
| 2678 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2679 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2680 | |||
| 2681 | &SPIRVDecompiler::Texture, | ||
| 2682 | &SPIRVDecompiler::TextureLod, | ||
| 2683 | &SPIRVDecompiler::TextureGather, | ||
| 2684 | &SPIRVDecompiler::TextureQueryDimensions, | ||
| 2685 | &SPIRVDecompiler::TextureQueryLod, | ||
| 2686 | &SPIRVDecompiler::TexelFetch, | ||
| 2687 | &SPIRVDecompiler::TextureGradient, | ||
| 2688 | |||
| 2689 | &SPIRVDecompiler::ImageLoad, | ||
| 2690 | &SPIRVDecompiler::ImageStore, | ||
| 2691 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, | ||
| 2692 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, | ||
| 2693 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, | ||
| 2694 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, | ||
| 2695 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, | ||
| 2696 | |||
| 2697 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||
| 2698 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||
| 2699 | &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, | ||
| 2700 | &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, | ||
| 2701 | &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||
| 2702 | &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||
| 2703 | &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||
| 2704 | |||
| 2705 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||
| 2706 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||
| 2707 | &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, | ||
| 2708 | &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, | ||
| 2709 | &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||
| 2710 | &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||
| 2711 | &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||
| 2712 | |||
| 2713 | &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||
| 2714 | &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, | ||
| 2715 | &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, | ||
| 2716 | &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||
| 2717 | &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||
| 2718 | &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||
| 2719 | |||
| 2720 | &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||
| 2721 | &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, | ||
| 2722 | &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, | ||
| 2723 | &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||
| 2724 | &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||
| 2725 | &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||
| 2726 | |||
| 2727 | &SPIRVDecompiler::Branch, | ||
| 2728 | &SPIRVDecompiler::BranchIndirect, | ||
| 2729 | &SPIRVDecompiler::PushFlowStack, | ||
| 2730 | &SPIRVDecompiler::PopFlowStack, | ||
| 2731 | &SPIRVDecompiler::Exit, | ||
| 2732 | &SPIRVDecompiler::Discard, | ||
| 2733 | |||
| 2734 | &SPIRVDecompiler::EmitVertex, | ||
| 2735 | &SPIRVDecompiler::EndPrimitive, | ||
| 2736 | |||
| 2737 | &SPIRVDecompiler::InvocationId, | ||
| 2738 | &SPIRVDecompiler::YNegate, | ||
| 2739 | &SPIRVDecompiler::LocalInvocationId<0>, | ||
| 2740 | &SPIRVDecompiler::LocalInvocationId<1>, | ||
| 2741 | &SPIRVDecompiler::LocalInvocationId<2>, | ||
| 2742 | &SPIRVDecompiler::WorkGroupId<0>, | ||
| 2743 | &SPIRVDecompiler::WorkGroupId<1>, | ||
| 2744 | &SPIRVDecompiler::WorkGroupId<2>, | ||
| 2745 | |||
| 2746 | &SPIRVDecompiler::BallotThread, | ||
| 2747 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, | ||
| 2748 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, | ||
| 2749 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, | ||
| 2750 | |||
| 2751 | &SPIRVDecompiler::ThreadId, | ||
| 2752 | &SPIRVDecompiler::ThreadMask<0>, // Eq | ||
| 2753 | &SPIRVDecompiler::ThreadMask<1>, // Ge | ||
| 2754 | &SPIRVDecompiler::ThreadMask<2>, // Gt | ||
| 2755 | &SPIRVDecompiler::ThreadMask<3>, // Le | ||
| 2756 | &SPIRVDecompiler::ThreadMask<4>, // Lt | ||
| 2757 | &SPIRVDecompiler::ShuffleIndexed, | ||
| 2758 | |||
| 2759 | &SPIRVDecompiler::Barrier, | ||
| 2760 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, | ||
| 2761 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, | ||
| 2762 | }; | ||
| 2763 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 2764 | |||
| 2765 | const Device& device; | ||
| 2766 | const ShaderIR& ir; | ||
| 2767 | const ShaderType stage; | ||
| 2768 | const Tegra::Shader::Header header; | ||
| 2769 | const Registry& registry; | ||
| 2770 | const Specialization& specialization; | ||
| 2771 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2772 | |||
| 2773 | const Id t_void = Name(TypeVoid(), "void"); | ||
| 2774 | |||
| 2775 | const Id t_bool = Name(TypeBool(), "bool"); | ||
| 2776 | const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); | ||
| 2777 | |||
| 2778 | const Id t_int = Name(TypeInt(32, true), "int"); | ||
| 2779 | const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); | ||
| 2780 | const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); | ||
| 2781 | const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); | ||
| 2782 | |||
| 2783 | const Id t_uint = Name(TypeInt(32, false), "uint"); | ||
| 2784 | const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); | ||
| 2785 | const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); | ||
| 2786 | const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); | ||
| 2787 | |||
| 2788 | const Id t_float = Name(TypeFloat(32), "float"); | ||
| 2789 | const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); | ||
| 2790 | const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); | ||
| 2791 | const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); | ||
| 2792 | |||
| 2793 | const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); | ||
| 2794 | const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); | ||
| 2795 | |||
| 2796 | const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); | ||
| 2797 | |||
| 2798 | const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); | ||
| 2799 | const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); | ||
| 2800 | const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); | ||
| 2801 | const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); | ||
| 2802 | const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); | ||
| 2803 | const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); | ||
| 2804 | const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); | ||
| 2805 | const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); | ||
| 2806 | const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); | ||
| 2807 | const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); | ||
| 2808 | |||
| 2809 | const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); | ||
| 2810 | |||
| 2811 | const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); | ||
| 2812 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); | ||
| 2813 | |||
| 2814 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); | ||
| 2815 | const Id t_cbuf_std140 = Decorate( | ||
| 2816 | Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), | ||
| 2817 | spv::Decoration::ArrayStride, 16U); | ||
| 2818 | const Id t_cbuf_scalar = Decorate( | ||
| 2819 | Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), | ||
| 2820 | spv::Decoration::ArrayStride, 4U); | ||
| 2821 | const Id t_cbuf_std140_struct = MemberDecorate( | ||
| 2822 | Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2823 | const Id t_cbuf_scalar_struct = MemberDecorate( | ||
| 2824 | Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2825 | const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); | ||
| 2826 | const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); | ||
| 2827 | |||
| 2828 | Id t_smem_uint{}; | ||
| 2829 | |||
| 2830 | const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); | ||
| 2831 | const Id t_gmem_array = | ||
| 2832 | Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); | ||
| 2833 | const Id t_gmem_struct = MemberDecorate( | ||
| 2834 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2835 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | ||
| 2836 | |||
| 2837 | const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); | ||
| 2838 | |||
| 2839 | const Id v_float_zero = Constant(t_float, 0.0f); | ||
| 2840 | const Id v_float_one = Constant(t_float, 1.0f); | ||
| 2841 | const Id v_uint_zero = Constant(t_uint, 0); | ||
| 2842 | |||
| 2843 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) | ||
| 2844 | const Id v_varying_default = | ||
| 2845 | ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); | ||
| 2846 | |||
| 2847 | const Id v_true = ConstantTrue(t_bool); | ||
| 2848 | const Id v_false = ConstantFalse(t_bool); | ||
| 2849 | |||
| 2850 | Id t_scalar_half{}; | ||
| 2851 | Id t_half{}; | ||
| 2852 | |||
| 2853 | Id out_vertex{}; | ||
| 2854 | Id in_vertex{}; | ||
| 2855 | std::map<u32, Id> registers; | ||
| 2856 | std::map<u32, Id> custom_variables; | ||
| 2857 | std::map<Tegra::Shader::Pred, Id> predicates; | ||
| 2858 | std::map<u32, Id> flow_variables; | ||
| 2859 | Id local_memory{}; | ||
| 2860 | Id shared_memory{}; | ||
| 2861 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | ||
| 2862 | std::map<Attribute::Index, Id> input_attributes; | ||
| 2863 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; | ||
| 2864 | std::map<u32, Id> constant_buffers; | ||
| 2865 | std::map<GlobalMemoryBase, Id> global_buffers; | ||
| 2866 | std::map<u32, TexelBuffer> uniform_texels; | ||
| 2867 | std::map<u32, SampledImage> sampled_images; | ||
| 2868 | std::map<u32, StorageImage> images; | ||
| 2869 | |||
| 2870 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2871 | Id instance_index{}; | ||
| 2872 | Id vertex_index{}; | ||
| 2873 | Id base_instance{}; | ||
| 2874 | Id base_vertex{}; | ||
| 2875 | Id frag_depth{}; | ||
| 2876 | Id frag_coord{}; | ||
| 2877 | Id front_facing{}; | ||
| 2878 | Id point_coord{}; | ||
| 2879 | Id tess_level_outer{}; | ||
| 2880 | Id tess_level_inner{}; | ||
| 2881 | Id tess_coord{}; | ||
| 2882 | Id invocation_id{}; | ||
| 2883 | Id workgroup_id{}; | ||
| 2884 | Id local_invocation_id{}; | ||
| 2885 | Id thread_id{}; | ||
| 2886 | std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt | ||
| 2887 | |||
| 2888 | VertexIndices in_indices; | ||
| 2889 | VertexIndices out_indices; | ||
| 2890 | |||
| 2891 | std::vector<Id> interfaces; | ||
| 2892 | |||
| 2893 | Id jmp_to{}; | ||
| 2894 | Id ssy_flow_stack_top{}; | ||
| 2895 | Id pbk_flow_stack_top{}; | ||
| 2896 | Id ssy_flow_stack{}; | ||
| 2897 | Id pbk_flow_stack{}; | ||
| 2898 | Id continue_label{}; | ||
| 2899 | std::map<u32, Id> labels; | ||
| 2900 | |||
| 2901 | bool conditional_branch_set{}; | ||
| 2902 | bool inside_branch{}; | ||
| 2903 | }; | ||
| 2904 | |||
| 2905 | class ExprDecompiler { | ||
| 2906 | public: | ||
| 2907 | explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2908 | |||
| 2909 | Id operator()(const ExprAnd& expr) { | ||
| 2910 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2911 | const Id op1 = Visit(expr.operand1); | ||
| 2912 | const Id op2 = Visit(expr.operand2); | ||
| 2913 | return decomp.OpLogicalAnd(type_def, op1, op2); | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | Id operator()(const ExprOr& expr) { | ||
| 2917 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2918 | const Id op1 = Visit(expr.operand1); | ||
| 2919 | const Id op2 = Visit(expr.operand2); | ||
| 2920 | return decomp.OpLogicalOr(type_def, op1, op2); | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | Id operator()(const ExprNot& expr) { | ||
| 2924 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2925 | const Id op1 = Visit(expr.operand1); | ||
| 2926 | return decomp.OpLogicalNot(type_def, op1); | ||
| 2927 | } | ||
| 2928 | |||
| 2929 | Id operator()(const ExprPredicate& expr) { | ||
| 2930 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | ||
| 2931 | return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); | ||
| 2932 | } | ||
| 2933 | |||
| 2934 | Id operator()(const ExprCondCode& expr) { | ||
| 2935 | return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | Id operator()(const ExprVar& expr) { | ||
| 2939 | return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); | ||
| 2940 | } | ||
| 2941 | |||
| 2942 | Id operator()(const ExprBoolean& expr) { | ||
| 2943 | return expr.value ? decomp.v_true : decomp.v_false; | ||
| 2944 | } | ||
| 2945 | |||
| 2946 | Id operator()(const ExprGprEqual& expr) { | ||
| 2947 | const Id target = decomp.Constant(decomp.t_uint, expr.value); | ||
| 2948 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); | ||
| 2949 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); | ||
| 2950 | return decomp.OpIEqual(decomp.t_bool, gpr, target); | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | Id Visit(const Expr& node) { | ||
| 2954 | return std::visit(*this, *node); | ||
| 2955 | } | ||
| 2956 | |||
| 2957 | private: | ||
| 2958 | SPIRVDecompiler& decomp; | ||
| 2959 | }; | ||
| 2960 | |||
| 2961 | class ASTDecompiler { | ||
| 2962 | public: | ||
| 2963 | explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2964 | |||
| 2965 | void operator()(const ASTProgram& ast) { | ||
| 2966 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2967 | while (current) { | ||
| 2968 | Visit(current); | ||
| 2969 | current = current->GetNext(); | ||
| 2970 | } | ||
| 2971 | } | ||
| 2972 | |||
| 2973 | void operator()(const ASTIfThen& ast) { | ||
| 2974 | ExprDecompiler expr_parser{decomp}; | ||
| 2975 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 2976 | const Id then_label = decomp.OpLabel(); | ||
| 2977 | const Id endif_label = decomp.OpLabel(); | ||
| 2978 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 2979 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 2980 | decomp.AddLabel(then_label); | ||
| 2981 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2982 | while (current) { | ||
| 2983 | Visit(current); | ||
| 2984 | current = current->GetNext(); | ||
| 2985 | } | ||
| 2986 | decomp.OpBranch(endif_label); | ||
| 2987 | decomp.AddLabel(endif_label); | ||
| 2988 | } | ||
| 2989 | |||
| 2990 | void operator()([[maybe_unused]] const ASTIfElse& ast) { | ||
| 2991 | UNREACHABLE(); | ||
| 2992 | } | ||
| 2993 | |||
| 2994 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { | ||
| 2995 | UNREACHABLE(); | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | void operator()(const ASTBlockDecoded& ast) { | ||
| 2999 | decomp.VisitBasicBlock(ast.nodes); | ||
| 3000 | } | ||
| 3001 | |||
| 3002 | void operator()(const ASTVarSet& ast) { | ||
| 3003 | ExprDecompiler expr_parser{decomp}; | ||
| 3004 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3005 | decomp.OpStore(decomp.flow_variables.at(ast.index), condition); | ||
| 3006 | } | ||
| 3007 | |||
| 3008 | void operator()([[maybe_unused]] const ASTLabel& ast) { | ||
| 3009 | // Do nothing | ||
| 3010 | } | ||
| 3011 | |||
| 3012 | void operator()([[maybe_unused]] const ASTGoto& ast) { | ||
| 3013 | UNREACHABLE(); | ||
| 3014 | } | ||
| 3015 | |||
| 3016 | void operator()(const ASTDoWhile& ast) { | ||
| 3017 | const Id loop_label = decomp.OpLabel(); | ||
| 3018 | const Id endloop_label = decomp.OpLabel(); | ||
| 3019 | const Id loop_start_block = decomp.OpLabel(); | ||
| 3020 | const Id loop_continue_block = decomp.OpLabel(); | ||
| 3021 | current_loop_exit = endloop_label; | ||
| 3022 | decomp.OpBranch(loop_label); | ||
| 3023 | decomp.AddLabel(loop_label); | ||
| 3024 | decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); | ||
| 3025 | decomp.OpBranch(loop_start_block); | ||
| 3026 | decomp.AddLabel(loop_start_block); | ||
| 3027 | ASTNode current = ast.nodes.GetFirst(); | ||
| 3028 | while (current) { | ||
| 3029 | Visit(current); | ||
| 3030 | current = current->GetNext(); | ||
| 3031 | } | ||
| 3032 | decomp.OpBranch(loop_continue_block); | ||
| 3033 | decomp.AddLabel(loop_continue_block); | ||
| 3034 | ExprDecompiler expr_parser{decomp}; | ||
| 3035 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3036 | decomp.OpBranchConditional(condition, loop_label, endloop_label); | ||
| 3037 | decomp.AddLabel(endloop_label); | ||
| 3038 | } | ||
| 3039 | |||
| 3040 | void operator()(const ASTReturn& ast) { | ||
| 3041 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | ||
| 3042 | ExprDecompiler expr_parser{decomp}; | ||
| 3043 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3044 | const Id then_label = decomp.OpLabel(); | ||
| 3045 | const Id endif_label = decomp.OpLabel(); | ||
| 3046 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 3047 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 3048 | decomp.AddLabel(then_label); | ||
| 3049 | if (ast.kills) { | ||
| 3050 | decomp.OpKill(); | ||
| 3051 | } else { | ||
| 3052 | decomp.PreExit(); | ||
| 3053 | decomp.OpReturn(); | ||
| 3054 | } | ||
| 3055 | decomp.AddLabel(endif_label); | ||
| 3056 | } else { | ||
| 3057 | const Id next_block = decomp.OpLabel(); | ||
| 3058 | decomp.OpBranch(next_block); | ||
| 3059 | decomp.AddLabel(next_block); | ||
| 3060 | if (ast.kills) { | ||
| 3061 | decomp.OpKill(); | ||
| 3062 | } else { | ||
| 3063 | decomp.PreExit(); | ||
| 3064 | decomp.OpReturn(); | ||
| 3065 | } | ||
| 3066 | decomp.AddLabel(decomp.OpLabel()); | ||
| 3067 | } | ||
| 3068 | } | ||
| 3069 | |||
| 3070 | void operator()(const ASTBreak& ast) { | ||
| 3071 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | ||
| 3072 | ExprDecompiler expr_parser{decomp}; | ||
| 3073 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3074 | const Id then_label = decomp.OpLabel(); | ||
| 3075 | const Id endif_label = decomp.OpLabel(); | ||
| 3076 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 3077 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 3078 | decomp.AddLabel(then_label); | ||
| 3079 | decomp.OpBranch(current_loop_exit); | ||
| 3080 | decomp.AddLabel(endif_label); | ||
| 3081 | } else { | ||
| 3082 | const Id next_block = decomp.OpLabel(); | ||
| 3083 | decomp.OpBranch(next_block); | ||
| 3084 | decomp.AddLabel(next_block); | ||
| 3085 | decomp.OpBranch(current_loop_exit); | ||
| 3086 | decomp.AddLabel(decomp.OpLabel()); | ||
| 3087 | } | ||
| 3088 | } | ||
| 3089 | |||
| 3090 | void Visit(const ASTNode& node) { | ||
| 3091 | std::visit(*this, *node->GetInnerData()); | ||
| 3092 | } | ||
| 3093 | |||
| 3094 | private: | ||
| 3095 | SPIRVDecompiler& decomp; | ||
| 3096 | Id current_loop_exit{}; | ||
| 3097 | }; | ||
| 3098 | |||
| 3099 | void SPIRVDecompiler::DecompileAST() { | ||
| 3100 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 3101 | for (u32 i = 0; i < num_flow_variables; i++) { | ||
| 3102 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 3103 | Name(id, fmt::format("flow_var_{}", i)); | ||
| 3104 | flow_variables.emplace(i, AddGlobalVariable(id)); | ||
| 3105 | } | ||
| 3106 | |||
| 3107 | DefinePrologue(); | ||
| 3108 | |||
| 3109 | const ASTNode program = ir.GetASTProgram(); | ||
| 3110 | ASTDecompiler decompiler{*this}; | ||
| 3111 | decompiler.Visit(program); | ||
| 3112 | |||
| 3113 | const Id next_block = OpLabel(); | ||
| 3114 | OpBranch(next_block); | ||
| 3115 | AddLabel(next_block); | ||
| 3116 | } | ||
| 3117 | |||
| 3118 | } // Anonymous namespace | ||
| 3119 | |||
| 3120 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||
| 3121 | ShaderEntries entries; | ||
| 3122 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 3123 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); | ||
| 3124 | } | ||
| 3125 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 3126 | entries.global_buffers.emplace_back(GlobalBufferEntry{ | ||
| 3127 | .cbuf_index = base.cbuf_index, | ||
| 3128 | .cbuf_offset = base.cbuf_offset, | ||
| 3129 | .is_written = usage.is_written, | ||
| 3130 | }); | ||
| 3131 | } | ||
| 3132 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 3133 | if (sampler.is_buffer) { | ||
| 3134 | entries.uniform_texels.emplace_back(sampler); | ||
| 3135 | } else { | ||
| 3136 | entries.samplers.emplace_back(sampler); | ||
| 3137 | } | ||
| 3138 | } | ||
| 3139 | for (const auto& image : ir.GetImages()) { | ||
| 3140 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 3141 | entries.storage_texels.emplace_back(image); | ||
| 3142 | } else { | ||
| 3143 | entries.images.emplace_back(image); | ||
| 3144 | } | ||
| 3145 | } | ||
| 3146 | for (const auto& attribute : ir.GetInputAttributes()) { | ||
| 3147 | if (IsGenericAttribute(attribute)) { | ||
| 3148 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||
| 3149 | } | ||
| 3150 | } | ||
| 3151 | for (const auto& buffer : entries.const_buffers) { | ||
| 3152 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 3153 | } | ||
| 3154 | entries.clip_distances = ir.GetClipDistances(); | ||
| 3155 | entries.shader_length = ir.GetLength(); | ||
| 3156 | entries.uses_warps = ir.UsesWarps(); | ||
| 3157 | return entries; | ||
| 3158 | } | ||
| 3159 | |||
| 3160 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 3161 | ShaderType stage, const VideoCommon::Shader::Registry& registry, | ||
| 3162 | const Specialization& specialization) { | ||
| 3163 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 3164 | } | ||
| 3165 | |||
| 3166 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <set> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/engines/shader_type.h" | ||
| 14 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | class Device; | ||
| 20 | |||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 22 | using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; | ||
| 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; | ||
| 24 | using StorageTexelEntry = VideoCommon::Shader::ImageEntry; | ||
| 25 | using ImageEntry = VideoCommon::Shader::ImageEntry; | ||
| 26 | |||
| 27 | constexpr u32 DESCRIPTOR_SET = 0; | ||
| 28 | |||
| 29 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 30 | public: | ||
| 31 | explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) | ||
| 32 | : ConstBuffer{entry_}, index{index_} {} | ||
| 33 | |||
| 34 | constexpr u32 GetIndex() const { | ||
| 35 | return index; | ||
| 36 | } | ||
| 37 | |||
| 38 | private: | ||
| 39 | u32 index{}; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct GlobalBufferEntry { | ||
| 43 | u32 cbuf_index{}; | ||
| 44 | u32 cbuf_offset{}; | ||
| 45 | bool is_written{}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | struct ShaderEntries { | ||
| 49 | u32 NumBindings() const { | ||
| 50 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + | ||
| 51 | uniform_texels.size() + samplers.size() + storage_texels.size() + | ||
| 52 | images.size()); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::vector<ConstBufferEntry> const_buffers; | ||
| 56 | std::vector<GlobalBufferEntry> global_buffers; | ||
| 57 | std::vector<UniformTexelEntry> uniform_texels; | ||
| 58 | std::vector<SamplerEntry> samplers; | ||
| 59 | std::vector<StorageTexelEntry> storage_texels; | ||
| 60 | std::vector<ImageEntry> images; | ||
| 61 | std::set<u32> attributes; | ||
| 62 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 63 | std::size_t shader_length{}; | ||
| 64 | u32 enabled_uniform_buffers{}; | ||
| 65 | bool uses_warps{}; | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct Specialization final { | ||
| 69 | u32 base_binding{}; | ||
| 70 | |||
| 71 | // Compute specific | ||
| 72 | std::array<u32, 3> workgroup_size{}; | ||
| 73 | u32 shared_memory_size{}; | ||
| 74 | |||
| 75 | // Graphics specific | ||
| 76 | std::optional<float> point_size; | ||
| 77 | std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; | ||
| 78 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | ||
| 79 | bool ndc_minus_one_to_one{}; | ||
| 80 | bool early_fragment_tests{}; | ||
| 81 | float alpha_test_ref{}; | ||
| 82 | Maxwell::ComparisonOp alpha_test_func{}; | ||
| 83 | }; | ||
| 84 | // Old gcc versions don't consider this trivially copyable. | ||
| 85 | // static_assert(std::is_trivially_copyable_v<Specialization>); | ||
| 86 | |||
| 87 | struct SPIRVShader { | ||
| 88 | std::vector<u32> code; | ||
| 89 | ShaderEntries entries; | ||
| 90 | }; | ||
| 91 | |||
| 92 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | ||
| 93 | |||
| 94 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 95 | Tegra::Engines::ShaderType stage, | ||
| 96 | const VideoCommon::Shader::Registry& registry, | ||
| 97 | const Specialization& specialization); | ||
| 98 | |||
| 99 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 91 | .flags = 0, | 91 | .flags = 0, |
| 92 | .size = STREAM_BUFFER_SIZE, | 92 | .size = STREAM_BUFFER_SIZE, |
| 93 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 93 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 94 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | 94 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 95 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 95 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 96 | .queueFamilyIndexCount = 0, | 96 | .queueFamilyIndexCount = 0, |
| 97 | .pQueueFamilyIndices = nullptr, | 97 | .pQueueFamilyIndices = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; | |||
| 29 | 29 | ||
| 30 | Flags MakeInvalidationFlags() { | 30 | Flags MakeInvalidationFlags() { |
| 31 | static constexpr int INVALIDATION_FLAGS[]{ | 31 | static constexpr int INVALIDATION_FLAGS[]{ |
| 32 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | 32 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, |
| 33 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | 33 | StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, |
| 34 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, | 34 | DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, |
| 35 | VertexBuffers, VertexInput, | ||
| 35 | }; | 36 | }; |
| 36 | Flags flags{}; | 37 | Flags flags{}; |
| 37 | for (const int flag : INVALIDATION_FLAGS) { | 38 | for (const int flag : INVALIDATION_FLAGS) { |
| @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { | |||
| 40 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | 41 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { |
| 41 | flags[index] = true; | 42 | flags[index] = true; |
| 42 | } | 43 | } |
| 44 | for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { | ||
| 45 | flags[index] = true; | ||
| 46 | } | ||
| 47 | for (int index = VertexBinding0; index <= VertexBinding31; ++index) { | ||
| 48 | flags[index] = true; | ||
| 49 | } | ||
| 43 | return flags; | 50 | return flags; |
| 44 | } | 51 | } |
| 45 | 52 | ||
| @@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { | |||
| 79 | table[OFF(stencil_back_func_mask)] = StencilProperties; | 86 | table[OFF(stencil_back_func_mask)] = StencilProperties; |
| 80 | } | 87 | } |
| 81 | 88 | ||
| 89 | void SetupDirtyLineWidth(Tables& tables) { | ||
| 90 | tables[0][OFF(line_width_smooth)] = LineWidth; | ||
| 91 | tables[0][OFF(line_width_aliased)] = LineWidth; | ||
| 92 | } | ||
| 93 | |||
| 82 | void SetupDirtyCullMode(Tables& tables) { | 94 | void SetupDirtyCullMode(Tables& tables) { |
| 83 | auto& table = tables[0]; | 95 | auto& table = tables[0]; |
| 84 | table[OFF(cull_face)] = CullMode; | 96 | table[OFF(cull_face)] = CullMode; |
| @@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) { | |||
| 134 | FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); | 146 | FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); |
| 135 | } | 147 | } |
| 136 | 148 | ||
| 137 | void SetupDirtyInstanceDivisors(Tables& tables) { | 149 | void SetupDirtyViewportSwizzles(Tables& tables) { |
| 138 | static constexpr size_t divisor_offset = 3; | 150 | static constexpr size_t swizzle_offset = 6; |
| 139 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { | 151 | for (size_t index = 0; index < Regs::NumViewports; ++index) { |
| 140 | tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; | 152 | tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = |
| 141 | tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = | 153 | ViewportSwizzles; |
| 142 | InstanceDivisors; | ||
| 143 | } | 154 | } |
| 144 | } | 155 | } |
| 145 | 156 | ||
| 146 | void SetupDirtyVertexAttributes(Tables& tables) { | 157 | void SetupDirtyVertexAttributes(Tables& tables) { |
| 147 | FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); | 158 | for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { |
| 159 | const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); | ||
| 160 | FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); | ||
| 161 | } | ||
| 162 | FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); | ||
| 148 | } | 163 | } |
| 149 | 164 | ||
| 150 | void SetupDirtyViewportSwizzles(Tables& tables) { | 165 | void SetupDirtyVertexBindings(Tables& tables) { |
| 151 | static constexpr size_t swizzle_offset = 6; | 166 | // Do NOT include stride here, it's implicit in VertexBuffer |
| 152 | for (size_t index = 0; index < Regs::NumViewports; ++index) { | 167 | static constexpr size_t divisor_offset = 3; |
| 153 | tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = | 168 | for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { |
| 154 | ViewportSwizzles; | 169 | const u8 flag = static_cast<u8>(VertexBinding0 + i); |
| 170 | tables[0][OFF(instanced_arrays) + i] = VertexInput; | ||
| 171 | tables[1][OFF(instanced_arrays) + i] = flag; | ||
| 172 | tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; | ||
| 173 | tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; | ||
| 155 | } | 174 | } |
| 156 | } | 175 | } |
| 157 | } // Anonymous namespace | 176 | } // Anonymous namespace |
| 158 | 177 | ||
| 159 | StateTracker::StateTracker(Tegra::GPU& gpu) | 178 | StateTracker::StateTracker(Tegra::GPU& gpu) |
| 160 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 179 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { |
| 161 | auto& tables = gpu.Maxwell3D().dirty.tables; | 180 | auto& tables{gpu.Maxwell3D().dirty.tables}; |
| 162 | SetupDirtyFlags(tables); | 181 | SetupDirtyFlags(tables); |
| 163 | SetupDirtyViewports(tables); | 182 | SetupDirtyViewports(tables); |
| 164 | SetupDirtyScissors(tables); | 183 | SetupDirtyScissors(tables); |
| @@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) | |||
| 166 | SetupDirtyBlendConstants(tables); | 185 | SetupDirtyBlendConstants(tables); |
| 167 | SetupDirtyDepthBounds(tables); | 186 | SetupDirtyDepthBounds(tables); |
| 168 | SetupDirtyStencilProperties(tables); | 187 | SetupDirtyStencilProperties(tables); |
| 188 | SetupDirtyLineWidth(tables); | ||
| 169 | SetupDirtyCullMode(tables); | 189 | SetupDirtyCullMode(tables); |
| 170 | SetupDirtyDepthBoundsEnable(tables); | 190 | SetupDirtyDepthBoundsEnable(tables); |
| 171 | SetupDirtyDepthTestEnable(tables); | 191 | SetupDirtyDepthTestEnable(tables); |
| @@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) | |||
| 175 | SetupDirtyStencilOp(tables); | 195 | SetupDirtyStencilOp(tables); |
| 176 | SetupDirtyStencilTestEnable(tables); | 196 | SetupDirtyStencilTestEnable(tables); |
| 177 | SetupDirtyBlending(tables); | 197 | SetupDirtyBlending(tables); |
| 178 | SetupDirtyInstanceDivisors(tables); | ||
| 179 | SetupDirtyVertexAttributes(tables); | ||
| 180 | SetupDirtyViewportSwizzles(tables); | 198 | SetupDirtyViewportSwizzles(tables); |
| 199 | SetupDirtyVertexAttributes(tables); | ||
| 200 | SetupDirtyVertexBindings(tables); | ||
| 181 | } | 201 | } |
| 182 | 202 | ||
| 183 | } // namespace Vulkan | 203 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -19,12 +19,19 @@ namespace Dirty { | |||
| 19 | enum : u8 { | 19 | enum : u8 { |
| 20 | First = VideoCommon::Dirty::LastCommonEntry, | 20 | First = VideoCommon::Dirty::LastCommonEntry, |
| 21 | 21 | ||
| 22 | VertexInput, | ||
| 23 | VertexAttribute0, | ||
| 24 | VertexAttribute31 = VertexAttribute0 + 31, | ||
| 25 | VertexBinding0, | ||
| 26 | VertexBinding31 = VertexBinding0 + 31, | ||
| 27 | |||
| 22 | Viewports, | 28 | Viewports, |
| 23 | Scissors, | 29 | Scissors, |
| 24 | DepthBias, | 30 | DepthBias, |
| 25 | BlendConstants, | 31 | BlendConstants, |
| 26 | DepthBounds, | 32 | DepthBounds, |
| 27 | StencilProperties, | 33 | StencilProperties, |
| 34 | LineWidth, | ||
| 28 | 35 | ||
| 29 | CullMode, | 36 | CullMode, |
| 30 | DepthBoundsEnable, | 37 | DepthBoundsEnable, |
| @@ -36,11 +43,9 @@ enum : u8 { | |||
| 36 | StencilTestEnable, | 43 | StencilTestEnable, |
| 37 | 44 | ||
| 38 | Blending, | 45 | Blending, |
| 39 | InstanceDivisors, | ||
| 40 | VertexAttributes, | ||
| 41 | ViewportSwizzles, | 46 | ViewportSwizzles, |
| 42 | 47 | ||
| 43 | Last | 48 | Last, |
| 44 | }; | 49 | }; |
| 45 | static_assert(Last <= std::numeric_limits<u8>::max()); | 50 | static_assert(Last <= std::numeric_limits<u8>::max()); |
| 46 | 51 | ||
| @@ -89,6 +94,10 @@ public: | |||
| 89 | return Exchange(Dirty::StencilProperties, false); | 94 | return Exchange(Dirty::StencilProperties, false); |
| 90 | } | 95 | } |
| 91 | 96 | ||
| 97 | bool TouchLineWidth() const { | ||
| 98 | return Exchange(Dirty::LineWidth, false); | ||
| 99 | } | ||
| 100 | |||
| 92 | bool TouchCullMode() { | 101 | bool TouchCullMode() { |
| 93 | return Exchange(Dirty::CullMode, false); | 102 | return Exchange(Dirty::CullMode, false); |
| 94 | } | 103 | } |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul | |||
| 65 | VKSwapchain::~VKSwapchain() = default; | 65 | VKSwapchain::~VKSwapchain() = default; |
| 66 | 66 | ||
| 67 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | 67 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { |
| 68 | is_outdated = false; | ||
| 69 | is_suboptimal = false; | ||
| 70 | |||
| 68 | const auto physical_device = device.GetPhysical(); | 71 | const auto physical_device = device.GetPhysical(); |
| 69 | const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; | 72 | const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; |
| 70 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { | 73 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { |
| @@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | |||
| 82 | resource_ticks.resize(image_count); | 85 | resource_ticks.resize(image_count); |
| 83 | } | 86 | } |
| 84 | 87 | ||
| 85 | bool VKSwapchain::AcquireNextImage() { | 88 | void VKSwapchain::AcquireNextImage() { |
| 86 | const VkResult result = | 89 | const VkResult result = device.GetLogical().AcquireNextImageKHR( |
| 87 | device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), | 90 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], |
| 88 | *present_semaphores[frame_index], {}, &image_index); | 91 | VK_NULL_HANDLE, &image_index); |
| 89 | 92 | switch (result) { | |
| 93 | case VK_SUCCESS: | ||
| 94 | break; | ||
| 95 | case VK_SUBOPTIMAL_KHR: | ||
| 96 | is_suboptimal = true; | ||
| 97 | break; | ||
| 98 | case VK_ERROR_OUT_OF_DATE_KHR: | ||
| 99 | is_outdated = true; | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); | ||
| 103 | break; | ||
| 104 | } | ||
| 90 | scheduler.Wait(resource_ticks[image_index]); | 105 | scheduler.Wait(resource_ticks[image_index]); |
| 91 | return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; | 106 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 92 | } | 107 | } |
| 93 | 108 | ||
| 94 | bool VKSwapchain::Present(VkSemaphore render_semaphore) { | 109 | void VKSwapchain::Present(VkSemaphore render_semaphore) { |
| 95 | const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; | 110 | const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; |
| 96 | const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; | 111 | const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; |
| 97 | const auto present_queue{device.GetPresentQueue()}; | 112 | const auto present_queue{device.GetPresentQueue()}; |
| 98 | bool recreated = false; | ||
| 99 | |||
| 100 | const VkPresentInfoKHR present_info{ | 113 | const VkPresentInfoKHR present_info{ |
| 101 | .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, | 114 | .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, |
| 102 | .pNext = nullptr, | 115 | .pNext = nullptr, |
| @@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { | |||
| 107 | .pImageIndices = &image_index, | 120 | .pImageIndices = &image_index, |
| 108 | .pResults = nullptr, | 121 | .pResults = nullptr, |
| 109 | }; | 122 | }; |
| 110 | |||
| 111 | switch (const VkResult result = present_queue.Present(present_info)) { | 123 | switch (const VkResult result = present_queue.Present(present_info)) { |
| 112 | case VK_SUCCESS: | 124 | case VK_SUCCESS: |
| 113 | break; | 125 | break; |
| @@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { | |||
| 115 | LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); | 127 | LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); |
| 116 | break; | 128 | break; |
| 117 | case VK_ERROR_OUT_OF_DATE_KHR: | 129 | case VK_ERROR_OUT_OF_DATE_KHR: |
| 118 | if (current_width > 0 && current_height > 0) { | 130 | is_outdated = true; |
| 119 | Create(current_width, current_height, current_srgb); | ||
| 120 | recreated = true; | ||
| 121 | } | ||
| 122 | break; | 131 | break; |
| 123 | default: | 132 | default: |
| 124 | LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); | 133 | LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); |
| 125 | break; | 134 | break; |
| 126 | } | 135 | } |
| 127 | 136 | ++frame_index; | |
| 128 | resource_ticks[image_index] = scheduler.CurrentTick(); | 137 | if (frame_index >= image_count) { |
| 129 | frame_index = (frame_index + 1) % static_cast<u32>(image_count); | 138 | frame_index = 0; |
| 130 | return recreated; | 139 | } |
| 131 | } | ||
| 132 | |||
| 133 | bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { | ||
| 134 | // TODO(Rodrigo): Handle framebuffer pixel format changes | ||
| 135 | return framebuffer.width != current_width || framebuffer.height != current_height; | ||
| 136 | } | 140 | } |
| 137 | 141 | ||
| 138 | void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, | 142 | void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, |
| @@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 148 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { | 152 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { |
| 149 | requested_image_count = capabilities.maxImageCount; | 153 | requested_image_count = capabilities.maxImageCount; |
| 150 | } | 154 | } |
| 151 | |||
| 152 | VkSwapchainCreateInfoKHR swapchain_ci{ | 155 | VkSwapchainCreateInfoKHR swapchain_ci{ |
| 153 | .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, | 156 | .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, |
| 154 | .pNext = nullptr, | 157 | .pNext = nullptr, |
| @@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 169 | .clipped = VK_FALSE, | 172 | .clipped = VK_FALSE, |
| 170 | .oldSwapchain = nullptr, | 173 | .oldSwapchain = nullptr, |
| 171 | }; | 174 | }; |
| 172 | |||
| 173 | const u32 graphics_family{device.GetGraphicsFamily()}; | 175 | const u32 graphics_family{device.GetGraphicsFamily()}; |
| 174 | const u32 present_family{device.GetPresentFamily()}; | 176 | const u32 present_family{device.GetPresentFamily()}; |
| 175 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; | 177 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; |
| @@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 178 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); | 180 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); |
| 179 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); | 181 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); |
| 180 | } | 182 | } |
| 181 | |||
| 182 | // Request the size again to reduce the possibility of a TOCTOU race condition. | 183 | // Request the size again to reduce the possibility of a TOCTOU race condition. |
| 183 | const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); | 184 | const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); |
| 184 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); | 185 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); |
| @@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 186 | swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); | 187 | swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); |
| 187 | 188 | ||
| 188 | extent = swapchain_ci.imageExtent; | 189 | extent = swapchain_ci.imageExtent; |
| 189 | current_width = extent.width; | ||
| 190 | current_height = extent.height; | ||
| 191 | current_srgb = srgb; | 190 | current_srgb = srgb; |
| 192 | 191 | ||
| 193 | images = swapchain.GetImages(); | 192 | images = swapchain.GetImages(); |
| @@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 197 | 196 | ||
| 198 | void VKSwapchain::CreateSemaphores() { | 197 | void VKSwapchain::CreateSemaphores() { |
| 199 | present_semaphores.resize(image_count); | 198 | present_semaphores.resize(image_count); |
| 200 | std::generate(present_semaphores.begin(), present_semaphores.end(), | 199 | std::ranges::generate(present_semaphores, |
| 201 | [this] { return device.GetLogical().CreateSemaphore(); }); | 200 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | void VKSwapchain::CreateImageViews() { | 203 | void VKSwapchain::CreateImageViews() { |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -28,14 +28,25 @@ public: | |||
| 28 | void Create(u32 width, u32 height, bool srgb); | 28 | void Create(u32 width, u32 height, bool srgb); |
| 29 | 29 | ||
| 30 | /// Acquires the next image in the swapchain, waits as needed. | 30 | /// Acquires the next image in the swapchain, waits as needed. |
| 31 | bool AcquireNextImage(); | 31 | void AcquireNextImage(); |
| 32 | 32 | ||
| 33 | /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be | 33 | /// Presents the rendered image to the swapchain. |
| 34 | /// recreated. Takes responsability for the ownership of fence. | 34 | void Present(VkSemaphore render_semaphore); |
| 35 | bool Present(VkSemaphore render_semaphore); | ||
| 36 | 35 | ||
| 37 | /// Returns true when the framebuffer layout has changed. | 36 | /// Returns true when the color space has changed. |
| 38 | bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; | 37 | bool HasColorSpaceChanged(bool is_srgb) const { |
| 38 | return current_srgb != is_srgb; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Returns true when the swapchain is outdated. | ||
| 42 | bool IsOutDated() const { | ||
| 43 | return is_outdated; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Returns true when the swapchain is suboptimal. | ||
| 47 | bool IsSubOptimal() const { | ||
| 48 | return is_suboptimal; | ||
| 49 | } | ||
| 39 | 50 | ||
| 40 | VkExtent2D GetSize() const { | 51 | VkExtent2D GetSize() const { |
| 41 | return extent; | 52 | return extent; |
| @@ -61,10 +72,6 @@ public: | |||
| 61 | return image_format; | 72 | return image_format; |
| 62 | } | 73 | } |
| 63 | 74 | ||
| 64 | bool GetSrgbState() const { | ||
| 65 | return current_srgb; | ||
| 66 | } | ||
| 67 | |||
| 68 | private: | 75 | private: |
| 69 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, | 76 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, |
| 70 | bool srgb); | 77 | bool srgb); |
| @@ -92,9 +99,9 @@ private: | |||
| 92 | VkFormat image_format{}; | 99 | VkFormat image_format{}; |
| 93 | VkExtent2D extent{}; | 100 | VkExtent2D extent{}; |
| 94 | 101 | ||
| 95 | u32 current_width{}; | ||
| 96 | u32 current_height{}; | ||
| 97 | bool current_srgb{}; | 102 | bool current_srgb{}; |
| 103 | bool is_outdated{}; | ||
| 104 | bool is_suboptimal{}; | ||
| 98 | }; | 105 | }; |
| 99 | 106 | ||
| 100 | } // namespace Vulkan | 107 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 15 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 17 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 17 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 18 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 20 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; | |||
| 34 | using VideoCore::Surface::IsPixelFormatASTC; | 35 | using VideoCore::Surface::IsPixelFormatASTC; |
| 35 | 36 | ||
| 36 | namespace { | 37 | namespace { |
| 37 | |||
| 38 | constexpr std::array ATTACHMENT_REFERENCES{ | ||
| 39 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 40 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 41 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 42 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 43 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 44 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 45 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 46 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 47 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 48 | }; | ||
| 49 | |||
| 50 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | 38 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { |
| 51 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | 39 | if (color == std::array<float, 4>{0, 0, 0, 0}) { |
| 52 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | 40 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; |
| @@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 174 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); | 162 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); |
| 175 | } | 163 | } |
| 176 | 164 | ||
| 177 | [[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { | ||
| 178 | if (info.type != ImageType::Buffer) { | ||
| 179 | return vk::Buffer{}; | ||
| 180 | } | ||
| 181 | const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); | ||
| 182 | return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 183 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 184 | .pNext = nullptr, | ||
| 185 | .flags = 0, | ||
| 186 | .size = info.size.width * bytes_per_block, | ||
| 187 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 188 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 189 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, | ||
| 190 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 191 | .queueFamilyIndexCount = 0, | ||
| 192 | .pQueueFamilyIndices = nullptr, | ||
| 193 | }); | ||
| 194 | } | ||
| 195 | |||
| 196 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { | 165 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { |
| 197 | switch (VideoCore::Surface::GetFormatType(format)) { | 166 | switch (VideoCore::Surface::GetFormatType(format)) { |
| 198 | case VideoCore::Surface::SurfaceType::ColorTexture: | 167 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| @@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 226 | } | 195 | } |
| 227 | } | 196 | } |
| 228 | 197 | ||
| 229 | [[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, | ||
| 230 | const ImageView* image_view) { | ||
| 231 | using MaxwellToVK::SurfaceFormat; | ||
| 232 | const PixelFormat pixel_format = image_view->format; | ||
| 233 | return VkAttachmentDescription{ | ||
| 234 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 235 | .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, | ||
| 236 | .samples = image_view->Samples(), | ||
| 237 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 238 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 239 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 240 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 241 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 242 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 243 | }; | ||
| 244 | } | ||
| 245 | |||
| 246 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { | 198 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { |
| 247 | switch (swizzle) { | 199 | switch (swizzle) { |
| 248 | case SwizzleSource::Zero: | 200 | case SwizzleSource::Zero: |
| @@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 263 | return VK_COMPONENT_SWIZZLE_ZERO; | 215 | return VK_COMPONENT_SWIZZLE_ZERO; |
| 264 | } | 216 | } |
| 265 | 217 | ||
| 218 | [[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { | ||
| 219 | switch (type) { | ||
| 220 | case Shader::TextureType::Color1D: | ||
| 221 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 222 | case Shader::TextureType::Color2D: | ||
| 223 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 224 | case Shader::TextureType::ColorCube: | ||
| 225 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 226 | case Shader::TextureType::Color3D: | ||
| 227 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 228 | case Shader::TextureType::ColorArray1D: | ||
| 229 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 230 | case Shader::TextureType::ColorArray2D: | ||
| 231 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 232 | case Shader::TextureType::ColorArrayCube: | ||
| 233 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 234 | case Shader::TextureType::Buffer: | ||
| 235 | UNREACHABLE_MSG("Texture buffers can't be image views"); | ||
| 236 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 237 | } | ||
| 238 | UNREACHABLE_MSG("Invalid image view type={}", type); | ||
| 239 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 240 | } | ||
| 241 | |||
| 266 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { | 242 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { |
| 267 | switch (type) { | 243 | switch (type) { |
| 268 | case VideoCommon::ImageViewType::e1D: | 244 | case VideoCommon::ImageViewType::e1D: |
| @@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 280 | case VideoCommon::ImageViewType::CubeArray: | 256 | case VideoCommon::ImageViewType::CubeArray: |
| 281 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | 257 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; |
| 282 | case VideoCommon::ImageViewType::Rect: | 258 | case VideoCommon::ImageViewType::Rect: |
| 283 | LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); | 259 | UNIMPLEMENTED_MSG("Rect image view"); |
| 284 | return VK_IMAGE_VIEW_TYPE_2D; | 260 | return VK_IMAGE_VIEW_TYPE_2D; |
| 285 | case VideoCommon::ImageViewType::Buffer: | 261 | case VideoCommon::ImageViewType::Buffer: |
| 286 | UNREACHABLE_MSG("Texture buffers can't be image views"); | 262 | UNREACHABLE_MSG("Texture buffers can't be image views"); |
| @@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 327 | }; | 303 | }; |
| 328 | } | 304 | } |
| 329 | 305 | ||
| 330 | [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | 306 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( |
| 331 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | 307 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 332 | std::vector<VkBufferCopy> result(copies.size()); | 308 | std::vector<VkBufferCopy> result(copies.size()); |
| 333 | std::ranges::transform( | 309 | std::ranges::transform( |
| @@ -587,6 +563,28 @@ struct RangedBarrierRange { | |||
| 587 | } | 563 | } |
| 588 | }; | 564 | }; |
| 589 | 565 | ||
| 566 | [[nodiscard]] VkFormat Format(Shader::ImageFormat format) { | ||
| 567 | switch (format) { | ||
| 568 | case Shader::ImageFormat::Typeless: | ||
| 569 | break; | ||
| 570 | case Shader::ImageFormat::R8_SINT: | ||
| 571 | return VK_FORMAT_R8_SINT; | ||
| 572 | case Shader::ImageFormat::R8_UINT: | ||
| 573 | return VK_FORMAT_R8_UINT; | ||
| 574 | case Shader::ImageFormat::R16_UINT: | ||
| 575 | return VK_FORMAT_R16_UINT; | ||
| 576 | case Shader::ImageFormat::R16_SINT: | ||
| 577 | return VK_FORMAT_R16_SINT; | ||
| 578 | case Shader::ImageFormat::R32_UINT: | ||
| 579 | return VK_FORMAT_R32_UINT; | ||
| 580 | case Shader::ImageFormat::R32G32_UINT: | ||
| 581 | return VK_FORMAT_R32G32_UINT; | ||
| 582 | case Shader::ImageFormat::R32G32B32A32_UINT: | ||
| 583 | return VK_FORMAT_R32G32B32A32_UINT; | ||
| 584 | } | ||
| 585 | UNREACHABLE_MSG("Invalid image format={}", format); | ||
| 586 | return VK_FORMAT_R32_UINT; | ||
| 587 | } | ||
| 590 | } // Anonymous namespace | 588 | } // Anonymous namespace |
| 591 | 589 | ||
| 592 | void TextureCacheRuntime::Finish() { | 590 | void TextureCacheRuntime::Finish() { |
| @@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst | |||
| 625 | return; | 623 | return; |
| 626 | } | 624 | } |
| 627 | } | 625 | } |
| 628 | ASSERT(src.ImageFormat() == dst.ImageFormat()); | 626 | ASSERT(src.format == dst.format); |
| 629 | ASSERT(!(is_dst_msaa && !is_src_msaa)); | 627 | ASSERT(!(is_dst_msaa && !is_src_msaa)); |
| 630 | ASSERT(operation == Fermi2D::Operation::SrcCopy); | 628 | ASSERT(operation == Fermi2D::Operation::SrcCopy); |
| 631 | 629 | ||
| @@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | |||
| 842 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | 840 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| 843 | VAddr cpu_addr_) | 841 | VAddr cpu_addr_) |
| 844 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | 842 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, |
| 845 | image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), | 843 | image(MakeImage(runtime.device, info)), |
| 844 | commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), | ||
| 846 | aspect_mask(ImageAspectMask(info.format)) { | 845 | aspect_mask(ImageAspectMask(info.format)) { |
| 847 | if (image) { | ||
| 848 | commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); | ||
| 849 | } else { | ||
| 850 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 851 | } | ||
| 852 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 846 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 853 | if (Settings::values.accelerate_astc.GetValue()) { | 847 | if (Settings::values.accelerate_astc.GetValue()) { |
| 854 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | 848 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; |
| @@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 857 | } | 851 | } |
| 858 | } | 852 | } |
| 859 | if (runtime.device.HasDebuggingToolAttached()) { | 853 | if (runtime.device.HasDebuggingToolAttached()) { |
| 860 | if (image) { | 854 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 861 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 862 | } else { | ||
| 863 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 864 | } | ||
| 865 | } | 855 | } |
| 866 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ | 856 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ |
| 867 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | 857 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, |
| @@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag | |||
| 913 | }); | 903 | }); |
| 914 | } | 904 | } |
| 915 | 905 | ||
| 916 | void Image::UploadMemory(const StagingBufferRef& map, | ||
| 917 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 918 | // TODO: Move this to another API | ||
| 919 | scheduler->RequestOutsideRenderPassOperationContext(); | ||
| 920 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); | ||
| 921 | const VkBuffer src_buffer = map.buffer; | ||
| 922 | const VkBuffer dst_buffer = *buffer; | ||
| 923 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 924 | // TODO: Barriers | ||
| 925 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 926 | }); | ||
| 927 | } | ||
| 928 | |||
| 929 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 906 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 930 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | 907 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 931 | scheduler->RequestOutsideRenderPassOperationContext(); | 908 | scheduler->RequestOutsideRenderPassOperationContext(); |
| @@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm | |||
| 984 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 961 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 985 | ImageId image_id_, Image& image) | 962 | ImageId image_id_, Image& image) |
| 986 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, | 963 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, |
| 987 | image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( | 964 | image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} { |
| 988 | image.info.num_samples)} { | 965 | using Shader::TextureType; |
| 966 | |||
| 989 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); | 967 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); |
| 990 | std::array<SwizzleSource, 4> swizzle{ | 968 | std::array<SwizzleSource, 4> swizzle{ |
| 991 | SwizzleSource::R, | 969 | SwizzleSource::R, |
| @@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 1023 | }, | 1001 | }, |
| 1024 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), | 1002 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), |
| 1025 | }; | 1003 | }; |
| 1026 | const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { | 1004 | const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) { |
| 1027 | VkImageViewCreateInfo ci{create_info}; | 1005 | VkImageViewCreateInfo ci{create_info}; |
| 1028 | ci.viewType = ImageViewType(view_type); | 1006 | ci.viewType = ImageViewType(tex_type); |
| 1029 | if (num_layers) { | 1007 | if (num_layers) { |
| 1030 | ci.subresourceRange.layerCount = *num_layers; | 1008 | ci.subresourceRange.layerCount = *num_layers; |
| 1031 | } | 1009 | } |
| 1032 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); | 1010 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); |
| 1033 | if (device->HasDebuggingToolAttached()) { | 1011 | if (device->HasDebuggingToolAttached()) { |
| 1034 | handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); | 1012 | handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 1035 | } | 1013 | } |
| 1036 | image_views[static_cast<size_t>(view_type)] = std::move(handle); | 1014 | image_views[static_cast<size_t>(tex_type)] = std::move(handle); |
| 1037 | }; | 1015 | }; |
| 1038 | switch (info.type) { | 1016 | switch (info.type) { |
| 1039 | case VideoCommon::ImageViewType::e1D: | 1017 | case VideoCommon::ImageViewType::e1D: |
| 1040 | case VideoCommon::ImageViewType::e1DArray: | 1018 | case VideoCommon::ImageViewType::e1DArray: |
| 1041 | create(VideoCommon::ImageViewType::e1D, 1); | 1019 | create(TextureType::Color1D, 1); |
| 1042 | create(VideoCommon::ImageViewType::e1DArray, std::nullopt); | 1020 | create(TextureType::ColorArray1D, std::nullopt); |
| 1043 | render_target = Handle(VideoCommon::ImageViewType::e1DArray); | 1021 | render_target = Handle(TextureType::ColorArray1D); |
| 1044 | break; | 1022 | break; |
| 1045 | case VideoCommon::ImageViewType::e2D: | 1023 | case VideoCommon::ImageViewType::e2D: |
| 1046 | case VideoCommon::ImageViewType::e2DArray: | 1024 | case VideoCommon::ImageViewType::e2DArray: |
| 1047 | create(VideoCommon::ImageViewType::e2D, 1); | 1025 | create(TextureType::Color2D, 1); |
| 1048 | create(VideoCommon::ImageViewType::e2DArray, std::nullopt); | 1026 | create(TextureType::ColorArray2D, std::nullopt); |
| 1049 | render_target = Handle(VideoCommon::ImageViewType::e2DArray); | 1027 | render_target = Handle(Shader::TextureType::ColorArray2D); |
| 1050 | break; | 1028 | break; |
| 1051 | case VideoCommon::ImageViewType::e3D: | 1029 | case VideoCommon::ImageViewType::e3D: |
| 1052 | create(VideoCommon::ImageViewType::e3D, std::nullopt); | 1030 | create(TextureType::Color3D, std::nullopt); |
| 1053 | render_target = Handle(VideoCommon::ImageViewType::e3D); | 1031 | render_target = Handle(Shader::TextureType::Color3D); |
| 1054 | break; | 1032 | break; |
| 1055 | case VideoCommon::ImageViewType::Cube: | 1033 | case VideoCommon::ImageViewType::Cube: |
| 1056 | case VideoCommon::ImageViewType::CubeArray: | 1034 | case VideoCommon::ImageViewType::CubeArray: |
| 1057 | create(VideoCommon::ImageViewType::Cube, 6); | 1035 | create(TextureType::ColorCube, 6); |
| 1058 | create(VideoCommon::ImageViewType::CubeArray, std::nullopt); | 1036 | create(TextureType::ColorArrayCube, std::nullopt); |
| 1059 | break; | 1037 | break; |
| 1060 | case VideoCommon::ImageViewType::Rect: | 1038 | case VideoCommon::ImageViewType::Rect: |
| 1061 | UNIMPLEMENTED(); | 1039 | UNIMPLEMENTED(); |
| 1062 | break; | 1040 | break; |
| 1063 | case VideoCommon::ImageViewType::Buffer: | 1041 | case VideoCommon::ImageViewType::Buffer: |
| 1064 | buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ | 1042 | UNREACHABLE(); |
| 1065 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 1066 | .pNext = nullptr, | ||
| 1067 | .flags = 0, | ||
| 1068 | .buffer = image.Buffer(), | ||
| 1069 | .format = format_info.format, | ||
| 1070 | .offset = 0, // TODO: Redesign buffer cache to support this | ||
| 1071 | .range = image.guest_size_bytes, | ||
| 1072 | }); | ||
| 1073 | break; | 1043 | break; |
| 1074 | } | 1044 | } |
| 1075 | } | 1045 | } |
| 1076 | 1046 | ||
| 1047 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 1048 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | ||
| 1049 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | ||
| 1050 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | ||
| 1051 | |||
| 1077 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) | 1052 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) |
| 1078 | : VideoCommon::ImageViewBase{params} {} | 1053 | : VideoCommon::ImageViewBase{params} {} |
| 1079 | 1054 | ||
| @@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() { | |||
| 1081 | if (depth_view) { | 1056 | if (depth_view) { |
| 1082 | return *depth_view; | 1057 | return *depth_view; |
| 1083 | } | 1058 | } |
| 1084 | depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); | 1059 | const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |
| 1060 | depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); | ||
| 1085 | return *depth_view; | 1061 | return *depth_view; |
| 1086 | } | 1062 | } |
| 1087 | 1063 | ||
| @@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() { | |||
| 1089 | if (stencil_view) { | 1065 | if (stencil_view) { |
| 1090 | return *stencil_view; | 1066 | return *stencil_view; |
| 1091 | } | 1067 | } |
| 1092 | stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); | 1068 | const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |
| 1069 | stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); | ||
| 1093 | return *stencil_view; | 1070 | return *stencil_view; |
| 1094 | } | 1071 | } |
| 1095 | 1072 | ||
| 1096 | vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { | 1073 | VkImageView ImageView::StorageView(Shader::TextureType texture_type, |
| 1074 | Shader::ImageFormat image_format) { | ||
| 1075 | if (image_format == Shader::ImageFormat::Typeless) { | ||
| 1076 | return Handle(texture_type); | ||
| 1077 | } | ||
| 1078 | const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || | ||
| 1079 | image_format == Shader::ImageFormat::R16_SINT}; | ||
| 1080 | if (!storage_views) { | ||
| 1081 | storage_views = std::make_unique<StorageViews>(); | ||
| 1082 | } | ||
| 1083 | auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; | ||
| 1084 | auto& view{views[static_cast<size_t>(texture_type)]}; | ||
| 1085 | if (view) { | ||
| 1086 | return *view; | ||
| 1087 | } | ||
| 1088 | view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); | ||
| 1089 | return *view; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { | ||
| 1097 | return device->GetLogical().CreateImageView({ | 1093 | return device->GetLogical().CreateImageView({ |
| 1098 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 1094 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 1099 | .pNext = nullptr, | 1095 | .pNext = nullptr, |
| 1100 | .flags = 0, | 1096 | .flags = 0, |
| 1101 | .image = image_handle, | 1097 | .image = image_handle, |
| 1102 | .viewType = ImageViewType(type), | 1098 | .viewType = ImageViewType(type), |
| 1103 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, | 1099 | .format = vk_format, |
| 1104 | .components{ | 1100 | .components{ |
| 1105 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 1101 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 1106 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 1102 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| @@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t | |||
| 1164 | 1160 | ||
| 1165 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, | 1161 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1166 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | 1162 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 1167 | std::vector<VkAttachmentDescription> descriptions; | ||
| 1168 | std::vector<VkImageView> attachments; | 1163 | std::vector<VkImageView> attachments; |
| 1169 | RenderPassKey renderpass_key{}; | 1164 | RenderPassKey renderpass_key{}; |
| 1170 | s32 num_layers = 1; | 1165 | s32 num_layers = 1; |
| @@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1175 | renderpass_key.color_formats[index] = PixelFormat::Invalid; | 1170 | renderpass_key.color_formats[index] = PixelFormat::Invalid; |
| 1176 | continue; | 1171 | continue; |
| 1177 | } | 1172 | } |
| 1178 | descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); | ||
| 1179 | attachments.push_back(color_buffer->RenderTarget()); | 1173 | attachments.push_back(color_buffer->RenderTarget()); |
| 1180 | renderpass_key.color_formats[index] = color_buffer->format; | 1174 | renderpass_key.color_formats[index] = color_buffer->format; |
| 1181 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); | 1175 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); |
| @@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1185 | ++num_images; | 1179 | ++num_images; |
| 1186 | } | 1180 | } |
| 1187 | const size_t num_colors = attachments.size(); | 1181 | const size_t num_colors = attachments.size(); |
| 1188 | const VkAttachmentReference* depth_attachment = | ||
| 1189 | depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; | ||
| 1190 | if (depth_buffer) { | 1182 | if (depth_buffer) { |
| 1191 | descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); | ||
| 1192 | attachments.push_back(depth_buffer->RenderTarget()); | 1183 | attachments.push_back(depth_buffer->RenderTarget()); |
| 1193 | renderpass_key.depth_format = depth_buffer->format; | 1184 | renderpass_key.depth_format = depth_buffer->format; |
| 1194 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); | 1185 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); |
| @@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1201 | } | 1192 | } |
| 1202 | renderpass_key.samples = samples; | 1193 | renderpass_key.samples = samples; |
| 1203 | 1194 | ||
| 1204 | const auto& device = runtime.device.GetLogical(); | 1195 | renderpass = runtime.render_pass_cache.Get(renderpass_key); |
| 1205 | const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); | 1196 | |
| 1206 | if (is_new) { | ||
| 1207 | const VkSubpassDescription subpass{ | ||
| 1208 | .flags = 0, | ||
| 1209 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 1210 | .inputAttachmentCount = 0, | ||
| 1211 | .pInputAttachments = nullptr, | ||
| 1212 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 1213 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 1214 | .pResolveAttachments = nullptr, | ||
| 1215 | .pDepthStencilAttachment = depth_attachment, | ||
| 1216 | .preserveAttachmentCount = 0, | ||
| 1217 | .pPreserveAttachments = nullptr, | ||
| 1218 | }; | ||
| 1219 | cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ | ||
| 1220 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 1221 | .pNext = nullptr, | ||
| 1222 | .flags = 0, | ||
| 1223 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 1224 | .pAttachments = descriptions.data(), | ||
| 1225 | .subpassCount = 1, | ||
| 1226 | .pSubpasses = &subpass, | ||
| 1227 | .dependencyCount = 0, | ||
| 1228 | .pDependencies = nullptr, | ||
| 1229 | }); | ||
| 1230 | } | ||
| 1231 | renderpass = *cache_pair->second; | ||
| 1232 | render_area = VkExtent2D{ | 1197 | render_area = VkExtent2D{ |
| 1233 | .width = key.size.width, | 1198 | .width = key.size.width, |
| 1234 | .height = key.size.height, | 1199 | .height = key.size.height, |
| 1235 | }; | 1200 | }; |
| 1236 | num_color_buffers = static_cast<u32>(num_colors); | 1201 | num_color_buffers = static_cast<u32>(num_colors); |
| 1237 | framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ | 1202 | framebuffer = runtime.device.GetLogical().CreateFramebuffer({ |
| 1238 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | 1203 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| 1239 | .pNext = nullptr, | 1204 | .pNext = nullptr, |
| 1240 | .flags = 0, | 1205 | .flags = 0, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <compare> | 7 | #include <compare> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "shader_recompiler/shader_info.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/texture_cache/texture_cache.h" | 12 | #include "video_core/texture_cache/texture_cache.h" |
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| @@ -26,35 +27,10 @@ class Device; | |||
| 26 | class Image; | 27 | class Image; |
| 27 | class ImageView; | 28 | class ImageView; |
| 28 | class Framebuffer; | 29 | class Framebuffer; |
| 30 | class RenderPassCache; | ||
| 29 | class StagingBufferPool; | 31 | class StagingBufferPool; |
| 30 | class VKScheduler; | 32 | class VKScheduler; |
| 31 | 33 | ||
| 32 | struct RenderPassKey { | ||
| 33 | constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 34 | |||
| 35 | std::array<PixelFormat, NUM_RT> color_formats; | ||
| 36 | PixelFormat depth_format; | ||
| 37 | VkSampleCountFlagBits samples; | ||
| 38 | }; | ||
| 39 | |||
| 40 | } // namespace Vulkan | ||
| 41 | |||
| 42 | namespace std { | ||
| 43 | template <> | ||
| 44 | struct hash<Vulkan::RenderPassKey> { | ||
| 45 | [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 46 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 47 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 48 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 49 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 50 | } | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | } // namespace std | ||
| 55 | |||
| 56 | namespace Vulkan { | ||
| 57 | |||
| 58 | struct TextureCacheRuntime { | 34 | struct TextureCacheRuntime { |
| 59 | const Device& device; | 35 | const Device& device; |
| 60 | VKScheduler& scheduler; | 36 | VKScheduler& scheduler; |
| @@ -62,13 +38,13 @@ struct TextureCacheRuntime { | |||
| 62 | StagingBufferPool& staging_buffer_pool; | 38 | StagingBufferPool& staging_buffer_pool; |
| 63 | BlitImageHelper& blit_image_helper; | 39 | BlitImageHelper& blit_image_helper; |
| 64 | ASTCDecoderPass& astc_decoder_pass; | 40 | ASTCDecoderPass& astc_decoder_pass; |
| 65 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; | 41 | RenderPassCache& render_pass_cache; |
| 66 | 42 | ||
| 67 | void Finish(); | 43 | void Finish(); |
| 68 | 44 | ||
| 69 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 45 | StagingBufferRef UploadStagingBuffer(size_t size); |
| 70 | 46 | ||
| 71 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 47 | StagingBufferRef DownloadStagingBuffer(size_t size); |
| 72 | 48 | ||
| 73 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 49 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 74 | const Region2D& dst_region, const Region2D& src_region, | 50 | const Region2D& dst_region, const Region2D& src_region, |
| @@ -79,7 +55,7 @@ struct TextureCacheRuntime { | |||
| 79 | 55 | ||
| 80 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); | 56 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); |
| 81 | 57 | ||
| 82 | [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { | 58 | bool CanAccelerateImageUpload(Image&) const noexcept { |
| 83 | return false; | 59 | return false; |
| 84 | } | 60 | } |
| 85 | 61 | ||
| @@ -117,8 +93,6 @@ public: | |||
| 117 | void UploadMemory(const StagingBufferRef& map, | 93 | void UploadMemory(const StagingBufferRef& map, |
| 118 | std::span<const VideoCommon::BufferImageCopy> copies); | 94 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 119 | 95 | ||
| 120 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); | ||
| 121 | |||
| 122 | void DownloadMemory(const StagingBufferRef& map, | 96 | void DownloadMemory(const StagingBufferRef& map, |
| 123 | std::span<const VideoCommon::BufferImageCopy> copies); | 97 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 124 | 98 | ||
| @@ -126,10 +100,6 @@ public: | |||
| 126 | return *image; | 100 | return *image; |
| 127 | } | 101 | } |
| 128 | 102 | ||
| 129 | [[nodiscard]] VkBuffer Buffer() const noexcept { | ||
| 130 | return *buffer; | ||
| 131 | } | ||
| 132 | |||
| 133 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { | 103 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { |
| 134 | return aspect_mask; | 104 | return aspect_mask; |
| 135 | } | 105 | } |
| @@ -146,7 +116,6 @@ public: | |||
| 146 | private: | 116 | private: |
| 147 | VKScheduler* scheduler; | 117 | VKScheduler* scheduler; |
| 148 | vk::Image image; | 118 | vk::Image image; |
| 149 | vk::Buffer buffer; | ||
| 150 | MemoryCommit commit; | 119 | MemoryCommit commit; |
| 151 | vk::ImageView image_view; | 120 | vk::ImageView image_view; |
| 152 | std::vector<vk::ImageView> storage_image_views; | 121 | std::vector<vk::ImageView> storage_image_views; |
| @@ -157,18 +126,19 @@ private: | |||
| 157 | class ImageView : public VideoCommon::ImageViewBase { | 126 | class ImageView : public VideoCommon::ImageViewBase { |
| 158 | public: | 127 | public: |
| 159 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | 128 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 129 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | ||
| 130 | const VideoCommon::ImageViewInfo&, GPUVAddr); | ||
| 160 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 131 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 161 | 132 | ||
| 162 | [[nodiscard]] VkImageView DepthView(); | 133 | [[nodiscard]] VkImageView DepthView(); |
| 163 | 134 | ||
| 164 | [[nodiscard]] VkImageView StencilView(); | 135 | [[nodiscard]] VkImageView StencilView(); |
| 165 | 136 | ||
| 166 | [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { | 137 | [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, |
| 167 | return *image_views[static_cast<size_t>(query_type)]; | 138 | Shader::ImageFormat image_format); |
| 168 | } | ||
| 169 | 139 | ||
| 170 | [[nodiscard]] VkBufferView BufferView() const noexcept { | 140 | [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { |
| 171 | return *buffer_view; | 141 | return *image_views[static_cast<size_t>(texture_type)]; |
| 172 | } | 142 | } |
| 173 | 143 | ||
| 174 | [[nodiscard]] VkImage ImageHandle() const noexcept { | 144 | [[nodiscard]] VkImage ImageHandle() const noexcept { |
| @@ -179,26 +149,36 @@ public: | |||
| 179 | return render_target; | 149 | return render_target; |
| 180 | } | 150 | } |
| 181 | 151 | ||
| 182 | [[nodiscard]] PixelFormat ImageFormat() const noexcept { | ||
| 183 | return image_format; | ||
| 184 | } | ||
| 185 | |||
| 186 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { | 152 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 187 | return samples; | 153 | return samples; |
| 188 | } | 154 | } |
| 189 | 155 | ||
| 156 | [[nodiscard]] GPUVAddr GpuAddr() const noexcept { | ||
| 157 | return gpu_addr; | ||
| 158 | } | ||
| 159 | |||
| 160 | [[nodiscard]] u32 BufferSize() const noexcept { | ||
| 161 | return buffer_size; | ||
| 162 | } | ||
| 163 | |||
| 190 | private: | 164 | private: |
| 191 | [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); | 165 | struct StorageViews { |
| 166 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds; | ||
| 167 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds; | ||
| 168 | }; | ||
| 169 | |||
| 170 | [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); | ||
| 192 | 171 | ||
| 193 | const Device* device = nullptr; | 172 | const Device* device = nullptr; |
| 194 | std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; | 173 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; |
| 174 | std::unique_ptr<StorageViews> storage_views; | ||
| 195 | vk::ImageView depth_view; | 175 | vk::ImageView depth_view; |
| 196 | vk::ImageView stencil_view; | 176 | vk::ImageView stencil_view; |
| 197 | vk::BufferView buffer_view; | ||
| 198 | VkImage image_handle = VK_NULL_HANDLE; | 177 | VkImage image_handle = VK_NULL_HANDLE; |
| 199 | VkImageView render_target = VK_NULL_HANDLE; | 178 | VkImageView render_target = VK_NULL_HANDLE; |
| 200 | PixelFormat image_format = PixelFormat::Invalid; | ||
| 201 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 179 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
| 180 | GPUVAddr gpu_addr = 0; | ||
| 181 | u32 buffer_size = 0; | ||
| 202 | }; | 182 | }; |
| 203 | 183 | ||
| 204 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | 184 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -15,7 +15,9 @@ | |||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) | 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) |
| 18 | : device{device_}, scheduler{scheduler_} {} | 18 | : device{device_}, scheduler{scheduler_} { |
| 19 | payload_cursor = payload.data(); | ||
| 20 | } | ||
| 19 | 21 | ||
| 20 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; | 22 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; |
| 21 | 23 | ||
| @@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() { | |||
| 36 | upload_start = payload_cursor; | 38 | upload_start = payload_cursor; |
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, | ||
| 40 | VkDescriptorSet set) { | ||
| 41 | const void* const data = upload_start; | ||
| 42 | const vk::Device* const logical = &device.GetLogical(); | ||
| 43 | scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { | ||
| 44 | logical->UpdateDescriptorSet(set, update_template, data); | ||
| 45 | }); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Vulkan | 41 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -39,7 +39,9 @@ public: | |||
| 39 | 39 | ||
| 40 | void Acquire(); | 40 | void Acquire(); |
| 41 | 41 | ||
| 42 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); | 42 | const DescriptorUpdateEntry* UpdateData() const noexcept { |
| 43 | return upload_start; | ||
| 44 | } | ||
| 43 | 45 | ||
| 44 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { | 46 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { |
| 45 | *(payload_cursor++) = VkDescriptorImageInfo{ | 47 | *(payload_cursor++) = VkDescriptorImageInfo{ |
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null | |||
| @@ -1,752 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/shader/ast.h" | ||
| 13 | #include "video_core/shader/expr.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | ASTZipper::ASTZipper() = default; | ||
| 18 | |||
| 19 | void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { | ||
| 20 | ASSERT(new_first->manager == nullptr); | ||
| 21 | first = new_first; | ||
| 22 | last = new_first; | ||
| 23 | |||
| 24 | ASTNode current = first; | ||
| 25 | while (current) { | ||
| 26 | current->manager = this; | ||
| 27 | current->parent = parent; | ||
| 28 | last = current; | ||
| 29 | current = current->next; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void ASTZipper::PushBack(const ASTNode new_node) { | ||
| 34 | ASSERT(new_node->manager == nullptr); | ||
| 35 | new_node->previous = last; | ||
| 36 | if (last) { | ||
| 37 | last->next = new_node; | ||
| 38 | } | ||
| 39 | new_node->next.reset(); | ||
| 40 | last = new_node; | ||
| 41 | if (!first) { | ||
| 42 | first = new_node; | ||
| 43 | } | ||
| 44 | new_node->manager = this; | ||
| 45 | } | ||
| 46 | |||
| 47 | void ASTZipper::PushFront(const ASTNode new_node) { | ||
| 48 | ASSERT(new_node->manager == nullptr); | ||
| 49 | new_node->previous.reset(); | ||
| 50 | new_node->next = first; | ||
| 51 | if (first) { | ||
| 52 | first->previous = new_node; | ||
| 53 | } | ||
| 54 | if (last == first) { | ||
| 55 | last = new_node; | ||
| 56 | } | ||
| 57 | first = new_node; | ||
| 58 | new_node->manager = this; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { | ||
| 62 | ASSERT(new_node->manager == nullptr); | ||
| 63 | if (!at_node) { | ||
| 64 | PushFront(new_node); | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | const ASTNode next = at_node->next; | ||
| 68 | if (next) { | ||
| 69 | next->previous = new_node; | ||
| 70 | } | ||
| 71 | new_node->previous = at_node; | ||
| 72 | if (at_node == last) { | ||
| 73 | last = new_node; | ||
| 74 | } | ||
| 75 | new_node->next = next; | ||
| 76 | at_node->next = new_node; | ||
| 77 | new_node->manager = this; | ||
| 78 | } | ||
| 79 | |||
| 80 | void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { | ||
| 81 | ASSERT(new_node->manager == nullptr); | ||
| 82 | if (!at_node) { | ||
| 83 | PushBack(new_node); | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | const ASTNode previous = at_node->previous; | ||
| 87 | if (previous) { | ||
| 88 | previous->next = new_node; | ||
| 89 | } | ||
| 90 | new_node->next = at_node; | ||
| 91 | if (at_node == first) { | ||
| 92 | first = new_node; | ||
| 93 | } | ||
| 94 | new_node->previous = previous; | ||
| 95 | at_node->previous = new_node; | ||
| 96 | new_node->manager = this; | ||
| 97 | } | ||
| 98 | |||
| 99 | void ASTZipper::DetachTail(ASTNode node) { | ||
| 100 | ASSERT(node->manager == this); | ||
| 101 | if (node == first) { | ||
| 102 | first.reset(); | ||
| 103 | last.reset(); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | |||
| 107 | last = node->previous; | ||
| 108 | last->next.reset(); | ||
| 109 | node->previous.reset(); | ||
| 110 | |||
| 111 | ASTNode current = std::move(node); | ||
| 112 | while (current) { | ||
| 113 | current->manager = nullptr; | ||
| 114 | current->parent.reset(); | ||
| 115 | current = current->next; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { | ||
| 120 | ASSERT(start->manager == this && end->manager == this); | ||
| 121 | if (start == end) { | ||
| 122 | DetachSingle(start); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | const ASTNode prev = start->previous; | ||
| 126 | const ASTNode post = end->next; | ||
| 127 | if (!prev) { | ||
| 128 | first = post; | ||
| 129 | } else { | ||
| 130 | prev->next = post; | ||
| 131 | } | ||
| 132 | if (!post) { | ||
| 133 | last = prev; | ||
| 134 | } else { | ||
| 135 | post->previous = prev; | ||
| 136 | } | ||
| 137 | start->previous.reset(); | ||
| 138 | end->next.reset(); | ||
| 139 | ASTNode current = start; | ||
| 140 | bool found = false; | ||
| 141 | while (current) { | ||
| 142 | current->manager = nullptr; | ||
| 143 | current->parent.reset(); | ||
| 144 | found |= current == end; | ||
| 145 | current = current->next; | ||
| 146 | } | ||
| 147 | ASSERT(found); | ||
| 148 | } | ||
| 149 | |||
| 150 | void ASTZipper::DetachSingle(const ASTNode node) { | ||
| 151 | ASSERT(node->manager == this); | ||
| 152 | const ASTNode prev = node->previous; | ||
| 153 | const ASTNode post = node->next; | ||
| 154 | node->previous.reset(); | ||
| 155 | node->next.reset(); | ||
| 156 | if (!prev) { | ||
| 157 | first = post; | ||
| 158 | } else { | ||
| 159 | prev->next = post; | ||
| 160 | } | ||
| 161 | if (!post) { | ||
| 162 | last = prev; | ||
| 163 | } else { | ||
| 164 | post->previous = prev; | ||
| 165 | } | ||
| 166 | |||
| 167 | node->manager = nullptr; | ||
| 168 | node->parent.reset(); | ||
| 169 | } | ||
| 170 | |||
| 171 | void ASTZipper::Remove(const ASTNode node) { | ||
| 172 | ASSERT(node->manager == this); | ||
| 173 | const ASTNode next = node->next; | ||
| 174 | const ASTNode previous = node->previous; | ||
| 175 | if (previous) { | ||
| 176 | previous->next = next; | ||
| 177 | } | ||
| 178 | if (next) { | ||
| 179 | next->previous = previous; | ||
| 180 | } | ||
| 181 | node->parent.reset(); | ||
| 182 | node->manager = nullptr; | ||
| 183 | if (node == last) { | ||
| 184 | last = previous; | ||
| 185 | } | ||
| 186 | if (node == first) { | ||
| 187 | first = next; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | class ExprPrinter final { | ||
| 192 | public: | ||
| 193 | void operator()(const ExprAnd& expr) { | ||
| 194 | inner += "( "; | ||
| 195 | std::visit(*this, *expr.operand1); | ||
| 196 | inner += " && "; | ||
| 197 | std::visit(*this, *expr.operand2); | ||
| 198 | inner += ')'; | ||
| 199 | } | ||
| 200 | |||
| 201 | void operator()(const ExprOr& expr) { | ||
| 202 | inner += "( "; | ||
| 203 | std::visit(*this, *expr.operand1); | ||
| 204 | inner += " || "; | ||
| 205 | std::visit(*this, *expr.operand2); | ||
| 206 | inner += ')'; | ||
| 207 | } | ||
| 208 | |||
| 209 | void operator()(const ExprNot& expr) { | ||
| 210 | inner += "!"; | ||
| 211 | std::visit(*this, *expr.operand1); | ||
| 212 | } | ||
| 213 | |||
| 214 | void operator()(const ExprPredicate& expr) { | ||
| 215 | inner += fmt::format("P{}", expr.predicate); | ||
| 216 | } | ||
| 217 | |||
| 218 | void operator()(const ExprCondCode& expr) { | ||
| 219 | inner += fmt::format("CC{}", expr.cc); | ||
| 220 | } | ||
| 221 | |||
| 222 | void operator()(const ExprVar& expr) { | ||
| 223 | inner += fmt::format("V{}", expr.var_index); | ||
| 224 | } | ||
| 225 | |||
| 226 | void operator()(const ExprBoolean& expr) { | ||
| 227 | inner += expr.value ? "true" : "false"; | ||
| 228 | } | ||
| 229 | |||
| 230 | void operator()(const ExprGprEqual& expr) { | ||
| 231 | inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); | ||
| 232 | } | ||
| 233 | |||
| 234 | const std::string& GetResult() const { | ||
| 235 | return inner; | ||
| 236 | } | ||
| 237 | |||
| 238 | private: | ||
| 239 | std::string inner; | ||
| 240 | }; | ||
| 241 | |||
| 242 | class ASTPrinter { | ||
| 243 | public: | ||
| 244 | void operator()(const ASTProgram& ast) { | ||
| 245 | scope++; | ||
| 246 | inner += "program {\n"; | ||
| 247 | ASTNode current = ast.nodes.GetFirst(); | ||
| 248 | while (current) { | ||
| 249 | Visit(current); | ||
| 250 | current = current->GetNext(); | ||
| 251 | } | ||
| 252 | inner += "}\n"; | ||
| 253 | scope--; | ||
| 254 | } | ||
| 255 | |||
| 256 | void operator()(const ASTIfThen& ast) { | ||
| 257 | ExprPrinter expr_parser{}; | ||
| 258 | std::visit(expr_parser, *ast.condition); | ||
| 259 | inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); | ||
| 260 | scope++; | ||
| 261 | ASTNode current = ast.nodes.GetFirst(); | ||
| 262 | while (current) { | ||
| 263 | Visit(current); | ||
| 264 | current = current->GetNext(); | ||
| 265 | } | ||
| 266 | scope--; | ||
| 267 | inner += fmt::format("{}}}\n", Indent()); | ||
| 268 | } | ||
| 269 | |||
| 270 | void operator()(const ASTIfElse& ast) { | ||
| 271 | inner += Indent(); | ||
| 272 | inner += "else {\n"; | ||
| 273 | |||
| 274 | scope++; | ||
| 275 | ASTNode current = ast.nodes.GetFirst(); | ||
| 276 | while (current) { | ||
| 277 | Visit(current); | ||
| 278 | current = current->GetNext(); | ||
| 279 | } | ||
| 280 | scope--; | ||
| 281 | |||
| 282 | inner += Indent(); | ||
| 283 | inner += "}\n"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void operator()(const ASTBlockEncoded& ast) { | ||
| 287 | inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); | ||
| 288 | } | ||
| 289 | |||
| 290 | void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { | ||
| 291 | inner += Indent(); | ||
| 292 | inner += "Block;\n"; | ||
| 293 | } | ||
| 294 | |||
| 295 | void operator()(const ASTVarSet& ast) { | ||
| 296 | ExprPrinter expr_parser{}; | ||
| 297 | std::visit(expr_parser, *ast.condition); | ||
| 298 | inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); | ||
| 299 | } | ||
| 300 | |||
| 301 | void operator()(const ASTLabel& ast) { | ||
| 302 | inner += fmt::format("Label_{}:\n", ast.index); | ||
| 303 | } | ||
| 304 | |||
| 305 | void operator()(const ASTGoto& ast) { | ||
| 306 | ExprPrinter expr_parser{}; | ||
| 307 | std::visit(expr_parser, *ast.condition); | ||
| 308 | inner += | ||
| 309 | fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); | ||
| 310 | } | ||
| 311 | |||
| 312 | void operator()(const ASTDoWhile& ast) { | ||
| 313 | ExprPrinter expr_parser{}; | ||
| 314 | std::visit(expr_parser, *ast.condition); | ||
| 315 | inner += fmt::format("{}do {{\n", Indent()); | ||
| 316 | scope++; | ||
| 317 | ASTNode current = ast.nodes.GetFirst(); | ||
| 318 | while (current) { | ||
| 319 | Visit(current); | ||
| 320 | current = current->GetNext(); | ||
| 321 | } | ||
| 322 | scope--; | ||
| 323 | inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); | ||
| 324 | } | ||
| 325 | |||
| 326 | void operator()(const ASTReturn& ast) { | ||
| 327 | ExprPrinter expr_parser{}; | ||
| 328 | std::visit(expr_parser, *ast.condition); | ||
| 329 | inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), | ||
| 330 | ast.kills ? "discard" : "exit"); | ||
| 331 | } | ||
| 332 | |||
| 333 | void operator()(const ASTBreak& ast) { | ||
| 334 | ExprPrinter expr_parser{}; | ||
| 335 | std::visit(expr_parser, *ast.condition); | ||
| 336 | inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); | ||
| 337 | } | ||
| 338 | |||
| 339 | void Visit(const ASTNode& node) { | ||
| 340 | std::visit(*this, *node->GetInnerData()); | ||
| 341 | } | ||
| 342 | |||
| 343 | const std::string& GetResult() const { | ||
| 344 | return inner; | ||
| 345 | } | ||
| 346 | |||
| 347 | private: | ||
| 348 | std::string_view Indent() { | ||
| 349 | if (space_segment_scope == scope) { | ||
| 350 | return space_segment; | ||
| 351 | } | ||
| 352 | |||
| 353 | // Ensure that we don't exceed our view. | ||
| 354 | ASSERT(scope * 2 < spaces.size()); | ||
| 355 | |||
| 356 | space_segment = spaces.substr(0, scope * 2); | ||
| 357 | space_segment_scope = scope; | ||
| 358 | return space_segment; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::string inner{}; | ||
| 362 | std::string_view space_segment; | ||
| 363 | |||
| 364 | u32 scope{}; | ||
| 365 | u32 space_segment_scope{}; | ||
| 366 | |||
| 367 | static constexpr std::string_view spaces{" "}; | ||
| 368 | }; | ||
| 369 | |||
| 370 | std::string ASTManager::Print() const { | ||
| 371 | ASTPrinter printer{}; | ||
| 372 | printer.Visit(main_node); | ||
| 373 | return printer.GetResult(); | ||
| 374 | } | ||
| 375 | |||
| 376 | ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) | ||
| 377 | : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} | ||
| 378 | |||
| 379 | ASTManager::~ASTManager() { | ||
| 380 | Clear(); | ||
| 381 | } | ||
| 382 | |||
| 383 | void ASTManager::Init() { | ||
| 384 | main_node = ASTBase::Make<ASTProgram>(ASTNode{}); | ||
| 385 | program = std::get_if<ASTProgram>(main_node->GetInnerData()); | ||
| 386 | false_condition = MakeExpr<ExprBoolean>(false); | ||
| 387 | } | ||
| 388 | |||
| 389 | void ASTManager::DeclareLabel(u32 address) { | ||
| 390 | const auto pair = labels_map.emplace(address, labels_count); | ||
| 391 | if (pair.second) { | ||
| 392 | labels_count++; | ||
| 393 | labels.resize(labels_count); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | void ASTManager::InsertLabel(u32 address) { | ||
| 398 | const u32 index = labels_map[address]; | ||
| 399 | const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); | ||
| 400 | labels[index] = label; | ||
| 401 | program->nodes.PushBack(label); | ||
| 402 | } | ||
| 403 | |||
| 404 | void ASTManager::InsertGoto(Expr condition, u32 address) { | ||
| 405 | const u32 index = labels_map[address]; | ||
| 406 | const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); | ||
| 407 | gotos.push_back(goto_node); | ||
| 408 | program->nodes.PushBack(goto_node); | ||
| 409 | } | ||
| 410 | |||
| 411 | void ASTManager::InsertBlock(u32 start_address, u32 end_address) { | ||
| 412 | ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); | ||
| 413 | program->nodes.PushBack(std::move(block)); | ||
| 414 | } | ||
| 415 | |||
| 416 | void ASTManager::InsertReturn(Expr condition, bool kills) { | ||
| 417 | ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); | ||
| 418 | program->nodes.PushBack(std::move(node)); | ||
| 419 | } | ||
| 420 | |||
| 421 | // The decompile algorithm is based on | ||
| 422 | // "Taming control flow: A structured approach to eliminating goto statements" | ||
| 423 | // by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be | ||
| 424 | // on the same structured level as the label which they jump to. This is done, | ||
| 425 | // through outward/inward movements and lifting. Once they are at the same | ||
| 426 | // level, you can enclose them in an "if" structure or a "do-while" structure. | ||
| 427 | void ASTManager::Decompile() { | ||
| 428 | auto it = gotos.begin(); | ||
| 429 | while (it != gotos.end()) { | ||
| 430 | const ASTNode goto_node = *it; | ||
| 431 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 432 | if (!label_index) { | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | const ASTNode label = labels[*label_index]; | ||
| 436 | if (!full_decompile) { | ||
| 437 | // We only decompile backward jumps | ||
| 438 | if (!IsBackwardsJump(goto_node, label)) { | ||
| 439 | it++; | ||
| 440 | continue; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | if (IndirectlyRelated(goto_node, label)) { | ||
| 444 | while (!DirectlyRelated(goto_node, label)) { | ||
| 445 | MoveOutward(goto_node); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | if (DirectlyRelated(goto_node, label)) { | ||
| 449 | u32 goto_level = goto_node->GetLevel(); | ||
| 450 | const u32 label_level = label->GetLevel(); | ||
| 451 | while (label_level < goto_level) { | ||
| 452 | MoveOutward(goto_node); | ||
| 453 | goto_level--; | ||
| 454 | } | ||
| 455 | // TODO(Blinkhawk): Implement Lifting and Inward Movements | ||
| 456 | } | ||
| 457 | if (label->GetParent() == goto_node->GetParent()) { | ||
| 458 | bool is_loop = false; | ||
| 459 | ASTNode current = goto_node->GetPrevious(); | ||
| 460 | while (current) { | ||
| 461 | if (current == label) { | ||
| 462 | is_loop = true; | ||
| 463 | break; | ||
| 464 | } | ||
| 465 | current = current->GetPrevious(); | ||
| 466 | } | ||
| 467 | |||
| 468 | if (is_loop) { | ||
| 469 | EncloseDoWhile(goto_node, label); | ||
| 470 | } else { | ||
| 471 | EncloseIfThen(goto_node, label); | ||
| 472 | } | ||
| 473 | it = gotos.erase(it); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | it++; | ||
| 477 | } | ||
| 478 | if (full_decompile) { | ||
| 479 | for (const ASTNode& label : labels) { | ||
| 480 | auto& manager = label->GetManager(); | ||
| 481 | manager.Remove(label); | ||
| 482 | } | ||
| 483 | labels.clear(); | ||
| 484 | } else { | ||
| 485 | auto label_it = labels.begin(); | ||
| 486 | while (label_it != labels.end()) { | ||
| 487 | bool can_remove = true; | ||
| 488 | ASTNode label = *label_it; | ||
| 489 | for (const ASTNode& goto_node : gotos) { | ||
| 490 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 491 | if (!label_index) { | ||
| 492 | return; | ||
| 493 | } | ||
| 494 | ASTNode& glabel = labels[*label_index]; | ||
| 495 | if (glabel == label) { | ||
| 496 | can_remove = false; | ||
| 497 | break; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | if (can_remove) { | ||
| 501 | label->MarkLabelUnused(); | ||
| 502 | } | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { | ||
| 508 | u32 goto_level = goto_node->GetLevel(); | ||
| 509 | u32 label_level = label_node->GetLevel(); | ||
| 510 | while (goto_level > label_level) { | ||
| 511 | goto_level--; | ||
| 512 | goto_node = goto_node->GetParent(); | ||
| 513 | } | ||
| 514 | while (label_level > goto_level) { | ||
| 515 | label_level--; | ||
| 516 | label_node = label_node->GetParent(); | ||
| 517 | } | ||
| 518 | while (goto_node->GetParent() != label_node->GetParent()) { | ||
| 519 | goto_node = goto_node->GetParent(); | ||
| 520 | label_node = label_node->GetParent(); | ||
| 521 | } | ||
| 522 | ASTNode current = goto_node->GetPrevious(); | ||
| 523 | while (current) { | ||
| 524 | if (current == label_node) { | ||
| 525 | return true; | ||
| 526 | } | ||
| 527 | current = current->GetPrevious(); | ||
| 528 | } | ||
| 529 | return false; | ||
| 530 | } | ||
| 531 | |||
| 532 | bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 533 | return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); | ||
| 534 | } | ||
| 535 | |||
| 536 | bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 537 | if (first->GetParent() == second->GetParent()) { | ||
| 538 | return false; | ||
| 539 | } | ||
| 540 | const u32 first_level = first->GetLevel(); | ||
| 541 | const u32 second_level = second->GetLevel(); | ||
| 542 | u32 min_level; | ||
| 543 | u32 max_level; | ||
| 544 | ASTNode max; | ||
| 545 | ASTNode min; | ||
| 546 | if (first_level > second_level) { | ||
| 547 | min_level = second_level; | ||
| 548 | min = second; | ||
| 549 | max_level = first_level; | ||
| 550 | max = first; | ||
| 551 | } else { | ||
| 552 | min_level = first_level; | ||
| 553 | min = first; | ||
| 554 | max_level = second_level; | ||
| 555 | max = second; | ||
| 556 | } | ||
| 557 | |||
| 558 | while (max_level > min_level) { | ||
| 559 | max_level--; | ||
| 560 | max = max->GetParent(); | ||
| 561 | } | ||
| 562 | |||
| 563 | return min->GetParent() == max->GetParent(); | ||
| 564 | } | ||
| 565 | |||
| 566 | void ASTManager::ShowCurrentState(std::string_view state) const { | ||
| 567 | LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); | ||
| 568 | SanityCheck(); | ||
| 569 | } | ||
| 570 | |||
| 571 | void ASTManager::SanityCheck() const { | ||
| 572 | for (const auto& label : labels) { | ||
| 573 | if (!label->GetParent()) { | ||
| 574 | LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | } | ||
| 578 | |||
| 579 | void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { | ||
| 580 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 581 | const ASTNode loop_start = label->GetNext(); | ||
| 582 | if (loop_start == goto_node) { | ||
| 583 | zipper.Remove(goto_node); | ||
| 584 | return; | ||
| 585 | } | ||
| 586 | const ASTNode parent = label->GetParent(); | ||
| 587 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 588 | zipper.DetachSegment(loop_start, goto_node); | ||
| 589 | const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); | ||
| 590 | ASTZipper* sub_zipper = do_while_node->GetSubNodes(); | ||
| 591 | sub_zipper->Init(loop_start, do_while_node); | ||
| 592 | zipper.InsertAfter(do_while_node, label); | ||
| 593 | sub_zipper->Remove(goto_node); | ||
| 594 | } | ||
| 595 | |||
| 596 | void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { | ||
| 597 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 598 | const ASTNode if_end = label->GetPrevious(); | ||
| 599 | if (if_end == goto_node) { | ||
| 600 | zipper.Remove(goto_node); | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 604 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 605 | bool do_else = false; | ||
| 606 | if (!disable_else_derivation && prev->IsIfThen()) { | ||
| 607 | const Expr if_condition = prev->GetIfCondition(); | ||
| 608 | do_else = ExprAreEqual(if_condition, condition); | ||
| 609 | } | ||
| 610 | const ASTNode parent = label->GetParent(); | ||
| 611 | zipper.DetachSegment(goto_node, if_end); | ||
| 612 | ASTNode if_node; | ||
| 613 | if (do_else) { | ||
| 614 | if_node = ASTBase::Make<ASTIfElse>(parent); | ||
| 615 | } else { | ||
| 616 | Expr neg_condition = MakeExprNot(condition); | ||
| 617 | if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); | ||
| 618 | } | ||
| 619 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 620 | sub_zipper->Init(goto_node, if_node); | ||
| 621 | zipper.InsertAfter(if_node, prev); | ||
| 622 | sub_zipper->Remove(goto_node); | ||
| 623 | } | ||
| 624 | |||
| 625 | void ASTManager::MoveOutward(ASTNode goto_node) { | ||
| 626 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 627 | const ASTNode parent = goto_node->GetParent(); | ||
| 628 | ASTZipper& zipper2 = parent->GetManager(); | ||
| 629 | const ASTNode grandpa = parent->GetParent(); | ||
| 630 | const bool is_loop = parent->IsLoop(); | ||
| 631 | const bool is_else = parent->IsIfElse(); | ||
| 632 | const bool is_if = parent->IsIfThen(); | ||
| 633 | |||
| 634 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 635 | const ASTNode post = goto_node->GetNext(); | ||
| 636 | |||
| 637 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 638 | zipper.DetachSingle(goto_node); | ||
| 639 | if (is_loop) { | ||
| 640 | const u32 var_index = NewVariable(); | ||
| 641 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 642 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 643 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 644 | zipper2.InsertBefore(var_node_init, parent); | ||
| 645 | zipper.InsertAfter(var_node, prev); | ||
| 646 | goto_node->SetGotoCondition(var_condition); | ||
| 647 | const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); | ||
| 648 | zipper.InsertAfter(break_node, var_node); | ||
| 649 | } else if (is_if || is_else) { | ||
| 650 | const u32 var_index = NewVariable(); | ||
| 651 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 652 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 653 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 654 | if (is_if) { | ||
| 655 | zipper2.InsertBefore(var_node_init, parent); | ||
| 656 | } else { | ||
| 657 | zipper2.InsertBefore(var_node_init, parent->GetPrevious()); | ||
| 658 | } | ||
| 659 | zipper.InsertAfter(var_node, prev); | ||
| 660 | goto_node->SetGotoCondition(var_condition); | ||
| 661 | if (post) { | ||
| 662 | zipper.DetachTail(post); | ||
| 663 | const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); | ||
| 664 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 665 | sub_zipper->Init(post, if_node); | ||
| 666 | zipper.InsertAfter(if_node, var_node); | ||
| 667 | } | ||
| 668 | } else { | ||
| 669 | UNREACHABLE(); | ||
| 670 | } | ||
| 671 | const ASTNode next = parent->GetNext(); | ||
| 672 | if (is_if && next && next->IsIfElse()) { | ||
| 673 | zipper2.InsertAfter(goto_node, next); | ||
| 674 | goto_node->SetParent(grandpa); | ||
| 675 | return; | ||
| 676 | } | ||
| 677 | zipper2.InsertAfter(goto_node, parent); | ||
| 678 | goto_node->SetParent(grandpa); | ||
| 679 | } | ||
| 680 | |||
| 681 | class ASTClearer { | ||
| 682 | public: | ||
| 683 | ASTClearer() = default; | ||
| 684 | |||
| 685 | void operator()(const ASTProgram& ast) { | ||
| 686 | ASTNode current = ast.nodes.GetFirst(); | ||
| 687 | while (current) { | ||
| 688 | Visit(current); | ||
| 689 | current = current->GetNext(); | ||
| 690 | } | ||
| 691 | } | ||
| 692 | |||
| 693 | void operator()(const ASTIfThen& ast) { | ||
| 694 | ASTNode current = ast.nodes.GetFirst(); | ||
| 695 | while (current) { | ||
| 696 | Visit(current); | ||
| 697 | current = current->GetNext(); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | void operator()(const ASTIfElse& ast) { | ||
| 702 | ASTNode current = ast.nodes.GetFirst(); | ||
| 703 | while (current) { | ||
| 704 | Visit(current); | ||
| 705 | current = current->GetNext(); | ||
| 706 | } | ||
| 707 | } | ||
| 708 | |||
| 709 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} | ||
| 710 | |||
| 711 | void operator()(ASTBlockDecoded& ast) { | ||
| 712 | ast.nodes.clear(); | ||
| 713 | } | ||
| 714 | |||
| 715 | void operator()([[maybe_unused]] const ASTVarSet& ast) {} | ||
| 716 | |||
| 717 | void operator()([[maybe_unused]] const ASTLabel& ast) {} | ||
| 718 | |||
| 719 | void operator()([[maybe_unused]] const ASTGoto& ast) {} | ||
| 720 | |||
| 721 | void operator()(const ASTDoWhile& ast) { | ||
| 722 | ASTNode current = ast.nodes.GetFirst(); | ||
| 723 | while (current) { | ||
| 724 | Visit(current); | ||
| 725 | current = current->GetNext(); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | |||
| 729 | void operator()([[maybe_unused]] const ASTReturn& ast) {} | ||
| 730 | |||
| 731 | void operator()([[maybe_unused]] const ASTBreak& ast) {} | ||
| 732 | |||
| 733 | void Visit(const ASTNode& node) { | ||
| 734 | std::visit(*this, *node->GetInnerData()); | ||
| 735 | node->Clear(); | ||
| 736 | } | ||
| 737 | }; | ||
| 738 | |||
| 739 | void ASTManager::Clear() { | ||
| 740 | if (!main_node) { | ||
| 741 | return; | ||
| 742 | } | ||
| 743 | ASTClearer clearer{}; | ||
| 744 | clearer.Visit(main_node); | ||
| 745 | main_node.reset(); | ||
| 746 | program = nullptr; | ||
| 747 | labels_map.clear(); | ||
| 748 | labels.clear(); | ||
| 749 | gotos.clear(); | ||
| 750 | } | ||
| 751 | |||
| 752 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null | |||
| @@ -1,398 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | #include <list> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "video_core/shader/expr.h" | ||
| 16 | #include "video_core/shader/node.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | class ASTBase; | ||
| 21 | class ASTBlockDecoded; | ||
| 22 | class ASTBlockEncoded; | ||
| 23 | class ASTBreak; | ||
| 24 | class ASTDoWhile; | ||
| 25 | class ASTGoto; | ||
| 26 | class ASTIfElse; | ||
| 27 | class ASTIfThen; | ||
| 28 | class ASTLabel; | ||
| 29 | class ASTProgram; | ||
| 30 | class ASTReturn; | ||
| 31 | class ASTVarSet; | ||
| 32 | |||
| 33 | using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, | ||
| 34 | ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; | ||
| 35 | |||
| 36 | using ASTNode = std::shared_ptr<ASTBase>; | ||
| 37 | |||
| 38 | enum class ASTZipperType : u32 { | ||
| 39 | Program, | ||
| 40 | IfThen, | ||
| 41 | IfElse, | ||
| 42 | Loop, | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ASTZipper final { | ||
| 46 | public: | ||
| 47 | explicit ASTZipper(); | ||
| 48 | |||
| 49 | void Init(ASTNode first, ASTNode parent); | ||
| 50 | |||
| 51 | ASTNode GetFirst() const { | ||
| 52 | return first; | ||
| 53 | } | ||
| 54 | |||
| 55 | ASTNode GetLast() const { | ||
| 56 | return last; | ||
| 57 | } | ||
| 58 | |||
| 59 | void PushBack(ASTNode new_node); | ||
| 60 | void PushFront(ASTNode new_node); | ||
| 61 | void InsertAfter(ASTNode new_node, ASTNode at_node); | ||
| 62 | void InsertBefore(ASTNode new_node, ASTNode at_node); | ||
| 63 | void DetachTail(ASTNode node); | ||
| 64 | void DetachSingle(ASTNode node); | ||
| 65 | void DetachSegment(ASTNode start, ASTNode end); | ||
| 66 | void Remove(ASTNode node); | ||
| 67 | |||
| 68 | ASTNode first; | ||
| 69 | ASTNode last; | ||
| 70 | }; | ||
| 71 | |||
| 72 | class ASTProgram { | ||
| 73 | public: | ||
| 74 | ASTZipper nodes{}; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ASTIfThen { | ||
| 78 | public: | ||
| 79 | explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} | ||
| 80 | Expr condition; | ||
| 81 | ASTZipper nodes{}; | ||
| 82 | }; | ||
| 83 | |||
| 84 | class ASTIfElse { | ||
| 85 | public: | ||
| 86 | ASTZipper nodes{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | class ASTBlockEncoded { | ||
| 90 | public: | ||
| 91 | explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} | ||
| 92 | u32 start; | ||
| 93 | u32 end; | ||
| 94 | }; | ||
| 95 | |||
| 96 | class ASTBlockDecoded { | ||
| 97 | public: | ||
| 98 | explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} | ||
| 99 | NodeBlock nodes; | ||
| 100 | }; | ||
| 101 | |||
| 102 | class ASTVarSet { | ||
| 103 | public: | ||
| 104 | explicit ASTVarSet(u32 index_, Expr condition_) | ||
| 105 | : index{index_}, condition{std::move(condition_)} {} | ||
| 106 | |||
| 107 | u32 index; | ||
| 108 | Expr condition; | ||
| 109 | }; | ||
| 110 | |||
| 111 | class ASTLabel { | ||
| 112 | public: | ||
| 113 | explicit ASTLabel(u32 index_) : index{index_} {} | ||
| 114 | u32 index; | ||
| 115 | bool unused{}; | ||
| 116 | }; | ||
| 117 | |||
| 118 | class ASTGoto { | ||
| 119 | public: | ||
| 120 | explicit ASTGoto(Expr condition_, u32 label_) | ||
| 121 | : condition{std::move(condition_)}, label{label_} {} | ||
| 122 | |||
| 123 | Expr condition; | ||
| 124 | u32 label; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class ASTDoWhile { | ||
| 128 | public: | ||
| 129 | explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} | ||
| 130 | Expr condition; | ||
| 131 | ASTZipper nodes{}; | ||
| 132 | }; | ||
| 133 | |||
| 134 | class ASTReturn { | ||
| 135 | public: | ||
| 136 | explicit ASTReturn(Expr condition_, bool kills_) | ||
| 137 | : condition{std::move(condition_)}, kills{kills_} {} | ||
| 138 | |||
| 139 | Expr condition; | ||
| 140 | bool kills; | ||
| 141 | }; | ||
| 142 | |||
| 143 | class ASTBreak { | ||
| 144 | public: | ||
| 145 | explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} | ||
| 146 | Expr condition; | ||
| 147 | }; | ||
| 148 | |||
| 149 | class ASTBase { | ||
| 150 | public: | ||
| 151 | explicit ASTBase(ASTNode parent_, ASTData data_) | ||
| 152 | : data{std::move(data_)}, parent{std::move(parent_)} {} | ||
| 153 | |||
| 154 | template <class U, class... Args> | ||
| 155 | static ASTNode Make(ASTNode parent, Args&&... args) { | ||
| 156 | return std::make_shared<ASTBase>(std::move(parent), | ||
| 157 | ASTData(U(std::forward<Args>(args)...))); | ||
| 158 | } | ||
| 159 | |||
| 160 | void SetParent(ASTNode new_parent) { | ||
| 161 | parent = std::move(new_parent); | ||
| 162 | } | ||
| 163 | |||
| 164 | ASTNode& GetParent() { | ||
| 165 | return parent; | ||
| 166 | } | ||
| 167 | |||
| 168 | const ASTNode& GetParent() const { | ||
| 169 | return parent; | ||
| 170 | } | ||
| 171 | |||
| 172 | u32 GetLevel() const { | ||
| 173 | u32 level = 0; | ||
| 174 | auto next_parent = parent; | ||
| 175 | while (next_parent) { | ||
| 176 | next_parent = next_parent->GetParent(); | ||
| 177 | level++; | ||
| 178 | } | ||
| 179 | return level; | ||
| 180 | } | ||
| 181 | |||
| 182 | ASTData* GetInnerData() { | ||
| 183 | return &data; | ||
| 184 | } | ||
| 185 | |||
| 186 | const ASTData* GetInnerData() const { | ||
| 187 | return &data; | ||
| 188 | } | ||
| 189 | |||
| 190 | ASTNode GetNext() const { | ||
| 191 | return next; | ||
| 192 | } | ||
| 193 | |||
| 194 | ASTNode GetPrevious() const { | ||
| 195 | return previous; | ||
| 196 | } | ||
| 197 | |||
| 198 | ASTZipper& GetManager() { | ||
| 199 | return *manager; | ||
| 200 | } | ||
| 201 | |||
| 202 | const ASTZipper& GetManager() const { | ||
| 203 | return *manager; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<u32> GetGotoLabel() const { | ||
| 207 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 208 | return {inner->label}; | ||
| 209 | } | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Expr GetGotoCondition() const { | ||
| 214 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 215 | return inner->condition; | ||
| 216 | } | ||
| 217 | return nullptr; | ||
| 218 | } | ||
| 219 | |||
| 220 | void MarkLabelUnused() { | ||
| 221 | if (auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 222 | inner->unused = true; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | bool IsLabelUnused() const { | ||
| 227 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 228 | return inner->unused; | ||
| 229 | } | ||
| 230 | return true; | ||
| 231 | } | ||
| 232 | |||
| 233 | std::optional<u32> GetLabelIndex() const { | ||
| 234 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 235 | return {inner->index}; | ||
| 236 | } | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | Expr GetIfCondition() const { | ||
| 241 | if (const auto* inner = std::get_if<ASTIfThen>(&data)) { | ||
| 242 | return inner->condition; | ||
| 243 | } | ||
| 244 | return nullptr; | ||
| 245 | } | ||
| 246 | |||
| 247 | void SetGotoCondition(Expr new_condition) { | ||
| 248 | if (auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 249 | inner->condition = std::move(new_condition); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | bool IsIfThen() const { | ||
| 254 | return std::holds_alternative<ASTIfThen>(data); | ||
| 255 | } | ||
| 256 | |||
| 257 | bool IsIfElse() const { | ||
| 258 | return std::holds_alternative<ASTIfElse>(data); | ||
| 259 | } | ||
| 260 | |||
| 261 | bool IsBlockEncoded() const { | ||
| 262 | return std::holds_alternative<ASTBlockEncoded>(data); | ||
| 263 | } | ||
| 264 | |||
| 265 | void TransformBlockEncoded(NodeBlock&& nodes) { | ||
| 266 | data = ASTBlockDecoded(std::move(nodes)); | ||
| 267 | } | ||
| 268 | |||
| 269 | bool IsLoop() const { | ||
| 270 | return std::holds_alternative<ASTDoWhile>(data); | ||
| 271 | } | ||
| 272 | |||
| 273 | ASTZipper* GetSubNodes() { | ||
| 274 | if (std::holds_alternative<ASTProgram>(data)) { | ||
| 275 | return &std::get_if<ASTProgram>(&data)->nodes; | ||
| 276 | } | ||
| 277 | if (std::holds_alternative<ASTIfThen>(data)) { | ||
| 278 | return &std::get_if<ASTIfThen>(&data)->nodes; | ||
| 279 | } | ||
| 280 | if (std::holds_alternative<ASTIfElse>(data)) { | ||
| 281 | return &std::get_if<ASTIfElse>(&data)->nodes; | ||
| 282 | } | ||
| 283 | if (std::holds_alternative<ASTDoWhile>(data)) { | ||
| 284 | return &std::get_if<ASTDoWhile>(&data)->nodes; | ||
| 285 | } | ||
| 286 | return nullptr; | ||
| 287 | } | ||
| 288 | |||
| 289 | void Clear() { | ||
| 290 | next.reset(); | ||
| 291 | previous.reset(); | ||
| 292 | parent.reset(); | ||
| 293 | manager = nullptr; | ||
| 294 | } | ||
| 295 | |||
| 296 | private: | ||
| 297 | friend class ASTZipper; | ||
| 298 | |||
| 299 | ASTData data; | ||
| 300 | ASTNode parent; | ||
| 301 | ASTNode next; | ||
| 302 | ASTNode previous; | ||
| 303 | ASTZipper* manager{}; | ||
| 304 | }; | ||
| 305 | |||
| 306 | class ASTManager final { | ||
| 307 | public: | ||
| 308 | explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); | ||
| 309 | ~ASTManager(); | ||
| 310 | |||
| 311 | ASTManager(const ASTManager& o) = delete; | ||
| 312 | ASTManager& operator=(const ASTManager& other) = delete; | ||
| 313 | |||
| 314 | ASTManager(ASTManager&& other) noexcept = default; | ||
| 315 | ASTManager& operator=(ASTManager&& other) noexcept = default; | ||
| 316 | |||
| 317 | void Init(); | ||
| 318 | |||
| 319 | void DeclareLabel(u32 address); | ||
| 320 | |||
| 321 | void InsertLabel(u32 address); | ||
| 322 | |||
| 323 | void InsertGoto(Expr condition, u32 address); | ||
| 324 | |||
| 325 | void InsertBlock(u32 start_address, u32 end_address); | ||
| 326 | |||
| 327 | void InsertReturn(Expr condition, bool kills); | ||
| 328 | |||
| 329 | std::string Print() const; | ||
| 330 | |||
| 331 | void Decompile(); | ||
| 332 | |||
| 333 | void ShowCurrentState(std::string_view state) const; | ||
| 334 | |||
| 335 | void SanityCheck() const; | ||
| 336 | |||
| 337 | void Clear(); | ||
| 338 | |||
| 339 | bool IsFullyDecompiled() const { | ||
| 340 | if (full_decompile) { | ||
| 341 | return gotos.empty(); | ||
| 342 | } | ||
| 343 | |||
| 344 | for (ASTNode goto_node : gotos) { | ||
| 345 | auto label_index = goto_node->GetGotoLabel(); | ||
| 346 | if (!label_index) { | ||
| 347 | return false; | ||
| 348 | } | ||
| 349 | ASTNode glabel = labels[*label_index]; | ||
| 350 | if (IsBackwardsJump(goto_node, glabel)) { | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | return true; | ||
| 355 | } | ||
| 356 | |||
| 357 | ASTNode GetProgram() const { | ||
| 358 | return main_node; | ||
| 359 | } | ||
| 360 | |||
| 361 | u32 GetVariables() const { | ||
| 362 | return variables; | ||
| 363 | } | ||
| 364 | |||
| 365 | const std::vector<ASTNode>& GetLabels() const { | ||
| 366 | return labels; | ||
| 367 | } | ||
| 368 | |||
| 369 | private: | ||
| 370 | bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; | ||
| 371 | |||
| 372 | bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 373 | |||
| 374 | bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 375 | |||
| 376 | void EncloseDoWhile(ASTNode goto_node, ASTNode label); | ||
| 377 | |||
| 378 | void EncloseIfThen(ASTNode goto_node, ASTNode label); | ||
| 379 | |||
| 380 | void MoveOutward(ASTNode goto_node); | ||
| 381 | |||
| 382 | u32 NewVariable() { | ||
| 383 | return variables++; | ||
| 384 | } | ||
| 385 | |||
| 386 | bool full_decompile{}; | ||
| 387 | bool disable_else_derivation{}; | ||
| 388 | std::unordered_map<u32, u32> labels_map{}; | ||
| 389 | u32 labels_count{}; | ||
| 390 | std::vector<ASTNode> labels{}; | ||
| 391 | std::list<ASTNode> gotos{}; | ||
| 392 | u32 variables{}; | ||
| 393 | ASTProgram* program{}; | ||
| 394 | ASTNode main_node{}; | ||
| 395 | Expr false_condition{}; | ||
| 396 | }; | ||
| 397 | |||
| 398 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null | |||
| @@ -1,234 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <condition_variable> | ||
| 6 | #include <mutex> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_base.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 12 | #include "video_core/shader/async_shaders.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} | ||
| 17 | |||
| 18 | AsyncShaders::~AsyncShaders() { | ||
| 19 | KillWorkers(); | ||
| 20 | } | ||
| 21 | |||
| 22 | void AsyncShaders::AllocateWorkers() { | ||
| 23 | // Use at least one thread | ||
| 24 | u32 num_workers = 1; | ||
| 25 | |||
| 26 | // Deduce how many more threads we can use | ||
| 27 | const u32 thread_count = std::thread::hardware_concurrency(); | ||
| 28 | if (thread_count >= 8) { | ||
| 29 | // Increase async workers by 1 for every 2 threads >= 8 | ||
| 30 | num_workers += 1 + (thread_count - 8) / 2; | ||
| 31 | } | ||
| 32 | |||
| 33 | // If we already have workers queued, ignore | ||
| 34 | if (num_workers == worker_threads.size()) { | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | |||
| 38 | // If workers already exist, clear them | ||
| 39 | if (!worker_threads.empty()) { | ||
| 40 | FreeWorkers(); | ||
| 41 | } | ||
| 42 | |||
| 43 | // Create workers | ||
| 44 | for (std::size_t i = 0; i < num_workers; i++) { | ||
| 45 | context_list.push_back(emu_window.CreateSharedContext()); | ||
| 46 | worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, | ||
| 47 | context_list[i].get()); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | void AsyncShaders::FreeWorkers() { | ||
| 52 | // Mark all threads to quit | ||
| 53 | is_thread_exiting.store(true); | ||
| 54 | cv.notify_all(); | ||
| 55 | for (auto& thread : worker_threads) { | ||
| 56 | thread.join(); | ||
| 57 | } | ||
| 58 | // Clear our shared contexts | ||
| 59 | context_list.clear(); | ||
| 60 | |||
| 61 | // Clear our worker threads | ||
| 62 | worker_threads.clear(); | ||
| 63 | } | ||
| 64 | |||
| 65 | void AsyncShaders::KillWorkers() { | ||
| 66 | is_thread_exiting.store(true); | ||
| 67 | cv.notify_all(); | ||
| 68 | for (auto& thread : worker_threads) { | ||
| 69 | thread.detach(); | ||
| 70 | } | ||
| 71 | // Clear our shared contexts | ||
| 72 | context_list.clear(); | ||
| 73 | |||
| 74 | // Clear our worker threads | ||
| 75 | worker_threads.clear(); | ||
| 76 | } | ||
| 77 | |||
| 78 | bool AsyncShaders::HasWorkQueued() const { | ||
| 79 | return !pending_queue.empty(); | ||
| 80 | } | ||
| 81 | |||
| 82 | bool AsyncShaders::HasCompletedWork() const { | ||
| 83 | std::shared_lock lock{completed_mutex}; | ||
| 84 | return !finished_work.empty(); | ||
| 85 | } | ||
| 86 | |||
| 87 | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||
| 88 | const auto& regs = gpu.Maxwell3D().regs; | ||
| 89 | |||
| 90 | // If something is using depth, we can assume that games are not rendering anything which will | ||
| 91 | // be used one time. | ||
| 92 | if (regs.zeta_enable) { | ||
| 93 | return true; | ||
| 94 | } | ||
| 95 | |||
| 96 | // If games are using a small index count, we can assume these are full screen quads. Usually | ||
| 97 | // these shaders are only used once for building textures so we can assume they can't be built | ||
| 98 | // async | ||
| 99 | if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | |||
| 103 | return true; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||
| 107 | std::vector<Result> results; | ||
| 108 | { | ||
| 109 | std::unique_lock lock{completed_mutex}; | ||
| 110 | results = std::move(finished_work); | ||
| 111 | finished_work.clear(); | ||
| 112 | } | ||
| 113 | return results; | ||
| 114 | } | ||
| 115 | |||
| 116 | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||
| 117 | Tegra::Engines::ShaderType shader_type, u64 uid, | ||
| 118 | std::vector<u64> code, std::vector<u64> code_b, | ||
| 119 | u32 main_offset, CompilerSettings compiler_settings, | ||
| 120 | const Registry& registry, VAddr cpu_addr) { | ||
| 121 | std::unique_lock lock(queue_mutex); | ||
| 122 | pending_queue.push({ | ||
| 123 | .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, | ||
| 124 | .device = &device, | ||
| 125 | .shader_type = shader_type, | ||
| 126 | .uid = uid, | ||
| 127 | .code = std::move(code), | ||
| 128 | .code_b = std::move(code_b), | ||
| 129 | .main_offset = main_offset, | ||
| 130 | .compiler_settings = compiler_settings, | ||
| 131 | .registry = registry, | ||
| 132 | .cpu_address = cpu_addr, | ||
| 133 | .pp_cache = nullptr, | ||
| 134 | .vk_device = nullptr, | ||
| 135 | .scheduler = nullptr, | ||
| 136 | .descriptor_pool = nullptr, | ||
| 137 | .update_descriptor_queue = nullptr, | ||
| 138 | .bindings{}, | ||
| 139 | .program{}, | ||
| 140 | .key{}, | ||
| 141 | .num_color_buffers = 0, | ||
| 142 | }); | ||
| 143 | cv.notify_one(); | ||
| 144 | } | ||
| 145 | |||
| 146 | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | ||
| 147 | const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, | ||
| 148 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 149 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 150 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 151 | Vulkan::SPIRVProgram program, | ||
| 152 | Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { | ||
| 153 | std::unique_lock lock(queue_mutex); | ||
| 154 | pending_queue.push({ | ||
| 155 | .backend = Backend::Vulkan, | ||
| 156 | .device = nullptr, | ||
| 157 | .shader_type{}, | ||
| 158 | .uid = 0, | ||
| 159 | .code{}, | ||
| 160 | .code_b{}, | ||
| 161 | .main_offset = 0, | ||
| 162 | .compiler_settings{}, | ||
| 163 | .registry{}, | ||
| 164 | .cpu_address = 0, | ||
| 165 | .pp_cache = pp_cache, | ||
| 166 | .vk_device = &device, | ||
| 167 | .scheduler = &scheduler, | ||
| 168 | .descriptor_pool = &descriptor_pool, | ||
| 169 | .update_descriptor_queue = &update_descriptor_queue, | ||
| 170 | .bindings = std::move(bindings), | ||
| 171 | .program = std::move(program), | ||
| 172 | .key = key, | ||
| 173 | .num_color_buffers = num_color_buffers, | ||
| 174 | }); | ||
| 175 | cv.notify_one(); | ||
| 176 | } | ||
| 177 | |||
| 178 | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||
| 179 | while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||
| 180 | std::unique_lock lock{queue_mutex}; | ||
| 181 | cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); | ||
| 182 | if (is_thread_exiting) { | ||
| 183 | return; | ||
| 184 | } | ||
| 185 | |||
| 186 | // Partial lock to allow all threads to read at the same time | ||
| 187 | if (!HasWorkQueued()) { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | // Another thread beat us, just unlock and wait for the next load | ||
| 191 | if (pending_queue.empty()) { | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | |||
| 195 | // Pull work from queue | ||
| 196 | WorkerParams work = std::move(pending_queue.front()); | ||
| 197 | pending_queue.pop(); | ||
| 198 | lock.unlock(); | ||
| 199 | |||
| 200 | if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { | ||
| 201 | const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); | ||
| 202 | const auto scope = context->Acquire(); | ||
| 203 | auto program = | ||
| 204 | OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); | ||
| 205 | Result result{}; | ||
| 206 | result.backend = work.backend; | ||
| 207 | result.cpu_address = work.cpu_address; | ||
| 208 | result.uid = work.uid; | ||
| 209 | result.code = std::move(work.code); | ||
| 210 | result.code_b = std::move(work.code_b); | ||
| 211 | result.shader_type = work.shader_type; | ||
| 212 | |||
| 213 | if (work.backend == Backend::OpenGL) { | ||
| 214 | result.program.opengl = std::move(program->source_program); | ||
| 215 | } else if (work.backend == Backend::GLASM) { | ||
| 216 | result.program.glasm = std::move(program->assembly_program); | ||
| 217 | } | ||
| 218 | |||
| 219 | { | ||
| 220 | std::unique_lock complete_lock(completed_mutex); | ||
| 221 | finished_work.push_back(std::move(result)); | ||
| 222 | } | ||
| 223 | } else if (work.backend == Backend::Vulkan) { | ||
| 224 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | ||
| 225 | *work.vk_device, *work.scheduler, *work.descriptor_pool, | ||
| 226 | *work.update_descriptor_queue, work.key, work.bindings, work.program, | ||
| 227 | work.num_color_buffers); | ||
| 228 | |||
| 229 | work.pp_cache->EmplacePipeline(std::move(pipeline)); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null | |||
| @@ -1,138 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <condition_variable> | ||
| 8 | #include <memory> | ||
| 9 | #include <shared_mutex> | ||
| 10 | #include <thread> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 21 | |||
| 22 | namespace Core::Frontend { | ||
| 23 | class EmuWindow; | ||
| 24 | class GraphicsContext; | ||
| 25 | } // namespace Core::Frontend | ||
| 26 | |||
| 27 | namespace Tegra { | ||
| 28 | class GPU; | ||
| 29 | } | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | class VKPipelineCache; | ||
| 33 | } | ||
| 34 | |||
| 35 | namespace VideoCommon::Shader { | ||
| 36 | |||
| 37 | class AsyncShaders { | ||
| 38 | public: | ||
| 39 | enum class Backend { | ||
| 40 | OpenGL, | ||
| 41 | GLASM, | ||
| 42 | Vulkan, | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ResultPrograms { | ||
| 46 | OpenGL::OGLProgram opengl; | ||
| 47 | OpenGL::OGLAssemblyProgram glasm; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct Result { | ||
| 51 | u64 uid; | ||
| 52 | VAddr cpu_address; | ||
| 53 | Backend backend; | ||
| 54 | ResultPrograms program; | ||
| 55 | std::vector<u64> code; | ||
| 56 | std::vector<u64> code_b; | ||
| 57 | Tegra::Engines::ShaderType shader_type; | ||
| 58 | }; | ||
| 59 | |||
| 60 | explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); | ||
| 61 | ~AsyncShaders(); | ||
| 62 | |||
| 63 | /// Start up shader worker threads | ||
| 64 | void AllocateWorkers(); | ||
| 65 | |||
| 66 | /// Clear the shader queue and kill all worker threads | ||
| 67 | void FreeWorkers(); | ||
| 68 | |||
| 69 | // Force end all threads | ||
| 70 | void KillWorkers(); | ||
| 71 | |||
| 72 | /// Check to see if any shaders have actually been compiled | ||
| 73 | [[nodiscard]] bool HasCompletedWork() const; | ||
| 74 | |||
| 75 | /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build | ||
| 76 | /// every shader async as some shaders are only built and executed once. We try to "guess" which | ||
| 77 | /// shader would be used only once | ||
| 78 | [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||
| 79 | |||
| 80 | /// Pulls completed compiled shaders | ||
| 81 | [[nodiscard]] std::vector<Result> GetCompletedWork(); | ||
| 82 | |||
| 83 | void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 84 | u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||
| 85 | CompilerSettings compiler_settings, const Registry& registry, | ||
| 86 | VAddr cpu_addr); | ||
| 87 | |||
| 88 | void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, | ||
| 89 | Vulkan::VKScheduler& scheduler, | ||
| 90 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 91 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 92 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 93 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, | ||
| 94 | u32 num_color_buffers); | ||
| 95 | |||
| 96 | private: | ||
| 97 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||
| 98 | |||
| 99 | /// Check our worker queue to see if we have any work queued already | ||
| 100 | [[nodiscard]] bool HasWorkQueued() const; | ||
| 101 | |||
| 102 | struct WorkerParams { | ||
| 103 | Backend backend; | ||
| 104 | // For OGL | ||
| 105 | const OpenGL::Device* device; | ||
| 106 | Tegra::Engines::ShaderType shader_type; | ||
| 107 | u64 uid; | ||
| 108 | std::vector<u64> code; | ||
| 109 | std::vector<u64> code_b; | ||
| 110 | u32 main_offset; | ||
| 111 | CompilerSettings compiler_settings; | ||
| 112 | std::optional<Registry> registry; | ||
| 113 | VAddr cpu_address; | ||
| 114 | |||
| 115 | // For Vulkan | ||
| 116 | Vulkan::VKPipelineCache* pp_cache; | ||
| 117 | const Vulkan::Device* vk_device; | ||
| 118 | Vulkan::VKScheduler* scheduler; | ||
| 119 | Vulkan::VKDescriptorPool* descriptor_pool; | ||
| 120 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | ||
| 121 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 122 | Vulkan::SPIRVProgram program; | ||
| 123 | Vulkan::GraphicsPipelineCacheKey key; | ||
| 124 | u32 num_color_buffers; | ||
| 125 | }; | ||
| 126 | |||
| 127 | std::condition_variable cv; | ||
| 128 | mutable std::mutex queue_mutex; | ||
| 129 | mutable std::shared_mutex completed_mutex; | ||
| 130 | std::atomic<bool> is_thread_exiting{}; | ||
| 131 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||
| 132 | std::vector<std::thread> worker_threads; | ||
| 133 | std::queue<WorkerParams> pending_queue; | ||
| 134 | std::vector<Result> finished_work; | ||
| 135 | Core::Frontend::EmuWindow& emu_window; | ||
| 136 | }; | ||
| 137 | |||
| 138 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/shader/compiler_settings.h" | ||
| 6 | |||
| 7 | namespace VideoCommon::Shader { | ||
| 8 | |||
| 9 | std::string CompileDepthAsString(const CompileDepth cd) { | ||
| 10 | switch (cd) { | ||
| 11 | case CompileDepth::BruteForce: | ||
| 12 | return "Brute Force Compile"; | ||
| 13 | case CompileDepth::FlowStack: | ||
| 14 | return "Simple Flow Stack Mode"; | ||
| 15 | case CompileDepth::NoFlowStack: | ||
| 16 | return "Remove Flow Stack"; | ||
| 17 | case CompileDepth::DecompileBackwards: | ||
| 18 | return "Decompile Backward Jumps"; | ||
| 19 | case CompileDepth::FullDecompile: | ||
| 20 | return "Full Decompilation"; | ||
| 21 | default: | ||
| 22 | return "Unknown Compiler Process"; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | |||
| 9 | namespace VideoCommon::Shader { | ||
| 10 | |||
| 11 | enum class CompileDepth : u32 { | ||
| 12 | BruteForce = 0, | ||
| 13 | FlowStack = 1, | ||
| 14 | NoFlowStack = 2, | ||
| 15 | DecompileBackwards = 3, | ||
| 16 | FullDecompile = 4, | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::string CompileDepthAsString(CompileDepth cd); | ||
| 20 | |||
| 21 | struct CompilerSettings { | ||
| 22 | CompileDepth depth{CompileDepth::NoFlowStack}; | ||
| 23 | bool disable_else_derivation{true}; | ||
| 24 | }; | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null | |||
| @@ -1,751 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <set> | ||
| 8 | #include <stack> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/ast.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | |||
| 24 | using Tegra::Shader::Instruction; | ||
| 25 | using Tegra::Shader::OpCode; | ||
| 26 | |||
| 27 | constexpr s32 unassigned_branch = -2; | ||
| 28 | |||
| 29 | struct Query { | ||
| 30 | u32 address{}; | ||
| 31 | std::stack<u32> ssy_stack{}; | ||
| 32 | std::stack<u32> pbk_stack{}; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct BlockStack { | ||
| 36 | BlockStack() = default; | ||
| 37 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 38 | std::stack<u32> ssy_stack{}; | ||
| 39 | std::stack<u32> pbk_stack{}; | ||
| 40 | }; | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | BlockBranchInfo MakeBranchInfo(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, BranchData>); | ||
| 45 | return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool BlockBranchIsIgnored(BlockBranchInfo first) { | ||
| 49 | bool ignore = false; | ||
| 50 | if (std::holds_alternative<SingleBranch>(*first)) { | ||
| 51 | const auto branch = std::get_if<SingleBranch>(first.get()); | ||
| 52 | ignore = branch->ignore; | ||
| 53 | } | ||
| 54 | return ignore; | ||
| 55 | } | ||
| 56 | |||
| 57 | struct BlockInfo { | ||
| 58 | u32 start{}; | ||
| 59 | u32 end{}; | ||
| 60 | bool visited{}; | ||
| 61 | BlockBranchInfo branch{}; | ||
| 62 | |||
| 63 | bool IsInside(const u32 address) const { | ||
| 64 | return start <= address && address <= end; | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct CFGRebuildState { | ||
| 69 | explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) | ||
| 70 | : program_code{program_code_}, registry{registry_}, start{start_} {} | ||
| 71 | |||
| 72 | const ProgramCode& program_code; | ||
| 73 | Registry& registry; | ||
| 74 | u32 start{}; | ||
| 75 | std::vector<BlockInfo> block_info; | ||
| 76 | std::list<u32> inspect_queries; | ||
| 77 | std::list<Query> queries; | ||
| 78 | std::unordered_map<u32, u32> registered; | ||
| 79 | std::set<u32> labels; | ||
| 80 | std::map<u32, u32> ssy_labels; | ||
| 81 | std::map<u32, u32> pbk_labels; | ||
| 82 | std::unordered_map<u32, BlockStack> stacks; | ||
| 83 | ASTManager* manager{}; | ||
| 84 | }; | ||
| 85 | |||
| 86 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 87 | |||
| 88 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 89 | const auto& blocks = state.block_info; | ||
| 90 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 91 | if (blocks[index].start == address) { | ||
| 92 | return {BlockCollision::Found, index}; | ||
| 93 | } | ||
| 94 | if (blocks[index].IsInside(address)) { | ||
| 95 | return {BlockCollision::Inside, index}; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 99 | } | ||
| 100 | |||
| 101 | struct ParseInfo { | ||
| 102 | BlockBranchInfo branch_info{}; | ||
| 103 | u32 end_address{}; | ||
| 104 | }; | ||
| 105 | |||
| 106 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 107 | auto& it = state.block_info.emplace_back(); | ||
| 108 | it.start = start; | ||
| 109 | it.end = end; | ||
| 110 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 111 | state.registered.insert({start, index}); | ||
| 112 | return it; | ||
| 113 | } | ||
| 114 | |||
| 115 | Pred GetPredicate(u32 index, bool negated) { | ||
| 116 | return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | struct BranchIndirectInfo { | ||
| 126 | u32 buffer{}; | ||
| 127 | u32 offset{}; | ||
| 128 | u32 entries{}; | ||
| 129 | s32 relative_position{}; | ||
| 130 | }; | ||
| 131 | |||
| 132 | struct BufferInfo { | ||
| 133 | u32 index; | ||
| 134 | u32 offset; | ||
| 135 | }; | ||
| 136 | |||
| 137 | std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { | ||
| 138 | const Instruction instr = state.program_code[pos]; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | if (opcode->get().GetId() != OpCode::Id::BRX) { | ||
| 141 | return std::nullopt; | ||
| 142 | } | ||
| 143 | if (instr.brx.constant_buffer != 0) { | ||
| 144 | return std::nullopt; | ||
| 145 | } | ||
| 146 | --pos; | ||
| 147 | return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); | ||
| 148 | } | ||
| 149 | |||
| 150 | template <typename Result, typename TestCallable, typename PackCallable> | ||
| 151 | // requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> | ||
| 152 | // requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> | ||
| 153 | std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, | ||
| 154 | PackCallable pack) { | ||
| 155 | for (; pos >= state.start; --pos) { | ||
| 156 | if (IsSchedInstruction(pos, state.start)) { | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | const Instruction instr = state.program_code[pos]; | ||
| 160 | const auto opcode = OpCode::Decode(instr); | ||
| 161 | if (!opcode) { | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | if (test(instr, opcode->get())) { | ||
| 165 | --pos; | ||
| 166 | return std::make_optional(pack(instr, opcode->get())); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | return std::nullopt; | ||
| 170 | } | ||
| 171 | |||
| 172 | std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, | ||
| 173 | u64 brx_tracked_register) { | ||
| 174 | return TrackInstruction<std::pair<BufferInfo, u64>>( | ||
| 175 | state, pos, | ||
| 176 | [brx_tracked_register](auto instr, const auto& opcode) { | ||
| 177 | return opcode.GetId() == OpCode::Id::LD_C && | ||
| 178 | instr.gpr0.Value() == brx_tracked_register && | ||
| 179 | instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; | ||
| 180 | }, | ||
| 181 | [](auto instr, const auto& opcode) { | ||
| 182 | const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), | ||
| 183 | static_cast<u32>(instr.cbuf36.GetOffset())}; | ||
| 184 | return std::make_pair(info, instr.gpr8.Value()); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, | ||
| 189 | u64 ldc_tracked_register) { | ||
| 190 | return TrackInstruction<u64>( | ||
| 191 | state, pos, | ||
| 192 | [ldc_tracked_register](auto instr, const auto& opcode) { | ||
| 193 | return opcode.GetId() == OpCode::Id::SHL_IMM && | ||
| 194 | instr.gpr0.Value() == ldc_tracked_register; | ||
| 195 | }, | ||
| 196 | [](auto instr, const auto&) { return instr.gpr8.Value(); }); | ||
| 197 | } | ||
| 198 | |||
| 199 | std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, | ||
| 200 | u64 shl_tracked_register) { | ||
| 201 | return TrackInstruction<u32>( | ||
| 202 | state, pos, | ||
| 203 | [shl_tracked_register](auto instr, const auto& opcode) { | ||
| 204 | return opcode.GetId() == OpCode::Id::IMNMX_IMM && | ||
| 205 | instr.gpr0.Value() == shl_tracked_register; | ||
| 206 | }, | ||
| 207 | [](auto instr, const auto&) { | ||
| 208 | return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); | ||
| 209 | }); | ||
| 210 | } | ||
| 211 | |||
| 212 | std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { | ||
| 213 | const auto brx_info = GetBRXInfo(state, pos); | ||
| 214 | if (!brx_info) { | ||
| 215 | return std::nullopt; | ||
| 216 | } | ||
| 217 | const auto [relative_position, brx_tracked_register] = *brx_info; | ||
| 218 | |||
| 219 | const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); | ||
| 220 | if (!ldc_info) { | ||
| 221 | return std::nullopt; | ||
| 222 | } | ||
| 223 | const auto [buffer_info, ldc_tracked_register] = *ldc_info; | ||
| 224 | |||
| 225 | const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); | ||
| 226 | if (!shl_tracked_register) { | ||
| 227 | return std::nullopt; | ||
| 228 | } | ||
| 229 | |||
| 230 | const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); | ||
| 231 | if (!entries) { | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | |||
| 235 | return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; | ||
| 236 | } | ||
| 237 | |||
| 238 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 239 | u32 offset = static_cast<u32>(address); | ||
| 240 | const u32 end_address = static_cast<u32>(state.program_code.size()); | ||
| 241 | ParseInfo parse_info{}; | ||
| 242 | SingleBranch single_branch{}; | ||
| 243 | |||
| 244 | const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { | ||
| 245 | const auto pair = rebuild_state.labels.emplace(label_address); | ||
| 246 | if (pair.second) { | ||
| 247 | rebuild_state.inspect_queries.push_back(label_address); | ||
| 248 | } | ||
| 249 | }; | ||
| 250 | |||
| 251 | while (true) { | ||
| 252 | if (offset >= end_address) { | ||
| 253 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 254 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 255 | |||
| 256 | single_branch.address = exit_branch; | ||
| 257 | single_branch.ignore = false; | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | if (state.registered.contains(offset)) { | ||
| 261 | single_branch.address = offset; | ||
| 262 | single_branch.ignore = true; | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | if (IsSchedInstruction(offset, state.start)) { | ||
| 266 | offset++; | ||
| 267 | continue; | ||
| 268 | } | ||
| 269 | const Instruction instr = {state.program_code[offset]}; | ||
| 270 | const auto opcode = OpCode::Decode(instr); | ||
| 271 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 272 | offset++; | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | |||
| 276 | switch (opcode->get().GetId()) { | ||
| 277 | case OpCode::Id::EXIT: { | ||
| 278 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 279 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 280 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 281 | offset++; | ||
| 282 | continue; | ||
| 283 | } | ||
| 284 | const ConditionCode cc = instr.flow_condition_code; | ||
| 285 | single_branch.condition.cc = cc; | ||
| 286 | if (cc == ConditionCode::F) { | ||
| 287 | offset++; | ||
| 288 | continue; | ||
| 289 | } | ||
| 290 | single_branch.address = exit_branch; | ||
| 291 | single_branch.kill = false; | ||
| 292 | single_branch.is_sync = false; | ||
| 293 | single_branch.is_brk = false; | ||
| 294 | single_branch.ignore = false; | ||
| 295 | parse_info.end_address = offset; | ||
| 296 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 297 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 298 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 299 | |||
| 300 | return {ParseResult::ControlCaught, parse_info}; | ||
| 301 | } | ||
| 302 | case OpCode::Id::BRA: { | ||
| 303 | if (instr.bra.constant_buffer != 0) { | ||
| 304 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 305 | } | ||
| 306 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 307 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 308 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 309 | offset++; | ||
| 310 | continue; | ||
| 311 | } | ||
| 312 | const ConditionCode cc = instr.flow_condition_code; | ||
| 313 | single_branch.condition.cc = cc; | ||
| 314 | if (cc == ConditionCode::F) { | ||
| 315 | offset++; | ||
| 316 | continue; | ||
| 317 | } | ||
| 318 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 319 | if (branch_offset == 0) { | ||
| 320 | single_branch.address = exit_branch; | ||
| 321 | } else { | ||
| 322 | single_branch.address = branch_offset; | ||
| 323 | } | ||
| 324 | insert_label(state, branch_offset); | ||
| 325 | single_branch.kill = false; | ||
| 326 | single_branch.is_sync = false; | ||
| 327 | single_branch.is_brk = false; | ||
| 328 | single_branch.ignore = false; | ||
| 329 | parse_info.end_address = offset; | ||
| 330 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 331 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 332 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 333 | |||
| 334 | return {ParseResult::ControlCaught, parse_info}; | ||
| 335 | } | ||
| 336 | case OpCode::Id::SYNC: { | ||
| 337 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 338 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 339 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 340 | offset++; | ||
| 341 | continue; | ||
| 342 | } | ||
| 343 | const ConditionCode cc = instr.flow_condition_code; | ||
| 344 | single_branch.condition.cc = cc; | ||
| 345 | if (cc == ConditionCode::F) { | ||
| 346 | offset++; | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | single_branch.address = unassigned_branch; | ||
| 350 | single_branch.kill = false; | ||
| 351 | single_branch.is_sync = true; | ||
| 352 | single_branch.is_brk = false; | ||
| 353 | single_branch.ignore = false; | ||
| 354 | parse_info.end_address = offset; | ||
| 355 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 356 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 357 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 358 | |||
| 359 | return {ParseResult::ControlCaught, parse_info}; | ||
| 360 | } | ||
| 361 | case OpCode::Id::BRK: { | ||
| 362 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 363 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 364 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 365 | offset++; | ||
| 366 | continue; | ||
| 367 | } | ||
| 368 | const ConditionCode cc = instr.flow_condition_code; | ||
| 369 | single_branch.condition.cc = cc; | ||
| 370 | if (cc == ConditionCode::F) { | ||
| 371 | offset++; | ||
| 372 | continue; | ||
| 373 | } | ||
| 374 | single_branch.address = unassigned_branch; | ||
| 375 | single_branch.kill = false; | ||
| 376 | single_branch.is_sync = false; | ||
| 377 | single_branch.is_brk = true; | ||
| 378 | single_branch.ignore = false; | ||
| 379 | parse_info.end_address = offset; | ||
| 380 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 381 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 382 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 383 | |||
| 384 | return {ParseResult::ControlCaught, parse_info}; | ||
| 385 | } | ||
| 386 | case OpCode::Id::KIL: { | ||
| 387 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 388 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 389 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 390 | offset++; | ||
| 391 | continue; | ||
| 392 | } | ||
| 393 | const ConditionCode cc = instr.flow_condition_code; | ||
| 394 | single_branch.condition.cc = cc; | ||
| 395 | if (cc == ConditionCode::F) { | ||
| 396 | offset++; | ||
| 397 | continue; | ||
| 398 | } | ||
| 399 | single_branch.address = exit_branch; | ||
| 400 | single_branch.kill = true; | ||
| 401 | single_branch.is_sync = false; | ||
| 402 | single_branch.is_brk = false; | ||
| 403 | single_branch.ignore = false; | ||
| 404 | parse_info.end_address = offset; | ||
| 405 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 406 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 407 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 408 | |||
| 409 | return {ParseResult::ControlCaught, parse_info}; | ||
| 410 | } | ||
| 411 | case OpCode::Id::SSY: { | ||
| 412 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 413 | insert_label(state, target); | ||
| 414 | state.ssy_labels.emplace(offset, target); | ||
| 415 | break; | ||
| 416 | } | ||
| 417 | case OpCode::Id::PBK: { | ||
| 418 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 419 | insert_label(state, target); | ||
| 420 | state.pbk_labels.emplace(offset, target); | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | case OpCode::Id::BRX: { | ||
| 424 | const auto tmp = TrackBranchIndirectInfo(state, offset); | ||
| 425 | if (!tmp) { | ||
| 426 | LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); | ||
| 427 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 428 | } | ||
| 429 | |||
| 430 | const auto result = *tmp; | ||
| 431 | const s32 pc_target = offset + result.relative_position; | ||
| 432 | std::vector<CaseBranch> branches; | ||
| 433 | for (u32 i = 0; i < result.entries; i++) { | ||
| 434 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); | ||
| 435 | if (!key) { | ||
| 436 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 437 | } | ||
| 438 | u32 value = *key; | ||
| 439 | u32 target = static_cast<u32>((value >> 3) + pc_target); | ||
| 440 | insert_label(state, target); | ||
| 441 | branches.emplace_back(value, target); | ||
| 442 | } | ||
| 443 | parse_info.end_address = offset; | ||
| 444 | parse_info.branch_info = MakeBranchInfo<MultiBranch>( | ||
| 445 | static_cast<u32>(instr.gpr8.Value()), std::move(branches)); | ||
| 446 | |||
| 447 | return {ParseResult::ControlCaught, parse_info}; | ||
| 448 | } | ||
| 449 | default: | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | |||
| 453 | offset++; | ||
| 454 | } | ||
| 455 | single_branch.kill = false; | ||
| 456 | single_branch.is_sync = false; | ||
| 457 | single_branch.is_brk = false; | ||
| 458 | parse_info.end_address = offset - 1; | ||
| 459 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 460 | single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, | ||
| 461 | single_branch.is_brk, single_branch.ignore); | ||
| 462 | return {ParseResult::BlockEnd, parse_info}; | ||
| 463 | } | ||
| 464 | |||
| 465 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 466 | if (state.inspect_queries.empty()) { | ||
| 467 | return false; | ||
| 468 | } | ||
| 469 | |||
| 470 | const u32 address = state.inspect_queries.front(); | ||
| 471 | state.inspect_queries.pop_front(); | ||
| 472 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 473 | switch (result) { | ||
| 474 | case BlockCollision::Found: { | ||
| 475 | return true; | ||
| 476 | } | ||
| 477 | case BlockCollision::Inside: { | ||
| 478 | // This case is the tricky one: | ||
| 479 | // We need to split the block into 2 separate blocks | ||
| 480 | const u32 end = state.block_info[block_index].end; | ||
| 481 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 482 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 483 | current_block.end = address - 1; | ||
| 484 | new_block.branch = std::move(current_block.branch); | ||
| 485 | BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); | ||
| 486 | const auto branch = std::get_if<SingleBranch>(forward_branch.get()); | ||
| 487 | branch->address = address; | ||
| 488 | branch->ignore = true; | ||
| 489 | current_block.branch = std::move(forward_branch); | ||
| 490 | return true; | ||
| 491 | } | ||
| 492 | default: | ||
| 493 | break; | ||
| 494 | } | ||
| 495 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 496 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 497 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 498 | return false; | ||
| 499 | } | ||
| 500 | |||
| 501 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 502 | block_info.branch = parse_info.branch_info; | ||
| 503 | if (std::holds_alternative<SingleBranch>(*block_info.branch)) { | ||
| 504 | const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); | ||
| 505 | if (branch->condition.IsUnconditional()) { | ||
| 506 | return true; | ||
| 507 | } | ||
| 508 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 509 | state.inspect_queries.push_front(fallthrough_address); | ||
| 510 | return true; | ||
| 511 | } | ||
| 512 | return true; | ||
| 513 | } | ||
| 514 | |||
| 515 | bool TryQuery(CFGRebuildState& state) { | ||
| 516 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 517 | BlockInfo& block) { | ||
| 518 | auto gather_start = labels.lower_bound(block.start); | ||
| 519 | const auto gather_end = labels.upper_bound(block.end); | ||
| 520 | while (gather_start != gather_end) { | ||
| 521 | cc.push(gather_start->second); | ||
| 522 | ++gather_start; | ||
| 523 | } | ||
| 524 | }; | ||
| 525 | if (state.queries.empty()) { | ||
| 526 | return false; | ||
| 527 | } | ||
| 528 | |||
| 529 | Query& q = state.queries.front(); | ||
| 530 | const u32 block_index = state.registered[q.address]; | ||
| 531 | BlockInfo& block = state.block_info[block_index]; | ||
| 532 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 533 | // labels into the current stack and look if the branch at the end of the block | ||
| 534 | // consumes a label. Schedule new queries accordingly | ||
| 535 | if (block.visited) { | ||
| 536 | BlockStack& stack = state.stacks[q.address]; | ||
| 537 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 538 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 539 | state.queries.pop_front(); | ||
| 540 | return all_okay; | ||
| 541 | } | ||
| 542 | block.visited = true; | ||
| 543 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 544 | |||
| 545 | Query q2(q); | ||
| 546 | state.queries.pop_front(); | ||
| 547 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 548 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 549 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 550 | auto* branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 551 | if (!branch->condition.IsUnconditional()) { | ||
| 552 | q2.address = block.end + 1; | ||
| 553 | state.queries.push_back(q2); | ||
| 554 | } | ||
| 555 | |||
| 556 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 557 | if (branch->is_sync) { | ||
| 558 | if (branch->address == unassigned_branch) { | ||
| 559 | branch->address = conditional_query.ssy_stack.top(); | ||
| 560 | } | ||
| 561 | conditional_query.ssy_stack.pop(); | ||
| 562 | } | ||
| 563 | if (branch->is_brk) { | ||
| 564 | if (branch->address == unassigned_branch) { | ||
| 565 | branch->address = conditional_query.pbk_stack.top(); | ||
| 566 | } | ||
| 567 | conditional_query.pbk_stack.pop(); | ||
| 568 | } | ||
| 569 | conditional_query.address = branch->address; | ||
| 570 | return true; | ||
| 571 | } | ||
| 572 | |||
| 573 | const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 574 | for (const auto& branch_case : multi_branch->branches) { | ||
| 575 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 576 | conditional_query.address = branch_case.address; | ||
| 577 | } | ||
| 578 | |||
| 579 | return true; | ||
| 580 | } | ||
| 581 | |||
| 582 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | ||
| 583 | const auto get_expr = [](const Condition& cond) -> Expr { | ||
| 584 | Expr result; | ||
| 585 | if (cond.cc != ConditionCode::T) { | ||
| 586 | result = MakeExpr<ExprCondCode>(cond.cc); | ||
| 587 | } | ||
| 588 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 589 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 590 | bool negate = false; | ||
| 591 | if (pred > 7) { | ||
| 592 | negate = true; | ||
| 593 | pred -= 8; | ||
| 594 | } | ||
| 595 | Expr extra = MakeExpr<ExprPredicate>(pred); | ||
| 596 | if (negate) { | ||
| 597 | extra = MakeExpr<ExprNot>(std::move(extra)); | ||
| 598 | } | ||
| 599 | if (result) { | ||
| 600 | return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); | ||
| 601 | } | ||
| 602 | return extra; | ||
| 603 | } | ||
| 604 | if (result) { | ||
| 605 | return result; | ||
| 606 | } | ||
| 607 | return MakeExpr<ExprBoolean>(true); | ||
| 608 | }; | ||
| 609 | |||
| 610 | if (std::holds_alternative<SingleBranch>(*branch_info)) { | ||
| 611 | const auto* branch = std::get_if<SingleBranch>(branch_info.get()); | ||
| 612 | if (branch->address < 0) { | ||
| 613 | if (branch->kill) { | ||
| 614 | mm.InsertReturn(get_expr(branch->condition), true); | ||
| 615 | return; | ||
| 616 | } | ||
| 617 | mm.InsertReturn(get_expr(branch->condition), false); | ||
| 618 | return; | ||
| 619 | } | ||
| 620 | mm.InsertGoto(get_expr(branch->condition), branch->address); | ||
| 621 | return; | ||
| 622 | } | ||
| 623 | const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); | ||
| 624 | for (const auto& branch_case : multi_branch->branches) { | ||
| 625 | mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), | ||
| 626 | branch_case.address); | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | void DecompileShader(CFGRebuildState& state) { | ||
| 631 | state.manager->Init(); | ||
| 632 | for (auto label : state.labels) { | ||
| 633 | state.manager->DeclareLabel(label); | ||
| 634 | } | ||
| 635 | for (const auto& block : state.block_info) { | ||
| 636 | if (state.labels.contains(block.start)) { | ||
| 637 | state.manager->InsertLabel(block.start); | ||
| 638 | } | ||
| 639 | const bool ignore = BlockBranchIsIgnored(block.branch); | ||
| 640 | const u32 end = ignore ? block.end + 1 : block.end; | ||
| 641 | state.manager->InsertBlock(block.start, end); | ||
| 642 | if (!ignore) { | ||
| 643 | InsertBranch(*state.manager, block.branch); | ||
| 644 | } | ||
| 645 | } | ||
| 646 | state.manager->Decompile(); | ||
| 647 | } | ||
| 648 | |||
| 649 | } // Anonymous namespace | ||
| 650 | |||
| 651 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 652 | const CompilerSettings& settings, | ||
| 653 | Registry& registry) { | ||
| 654 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 655 | if (settings.depth == CompileDepth::BruteForce) { | ||
| 656 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 657 | return result_out; | ||
| 658 | } | ||
| 659 | |||
| 660 | CFGRebuildState state{program_code, start_address, registry}; | ||
| 661 | // Inspect Code and generate blocks | ||
| 662 | state.labels.clear(); | ||
| 663 | state.labels.emplace(start_address); | ||
| 664 | state.inspect_queries.push_back(state.start); | ||
| 665 | while (!state.inspect_queries.empty()) { | ||
| 666 | if (!TryInspectAddress(state)) { | ||
| 667 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 668 | return result_out; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | bool use_flow_stack = true; | ||
| 673 | |||
| 674 | bool decompiled = false; | ||
| 675 | |||
| 676 | if (settings.depth != CompileDepth::FlowStack) { | ||
| 677 | // Decompile Stacks | ||
| 678 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 679 | decompiled = true; | ||
| 680 | while (!state.queries.empty()) { | ||
| 681 | if (!TryQuery(state)) { | ||
| 682 | decompiled = false; | ||
| 683 | break; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | } | ||
| 687 | |||
| 688 | use_flow_stack = !decompiled; | ||
| 689 | |||
| 690 | // Sort and organize results | ||
| 691 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 692 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||
| 693 | if (decompiled && settings.depth != CompileDepth::NoFlowStack) { | ||
| 694 | ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, | ||
| 695 | settings.disable_else_derivation}; | ||
| 696 | state.manager = &manager; | ||
| 697 | DecompileShader(state); | ||
| 698 | decompiled = state.manager->IsFullyDecompiled(); | ||
| 699 | if (!decompiled) { | ||
| 700 | if (settings.depth == CompileDepth::FullDecompile) { | ||
| 701 | LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); | ||
| 702 | } else { | ||
| 703 | LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); | ||
| 704 | } | ||
| 705 | state.manager->ShowCurrentState("Of Shader"); | ||
| 706 | state.manager->Clear(); | ||
| 707 | } else { | ||
| 708 | auto characteristics = std::make_unique<ShaderCharacteristics>(); | ||
| 709 | characteristics->start = start_address; | ||
| 710 | characteristics->settings.depth = settings.depth; | ||
| 711 | characteristics->manager = std::move(manager); | ||
| 712 | characteristics->end = state.block_info.back().end + 1; | ||
| 713 | return characteristics; | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | result_out->start = start_address; | ||
| 718 | result_out->settings.depth = | ||
| 719 | use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; | ||
| 720 | result_out->blocks.clear(); | ||
| 721 | for (auto& block : state.block_info) { | ||
| 722 | ShaderBlock new_block{}; | ||
| 723 | new_block.start = block.start; | ||
| 724 | new_block.end = block.end; | ||
| 725 | new_block.ignore_branch = BlockBranchIsIgnored(block.branch); | ||
| 726 | if (!new_block.ignore_branch) { | ||
| 727 | new_block.branch = block.branch; | ||
| 728 | } | ||
| 729 | result_out->end = std::max(result_out->end, block.end); | ||
| 730 | result_out->blocks.push_back(new_block); | ||
| 731 | } | ||
| 732 | if (!use_flow_stack) { | ||
| 733 | result_out->labels = std::move(state.labels); | ||
| 734 | return result_out; | ||
| 735 | } | ||
| 736 | |||
| 737 | auto back = result_out->blocks.begin(); | ||
| 738 | auto next = std::next(back); | ||
| 739 | while (next != result_out->blocks.end()) { | ||
| 740 | if (!state.labels.contains(next->start) && next->start == back->end + 1) { | ||
| 741 | back->end = next->end; | ||
| 742 | next = result_out->blocks.erase(next); | ||
| 743 | continue; | ||
| 744 | } | ||
| 745 | back = next; | ||
| 746 | ++next; | ||
| 747 | } | ||
| 748 | |||
| 749 | return result_out; | ||
| 750 | } | ||
| 751 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <set> | ||
| 10 | #include <variant> | ||
| 11 | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/ast.h" | ||
| 14 | #include "video_core/shader/compiler_settings.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::ConditionCode; | ||
| 21 | using Tegra::Shader::Pred; | ||
| 22 | |||
| 23 | constexpr s32 exit_branch = -1; | ||
| 24 | |||
| 25 | struct Condition { | ||
| 26 | Pred predicate{Pred::UnusedIndex}; | ||
| 27 | ConditionCode cc{ConditionCode::T}; | ||
| 28 | |||
| 29 | bool IsUnconditional() const { | ||
| 30 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const Condition& other) const { | ||
| 34 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const Condition& other) const { | ||
| 38 | return !operator==(other); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | class SingleBranch { | ||
| 43 | public: | ||
| 44 | SingleBranch() = default; | ||
| 45 | explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, | ||
| 46 | bool is_brk_, bool ignore_) | ||
| 47 | : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, | ||
| 48 | ignore{ignore_} {} | ||
| 49 | |||
| 50 | bool operator==(const SingleBranch& b) const { | ||
| 51 | return std::tie(condition, address, kill, is_sync, is_brk, ignore) == | ||
| 52 | std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); | ||
| 53 | } | ||
| 54 | |||
| 55 | bool operator!=(const SingleBranch& b) const { | ||
| 56 | return !operator==(b); | ||
| 57 | } | ||
| 58 | |||
| 59 | Condition condition{}; | ||
| 60 | s32 address{exit_branch}; | ||
| 61 | bool kill{}; | ||
| 62 | bool is_sync{}; | ||
| 63 | bool is_brk{}; | ||
| 64 | bool ignore{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct CaseBranch { | ||
| 68 | explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} | ||
| 69 | u32 cmp_value; | ||
| 70 | u32 address; | ||
| 71 | }; | ||
| 72 | |||
| 73 | class MultiBranch { | ||
| 74 | public: | ||
| 75 | explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) | ||
| 76 | : gpr{gpr_}, branches{std::move(branches_)} {} | ||
| 77 | |||
| 78 | u32 gpr{}; | ||
| 79 | std::vector<CaseBranch> branches{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | using BranchData = std::variant<SingleBranch, MultiBranch>; | ||
| 83 | using BlockBranchInfo = std::shared_ptr<BranchData>; | ||
| 84 | |||
| 85 | bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); | ||
| 86 | |||
| 87 | struct ShaderBlock { | ||
| 88 | u32 start{}; | ||
| 89 | u32 end{}; | ||
| 90 | bool ignore_branch{}; | ||
| 91 | BlockBranchInfo branch{}; | ||
| 92 | |||
| 93 | bool operator==(const ShaderBlock& sb) const { | ||
| 94 | return std::tie(start, end, ignore_branch) == | ||
| 95 | std::tie(sb.start, sb.end, sb.ignore_branch) && | ||
| 96 | BlockBranchInfoAreEqual(branch, sb.branch); | ||
| 97 | } | ||
| 98 | |||
| 99 | bool operator!=(const ShaderBlock& sb) const { | ||
| 100 | return !operator==(sb); | ||
| 101 | } | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct ShaderCharacteristics { | ||
| 105 | std::list<ShaderBlock> blocks{}; | ||
| 106 | std::set<u32> labels{}; | ||
| 107 | u32 start{}; | ||
| 108 | u32 end{}; | ||
| 109 | ASTManager manager{true, true}; | ||
| 110 | CompilerSettings settings{}; | ||
| 111 | }; | ||
| 112 | |||
| 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 114 | const CompilerSettings& settings, | ||
| 115 | Registry& registry); | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null | |||
| @@ -1,368 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <limits> | ||
| 7 | #include <set> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/node_helper.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using Tegra::Shader::Instruction; | ||
| 23 | using Tegra::Shader::OpCode; | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | ||
| 28 | const std::list<SamplerEntry>& used_samplers) { | ||
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | u32 count{}; | ||
| 33 | std::vector<u32> bound_offsets; | ||
| 34 | for (const auto& sampler : used_samplers) { | ||
| 35 | if (sampler.is_bindless) { | ||
| 36 | continue; | ||
| 37 | } | ||
| 38 | ++count; | ||
| 39 | bound_offsets.emplace_back(sampler.offset); | ||
| 40 | } | ||
| 41 | if (count > 1) { | ||
| 42 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, | ||
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | ||
| 48 | const std::list<SamplerEntry>& used_samplers) { | ||
| 49 | const u32 base_offset = sampler_to_deduce.offset; | ||
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 51 | for (const auto& sampler : used_samplers) { | ||
| 52 | if (sampler.is_bindless) { | ||
| 53 | continue; | ||
| 54 | } | ||
| 55 | if (sampler.offset > base_offset) { | ||
| 56 | max_offset = std::min(sampler.offset, max_offset); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | class ASTDecoder { | ||
| 68 | public: | ||
| 69 | explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} | ||
| 70 | |||
| 71 | void operator()(ASTProgram& ast) { | ||
| 72 | ASTNode current = ast.nodes.GetFirst(); | ||
| 73 | while (current) { | ||
| 74 | Visit(current); | ||
| 75 | current = current->GetNext(); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void operator()(ASTIfThen& ast) { | ||
| 80 | ASTNode current = ast.nodes.GetFirst(); | ||
| 81 | while (current) { | ||
| 82 | Visit(current); | ||
| 83 | current = current->GetNext(); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | void operator()(ASTIfElse& ast) { | ||
| 88 | ASTNode current = ast.nodes.GetFirst(); | ||
| 89 | while (current) { | ||
| 90 | Visit(current); | ||
| 91 | current = current->GetNext(); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | void operator()(ASTBlockEncoded& ast) {} | ||
| 96 | |||
| 97 | void operator()(ASTBlockDecoded& ast) {} | ||
| 98 | |||
| 99 | void operator()(ASTVarSet& ast) {} | ||
| 100 | |||
| 101 | void operator()(ASTLabel& ast) {} | ||
| 102 | |||
| 103 | void operator()(ASTGoto& ast) {} | ||
| 104 | |||
| 105 | void operator()(ASTDoWhile& ast) { | ||
| 106 | ASTNode current = ast.nodes.GetFirst(); | ||
| 107 | while (current) { | ||
| 108 | Visit(current); | ||
| 109 | current = current->GetNext(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void operator()(ASTReturn& ast) {} | ||
| 114 | |||
| 115 | void operator()(ASTBreak& ast) {} | ||
| 116 | |||
| 117 | void Visit(ASTNode& node) { | ||
| 118 | std::visit(*this, *node->GetInnerData()); | ||
| 119 | if (node->IsBlockEncoded()) { | ||
| 120 | auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); | ||
| 121 | NodeBlock bb = ir.DecodeRange(block->start, block->end); | ||
| 122 | node->TransformBlockEncoded(std::move(bb)); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | private: | ||
| 127 | ShaderIR& ir; | ||
| 128 | }; | ||
| 129 | |||
| 130 | void ShaderIR::Decode() { | ||
| 131 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 132 | |||
| 133 | decompiled = false; | ||
| 134 | auto info = ScanFlow(program_code, main_offset, settings, registry); | ||
| 135 | auto& shader_info = *info; | ||
| 136 | coverage_begin = shader_info.start; | ||
| 137 | coverage_end = shader_info.end; | ||
| 138 | switch (shader_info.settings.depth) { | ||
| 139 | case CompileDepth::FlowStack: { | ||
| 140 | for (const auto& block : shader_info.blocks) { | ||
| 141 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 142 | } | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | case CompileDepth::NoFlowStack: { | ||
| 146 | disable_flow_stack = true; | ||
| 147 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 148 | if (label == static_cast<u32>(exit_branch)) { | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | basic_blocks.insert({label, nodes}); | ||
| 152 | }; | ||
| 153 | const auto& blocks = shader_info.blocks; | ||
| 154 | NodeBlock current_block; | ||
| 155 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 156 | for (const auto& block : blocks) { | ||
| 157 | if (shader_info.labels.contains(block.start)) { | ||
| 158 | insert_block(current_block, current_label); | ||
| 159 | current_block.clear(); | ||
| 160 | current_label = block.start; | ||
| 161 | } | ||
| 162 | if (!block.ignore_branch) { | ||
| 163 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 164 | InsertControlFlow(current_block, block); | ||
| 165 | } else { | ||
| 166 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | insert_block(current_block, current_label); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | case CompileDepth::DecompileBackwards: | ||
| 173 | case CompileDepth::FullDecompile: { | ||
| 174 | program_manager = std::move(shader_info.manager); | ||
| 175 | disable_flow_stack = true; | ||
| 176 | decompiled = true; | ||
| 177 | ASTDecoder decoder{*this}; | ||
| 178 | ASTNode program = GetASTProgram(); | ||
| 179 | decoder.Visit(program); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | default: | ||
| 183 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case CompileDepth::BruteForce: { | ||
| 186 | const auto shader_end = static_cast<u32>(program_code.size()); | ||
| 187 | coverage_begin = main_offset; | ||
| 188 | coverage_end = shader_end; | ||
| 189 | for (u32 label = main_offset; label < shader_end; ++label) { | ||
| 190 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | if (settings.depth != shader_info.settings.depth) { | ||
| 196 | LOG_WARNING( | ||
| 197 | HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", | ||
| 198 | CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 203 | NodeBlock basic_block; | ||
| 204 | DecodeRangeInner(basic_block, begin, end); | ||
| 205 | return basic_block; | ||
| 206 | } | ||
| 207 | |||
| 208 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||
| 209 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 210 | pc = DecodeInstr(bb, pc); | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||
| 215 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { | ||
| 216 | Node result = n; | ||
| 217 | if (cond.cc != ConditionCode::T) { | ||
| 218 | result = Conditional(GetConditionCode(cond.cc), {result}); | ||
| 219 | } | ||
| 220 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 221 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 222 | const bool is_neg = pred > 7; | ||
| 223 | if (is_neg) { | ||
| 224 | pred -= 8; | ||
| 225 | } | ||
| 226 | result = Conditional(GetPredicate(pred, is_neg), {result}); | ||
| 227 | } | ||
| 228 | return result; | ||
| 229 | }; | ||
| 230 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 231 | auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 232 | if (branch->address < 0) { | ||
| 233 | if (branch->kill) { | ||
| 234 | Node n = Operation(OperationCode::Discard); | ||
| 235 | n = apply_conditions(branch->condition, n); | ||
| 236 | bb.push_back(n); | ||
| 237 | global_code.push_back(n); | ||
| 238 | return; | ||
| 239 | } | ||
| 240 | Node n = Operation(OperationCode::Exit); | ||
| 241 | n = apply_conditions(branch->condition, n); | ||
| 242 | bb.push_back(n); | ||
| 243 | global_code.push_back(n); | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | Node n = Operation(OperationCode::Branch, Immediate(branch->address)); | ||
| 247 | n = apply_conditions(branch->condition, n); | ||
| 248 | bb.push_back(n); | ||
| 249 | global_code.push_back(n); | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 253 | Node op_a = GetRegister(multi_branch->gpr); | ||
| 254 | for (auto& branch_case : multi_branch->branches) { | ||
| 255 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | ||
| 256 | Node op_b = Immediate(branch_case.cmp_value); | ||
| 257 | Node condition = | ||
| 258 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); | ||
| 259 | auto result = Conditional(condition, {n}); | ||
| 260 | bb.push_back(result); | ||
| 261 | global_code.push_back(result); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||
| 266 | // Ignore sched instructions when generating code. | ||
| 267 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 268 | return pc + 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | const Instruction instr = {program_code[pc]}; | ||
| 272 | const auto opcode = OpCode::Decode(instr); | ||
| 273 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 274 | |||
| 275 | // Decoding failure | ||
| 276 | if (!opcode) { | ||
| 277 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 278 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 279 | nv_address, instr.value))); | ||
| 280 | return pc + 1; | ||
| 281 | } | ||
| 282 | |||
| 283 | bb.push_back(Comment( | ||
| 284 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); | ||
| 285 | |||
| 286 | using Tegra::Shader::Pred; | ||
| 287 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 288 | "NeverExecute predicate not implemented"); | ||
| 289 | |||
| 290 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { | ||
| 291 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 292 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 293 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 294 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 295 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 296 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 297 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 298 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 299 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 300 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 301 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 302 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 303 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 304 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 305 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 306 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 307 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 308 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 309 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 310 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 311 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 312 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 313 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 314 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 315 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 316 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 317 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 318 | }; | ||
| 319 | |||
| 320 | std::vector<Node> tmp_block; | ||
| 321 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 322 | pc = (this->*decoder->second)(tmp_block, pc); | ||
| 323 | } else { | ||
| 324 | pc = DecodeOther(tmp_block, pc); | ||
| 325 | } | ||
| 326 | |||
| 327 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 328 | // executed. | ||
| 329 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 330 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 331 | |||
| 332 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 333 | const Node conditional = | ||
| 334 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); | ||
| 335 | global_code.push_back(conditional); | ||
| 336 | bb.push_back(conditional); | ||
| 337 | } else { | ||
| 338 | for (auto& node : tmp_block) { | ||
| 339 | global_code.push_back(node); | ||
| 340 | bb.push_back(node); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | return pc + 1; | ||
| 345 | } | ||
| 346 | |||
| 347 | void ShaderIR::PostDecode() { | ||
| 348 | // Deduce texture handler size if needed | ||
| 349 | auto gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 350 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 351 | // Deduce Indexed Samplers | ||
| 352 | if (!uses_indexed_samplers) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | for (auto& sampler : used_samplers) { | ||
| 356 | if (!sampler.is_indexed) { | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 360 | sampler.size = *size; | ||
| 361 | } else { | ||
| 362 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 363 | sampler.size = 1; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null | |||
| @@ -1,166 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::SubOp; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | |||
| 24 | Node op_b = [&] { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return GetImmediate19(instr); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::MOV_C: | ||
| 36 | case OpCode::Id::MOV_R: { | ||
| 37 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 38 | SetRegister(bb, instr.gpr0, op_b); | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | case OpCode::Id::FMUL_C: | ||
| 42 | case OpCode::Id::FMUL_R: | ||
| 43 | case OpCode::Id::FMUL_IMM: { | ||
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 45 | if (instr.fmul.tab5cb8_2 != 0) { | ||
| 46 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", | ||
| 47 | instr.fmul.tab5cb8_2.Value()); | ||
| 48 | } | ||
| 49 | if (instr.fmul.tab5c68_0 != 1) { | ||
| 50 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 53 | |||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 55 | |||
| 56 | static constexpr std::array FmulPostFactor = { | ||
| 57 | 1.000f, // None | ||
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 65 | |||
| 66 | if (instr.fmul.postfactor != 0) { | ||
| 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, | ||
| 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | ||
| 69 | } | ||
| 70 | |||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); | ||
| 114 | return Immediate(0); | ||
| 115 | } | ||
| 116 | }(); | ||
| 117 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 118 | |||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::FMNMX_C: | ||
| 123 | case OpCode::Id::FMNMX_R: | ||
| 124 | case OpCode::Id::FMNMX_IMM: { | ||
| 125 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 126 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 127 | |||
| 128 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 129 | |||
| 130 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 131 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 133 | |||
| 134 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | case OpCode::Id::FCMP_RR: | ||
| 139 | case OpCode::Id::FCMP_RC: | ||
| 140 | case OpCode::Id::FCMP_IMMR: { | ||
| 141 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); | ||
| 142 | Node op_c = GetRegister(instr.gpr39); | ||
| 143 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); | ||
| 144 | SetRegister( | ||
| 145 | bb, instr.gpr0, | ||
| 146 | Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case OpCode::Id::RRO_C: | ||
| 150 | case OpCode::Id::RRO_R: | ||
| 151 | case OpCode::Id::RRO_IMM: { | ||
| 152 | LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); | ||
| 153 | |||
| 154 | // Currently RRO is only implemented as a register move. | ||
| 155 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 156 | SetRegister(bb, instr.gpr0, op_b); | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | default: | ||
| 160 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 161 | } | ||
| 162 | |||
| 163 | return pc; | ||
| 164 | } | ||
| 165 | |||
| 166 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null | |||
| @@ -1,101 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfType; | ||
| 15 | using Tegra::Shader::Instruction; | ||
| 16 | using Tegra::Shader::OpCode; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | bool negate_a = false; | ||
| 23 | bool negate_b = false; | ||
| 24 | bool absolute_a = false; | ||
| 25 | bool absolute_b = false; | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HADD2_R: | ||
| 29 | if (instr.alu_half.ftz == 0) { | ||
| 30 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 31 | } | ||
| 32 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 33 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 34 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 35 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 36 | break; | ||
| 37 | case OpCode::Id::HADD2_C: | ||
| 38 | if (instr.alu_half.ftz == 0) { | ||
| 39 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 40 | } | ||
| 41 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 42 | negate_b = ((instr.value >> 56) & 1) != 0; | ||
| 43 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 44 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HMUL2_R: | ||
| 47 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 48 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 49 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 50 | break; | ||
| 51 | case OpCode::Id::HMUL2_C: | ||
| 52 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 53 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 54 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 55 | break; | ||
| 56 | default: | ||
| 57 | UNREACHABLE(); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | ||
| 62 | op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); | ||
| 63 | |||
| 64 | auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { | ||
| 65 | switch (opcode->get().GetId()) { | ||
| 66 | case OpCode::Id::HADD2_C: | ||
| 67 | case OpCode::Id::HMUL2_C: | ||
| 68 | return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 69 | case OpCode::Id::HADD2_R: | ||
| 70 | case OpCode::Id::HMUL2_R: | ||
| 71 | return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return {HalfType::F32, Immediate(0)}; | ||
| 75 | } | ||
| 76 | }(); | ||
| 77 | op_b = UnpackHalfFloat(op_b, type_b); | ||
| 78 | op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); | ||
| 79 | |||
| 80 | Node value = [this, opcode, op_a, op_b = op_b] { | ||
| 81 | switch (opcode->get().GetId()) { | ||
| 82 | case OpCode::Id::HADD2_C: | ||
| 83 | case OpCode::Id::HADD2_R: | ||
| 84 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 85 | case OpCode::Id::HMUL2_C: | ||
| 86 | case OpCode::Id::HMUL2_R: | ||
| 87 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 88 | default: | ||
| 89 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 90 | return Immediate(0); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||
| 94 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 95 | |||
| 96 | SetRegister(bb, instr.gpr0, value); | ||
| 97 | |||
| 98 | return pc; | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null | |||
| @@ -1,54 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 22 | if (instr.alu_half_imm.ftz == 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | } else { | ||
| 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { | ||
| 27 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | ||
| 32 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 33 | |||
| 34 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 35 | |||
| 36 | Node value = [&]() { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HADD2_IMM: | ||
| 39 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 40 | case OpCode::Id::HMUL2_IMM: | ||
| 41 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 49 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 50 | SetRegister(bb, instr.gpr0, value); | ||
| 51 | return pc; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::MOV32_IMM: { | ||
| 22 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 23 | break; | ||
| 24 | } | ||
| 25 | case OpCode::Id::FMUL32_IMM: { | ||
| 26 | Node value = | ||
| 27 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 28 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 29 | |||
| 30 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 31 | SetRegister(bb, instr.gpr0, value); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | case OpCode::Id::FADD32I: { | ||
| 35 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 36 | instr.fadd32i.negate_a); | ||
| 37 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 38 | instr.fadd32i.negate_b); | ||
| 39 | |||
| 40 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 41 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 42 | SetRegister(bb, instr.gpr0, value); | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | default: | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 47 | opcode->get().GetName()); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null | |||
| @@ -1,375 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::IAdd3Height; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = [&]() { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::IADD_C: | ||
| 36 | case OpCode::Id::IADD_R: | ||
| 37 | case OpCode::Id::IADD_IMM: { | ||
| 38 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); | ||
| 39 | UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); | ||
| 40 | |||
| 41 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 42 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 43 | |||
| 44 | Node value = Operation(OperationCode::UAdd, op_a, op_b); | ||
| 45 | |||
| 46 | if (instr.iadd.x) { | ||
| 47 | Node carry = GetInternalFlag(InternalFlag::Carry); | ||
| 48 | Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); | ||
| 49 | value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); | ||
| 50 | } | ||
| 51 | |||
| 52 | if (instr.generates_cc) { | ||
| 53 | const Node i0 = Immediate(0); | ||
| 54 | |||
| 55 | Node zero = Operation(OperationCode::LogicalIEqual, value, i0); | ||
| 56 | Node sign = Operation(OperationCode::LogicalILessThan, value, i0); | ||
| 57 | Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); | ||
| 58 | |||
| 59 | Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); | ||
| 60 | Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); | ||
| 61 | Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); | ||
| 62 | Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); | ||
| 63 | |||
| 64 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); | ||
| 65 | SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); | ||
| 66 | SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); | ||
| 67 | SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); | ||
| 68 | } | ||
| 69 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::IADD3_C: | ||
| 73 | case OpCode::Id::IADD3_R: | ||
| 74 | case OpCode::Id::IADD3_IMM: { | ||
| 75 | Node op_c = GetRegister(instr.gpr39); | ||
| 76 | |||
| 77 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 78 | switch (height) { | ||
| 79 | case IAdd3Height::None: | ||
| 80 | return value; | ||
| 81 | case IAdd3Height::LowerHalfWord: | ||
| 82 | return BitfieldExtract(value, 0, 16); | ||
| 83 | case IAdd3Height::UpperHalfWord: | ||
| 84 | return BitfieldExtract(value, 16, 16); | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); | ||
| 87 | return Immediate(0); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 92 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 93 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 94 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 95 | } | ||
| 96 | |||
| 97 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 98 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 99 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 100 | |||
| 101 | const Node value = [&] { | ||
| 102 | Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 103 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 104 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 105 | } | ||
| 106 | const Node shifted = [&] { | ||
| 107 | switch (instr.iadd3.mode) { | ||
| 108 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 109 | // TODO(tech4me): According to | ||
| 110 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 111 | // The addition between op_a and op_b should be done in uint33, more | ||
| 112 | // investigation required | ||
| 113 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 114 | Immediate(16)); | ||
| 115 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 116 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 117 | Immediate(16)); | ||
| 118 | default: | ||
| 119 | return add_ab; | ||
| 120 | } | ||
| 121 | }(); | ||
| 122 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 123 | }(); | ||
| 124 | |||
| 125 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 126 | SetRegister(bb, instr.gpr0, value); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::ISCADD_C: | ||
| 130 | case OpCode::Id::ISCADD_R: | ||
| 131 | case OpCode::Id::ISCADD_IMM: { | ||
| 132 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 133 | "Condition codes generation in ISCADD is not implemented"); | ||
| 134 | |||
| 135 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 136 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 137 | |||
| 138 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 139 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 140 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 141 | |||
| 142 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 143 | SetRegister(bb, instr.gpr0, value); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case OpCode::Id::POPC_C: | ||
| 147 | case OpCode::Id::POPC_R: | ||
| 148 | case OpCode::Id::POPC_IMM: { | ||
| 149 | if (instr.popc.invert) { | ||
| 150 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 151 | } | ||
| 152 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 153 | SetRegister(bb, instr.gpr0, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case OpCode::Id::FLO_R: | ||
| 157 | case OpCode::Id::FLO_C: | ||
| 158 | case OpCode::Id::FLO_IMM: { | ||
| 159 | Node value; | ||
| 160 | if (instr.flo.invert) { | ||
| 161 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 162 | } | ||
| 163 | if (instr.flo.is_signed) { | ||
| 164 | value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 165 | } else { | ||
| 166 | value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 167 | } | ||
| 168 | if (instr.flo.sh) { | ||
| 169 | value = | ||
| 170 | Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); | ||
| 171 | } | ||
| 172 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 173 | break; | ||
| 174 | } | ||
| 175 | case OpCode::Id::SEL_C: | ||
| 176 | case OpCode::Id::SEL_R: | ||
| 177 | case OpCode::Id::SEL_IMM: { | ||
| 178 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 179 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 180 | SetRegister(bb, instr.gpr0, value); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | case OpCode::Id::ICMP_CR: | ||
| 184 | case OpCode::Id::ICMP_R: | ||
| 185 | case OpCode::Id::ICMP_RC: | ||
| 186 | case OpCode::Id::ICMP_IMM: { | ||
| 187 | const Node zero = Immediate(0); | ||
| 188 | |||
| 189 | const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::ICMP_CR: | ||
| 192 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 193 | GetRegister(instr.gpr39)}; | ||
| 194 | case OpCode::Id::ICMP_R: | ||
| 195 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 196 | case OpCode::Id::ICMP_RC: | ||
| 197 | return {GetRegister(instr.gpr39), | ||
| 198 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 199 | case OpCode::Id::ICMP_IMM: | ||
| 200 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 201 | default: | ||
| 202 | UNREACHABLE(); | ||
| 203 | return {zero, zero}; | ||
| 204 | } | ||
| 205 | }(); | ||
| 206 | const Node op_lhs = GetRegister(instr.gpr8); | ||
| 207 | const Node comparison = | ||
| 208 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); | ||
| 209 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case OpCode::Id::LOP_C: | ||
| 213 | case OpCode::Id::LOP_R: | ||
| 214 | case OpCode::Id::LOP_IMM: { | ||
| 215 | if (instr.alu.lop.invert_a) | ||
| 216 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 217 | if (instr.alu.lop.invert_b) | ||
| 218 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 219 | |||
| 220 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 221 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 222 | instr.generates_cc); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::LOP3_C: | ||
| 226 | case OpCode::Id::LOP3_R: | ||
| 227 | case OpCode::Id::LOP3_IMM: { | ||
| 228 | const Node op_c = GetRegister(instr.gpr39); | ||
| 229 | const Node lut = [&]() { | ||
| 230 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 231 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 232 | } else { | ||
| 233 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 234 | } | ||
| 235 | }(); | ||
| 236 | |||
| 237 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IMNMX_C: | ||
| 241 | case OpCode::Id::IMNMX_R: | ||
| 242 | case OpCode::Id::IMNMX_IMM: { | ||
| 243 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 244 | |||
| 245 | const bool is_signed = instr.imnmx.is_signed; | ||
| 246 | |||
| 247 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 248 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 249 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 250 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 251 | |||
| 252 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 253 | SetRegister(bb, instr.gpr0, value); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case OpCode::Id::LEA_R2: | ||
| 257 | case OpCode::Id::LEA_R1: | ||
| 258 | case OpCode::Id::LEA_IMM: | ||
| 259 | case OpCode::Id::LEA_RZ: | ||
| 260 | case OpCode::Id::LEA_HI: { | ||
| 261 | auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 262 | switch (opcode->get().GetId()) { | ||
| 263 | case OpCode::Id::LEA_R2: { | ||
| 264 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 265 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 266 | } | ||
| 267 | case OpCode::Id::LEA_R1: { | ||
| 268 | const bool neg = instr.lea.r1.neg != 0; | ||
| 269 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 270 | GetRegister(instr.gpr20), | ||
| 271 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 272 | } | ||
| 273 | case OpCode::Id::LEA_IMM: { | ||
| 274 | const bool neg = instr.lea.imm.neg != 0; | ||
| 275 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 276 | Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 277 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 278 | } | ||
| 279 | case OpCode::Id::LEA_RZ: { | ||
| 280 | const bool neg = instr.lea.rz.neg != 0; | ||
| 281 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 282 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 283 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 284 | } | ||
| 285 | case OpCode::Id::LEA_HI: | ||
| 286 | default: | ||
| 287 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 288 | |||
| 289 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 290 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 291 | } | ||
| 292 | }(); | ||
| 293 | |||
| 294 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 295 | "Unhandled LEA Predicate"); | ||
| 296 | |||
| 297 | Node value = | ||
| 298 | Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); | ||
| 299 | value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); | ||
| 300 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 301 | |||
| 302 | break; | ||
| 303 | } | ||
| 304 | default: | ||
| 305 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 306 | } | ||
| 307 | |||
| 308 | return pc; | ||
| 309 | } | ||
| 310 | |||
| 311 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 312 | Node imm_lut, bool sets_cc) { | ||
| 313 | const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { | ||
| 314 | Node value = Immediate(0); | ||
| 315 | const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); | ||
| 316 | if (imm.GetValue() & 0x01) { | ||
| 317 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 318 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 319 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 320 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 321 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 322 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 323 | } | ||
| 324 | if (imm.GetValue() & 0x02) { | ||
| 325 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 326 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 327 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 328 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 329 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 330 | } | ||
| 331 | if (imm.GetValue() & 0x04) { | ||
| 332 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 333 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 334 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 335 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 336 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 337 | } | ||
| 338 | if (imm.GetValue() & 0x08) { | ||
| 339 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 340 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 341 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 342 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 343 | } | ||
| 344 | if (imm.GetValue() & 0x10) { | ||
| 345 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 346 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 347 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 348 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 349 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 350 | } | ||
| 351 | if (imm.GetValue() & 0x20) { | ||
| 352 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 353 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 354 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 355 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 356 | } | ||
| 357 | if (imm.GetValue() & 0x40) { | ||
| 358 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 359 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 360 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 361 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 362 | } | ||
| 363 | if (imm.GetValue() & 0x80) { | ||
| 364 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 365 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 366 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 367 | } | ||
| 368 | return value; | ||
| 369 | }(op_a, op_b, op_c, imm_lut); | ||
| 370 | |||
| 371 | SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); | ||
| 372 | SetRegister(bb, dest, lop3_fast); | ||
| 373 | } | ||
| 374 | |||
| 375 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::LogicOperation; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::PredicateResultMode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::IADD32I: { | ||
| 29 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 30 | |||
| 31 | op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); | ||
| 32 | |||
| 33 | Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); | ||
| 36 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::LOP32I: { | ||
| 40 | if (instr.alu.lop32i.invert_a) { | ||
| 41 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); | ||
| 42 | } | ||
| 43 | |||
| 44 | if (instr.alu.lop32i.invert_b) { | ||
| 45 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 46 | } | ||
| 47 | |||
| 48 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), | ||
| 49 | std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, | ||
| 50 | instr.op_32.generates_cc != 0); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | default: | ||
| 54 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 55 | opcode->get().GetName()); | ||
| 56 | } | ||
| 57 | |||
| 58 | return pc; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, | ||
| 62 | Node op_b, PredicateResultMode predicate_mode, Pred predicate, | ||
| 63 | bool sets_cc) { | ||
| 64 | Node result = [&] { | ||
| 65 | switch (logic_op) { | ||
| 66 | case LogicOperation::And: | ||
| 67 | return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 68 | case LogicOperation::Or: | ||
| 69 | return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 70 | case LogicOperation::Xor: | ||
| 71 | return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 72 | case LogicOperation::PassB: | ||
| 73 | return op_b; | ||
| 74 | default: | ||
| 75 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); | ||
| 76 | return Immediate(0); | ||
| 77 | } | ||
| 78 | }(); | ||
| 79 | |||
| 80 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 81 | SetRegister(bb, dest, result); | ||
| 82 | |||
| 83 | // Write the predicate value depending on the predicate mode. | ||
| 84 | switch (predicate_mode) { | ||
| 85 | case PredicateResultMode::None: | ||
| 86 | // Do nothing. | ||
| 87 | return; | ||
| 88 | case PredicateResultMode::NotZero: { | ||
| 89 | // Set the predicate to true if the result is not zero. | ||
| 90 | Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); | ||
| 91 | SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | default: | ||
| 95 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null | |||
| @@ -1,77 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | Node op_b = [&] { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::BFE_R: | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | case OpCode::Id::BFE_C: | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 34 | |||
| 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); | ||
| 36 | |||
| 37 | const bool is_signed = instr.bfe.is_signed; | ||
| 38 | |||
| 39 | // using reverse parallel method in | ||
| 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel | ||
| 41 | // note for later if possible to implement faster method. | ||
| 42 | if (instr.bfe.brev) { | ||
| 43 | const auto swap = [&](u32 s, u32 mask) { | ||
| 44 | Node v1 = | ||
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 65 | } | ||
| 66 | |||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 74 | return pc; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null | |||
| @@ -1,45 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { | ||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::BFI_RC: | ||
| 23 | return {GetRegister(instr.gpr39), | ||
| 24 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 25 | case OpCode::Id::BFI_IMM_R: | ||
| 26 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 27 | default: | ||
| 28 | UNREACHABLE(); | ||
| 29 | return {Immediate(0), Immediate(0)}; | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | const Node insert = GetRegister(instr.gpr8); | ||
| 33 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 34 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 35 | |||
| 36 | const Node value = | ||
| 37 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 38 | |||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | SetRegister(bb, instr.gpr0, value); | ||
| 41 | |||
| 42 | return pc; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null | |||
| @@ -1,321 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/node_helper.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 24 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 25 | } | ||
| 26 | |||
| 27 | constexpr u32 SizeInBits(Register::Size size) { | ||
| 28 | switch (size) { | ||
| 29 | case Register::Size::Byte: | ||
| 30 | return 8; | ||
| 31 | case Register::Size::Short: | ||
| 32 | return 16; | ||
| 33 | case Register::Size::Word: | ||
| 34 | return 32; | ||
| 35 | case Register::Size::Long: | ||
| 36 | return 64; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, | ||
| 42 | Register::Size dst_size, | ||
| 43 | bool src_signed, | ||
| 44 | bool dst_signed) { | ||
| 45 | const u32 dst_bits = SizeInBits(dst_size); | ||
| 46 | if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { | ||
| 47 | if (src_signed == dst_signed) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | return std::make_pair(0, std::numeric_limits<s32>::max()); | ||
| 51 | } | ||
| 52 | if (dst_signed) { | ||
| 53 | // Signed destination, clamp to [-128, 127] for instance | ||
| 54 | return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); | ||
| 55 | } else { | ||
| 56 | // Unsigned destination | ||
| 57 | if (dst_bits == 32) { | ||
| 58 | // Avoid shifting by 32, that is undefined behavior | ||
| 59 | return std::make_pair(0, s32(std::numeric_limits<u32>::max())); | ||
| 60 | } | ||
| 61 | return std::make_pair(0, (1 << dst_bits) - 1); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||
| 68 | const Instruction instr = {program_code[pc]}; | ||
| 69 | const auto opcode = OpCode::Decode(instr); | ||
| 70 | |||
| 71 | switch (opcode->get().GetId()) { | ||
| 72 | case OpCode::Id::I2I_R: | ||
| 73 | case OpCode::Id::I2I_C: | ||
| 74 | case OpCode::Id::I2I_IMM: { | ||
| 75 | const bool src_signed = instr.conversion.is_input_signed; | ||
| 76 | const bool dst_signed = instr.conversion.is_output_signed; | ||
| 77 | const Register::Size src_size = instr.conversion.src_size; | ||
| 78 | const Register::Size dst_size = instr.conversion.dst_size; | ||
| 79 | const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); | ||
| 80 | |||
| 81 | Node value = [this, instr, opcode] { | ||
| 82 | switch (opcode->get().GetId()) { | ||
| 83 | case OpCode::Id::I2I_R: | ||
| 84 | return GetRegister(instr.gpr20); | ||
| 85 | case OpCode::Id::I2I_C: | ||
| 86 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 87 | case OpCode::Id::I2I_IMM: | ||
| 88 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 89 | default: | ||
| 90 | UNREACHABLE(); | ||
| 91 | return Immediate(0); | ||
| 92 | } | ||
| 93 | }(); | ||
| 94 | |||
| 95 | // Ensure the source selector is valid | ||
| 96 | switch (instr.conversion.src_size) { | ||
| 97 | case Register::Size::Byte: | ||
| 98 | break; | ||
| 99 | case Register::Size::Short: | ||
| 100 | ASSERT(selector == 0 || selector == 2); | ||
| 101 | break; | ||
| 102 | default: | ||
| 103 | ASSERT(selector == 0); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | if (src_size != Register::Size::Word || selector != 0) { | ||
| 108 | value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), | ||
| 109 | Immediate(selector * 8), Immediate(SizeInBits(src_size))); | ||
| 110 | } | ||
| 111 | |||
| 112 | value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, | ||
| 113 | instr.conversion.negate_a, src_signed); | ||
| 114 | |||
| 115 | if (instr.alu.saturate_d) { | ||
| 116 | if (src_signed && !dst_signed) { | ||
| 117 | Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, | ||
| 118 | Immediate(1 << (SizeInBits(src_size) - 1))); | ||
| 119 | value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), | ||
| 120 | std::move(value)); | ||
| 121 | |||
| 122 | // Simplify generated expressions, this can be removed without semantic impact | ||
| 123 | SetTemporary(bb, 0, std::move(value)); | ||
| 124 | value = GetTemporary(0); | ||
| 125 | |||
| 126 | if (dst_size != Register::Size::Word) { | ||
| 127 | const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 128 | Node is_large = | ||
| 129 | Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); | ||
| 130 | value = Operation(OperationCode::Select, std::move(is_large), limit, | ||
| 131 | std::move(value)); | ||
| 132 | } | ||
| 133 | } else if (const std::optional bounds = | ||
| 134 | IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { | ||
| 135 | value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), | ||
| 136 | Immediate(bounds->first)); | ||
| 137 | value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), | ||
| 138 | Immediate(bounds->second)); | ||
| 139 | } | ||
| 140 | } else if (dst_size != Register::Size::Word) { | ||
| 141 | // No saturation, we only have to mask the result | ||
| 142 | Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 143 | value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); | ||
| 144 | } | ||
| 145 | |||
| 146 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 147 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case OpCode::Id::I2F_R: | ||
| 151 | case OpCode::Id::I2F_C: | ||
| 152 | case OpCode::Id::I2F_IMM: { | ||
| 153 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 154 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 155 | "Condition codes generation in I2F is not implemented"); | ||
| 156 | |||
| 157 | Node value = [&] { | ||
| 158 | switch (opcode->get().GetId()) { | ||
| 159 | case OpCode::Id::I2F_R: | ||
| 160 | return GetRegister(instr.gpr20); | ||
| 161 | case OpCode::Id::I2F_C: | ||
| 162 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 163 | case OpCode::Id::I2F_IMM: | ||
| 164 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 165 | default: | ||
| 166 | UNREACHABLE(); | ||
| 167 | return Immediate(0); | ||
| 168 | } | ||
| 169 | }(); | ||
| 170 | |||
| 171 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 172 | |||
| 173 | if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { | ||
| 174 | ASSERT(instr.conversion.src_size == Register::Size::Byte || | ||
| 175 | instr.conversion.src_size == Register::Size::Short); | ||
| 176 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 177 | ASSERT(offset == 0 || offset == 2); | ||
| 178 | } | ||
| 179 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 180 | std::move(value), Immediate(offset * 8)); | ||
| 181 | } | ||
| 182 | |||
| 183 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 184 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 185 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 186 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 187 | |||
| 188 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 189 | |||
| 190 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 191 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | SetRegister(bb, instr.gpr0, value); | ||
| 195 | break; | ||
| 196 | } | ||
| 197 | case OpCode::Id::F2F_R: | ||
| 198 | case OpCode::Id::F2F_C: | ||
| 199 | case OpCode::Id::F2F_IMM: { | ||
| 200 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 201 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 202 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 203 | "Condition codes generation in F2F is not implemented"); | ||
| 204 | |||
| 205 | Node value = [&]() { | ||
| 206 | switch (opcode->get().GetId()) { | ||
| 207 | case OpCode::Id::F2F_R: | ||
| 208 | return GetRegister(instr.gpr20); | ||
| 209 | case OpCode::Id::F2F_C: | ||
| 210 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 211 | case OpCode::Id::F2F_IMM: | ||
| 212 | return GetImmediate19(instr); | ||
| 213 | default: | ||
| 214 | UNREACHABLE(); | ||
| 215 | return Immediate(0); | ||
| 216 | } | ||
| 217 | }(); | ||
| 218 | |||
| 219 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 220 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 221 | std::move(value)); | ||
| 222 | } else { | ||
| 223 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 227 | |||
| 228 | value = [&] { | ||
| 229 | if (instr.conversion.src_size != instr.conversion.dst_size) { | ||
| 230 | // Rounding operations only matter when the source and destination conversion size | ||
| 231 | // is the same. | ||
| 232 | return value; | ||
| 233 | } | ||
| 234 | switch (instr.conversion.f2f.GetRoundingMode()) { | ||
| 235 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 236 | return value; | ||
| 237 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 238 | return Operation(OperationCode::FRoundEven, value); | ||
| 239 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 240 | return Operation(OperationCode::FFloor, value); | ||
| 241 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 242 | return Operation(OperationCode::FCeil, value); | ||
| 243 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 244 | return Operation(OperationCode::FTrunc, value); | ||
| 245 | default: | ||
| 246 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 247 | instr.conversion.f2f.rounding.Value()); | ||
| 248 | return value; | ||
| 249 | } | ||
| 250 | }(); | ||
| 251 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 252 | |||
| 253 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 254 | |||
| 255 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 256 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 257 | } | ||
| 258 | |||
| 259 | SetRegister(bb, instr.gpr0, value); | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::F2I_R: | ||
| 263 | case OpCode::Id::F2I_C: | ||
| 264 | case OpCode::Id::F2I_IMM: { | ||
| 265 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 267 | "Condition codes generation in F2I is not implemented"); | ||
| 268 | Node value = [&]() { | ||
| 269 | switch (opcode->get().GetId()) { | ||
| 270 | case OpCode::Id::F2I_R: | ||
| 271 | return GetRegister(instr.gpr20); | ||
| 272 | case OpCode::Id::F2I_C: | ||
| 273 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 274 | case OpCode::Id::F2I_IMM: | ||
| 275 | return GetImmediate19(instr); | ||
| 276 | default: | ||
| 277 | UNREACHABLE(); | ||
| 278 | return Immediate(0); | ||
| 279 | } | ||
| 280 | }(); | ||
| 281 | |||
| 282 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 283 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 284 | std::move(value)); | ||
| 285 | } else { | ||
| 286 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 287 | } | ||
| 288 | |||
| 289 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 290 | |||
| 291 | value = [&]() { | ||
| 292 | switch (instr.conversion.f2i.rounding) { | ||
| 293 | case Tegra::Shader::F2iRoundingOp::RoundEven: | ||
| 294 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 295 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 296 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 297 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 298 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 299 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 300 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 301 | default: | ||
| 302 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 303 | instr.conversion.f2i.rounding.Value()); | ||
| 304 | return Immediate(0); | ||
| 305 | } | ||
| 306 | }(); | ||
| 307 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 308 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 309 | value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); | ||
| 310 | |||
| 311 | SetRegister(bb, instr.gpr0, value); | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | default: | ||
| 315 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 316 | } | ||
| 317 | |||
| 318 | return pc; | ||
| 319 | } | ||
| 320 | |||
| 321 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 21 | if (instr.ffma.tab5980_0 != 1) { | ||
| 22 | LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); | ||
| 23 | } | ||
| 24 | if (instr.ffma.tab5980_1 != 0) { | ||
| 25 | LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 27 | |||
| 28 | const Node op_a = GetRegister(instr.gpr8); | ||
| 29 | |||
| 30 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 31 | switch (opcode->get().GetId()) { | ||
| 32 | case OpCode::Id::FFMA_CR: { | ||
| 33 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 34 | GetRegister(instr.gpr39)}; | ||
| 35 | } | ||
| 36 | case OpCode::Id::FFMA_RR: | ||
| 37 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 38 | case OpCode::Id::FFMA_RC: { | ||
| 39 | return {GetRegister(instr.gpr39), | ||
| 40 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 41 | } | ||
| 42 | case OpCode::Id::FFMA_IMM: | ||
| 43 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 46 | return {Immediate(0), Immediate(0)}; | ||
| 47 | } | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 51 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 52 | |||
| 53 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 54 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 55 | |||
| 56 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 57 | SetRegister(bb, instr.gpr0, value); | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null | |||
| @@ -1,57 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 39 | |||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | |||
| 43 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 44 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 45 | |||
| 46 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 47 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 48 | // if enabled | ||
| 49 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 50 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using std::move; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PredCondition; | ||
| 20 | |||
| 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | ||
| 22 | const Instruction instr = {program_code[pc]}; | ||
| 23 | const auto opcode = OpCode::Decode(instr); | ||
| 24 | |||
| 25 | PredCondition cond{}; | ||
| 26 | bool bf = false; | ||
| 27 | bool ftz = false; | ||
| 28 | bool neg_a = false; | ||
| 29 | bool abs_a = false; | ||
| 30 | bool neg_b = false; | ||
| 31 | bool abs_b = false; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 54 | } | ||
| 55 | |||
| 56 | Node op_b = [this, instr, opcode] { | ||
| 57 | switch (opcode->get().GetId()) { | ||
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 62 | case OpCode::Id::HSET2_R: | ||
| 63 | return GetRegister(instr.gpr20); | ||
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return Node{}; | ||
| 69 | } | ||
| 70 | }(); | ||
| 71 | |||
| 72 | if (!ftz) { | ||
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | |||
| 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 93 | |||
| 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 95 | |||
| 96 | // HSET2 operates on each half float in the pack. | ||
| 97 | std::array<Node, 2> values; | ||
| 98 | for (u32 i = 0; i < 2; ++i) { | ||
| 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; | ||
| 100 | Node true_value = Immediate(raw_value << (i * 16)); | ||
| 101 | Node false_value = Immediate(0); | ||
| 102 | |||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 105 | values[i] = | ||
| 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); | ||
| 110 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 111 | |||
| 112 | return pc; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null | |||
| @@ -1,80 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | if (instr.hsetp2.ftz != 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | ||
| 27 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 28 | |||
| 29 | Tegra::Shader::PredCondition cond{}; | ||
| 30 | bool h_and{}; | ||
| 31 | Node op_b{}; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSETP2_C: | ||
| 34 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 35 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 36 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 37 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); | ||
| 38 | // F32 is hardcoded in hardware | ||
| 39 | op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); | ||
| 40 | break; | ||
| 41 | case OpCode::Id::HSETP2_IMM: | ||
| 42 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 43 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 44 | op_b = UnpackHalfImmediate(instr, true); | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HSETP2_R: | ||
| 47 | cond = instr.hsetp2.reg.cond; | ||
| 48 | h_and = instr.hsetp2.reg.h_and; | ||
| 49 | op_b = | ||
| 50 | GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), | ||
| 51 | instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); | ||
| 52 | break; | ||
| 53 | default: | ||
| 54 | UNREACHABLE(); | ||
| 55 | op_b = Immediate(0); | ||
| 56 | } | ||
| 57 | |||
| 58 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 59 | const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); | ||
| 60 | |||
| 61 | const auto Write = [&](u64 dest, Node src) { | ||
| 62 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); | ||
| 63 | }; | ||
| 64 | |||
| 65 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 66 | const u64 first = instr.hsetp2.pred3; | ||
| 67 | const u64 second = instr.hsetp2.pred0; | ||
| 68 | if (h_and) { | ||
| 69 | Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 70 | Write(first, joined); | ||
| 71 | Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); | ||
| 72 | } else { | ||
| 73 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); | ||
| 74 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); | ||
| 75 | } | ||
| 76 | |||
| 77 | return pc; | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null | |||
| @@ -1,73 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::HalfPrecision; | ||
| 16 | using Tegra::Shader::HalfType; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); | ||
| 26 | } else { | ||
| 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr auto identity = HalfType::H0_H1; | ||
| 31 | bool neg_b{}, neg_c{}; | ||
| 32 | auto [saturate, type_b, op_b, type_c, | ||
| 33 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HFMA2_CR: | ||
| 36 | neg_b = instr.hfma2.negate_b; | ||
| 37 | neg_c = instr.hfma2.negate_c; | ||
| 38 | return {instr.hfma2.saturate, HalfType::F32, | ||
| 39 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 40 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 41 | case OpCode::Id::HFMA2_RC: | ||
| 42 | neg_b = instr.hfma2.negate_b; | ||
| 43 | neg_c = instr.hfma2.negate_c; | ||
| 44 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 45 | HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 46 | case OpCode::Id::HFMA2_RR: | ||
| 47 | neg_b = instr.hfma2.rr.negate_b; | ||
| 48 | neg_c = instr.hfma2.rr.negate_c; | ||
| 49 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 50 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 51 | case OpCode::Id::HFMA2_IMM_R: | ||
| 52 | neg_c = instr.hfma2.negate_c; | ||
| 53 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 54 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 55 | default: | ||
| 56 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 57 | } | ||
| 58 | }(); | ||
| 59 | |||
| 60 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | ||
| 61 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | ||
| 62 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 63 | |||
| 64 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | ||
| 65 | value = GetSaturatedHalfFloat(value, saturate); | ||
| 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 67 | |||
| 68 | SetRegister(bb, instr.gpr0, value); | ||
| 69 | |||
| 70 | return pc; | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null | |||
| @@ -1,536 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/textures/texture.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::PredCondition; | ||
| 23 | using Tegra::Shader::StoreType; | ||
| 24 | using Tegra::Texture::ComponentType; | ||
| 25 | using Tegra::Texture::TextureFormat; | ||
| 26 | using Tegra::Texture::TICEntry; | ||
| 27 | |||
| 28 | namespace { | ||
| 29 | |||
| 30 | ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||
| 31 | std::size_t component) { | ||
| 32 | const TextureFormat format{descriptor.format}; | ||
| 33 | switch (format) { | ||
| 34 | case TextureFormat::R16G16B16A16: | ||
| 35 | case TextureFormat::R32G32B32A32: | ||
| 36 | case TextureFormat::R32G32B32: | ||
| 37 | case TextureFormat::R32G32: | ||
| 38 | case TextureFormat::R16G16: | ||
| 39 | case TextureFormat::R32: | ||
| 40 | case TextureFormat::R16: | ||
| 41 | case TextureFormat::R8: | ||
| 42 | case TextureFormat::R1: | ||
| 43 | if (component == 0) { | ||
| 44 | return descriptor.r_type; | ||
| 45 | } | ||
| 46 | if (component == 1) { | ||
| 47 | return descriptor.g_type; | ||
| 48 | } | ||
| 49 | if (component == 2) { | ||
| 50 | return descriptor.b_type; | ||
| 51 | } | ||
| 52 | if (component == 3) { | ||
| 53 | return descriptor.a_type; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case TextureFormat::A8R8G8B8: | ||
| 57 | if (component == 0) { | ||
| 58 | return descriptor.a_type; | ||
| 59 | } | ||
| 60 | if (component == 1) { | ||
| 61 | return descriptor.r_type; | ||
| 62 | } | ||
| 63 | if (component == 2) { | ||
| 64 | return descriptor.g_type; | ||
| 65 | } | ||
| 66 | if (component == 3) { | ||
| 67 | return descriptor.b_type; | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | case TextureFormat::A2B10G10R10: | ||
| 71 | case TextureFormat::A4B4G4R4: | ||
| 72 | case TextureFormat::A5B5G5R1: | ||
| 73 | case TextureFormat::A1B5G5R5: | ||
| 74 | if (component == 0) { | ||
| 75 | return descriptor.a_type; | ||
| 76 | } | ||
| 77 | if (component == 1) { | ||
| 78 | return descriptor.b_type; | ||
| 79 | } | ||
| 80 | if (component == 2) { | ||
| 81 | return descriptor.g_type; | ||
| 82 | } | ||
| 83 | if (component == 3) { | ||
| 84 | return descriptor.r_type; | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | case TextureFormat::R32_B24G8: | ||
| 88 | if (component == 0) { | ||
| 89 | return descriptor.r_type; | ||
| 90 | } | ||
| 91 | if (component == 1) { | ||
| 92 | return descriptor.b_type; | ||
| 93 | } | ||
| 94 | if (component == 2) { | ||
| 95 | return descriptor.g_type; | ||
| 96 | } | ||
| 97 | break; | ||
| 98 | case TextureFormat::B5G6R5: | ||
| 99 | case TextureFormat::B6G5R5: | ||
| 100 | case TextureFormat::B10G11R11: | ||
| 101 | if (component == 0) { | ||
| 102 | return descriptor.b_type; | ||
| 103 | } | ||
| 104 | if (component == 1) { | ||
| 105 | return descriptor.g_type; | ||
| 106 | } | ||
| 107 | if (component == 2) { | ||
| 108 | return descriptor.r_type; | ||
| 109 | } | ||
| 110 | break; | ||
| 111 | case TextureFormat::R24G8: | ||
| 112 | case TextureFormat::R8G24: | ||
| 113 | case TextureFormat::R8G8: | ||
| 114 | case TextureFormat::G4R4: | ||
| 115 | if (component == 0) { | ||
| 116 | return descriptor.g_type; | ||
| 117 | } | ||
| 118 | if (component == 1) { | ||
| 119 | return descriptor.r_type; | ||
| 120 | } | ||
| 121 | break; | ||
| 122 | default: | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 126 | return ComponentType::FLOAT; | ||
| 127 | } | ||
| 128 | |||
| 129 | bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||
| 130 | constexpr u8 R = 0b0001; | ||
| 131 | constexpr u8 G = 0b0010; | ||
| 132 | constexpr u8 B = 0b0100; | ||
| 133 | constexpr u8 A = 0b1000; | ||
| 134 | constexpr std::array<u8, 16> mask = { | ||
| 135 | 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), | ||
| 136 | (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 137 | return std::bitset<4>{mask.at(component_mask)}.test(component); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||
| 141 | switch (format) { | ||
| 142 | case TextureFormat::R32G32B32A32: | ||
| 143 | return 32; | ||
| 144 | case TextureFormat::R16G16B16A16: | ||
| 145 | return 16; | ||
| 146 | case TextureFormat::R32G32B32: | ||
| 147 | return component <= 2 ? 32 : 0; | ||
| 148 | case TextureFormat::R32G32: | ||
| 149 | return component <= 1 ? 32 : 0; | ||
| 150 | case TextureFormat::R16G16: | ||
| 151 | return component <= 1 ? 16 : 0; | ||
| 152 | case TextureFormat::R32: | ||
| 153 | return component == 0 ? 32 : 0; | ||
| 154 | case TextureFormat::R16: | ||
| 155 | return component == 0 ? 16 : 0; | ||
| 156 | case TextureFormat::R8: | ||
| 157 | return component == 0 ? 8 : 0; | ||
| 158 | case TextureFormat::R1: | ||
| 159 | return component == 0 ? 1 : 0; | ||
| 160 | case TextureFormat::A8R8G8B8: | ||
| 161 | return 8; | ||
| 162 | case TextureFormat::A2B10G10R10: | ||
| 163 | return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||
| 164 | case TextureFormat::A4B4G4R4: | ||
| 165 | return 4; | ||
| 166 | case TextureFormat::A5B5G5R1: | ||
| 167 | return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||
| 168 | case TextureFormat::A1B5G5R5: | ||
| 169 | return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||
| 170 | case TextureFormat::R32_B24G8: | ||
| 171 | if (component == 0) { | ||
| 172 | return 32; | ||
| 173 | } | ||
| 174 | if (component == 1) { | ||
| 175 | return 24; | ||
| 176 | } | ||
| 177 | if (component == 2) { | ||
| 178 | return 8; | ||
| 179 | } | ||
| 180 | return 0; | ||
| 181 | case TextureFormat::B5G6R5: | ||
| 182 | if (component == 0 || component == 2) { | ||
| 183 | return 5; | ||
| 184 | } | ||
| 185 | if (component == 1) { | ||
| 186 | return 6; | ||
| 187 | } | ||
| 188 | return 0; | ||
| 189 | case TextureFormat::B6G5R5: | ||
| 190 | if (component == 1 || component == 2) { | ||
| 191 | return 5; | ||
| 192 | } | ||
| 193 | if (component == 0) { | ||
| 194 | return 6; | ||
| 195 | } | ||
| 196 | return 0; | ||
| 197 | case TextureFormat::B10G11R11: | ||
| 198 | if (component == 1 || component == 2) { | ||
| 199 | return 11; | ||
| 200 | } | ||
| 201 | if (component == 0) { | ||
| 202 | return 10; | ||
| 203 | } | ||
| 204 | return 0; | ||
| 205 | case TextureFormat::R24G8: | ||
| 206 | if (component == 0) { | ||
| 207 | return 8; | ||
| 208 | } | ||
| 209 | if (component == 1) { | ||
| 210 | return 24; | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | case TextureFormat::R8G24: | ||
| 214 | if (component == 0) { | ||
| 215 | return 24; | ||
| 216 | } | ||
| 217 | if (component == 1) { | ||
| 218 | return 8; | ||
| 219 | } | ||
| 220 | return 0; | ||
| 221 | case TextureFormat::R8G8: | ||
| 222 | return (component == 0 || component == 1) ? 8 : 0; | ||
| 223 | case TextureFormat::G4R4: | ||
| 224 | return (component == 0 || component == 1) ? 4 : 0; | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | std::size_t GetImageComponentMask(TextureFormat format) { | ||
| 232 | constexpr u8 R = 0b0001; | ||
| 233 | constexpr u8 G = 0b0010; | ||
| 234 | constexpr u8 B = 0b0100; | ||
| 235 | constexpr u8 A = 0b1000; | ||
| 236 | switch (format) { | ||
| 237 | case TextureFormat::R32G32B32A32: | ||
| 238 | case TextureFormat::R16G16B16A16: | ||
| 239 | case TextureFormat::A8R8G8B8: | ||
| 240 | case TextureFormat::A2B10G10R10: | ||
| 241 | case TextureFormat::A4B4G4R4: | ||
| 242 | case TextureFormat::A5B5G5R1: | ||
| 243 | case TextureFormat::A1B5G5R5: | ||
| 244 | return std::size_t{R | G | B | A}; | ||
| 245 | case TextureFormat::R32G32B32: | ||
| 246 | case TextureFormat::R32_B24G8: | ||
| 247 | case TextureFormat::B5G6R5: | ||
| 248 | case TextureFormat::B6G5R5: | ||
| 249 | case TextureFormat::B10G11R11: | ||
| 250 | return std::size_t{R | G | B}; | ||
| 251 | case TextureFormat::R32G32: | ||
| 252 | case TextureFormat::R16G16: | ||
| 253 | case TextureFormat::R24G8: | ||
| 254 | case TextureFormat::R8G24: | ||
| 255 | case TextureFormat::R8G8: | ||
| 256 | case TextureFormat::G4R4: | ||
| 257 | return std::size_t{R | G}; | ||
| 258 | case TextureFormat::R32: | ||
| 259 | case TextureFormat::R16: | ||
| 260 | case TextureFormat::R8: | ||
| 261 | case TextureFormat::R1: | ||
| 262 | return std::size_t{R}; | ||
| 263 | default: | ||
| 264 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 265 | return std::size_t{R | G | B | A}; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 270 | switch (image_type) { | ||
| 271 | case Tegra::Shader::ImageType::Texture1D: | ||
| 272 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 273 | return 1; | ||
| 274 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 275 | case Tegra::Shader::ImageType::Texture2D: | ||
| 276 | return 2; | ||
| 277 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 278 | case Tegra::Shader::ImageType::Texture3D: | ||
| 279 | return 3; | ||
| 280 | } | ||
| 281 | UNREACHABLE(); | ||
| 282 | return 1; | ||
| 283 | } | ||
| 284 | } // Anonymous namespace | ||
| 285 | |||
| 286 | std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||
| 287 | Node original_value) { | ||
| 288 | switch (component_type) { | ||
| 289 | case ComponentType::SNORM: { | ||
| 290 | // range [-1.0, 1.0] | ||
| 291 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 292 | Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||
| 293 | cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||
| 294 | return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||
| 295 | } | ||
| 296 | case ComponentType::SINT: | ||
| 297 | case ComponentType::UNORM: { | ||
| 298 | bool is_signed = component_type == ComponentType::SINT; | ||
| 299 | // range [0.0, 1.0] | ||
| 300 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 301 | Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||
| 302 | return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||
| 303 | is_signed}; | ||
| 304 | } | ||
| 305 | case ComponentType::UINT: // range [0, (1 << component_size) - 1] | ||
| 306 | return {std::move(original_value), false}; | ||
| 307 | case ComponentType::FLOAT: | ||
| 308 | if (component_size == 16) { | ||
| 309 | return {Operation(OperationCode::HCastFloat, original_value), true}; | ||
| 310 | } else { | ||
| 311 | return {std::move(original_value), true}; | ||
| 312 | } | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); | ||
| 315 | return {std::move(original_value), true}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 320 | const Instruction instr = {program_code[pc]}; | ||
| 321 | const auto opcode = OpCode::Decode(instr); | ||
| 322 | |||
| 323 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 324 | std::vector<Node> coords; | ||
| 325 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 326 | coords.reserve(num_coords); | ||
| 327 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 328 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 329 | } | ||
| 330 | return coords; | ||
| 331 | }; | ||
| 332 | |||
| 333 | switch (opcode->get().GetId()) { | ||
| 334 | case OpCode::Id::SULD: { | ||
| 335 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 336 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 337 | |||
| 338 | const auto type{instr.suldst.image_type}; | ||
| 339 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 340 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 341 | image.MarkRead(); | ||
| 342 | |||
| 343 | if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { | ||
| 344 | u32 indexer = 0; | ||
| 345 | for (u32 element = 0; element < 4; ++element) { | ||
| 346 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | MetaImage meta{image, {}, element}; | ||
| 350 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 351 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 352 | } | ||
| 353 | for (u32 i = 0; i < indexer; ++i) { | ||
| 354 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 355 | } | ||
| 356 | } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||
| 357 | UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||
| 358 | instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||
| 359 | |||
| 360 | auto descriptor = [this, instr] { | ||
| 361 | std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; | ||
| 362 | if (instr.suldst.is_immediate) { | ||
| 363 | sampler_descriptor = | ||
| 364 | registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||
| 365 | } else { | ||
| 366 | const Node image_register = GetRegister(instr.gpr39); | ||
| 367 | const auto result = TrackCbuf(image_register, global_code, | ||
| 368 | static_cast<s64>(global_code.size())); | ||
| 369 | const auto buffer = std::get<1>(result); | ||
| 370 | const auto offset = std::get<2>(result); | ||
| 371 | sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||
| 372 | } | ||
| 373 | if (!sampler_descriptor) { | ||
| 374 | UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||
| 375 | } | ||
| 376 | return *sampler_descriptor; | ||
| 377 | }(); | ||
| 378 | |||
| 379 | const auto comp_mask = GetImageComponentMask(descriptor.format); | ||
| 380 | |||
| 381 | switch (instr.suldst.GetStoreDataLayout()) { | ||
| 382 | case StoreType::Bits32: | ||
| 383 | case StoreType::Bits64: { | ||
| 384 | u32 indexer = 0; | ||
| 385 | u32 shifted_counter = 0; | ||
| 386 | Node value = Immediate(0); | ||
| 387 | for (u32 element = 0; element < 4; ++element) { | ||
| 388 | if (!IsComponentEnabled(comp_mask, element)) { | ||
| 389 | continue; | ||
| 390 | } | ||
| 391 | const auto component_type = GetComponentType(descriptor, element); | ||
| 392 | const auto component_size = GetComponentSize(descriptor.format, element); | ||
| 393 | MetaImage meta{image, {}, element}; | ||
| 394 | |||
| 395 | auto [converted_value, is_signed] = GetComponentValue( | ||
| 396 | component_type, component_size, | ||
| 397 | Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||
| 398 | |||
| 399 | // shift element to correct position | ||
| 400 | const auto shifted = shifted_counter; | ||
| 401 | if (shifted > 0) { | ||
| 402 | converted_value = | ||
| 403 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||
| 404 | std::move(converted_value), Immediate(shifted)); | ||
| 405 | } | ||
| 406 | shifted_counter += component_size; | ||
| 407 | |||
| 408 | // add value into result | ||
| 409 | value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||
| 410 | |||
| 411 | // if we shifted enough for 1 byte -> we save it into temp | ||
| 412 | if (shifted_counter >= 32) { | ||
| 413 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 414 | // reset counter and value to prepare pack next byte | ||
| 415 | value = Immediate(0); | ||
| 416 | shifted_counter = 0; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (u32 i = 0; i < indexer; ++i) { | ||
| 420 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 421 | } | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | default: | ||
| 425 | UNREACHABLE(); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | case OpCode::Id::SUST: { | ||
| 432 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 433 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 434 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 435 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 436 | |||
| 437 | std::vector<Node> values; | ||
| 438 | constexpr std::size_t hardcoded_size{4}; | ||
| 439 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 440 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 441 | } | ||
| 442 | |||
| 443 | const auto type{instr.suldst.image_type}; | ||
| 444 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 445 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 446 | image.MarkWrite(); | ||
| 447 | |||
| 448 | MetaImage meta{image, std::move(values)}; | ||
| 449 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | case OpCode::Id::SUATOM: { | ||
| 453 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 454 | |||
| 455 | const OperationCode operation_code = [instr] { | ||
| 456 | switch (instr.suatom_d.operation_type) { | ||
| 457 | case Tegra::Shader::ImageAtomicOperationType::S32: | ||
| 458 | case Tegra::Shader::ImageAtomicOperationType::U32: | ||
| 459 | switch (instr.suatom_d.operation) { | ||
| 460 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 461 | return OperationCode::AtomicImageAdd; | ||
| 462 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 463 | return OperationCode::AtomicImageAnd; | ||
| 464 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 465 | return OperationCode::AtomicImageOr; | ||
| 466 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 467 | return OperationCode::AtomicImageXor; | ||
| 468 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 469 | return OperationCode::AtomicImageExchange; | ||
| 470 | default: | ||
| 471 | break; | ||
| 472 | } | ||
| 473 | break; | ||
| 474 | default: | ||
| 475 | break; | ||
| 476 | } | ||
| 477 | UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", | ||
| 478 | static_cast<u64>(instr.suatom_d.operation.Value()), | ||
| 479 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | ||
| 480 | return OperationCode::AtomicImageAdd; | ||
| 481 | }(); | ||
| 482 | |||
| 483 | Node value = GetRegister(instr.gpr0); | ||
| 484 | |||
| 485 | const auto type = instr.suatom_d.image_type; | ||
| 486 | auto& image = GetImage(instr.image, type); | ||
| 487 | image.MarkAtomic(); | ||
| 488 | |||
| 489 | MetaImage meta{image, {std::move(value)}}; | ||
| 490 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | default: | ||
| 494 | UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); | ||
| 495 | } | ||
| 496 | |||
| 497 | return pc; | ||
| 498 | } | ||
| 499 | |||
| 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | ||
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | ||
| 502 | |||
| 503 | const auto it = | ||
| 504 | std::find_if(std::begin(used_images), std::end(used_images), | ||
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 506 | if (it != std::end(used_images)) { | ||
| 507 | ASSERT(!it->is_bindless && it->type == type); | ||
| 508 | return *it; | ||
| 509 | } | ||
| 510 | |||
| 511 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 512 | return used_images.emplace_back(next_index, offset, type); | ||
| 513 | } | ||
| 514 | |||
| 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | ||
| 516 | const Node image_register = GetRegister(reg); | ||
| 517 | const auto result = | ||
| 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | ||
| 519 | |||
| 520 | const auto buffer = std::get<1>(result); | ||
| 521 | const auto offset = std::get<2>(result); | ||
| 522 | |||
| 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | ||
| 524 | [buffer, offset](const ImageEntry& entry) { | ||
| 525 | return entry.buffer == buffer && entry.offset == offset; | ||
| 526 | }); | ||
| 527 | if (it != std::end(used_images)) { | ||
| 528 | ASSERT(it->is_bindless && it->type == type); | ||
| 529 | return *it; | ||
| 530 | } | ||
| 531 | |||
| 532 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 533 | return used_images.emplace_back(next_index, offset, buffer, type); | ||
| 534 | } | ||
| 535 | |||
| 536 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "video_core/engines/shader_bytecode.h" | ||
| 7 | #include "video_core/shader/node_helper.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | |||
| 18 | const Node op_a = GetRegister(instr.gpr8); | ||
| 19 | const Node op_b = [&]() { | ||
| 20 | if (instr.is_b_imm) { | ||
| 21 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 22 | } else if (instr.is_b_gpr) { | ||
| 23 | return GetRegister(instr.gpr20); | ||
| 24 | } else { | ||
| 25 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | |||
| 29 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 30 | // is true, and to 0 otherwise. | ||
| 31 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 32 | const Node first_pred = | ||
| 33 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 34 | |||
| 35 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 36 | |||
| 37 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 38 | |||
| 39 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 40 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 41 | const Node value = | ||
| 42 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 43 | |||
| 44 | SetRegister(bb, instr.gpr0, value); | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null | |||
| @@ -1,493 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/alignment.h" | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "video_core/engines/shader_bytecode.h" | ||
| 16 | #include "video_core/shader/node_helper.h" | ||
| 17 | #include "video_core/shader/shader_ir.h" | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | ||
| 20 | |||
| 21 | using std::move; | ||
| 22 | using Tegra::Shader::AtomicOp; | ||
| 23 | using Tegra::Shader::AtomicType; | ||
| 24 | using Tegra::Shader::Attribute; | ||
| 25 | using Tegra::Shader::GlobalAtomicType; | ||
| 26 | using Tegra::Shader::Instruction; | ||
| 27 | using Tegra::Shader::OpCode; | ||
| 28 | using Tegra::Shader::Register; | ||
| 29 | using Tegra::Shader::StoreType; | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | OperationCode GetAtomOperation(AtomicOp op) { | ||
| 34 | switch (op) { | ||
| 35 | case AtomicOp::Add: | ||
| 36 | return OperationCode::AtomicIAdd; | ||
| 37 | case AtomicOp::Min: | ||
| 38 | return OperationCode::AtomicIMin; | ||
| 39 | case AtomicOp::Max: | ||
| 40 | return OperationCode::AtomicIMax; | ||
| 41 | case AtomicOp::And: | ||
| 42 | return OperationCode::AtomicIAnd; | ||
| 43 | case AtomicOp::Or: | ||
| 44 | return OperationCode::AtomicIOr; | ||
| 45 | case AtomicOp::Xor: | ||
| 46 | return OperationCode::AtomicIXor; | ||
| 47 | case AtomicOp::Exch: | ||
| 48 | return OperationCode::AtomicIExchange; | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("op={}", op); | ||
| 51 | return OperationCode::AtomicIAdd; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | ||
| 56 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 57 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 61 | switch (uniform_type) { | ||
| 62 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 63 | return 0b11; | ||
| 64 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 65 | return 0b10; | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 73 | switch (uniform_type) { | ||
| 74 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 75 | return 8; | ||
| 76 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 77 | return 16; | ||
| 78 | case Tegra::Shader::UniformType::Single: | ||
| 79 | return 32; | ||
| 80 | case Tegra::Shader::UniformType::Double: | ||
| 81 | return 64; | ||
| 82 | case Tegra::Shader::UniformType::Quad: | ||
| 83 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 84 | return 128; | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); | ||
| 87 | return 32; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||
| 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||
| 93 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 94 | return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); | ||
| 95 | } | ||
| 96 | |||
| 97 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||
| 98 | Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); | ||
| 99 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 100 | return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), | ||
| 101 | Immediate(size)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node Sign16Extend(Node value) { | ||
| 105 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||
| 106 | Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); | ||
| 107 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||
| 108 | return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // Anonymous namespace | ||
| 112 | |||
| 113 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||
| 114 | const Instruction instr = {program_code[pc]}; | ||
| 115 | const auto opcode = OpCode::Decode(instr); | ||
| 116 | |||
| 117 | switch (opcode->get().GetId()) { | ||
| 118 | case OpCode::Id::LD_A: { | ||
| 119 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 120 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 121 | "Indirect attribute loads are not supported"); | ||
| 122 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 123 | "Unaligned attribute loads are not supported"); | ||
| 124 | UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && | ||
| 125 | instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, | ||
| 126 | "Non-32 bits PHYS reads are not implemented"); | ||
| 127 | |||
| 128 | const Node buffer{GetRegister(instr.gpr39)}; | ||
| 129 | |||
| 130 | u64 next_element = instr.attribute.fmt20.element; | ||
| 131 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 132 | |||
| 133 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 134 | const Node attribute{instr.attribute.fmt20.IsPhysical() | ||
| 135 | ? GetPhysicalInputAttribute(instr.gpr8, buffer) | ||
| 136 | : GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 137 | next_element, buffer)}; | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 140 | |||
| 141 | // Load the next attribute element into the following register. If the element | ||
| 142 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 143 | // attribute. | ||
| 144 | next_element = (next_element + 1) % 4; | ||
| 145 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 146 | }; | ||
| 147 | |||
| 148 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 149 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 150 | LoadNextElement(reg_offset); | ||
| 151 | } | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | case OpCode::Id::LD_C: { | ||
| 155 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 156 | |||
| 157 | Node index = GetRegister(instr.gpr8); | ||
| 158 | |||
| 159 | const Node op_a = | ||
| 160 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 161 | |||
| 162 | switch (instr.ld_c.type.Value()) { | ||
| 163 | case Tegra::Shader::UniformType::Single: | ||
| 164 | SetRegister(bb, instr.gpr0, op_a); | ||
| 165 | break; | ||
| 166 | |||
| 167 | case Tegra::Shader::UniformType::Double: { | ||
| 168 | const Node op_b = | ||
| 169 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | ||
| 170 | |||
| 171 | SetTemporary(bb, 0, op_a); | ||
| 172 | SetTemporary(bb, 1, op_b); | ||
| 173 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 174 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | default: | ||
| 178 | UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case OpCode::Id::LD_L: | ||
| 183 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case OpCode::Id::LD_S: { | ||
| 186 | const auto GetAddress = [&](s32 offset) { | ||
| 187 | ASSERT(offset % 4 == 0); | ||
| 188 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 189 | return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); | ||
| 190 | }; | ||
| 191 | const auto GetMemory = [&](s32 offset) { | ||
| 192 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) | ||
| 193 | : GetLocalMemory(GetAddress(offset)); | ||
| 194 | }; | ||
| 195 | |||
| 196 | switch (instr.ldst_sl.type.Value()) { | ||
| 197 | case StoreType::Signed16: | ||
| 198 | SetRegister(bb, instr.gpr0, | ||
| 199 | Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); | ||
| 200 | break; | ||
| 201 | case StoreType::Bits32: | ||
| 202 | case StoreType::Bits64: | ||
| 203 | case StoreType::Bits128: { | ||
| 204 | const u32 count = [&] { | ||
| 205 | switch (instr.ldst_sl.type.Value()) { | ||
| 206 | case StoreType::Bits32: | ||
| 207 | return 1; | ||
| 208 | case StoreType::Bits64: | ||
| 209 | return 2; | ||
| 210 | case StoreType::Bits128: | ||
| 211 | return 4; | ||
| 212 | default: | ||
| 213 | UNREACHABLE(); | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | }(); | ||
| 217 | for (u32 i = 0; i < count; ++i) { | ||
| 218 | SetTemporary(bb, i, GetMemory(i * 4)); | ||
| 219 | } | ||
| 220 | for (u32 i = 0; i < count; ++i) { | ||
| 221 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 222 | } | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), | ||
| 227 | instr.ldst_sl.type.Value()); | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | case OpCode::Id::LD: | ||
| 232 | case OpCode::Id::LDG: { | ||
| 233 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 234 | switch (opcode->get().GetId()) { | ||
| 235 | case OpCode::Id::LD: | ||
| 236 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); | ||
| 237 | return instr.generic.type; | ||
| 238 | case OpCode::Id::LDG: | ||
| 239 | return instr.ldg.type; | ||
| 240 | default: | ||
| 241 | UNREACHABLE(); | ||
| 242 | return {}; | ||
| 243 | } | ||
| 244 | }(); | ||
| 245 | |||
| 246 | const auto [real_address_base, base_address, descriptor] = | ||
| 247 | TrackGlobalMemory(bb, instr, true, false); | ||
| 248 | |||
| 249 | const u32 size = GetMemorySize(type); | ||
| 250 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 251 | if (!real_address_base || !base_address) { | ||
| 252 | // Tracking failed, load zeroes. | ||
| 253 | for (u32 i = 0; i < count; ++i) { | ||
| 254 | SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); | ||
| 255 | } | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | for (u32 i = 0; i < count; ++i) { | ||
| 260 | const Node it_offset = Immediate(i * 4); | ||
| 261 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 262 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 263 | |||
| 264 | // To handle unaligned loads get the bytes used to dereference global memory and extract | ||
| 265 | // those bytes from the loaded u32. | ||
| 266 | if (IsUnaligned(type)) { | ||
| 267 | gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); | ||
| 268 | } | ||
| 269 | |||
| 270 | SetTemporary(bb, i, gmem); | ||
| 271 | } | ||
| 272 | |||
| 273 | for (u32 i = 0; i < count; ++i) { | ||
| 274 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 275 | } | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | case OpCode::Id::ST_A: { | ||
| 279 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 280 | "Indirect attribute loads are not supported"); | ||
| 281 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 282 | "Unaligned attribute loads are not supported"); | ||
| 283 | |||
| 284 | u64 element = instr.attribute.fmt20.element; | ||
| 285 | auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 286 | |||
| 287 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 288 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 289 | Node dest; | ||
| 290 | if (instr.attribute.fmt20.patch) { | ||
| 291 | const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); | ||
| 292 | dest = MakeNode<PatchNode>(offset); | ||
| 293 | } else { | ||
| 294 | dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, | ||
| 295 | GetRegister(instr.gpr39)); | ||
| 296 | } | ||
| 297 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 298 | |||
| 299 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 300 | |||
| 301 | // Load the next attribute element into the following register. If the element to load | ||
| 302 | // goes beyond the vec4 size, load the first element of the next attribute. | ||
| 303 | element = (element + 1) % 4; | ||
| 304 | index = index + (element == 0 ? 1 : 0); | ||
| 305 | } | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | case OpCode::Id::ST_L: | ||
| 309 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); | ||
| 310 | [[fallthrough]]; | ||
| 311 | case OpCode::Id::ST_S: { | ||
| 312 | const auto GetAddress = [&](s32 offset) { | ||
| 313 | ASSERT(offset % 4 == 0); | ||
| 314 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 315 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | ||
| 316 | }; | ||
| 317 | |||
| 318 | const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; | ||
| 319 | const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; | ||
| 320 | const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; | ||
| 321 | |||
| 322 | switch (instr.ldst_sl.type.Value()) { | ||
| 323 | case StoreType::Bits128: | ||
| 324 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); | ||
| 325 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); | ||
| 326 | [[fallthrough]]; | ||
| 327 | case StoreType::Bits64: | ||
| 328 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); | ||
| 329 | [[fallthrough]]; | ||
| 330 | case StoreType::Bits32: | ||
| 331 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); | ||
| 332 | break; | ||
| 333 | case StoreType::Unsigned16: | ||
| 334 | case StoreType::Signed16: { | ||
| 335 | Node address = GetAddress(0); | ||
| 336 | Node memory = (this->*get_memory)(address); | ||
| 337 | (this->*set_memory)( | ||
| 338 | bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); | ||
| 339 | break; | ||
| 340 | } | ||
| 341 | default: | ||
| 342 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), | ||
| 343 | instr.ldst_sl.type.Value()); | ||
| 344 | } | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | case OpCode::Id::ST: | ||
| 348 | case OpCode::Id::STG: { | ||
| 349 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 350 | switch (opcode->get().GetId()) { | ||
| 351 | case OpCode::Id::ST: | ||
| 352 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); | ||
| 353 | return instr.generic.type; | ||
| 354 | case OpCode::Id::STG: | ||
| 355 | return instr.stg.type; | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | return {}; | ||
| 359 | } | ||
| 360 | }(); | ||
| 361 | |||
| 362 | // For unaligned reads we have to read memory too. | ||
| 363 | const bool is_read = IsUnaligned(type); | ||
| 364 | const auto [real_address_base, base_address, descriptor] = | ||
| 365 | TrackGlobalMemory(bb, instr, is_read, true); | ||
| 366 | if (!real_address_base || !base_address) { | ||
| 367 | // Tracking failed, skip the store. | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | |||
| 371 | const u32 size = GetMemorySize(type); | ||
| 372 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 373 | for (u32 i = 0; i < count; ++i) { | ||
| 374 | const Node it_offset = Immediate(i * 4); | ||
| 375 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 376 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 377 | Node value = GetRegister(instr.gpr0.Value() + i); | ||
| 378 | |||
| 379 | if (IsUnaligned(type)) { | ||
| 380 | const u32 mask = GetUnalignedMask(type); | ||
| 381 | value = InsertUnaligned(gmem, move(value), real_address, mask, size); | ||
| 382 | } | ||
| 383 | |||
| 384 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | ||
| 385 | } | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | case OpCode::Id::RED: { | ||
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", | ||
| 390 | instr.red.type.Value()); | ||
| 391 | const auto [real_address, base_address, descriptor] = | ||
| 392 | TrackGlobalMemory(bb, instr, true, true); | ||
| 393 | if (!real_address || !base_address) { | ||
| 394 | // Tracking failed, skip atomic. | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 398 | Node value = GetRegister(instr.gpr0); | ||
| 399 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); | ||
| 400 | break; | ||
| 401 | } | ||
| 402 | case OpCode::Id::ATOM: { | ||
| 403 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | ||
| 404 | instr.atom.operation == AtomicOp::Dec || | ||
| 405 | instr.atom.operation == AtomicOp::SafeAdd, | ||
| 406 | "operation={}", instr.atom.operation.Value()); | ||
| 407 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | ||
| 408 | instr.atom.type == GlobalAtomicType::U64 || | ||
| 409 | instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||
| 410 | instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||
| 411 | "type={}", instr.atom.type.Value()); | ||
| 412 | |||
| 413 | const auto [real_address, base_address, descriptor] = | ||
| 414 | TrackGlobalMemory(bb, instr, true, true); | ||
| 415 | if (!real_address || !base_address) { | ||
| 416 | // Tracking failed, skip atomic. | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | const bool is_signed = | ||
| 421 | instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; | ||
| 422 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 423 | SetRegister(bb, instr.gpr0, | ||
| 424 | SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, | ||
| 425 | GetRegister(instr.gpr20))); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | case OpCode::Id::ATOMS: { | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || | ||
| 430 | instr.atoms.operation == AtomicOp::Dec, | ||
| 431 | "operation={}", instr.atoms.operation.Value()); | ||
| 432 | UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || | ||
| 433 | instr.atoms.type == AtomicType::U64, | ||
| 434 | "type={}", instr.atoms.type.Value()); | ||
| 435 | const bool is_signed = | ||
| 436 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||
| 437 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 438 | Node address = GetRegister(instr.gpr8); | ||
| 439 | address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); | ||
| 440 | SetRegister(bb, instr.gpr0, | ||
| 441 | SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, | ||
| 442 | GetSharedMemory(move(address)), GetRegister(instr.gpr20))); | ||
| 443 | break; | ||
| 444 | } | ||
| 445 | case OpCode::Id::AL2P: { | ||
| 446 | // Ignore al2p.direction since we don't care about it. | ||
| 447 | |||
| 448 | // Calculate emulation fake physical address. | ||
| 449 | const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; | ||
| 450 | const Node reg{GetRegister(instr.gpr8)}; | ||
| 451 | const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; | ||
| 452 | |||
| 453 | // Set the fake address to target register. | ||
| 454 | SetRegister(bb, instr.gpr0, fake_address); | ||
| 455 | |||
| 456 | // Signal the shader IR to declare all possible attributes and varyings | ||
| 457 | uses_physical_attributes = true; | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | default: | ||
| 461 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 462 | } | ||
| 463 | |||
| 464 | return pc; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | ||
| 468 | Instruction instr, | ||
| 469 | bool is_read, bool is_write) { | ||
| 470 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | ||
| 471 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | ||
| 472 | |||
| 473 | const auto [base_address, index, offset] = | ||
| 474 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||
| 475 | ASSERT_OR_EXECUTE_MSG( | ||
| 476 | base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, | ||
| 477 | "Global memory tracking failed"); | ||
| 478 | |||
| 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | ||
| 480 | |||
| 481 | const GlobalMemoryBase descriptor{index, offset}; | ||
| 482 | const auto& entry = used_global_memory.try_emplace(descriptor).first; | ||
| 483 | auto& usage = entry->second; | ||
| 484 | usage.is_written |= is_write; | ||
| 485 | usage.is_read |= is_read; | ||
| 486 | |||
| 487 | const auto real_address = | ||
| 488 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | ||
| 489 | |||
| 490 | return {real_address, base_address, descriptor}; | ||
| 491 | } | ||
| 492 | |||
| 493 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null | |||
| @@ -1,322 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using std::move; | ||
| 15 | using Tegra::Shader::ConditionCode; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::IpaInterpMode; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PixelImap; | ||
| 20 | using Tegra::Shader::Register; | ||
| 21 | using Tegra::Shader::SystemVariable; | ||
| 22 | |||
| 23 | using Index = Tegra::Shader::Attribute::Index; | ||
| 24 | |||
| 25 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||
| 26 | const Instruction instr = {program_code[pc]}; | ||
| 27 | const auto opcode = OpCode::Decode(instr); | ||
| 28 | |||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::NOP: { | ||
| 31 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 32 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 33 | // With the previous preconditions, this instruction is a no-operation. | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case OpCode::Id::EXIT: { | ||
| 37 | const ConditionCode cc = instr.flow_condition_code; | ||
| 38 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); | ||
| 39 | |||
| 40 | switch (instr.flow.cond) { | ||
| 41 | case Tegra::Shader::FlowCondition::Always: | ||
| 42 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 43 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 44 | // If this is an unconditional exit then just end processing here, | ||
| 45 | // otherwise we have to account for the possibility of the condition | ||
| 46 | // not being met, so continue processing the next instruction. | ||
| 47 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 48 | } | ||
| 49 | break; | ||
| 50 | |||
| 51 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 52 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 53 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 54 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 55 | break; | ||
| 56 | |||
| 57 | default: | ||
| 58 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); | ||
| 59 | } | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | case OpCode::Id::KIL: { | ||
| 63 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 64 | |||
| 65 | const ConditionCode cc = instr.flow_condition_code; | ||
| 66 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); | ||
| 67 | |||
| 68 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | case OpCode::Id::S2R: { | ||
| 72 | const Node value = [this, instr] { | ||
| 73 | switch (instr.sys20) { | ||
| 74 | case SystemVariable::LaneId: | ||
| 75 | return Operation(OperationCode::ThreadId); | ||
| 76 | case SystemVariable::InvocationId: | ||
| 77 | return Operation(OperationCode::InvocationId); | ||
| 78 | case SystemVariable::Ydirection: | ||
| 79 | uses_y_negate = true; | ||
| 80 | return Operation(OperationCode::YNegate); | ||
| 81 | case SystemVariable::InvocationInfo: | ||
| 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | ||
| 83 | return Immediate(0x00ff'0000U); | ||
| 84 | case SystemVariable::WscaleFactorXY: | ||
| 85 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | ||
| 86 | return Immediate(0U); | ||
| 87 | case SystemVariable::WscaleFactorZ: | ||
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); | ||
| 89 | return Immediate(0U); | ||
| 90 | case SystemVariable::Tid: { | ||
| 91 | Node val = Immediate(0); | ||
| 92 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 93 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 94 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 95 | return val; | ||
| 96 | } | ||
| 97 | case SystemVariable::TidX: | ||
| 98 | return Operation(OperationCode::LocalInvocationIdX); | ||
| 99 | case SystemVariable::TidY: | ||
| 100 | return Operation(OperationCode::LocalInvocationIdY); | ||
| 101 | case SystemVariable::TidZ: | ||
| 102 | return Operation(OperationCode::LocalInvocationIdZ); | ||
| 103 | case SystemVariable::CtaIdX: | ||
| 104 | return Operation(OperationCode::WorkGroupIdX); | ||
| 105 | case SystemVariable::CtaIdY: | ||
| 106 | return Operation(OperationCode::WorkGroupIdY); | ||
| 107 | case SystemVariable::CtaIdZ: | ||
| 108 | return Operation(OperationCode::WorkGroupIdZ); | ||
| 109 | case SystemVariable::EqMask: | ||
| 110 | case SystemVariable::LtMask: | ||
| 111 | case SystemVariable::LeMask: | ||
| 112 | case SystemVariable::GtMask: | ||
| 113 | case SystemVariable::GeMask: | ||
| 114 | uses_warps = true; | ||
| 115 | switch (instr.sys20) { | ||
| 116 | case SystemVariable::EqMask: | ||
| 117 | return Operation(OperationCode::ThreadEqMask); | ||
| 118 | case SystemVariable::LtMask: | ||
| 119 | return Operation(OperationCode::ThreadLtMask); | ||
| 120 | case SystemVariable::LeMask: | ||
| 121 | return Operation(OperationCode::ThreadLeMask); | ||
| 122 | case SystemVariable::GtMask: | ||
| 123 | return Operation(OperationCode::ThreadGtMask); | ||
| 124 | case SystemVariable::GeMask: | ||
| 125 | return Operation(OperationCode::ThreadGeMask); | ||
| 126 | default: | ||
| 127 | UNREACHABLE(); | ||
| 128 | return Immediate(0u); | ||
| 129 | } | ||
| 130 | default: | ||
| 131 | UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); | ||
| 132 | return Immediate(0u); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | |||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::BRA: { | ||
| 140 | Node branch; | ||
| 141 | if (instr.bra.constant_buffer == 0) { | ||
| 142 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 143 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 144 | } else { | ||
| 145 | const u32 target = pc + 1; | ||
| 146 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 147 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 148 | PRECISE, op_a, Immediate(3)); | ||
| 149 | const Node operand = | ||
| 150 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 151 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 155 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 156 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 157 | } else { | ||
| 158 | bb.push_back(branch); | ||
| 159 | } | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case OpCode::Id::BRX: { | ||
| 163 | Node operand; | ||
| 164 | if (instr.brx.constant_buffer != 0) { | ||
| 165 | const s32 target = pc + 1; | ||
| 166 | const Node index = GetRegister(instr.gpr8); | ||
| 167 | const Node op_a = | ||
| 168 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 169 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 170 | PRECISE, op_a, Immediate(3)); | ||
| 171 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 172 | } else { | ||
| 173 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 174 | const Node op_a = GetRegister(instr.gpr8); | ||
| 175 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 176 | PRECISE, op_a, Immediate(3)); | ||
| 177 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 178 | } | ||
| 179 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 180 | |||
| 181 | const ConditionCode cc = instr.flow_condition_code; | ||
| 182 | if (cc != ConditionCode::T) { | ||
| 183 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 184 | } else { | ||
| 185 | bb.push_back(branch); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | case OpCode::Id::SSY: { | ||
| 190 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 191 | "Constant buffer flow is not supported"); | ||
| 192 | |||
| 193 | if (disable_flow_stack) { | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | |||
| 197 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | ||
| 198 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 199 | bb.push_back( | ||
| 200 | Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case OpCode::Id::PBK: { | ||
| 204 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 205 | "Constant buffer PBK is not supported"); | ||
| 206 | |||
| 207 | if (disable_flow_stack) { | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | |||
| 211 | // PBK pushes to a stack the address where BRK will jump to. | ||
| 212 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 213 | bb.push_back( | ||
| 214 | Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const ConditionCode cc = instr.flow_condition_code; | ||
| 219 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); | ||
| 220 | |||
| 221 | if (decompiled) { | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | |||
| 225 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 226 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | case OpCode::Id::BRK: { | ||
| 230 | const ConditionCode cc = instr.flow_condition_code; | ||
| 231 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); | ||
| 232 | if (decompiled) { | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | |||
| 236 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 237 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IPA: { | ||
| 241 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; | ||
| 242 | const auto attribute = instr.attribute.fmt28; | ||
| 243 | const Index index = attribute.index; | ||
| 244 | |||
| 245 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) | ||
| 246 | : GetInputAttribute(index, attribute.element); | ||
| 247 | |||
| 248 | // Code taken from Ryujinx. | ||
| 249 | if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { | ||
| 250 | const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); | ||
| 251 | if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { | ||
| 252 | Node position_w = GetInputAttribute(Index::Position, 3); | ||
| 253 | value = Operation(OperationCode::FMul, move(value), move(position_w)); | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { | ||
| 258 | value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); | ||
| 259 | } | ||
| 260 | |||
| 261 | value = GetSaturatedFloat(move(value), instr.ipa.saturate); | ||
| 262 | |||
| 263 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::OUT_R: { | ||
| 267 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 268 | "Stream buffer is not supported"); | ||
| 269 | |||
| 270 | if (instr.out.emit) { | ||
| 271 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 272 | // Hardware uses pointers here but we just ignore it | ||
| 273 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 274 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 275 | } | ||
| 276 | if (instr.out.cut) { | ||
| 277 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 278 | } | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case OpCode::Id::ISBERD: { | ||
| 282 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 283 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 284 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 285 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 286 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 287 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 288 | break; | ||
| 289 | } | ||
| 290 | case OpCode::Id::BAR: { | ||
| 291 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 292 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | case OpCode::Id::MEMBAR: { | ||
| 296 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | ||
| 297 | const OperationCode type = [instr] { | ||
| 298 | switch (instr.membar.type) { | ||
| 299 | case Tegra::Shader::MembarType::CTA: | ||
| 300 | return OperationCode::MemoryBarrierGroup; | ||
| 301 | case Tegra::Shader::MembarType::GL: | ||
| 302 | return OperationCode::MemoryBarrierGlobal; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); | ||
| 305 | return OperationCode::MemoryBarrierGlobal; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | bb.push_back(Operation(type)); | ||
| 309 | break; | ||
| 310 | } | ||
| 311 | case OpCode::Id::DEPBAR: { | ||
| 312 | LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | default: | ||
| 316 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 317 | } | ||
| 318 | |||
| 319 | return pc; | ||
| 320 | } | ||
| 321 | |||
| 322 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::PSETP: { | ||
| 23 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 24 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 25 | |||
| 26 | // We can't use the constant predicate as destination. | ||
| 27 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 28 | |||
| 29 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 30 | |||
| 31 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 32 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 33 | |||
| 34 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 35 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 36 | |||
| 37 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 38 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 39 | // enabled | ||
| 40 | SetPredicate(bb, instr.psetp.pred0, | ||
| 41 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 42 | second_pred)); | ||
| 43 | } | ||
| 44 | break; | ||
| 45 | } | ||
| 46 | case OpCode::Id::CSETP: { | ||
| 47 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 48 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 49 | |||
| 50 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 51 | |||
| 52 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 53 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 54 | } | ||
| 55 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 56 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 57 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 58 | } | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 63 | } | ||
| 64 | |||
| 65 | return pc; | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | constexpr u64 NUM_CONDITION_CODES = 4; | ||
| 21 | constexpr u64 NUM_PREDICATES = 7; | ||
| 22 | } // namespace | ||
| 23 | |||
| 24 | u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 25 | const Instruction instr = {program_code[pc]}; | ||
| 26 | const auto opcode = OpCode::Decode(instr); | ||
| 27 | |||
| 28 | Node apply_mask = [this, opcode, instr] { | ||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::R2P_IMM: | ||
| 31 | case OpCode::Id::P2R_IMM: | ||
| 32 | return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); | ||
| 33 | default: | ||
| 34 | UNREACHABLE(); | ||
| 35 | return Immediate(0); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; | ||
| 40 | |||
| 41 | const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; | ||
| 42 | const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; | ||
| 43 | const auto get_entry = [this, cc](u64 entry) { | ||
| 44 | return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); | ||
| 45 | }; | ||
| 46 | |||
| 47 | switch (opcode->get().GetId()) { | ||
| 48 | case OpCode::Id::R2P_IMM: { | ||
| 49 | Node mask = GetRegister(instr.gpr8); | ||
| 50 | |||
| 51 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 52 | const u32 shift = static_cast<u32>(entry); | ||
| 53 | |||
| 54 | Node apply = BitfieldExtract(apply_mask, shift, 1); | ||
| 55 | Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); | ||
| 56 | |||
| 57 | Node compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 58 | Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); | ||
| 59 | |||
| 60 | Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); | ||
| 61 | bb.push_back(Conditional(condition, {move(code)})); | ||
| 62 | } | ||
| 63 | break; | ||
| 64 | } | ||
| 65 | case OpCode::Id::P2R_IMM: { | ||
| 66 | Node value = Immediate(0); | ||
| 67 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 68 | Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), | ||
| 69 | Immediate(0)); | ||
| 70 | value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); | ||
| 71 | } | ||
| 72 | value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); | ||
| 73 | value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); | ||
| 74 | |||
| 75 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); | ||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | return pc; | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null | |||
| @@ -1,153 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::ShfType; | ||
| 17 | using Tegra::Shader::ShfXmode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | Node IsFull(Node shift) { | ||
| 22 | return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node Shift(OperationCode opcode, Node value, Node shift) { | ||
| 26 | Node shifted = Operation(opcode, move(value), shift); | ||
| 27 | return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node ClampShift(Node shift, s32 size = 32) { | ||
| 31 | shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); | ||
| 32 | return Operation(OperationCode::IMin, move(shift), Immediate(size)); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node WrapShift(Node shift, s32 size = 32) { | ||
| 36 | return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 40 | // These values are used when the shift value is less than 32 | ||
| 41 | Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); | ||
| 42 | Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); | ||
| 43 | Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); | ||
| 44 | |||
| 45 | if (type == ShfType::Bits32) { | ||
| 46 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 47 | return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); | ||
| 48 | } | ||
| 49 | |||
| 50 | // And these when it's larger than or 32 | ||
| 51 | const bool is_signed = type == ShfType::S64; | ||
| 52 | const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); | ||
| 53 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 54 | Node greater = Shift(opcode, high, move(reduced)); | ||
| 55 | |||
| 56 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 57 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 58 | |||
| 59 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 60 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 61 | } | ||
| 62 | |||
| 63 | Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 64 | // These values are used when the shift value is less than 32 | ||
| 65 | Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); | ||
| 66 | Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); | ||
| 67 | Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); | ||
| 68 | |||
| 69 | if (type == ShfType::Bits32) { | ||
| 70 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 71 | return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); | ||
| 72 | } | ||
| 73 | |||
| 74 | // And these when it's larger than or 32 | ||
| 75 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 76 | Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); | ||
| 77 | |||
| 78 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 79 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 80 | |||
| 81 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 82 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | ||
| 88 | const Instruction instr = {program_code[pc]}; | ||
| 89 | const auto opcode = OpCode::Decode(instr); | ||
| 90 | |||
| 91 | Node op_a = GetRegister(instr.gpr8); | ||
| 92 | Node op_b = [this, instr] { | ||
| 93 | if (instr.is_b_imm) { | ||
| 94 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 95 | } else if (instr.is_b_gpr) { | ||
| 96 | return GetRegister(instr.gpr20); | ||
| 97 | } else { | ||
| 98 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 99 | } | ||
| 100 | }(); | ||
| 101 | |||
| 102 | switch (const auto opid = opcode->get().GetId(); opid) { | ||
| 103 | case OpCode::Id::SHR_C: | ||
| 104 | case OpCode::Id::SHR_R: | ||
| 105 | case OpCode::Id::SHR_IMM: { | ||
| 106 | op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); | ||
| 107 | |||
| 108 | Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, | ||
| 109 | move(op_a), move(op_b)); | ||
| 110 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 111 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SHL_C: | ||
| 115 | case OpCode::Id::SHL_R: | ||
| 116 | case OpCode::Id::SHL_IMM: { | ||
| 117 | Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); | ||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::SHF_RIGHT_R: | ||
| 123 | case OpCode::Id::SHF_RIGHT_IMM: | ||
| 124 | case OpCode::Id::SHF_LEFT_R: | ||
| 125 | case OpCode::Id::SHF_LEFT_IMM: { | ||
| 126 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 127 | UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", | ||
| 128 | instr.shf.xmode.Value()); | ||
| 129 | |||
| 130 | if (instr.is_b_imm) { | ||
| 131 | op_b = Immediate(static_cast<u32>(instr.shf.immediate)); | ||
| 132 | } | ||
| 133 | const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; | ||
| 134 | Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); | ||
| 135 | |||
| 136 | Node negated_shift = Operation(OperationCode::INegate, shift); | ||
| 137 | Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); | ||
| 138 | |||
| 139 | const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; | ||
| 140 | Node value = (is_right ? ShiftRight : ShiftLeft)( | ||
| 141 | move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); | ||
| 142 | |||
| 143 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | default: | ||
| 147 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 148 | } | ||
| 149 | |||
| 150 | return pc; | ||
| 151 | } | ||
| 152 | |||
| 153 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null | |||
| @@ -1,935 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | using Tegra::Shader::TextureMiscMode; | ||
| 24 | using Tegra::Shader::TextureProcessMode; | ||
| 25 | using Tegra::Shader::TextureType; | ||
| 26 | |||
| 27 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 28 | switch (texture_type) { | ||
| 29 | case TextureType::Texture1D: | ||
| 30 | return 1; | ||
| 31 | case TextureType::Texture2D: | ||
| 32 | return 2; | ||
| 33 | case TextureType::Texture3D: | ||
| 34 | case TextureType::TextureCube: | ||
| 35 | return 3; | ||
| 36 | default: | ||
| 37 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 43 | const Instruction instr = {program_code[pc]}; | ||
| 44 | const auto opcode = OpCode::Decode(instr); | ||
| 45 | bool is_bindless = false; | ||
| 46 | switch (opcode->get().GetId()) { | ||
| 47 | case OpCode::Id::TEX: { | ||
| 48 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 49 | const bool is_array = instr.tex.array != 0; | ||
| 50 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 51 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 52 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 53 | WriteTexInstructionFloat( | ||
| 54 | bb, instr, | ||
| 55 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | case OpCode::Id::TEX_B: { | ||
| 59 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 60 | "AOFFI is not implemented"); | ||
| 61 | |||
| 62 | const TextureType texture_type{instr.tex_b.texture_type}; | ||
| 63 | const bool is_array = instr.tex_b.array != 0; | ||
| 64 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 65 | const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); | ||
| 66 | const auto process_mode = instr.tex_b.GetTextureProcessMode(); | ||
| 67 | WriteTexInstructionFloat(bb, instr, | ||
| 68 | GetTexCode(instr, texture_type, process_mode, depth_compare, | ||
| 69 | is_array, is_aoffi, {instr.gpr20})); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::TEXS: { | ||
| 73 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 74 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 75 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 76 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 77 | |||
| 78 | const Node4 components = | ||
| 79 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 80 | |||
| 81 | if (instr.texs.fp32_flag) { | ||
| 82 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 83 | } else { | ||
| 84 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | case OpCode::Id::TLD4_B: { | ||
| 89 | is_bindless = true; | ||
| 90 | [[fallthrough]]; | ||
| 91 | } | ||
| 92 | case OpCode::Id::TLD4: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 94 | "NDV is not implemented"); | ||
| 95 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 96 | const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) | ||
| 97 | : instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 98 | const bool is_array = instr.tld4.array != 0; | ||
| 99 | const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) | ||
| 100 | : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 101 | const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) | ||
| 102 | : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); | ||
| 103 | WriteTexInstructionFloat(bb, instr, | ||
| 104 | GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, | ||
| 105 | is_ptp, is_bindless)); | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | case OpCode::Id::TLD4S: { | ||
| 109 | constexpr std::size_t num_coords = 2; | ||
| 110 | const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 111 | const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 112 | const Node op_a = GetRegister(instr.gpr8); | ||
| 113 | const Node op_b = GetRegister(instr.gpr20); | ||
| 114 | |||
| 115 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 116 | std::vector<Node> coords; | ||
| 117 | std::vector<Node> aoffi; | ||
| 118 | Node depth_compare; | ||
| 119 | if (is_depth_compare) { | ||
| 120 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 121 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 122 | coords.push_back(op_a); | ||
| 123 | coords.push_back(op_y); | ||
| 124 | if (is_aoffi) { | ||
| 125 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 126 | depth_compare = GetRegister(instr.gpr20.Value() + 1); | ||
| 127 | } else { | ||
| 128 | depth_compare = op_b; | ||
| 129 | } | ||
| 130 | } else { | ||
| 131 | // There's no depth compare | ||
| 132 | coords.push_back(op_a); | ||
| 133 | if (is_aoffi) { | ||
| 134 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 135 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 136 | } else { | ||
| 137 | coords.push_back(op_b); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 141 | |||
| 142 | SamplerInfo info; | ||
| 143 | info.is_shadow = is_depth_compare; | ||
| 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 145 | |||
| 146 | Node4 values; | ||
| 147 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 148 | MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, | ||
| 149 | {}, {}, component, element, {}}; | ||
| 150 | values[element] = Operation(OperationCode::TextureGather, meta, coords); | ||
| 151 | } | ||
| 152 | |||
| 153 | if (instr.tld4s.fp16_flag) { | ||
| 154 | WriteTexsInstructionHalfFloat(bb, instr, values, true); | ||
| 155 | } else { | ||
| 156 | WriteTexsInstructionFloat(bb, instr, values, true); | ||
| 157 | } | ||
| 158 | break; | ||
| 159 | } | ||
| 160 | case OpCode::Id::TXD_B: | ||
| 161 | is_bindless = true; | ||
| 162 | [[fallthrough]]; | ||
| 163 | case OpCode::Id::TXD: { | ||
| 164 | UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 165 | "AOFFI is not implemented"); | ||
| 166 | |||
| 167 | const bool is_array = instr.txd.is_array != 0; | ||
| 168 | const auto derivate_reg = instr.gpr20.Value(); | ||
| 169 | const auto texture_type = instr.txd.texture_type.Value(); | ||
| 170 | const auto coord_count = GetCoordCount(texture_type); | ||
| 171 | u64 base_reg = instr.gpr8.Value(); | ||
| 172 | Node index_var; | ||
| 173 | SamplerInfo info; | ||
| 174 | info.type = texture_type; | ||
| 175 | info.is_array = is_array; | ||
| 176 | const std::optional<SamplerEntry> sampler = | ||
| 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) | ||
| 178 | : GetSampler(instr.sampler, info); | ||
| 179 | Node4 values; | ||
| 180 | if (!sampler) { | ||
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | ||
| 182 | WriteTexInstructionFloat(bb, instr, values); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (is_bindless) { | ||
| 187 | base_reg++; | ||
| 188 | } | ||
| 189 | |||
| 190 | std::vector<Node> coords; | ||
| 191 | std::vector<Node> derivates; | ||
| 192 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 193 | coords.push_back(GetRegister(base_reg + i)); | ||
| 194 | const std::size_t derivate = i * 2; | ||
| 195 | derivates.push_back(GetRegister(derivate_reg + derivate)); | ||
| 196 | derivates.push_back(GetRegister(derivate_reg + derivate + 1)); | ||
| 197 | } | ||
| 198 | |||
| 199 | Node array_node = {}; | ||
| 200 | if (is_array) { | ||
| 201 | const Node info_reg = GetRegister(base_reg + coord_count); | ||
| 202 | array_node = BitfieldExtract(info_reg, 0, 16); | ||
| 203 | } | ||
| 204 | |||
| 205 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 206 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, | ||
| 207 | {}, {}, {}, element, index_var}; | ||
| 208 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | ||
| 209 | } | ||
| 210 | |||
| 211 | WriteTexInstructionFloat(bb, instr, values); | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TXQ_B: | ||
| 216 | is_bindless = true; | ||
| 217 | [[fallthrough]]; | ||
| 218 | case OpCode::Id::TXQ: { | ||
| 219 | Node index_var; | ||
| 220 | const std::optional<SamplerEntry> sampler = | ||
| 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) | ||
| 222 | : GetSampler(instr.sampler, {}); | ||
| 223 | |||
| 224 | if (!sampler) { | ||
| 225 | u32 indexer = 0; | ||
| 226 | for (u32 element = 0; element < 4; ++element) { | ||
| 227 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 228 | continue; | ||
| 229 | } | ||
| 230 | const Node value = Immediate(0); | ||
| 231 | SetTemporary(bb, indexer++, value); | ||
| 232 | } | ||
| 233 | for (u32 i = 0; i < indexer; ++i) { | ||
| 234 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 235 | } | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | |||
| 239 | u32 indexer = 0; | ||
| 240 | switch (instr.txq.query_type) { | ||
| 241 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 242 | for (u32 element = 0; element < 4; ++element) { | ||
| 243 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 247 | const Node value = | ||
| 248 | Operation(OperationCode::TextureQueryDimensions, meta, | ||
| 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||
| 250 | SetTemporary(bb, indexer++, value); | ||
| 251 | } | ||
| 252 | for (u32 i = 0; i < indexer; ++i) { | ||
| 253 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 254 | } | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | default: | ||
| 258 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); | ||
| 259 | } | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::TMML_B: | ||
| 263 | is_bindless = true; | ||
| 264 | [[fallthrough]]; | ||
| 265 | case OpCode::Id::TMML: { | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 267 | "NDV is not implemented"); | ||
| 268 | |||
| 269 | const auto texture_type = instr.tmml.texture_type.Value(); | ||
| 270 | const bool is_array = instr.tmml.array != 0; | ||
| 271 | SamplerInfo info; | ||
| 272 | info.type = texture_type; | ||
| 273 | info.is_array = is_array; | ||
| 274 | Node index_var; | ||
| 275 | const std::optional<SamplerEntry> sampler = | ||
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | ||
| 277 | : GetSampler(instr.sampler, info); | ||
| 278 | |||
| 279 | if (!sampler) { | ||
| 280 | u32 indexer = 0; | ||
| 281 | for (u32 element = 0; element < 2; ++element) { | ||
| 282 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 283 | continue; | ||
| 284 | } | ||
| 285 | const Node value = Immediate(0); | ||
| 286 | SetTemporary(bb, indexer++, value); | ||
| 287 | } | ||
| 288 | for (u32 i = 0; i < indexer; ++i) { | ||
| 289 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 290 | } | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | |||
| 294 | const u64 base_index = is_array ? 1 : 0; | ||
| 295 | const u64 num_components = [texture_type] { | ||
| 296 | switch (texture_type) { | ||
| 297 | case TextureType::Texture1D: | ||
| 298 | return 1; | ||
| 299 | case TextureType::Texture2D: | ||
| 300 | return 2; | ||
| 301 | case TextureType::TextureCube: | ||
| 302 | return 3; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); | ||
| 305 | return 2; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | // TODO: What's the array component used for? | ||
| 309 | |||
| 310 | std::vector<Node> coords; | ||
| 311 | coords.reserve(num_components); | ||
| 312 | for (u64 component = 0; component < num_components; ++component) { | ||
| 313 | coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32 indexer = 0; | ||
| 317 | for (u32 element = 0; element < 2; ++element) { | ||
| 318 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 319 | continue; | ||
| 320 | } | ||
| 321 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 322 | Node value = Operation(OperationCode::TextureQueryLod, meta, coords); | ||
| 323 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 324 | } | ||
| 325 | for (u32 i = 0; i < indexer; ++i) { | ||
| 326 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 327 | } | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | case OpCode::Id::TLD: { | ||
| 331 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 332 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 333 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 334 | |||
| 335 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 336 | break; | ||
| 337 | } | ||
| 338 | case OpCode::Id::TLDS: { | ||
| 339 | const TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 340 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 341 | |||
| 342 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 343 | "AOFFI is not implemented"); | ||
| 344 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 345 | |||
| 346 | const Node4 components = GetTldsCode(instr, texture_type, is_array); | ||
| 347 | |||
| 348 | if (instr.tlds.fp32_flag) { | ||
| 349 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 350 | } else { | ||
| 351 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 352 | } | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | default: | ||
| 356 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 357 | } | ||
| 358 | |||
| 359 | return pc; | ||
| 360 | } | ||
| 361 | |||
| 362 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | ||
| 363 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { | ||
| 364 | if (info.IsComplete()) { | ||
| 365 | return info; | ||
| 366 | } | ||
| 367 | if (!sampler) { | ||
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | ||
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | ||
| 370 | info.is_array = info.is_array.value_or(false); | ||
| 371 | info.is_shadow = info.is_shadow.value_or(false); | ||
| 372 | info.is_buffer = info.is_buffer.value_or(false); | ||
| 373 | return info; | ||
| 374 | } | ||
| 375 | info.type = info.type.value_or(sampler->texture_type); | ||
| 376 | info.is_array = info.is_array.value_or(sampler->is_array != 0); | ||
| 377 | info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); | ||
| 378 | info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); | ||
| 379 | return info; | ||
| 380 | } | ||
| 381 | |||
| 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | ||
| 383 | SamplerInfo sampler_info) { | ||
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | ||
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | ||
| 386 | |||
| 387 | // If this sampler has already been used, return the existing mapping. | ||
| 388 | const auto it = | ||
| 389 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 391 | if (it != used_samplers.end()) { | ||
| 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 394 | return *it; | ||
| 395 | } | ||
| 396 | |||
| 397 | // Otherwise create a new mapping for this sampler | ||
| 398 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 399 | return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, | ||
| 400 | *info.is_shadow, *info.is_buffer, false); | ||
| 401 | } | ||
| 402 | |||
| 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | ||
| 404 | SamplerInfo info, Node& index_var) { | ||
| 405 | const Node sampler_register = GetRegister(reg); | ||
| 406 | const auto [base_node, tracked_sampler_info] = | ||
| 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||
| 408 | if (!base_node) { | ||
| 409 | UNREACHABLE(); | ||
| 410 | return std::nullopt; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 414 | const u32 buffer = sampler_info->index; | ||
| 415 | const u32 offset = sampler_info->offset; | ||
| 416 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); | ||
| 417 | |||
| 418 | // If this sampler has already been used, return the existing mapping. | ||
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 420 | [buffer, offset](const SamplerEntry& entry) { | ||
| 421 | return entry.buffer == buffer && entry.offset == offset; | ||
| 422 | }); | ||
| 423 | if (it != used_samplers.end()) { | ||
| 424 | ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 425 | it->is_shadow == info.is_shadow); | ||
| 426 | return *it; | ||
| 427 | } | ||
| 428 | |||
| 429 | // Otherwise create a new mapping for this sampler | ||
| 430 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | ||
| 432 | *info.is_shadow, *info.is_buffer, false); | ||
| 433 | } | ||
| 434 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { | ||
| 435 | const std::pair indices = sampler_info->indices; | ||
| 436 | const std::pair offsets = sampler_info->offsets; | ||
| 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | ||
| 438 | |||
| 439 | // Try to use an already created sampler if it exists | ||
| 440 | const auto it = | ||
| 441 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 442 | [indices, offsets](const SamplerEntry& entry) { | ||
| 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 445 | }); | ||
| 446 | if (it != used_samplers.end()) { | ||
| 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 449 | return *it; | ||
| 450 | } | ||
| 451 | |||
| 452 | // Otherwise create a new mapping for this sampler | ||
| 453 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 454 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 455 | *info.is_shadow, *info.is_buffer); | ||
| 456 | } | ||
| 457 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 458 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 459 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 460 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 461 | |||
| 462 | // If this sampler has already been used, return the existing mapping. | ||
| 463 | const auto it = std::find_if( | ||
| 464 | used_samplers.begin(), used_samplers.end(), | ||
| 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); | ||
| 466 | if (it != used_samplers.end()) { | ||
| 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | ||
| 469 | it->is_indexed); | ||
| 470 | return *it; | ||
| 471 | } | ||
| 472 | |||
| 473 | uses_indexed_samplers = true; | ||
| 474 | // Otherwise create a new mapping for this sampler | ||
| 475 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 476 | return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, | ||
| 477 | *info.is_shadow, *info.is_buffer, true); | ||
| 478 | } | ||
| 479 | return std::nullopt; | ||
| 480 | } | ||
| 481 | |||
| 482 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 483 | u32 dest_elem = 0; | ||
| 484 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 485 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 486 | // Skip disabled components | ||
| 487 | continue; | ||
| 488 | } | ||
| 489 | SetTemporary(bb, dest_elem++, components[elem]); | ||
| 490 | } | ||
| 491 | // After writing values in temporals, move them to the real registers | ||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, | ||
| 498 | bool ignore_mask) { | ||
| 499 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 500 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 501 | |||
| 502 | u32 dest_elem = 0; | ||
| 503 | for (u32 component = 0; component < 4; ++component) { | ||
| 504 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 505 | continue; | ||
| 506 | SetTemporary(bb, dest_elem++, components[component]); | ||
| 507 | } | ||
| 508 | |||
| 509 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 510 | if (i < 2) { | ||
| 511 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 512 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); | ||
| 513 | } else { | ||
| 514 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 515 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 516 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 522 | const Node4& components, bool ignore_mask) { | ||
| 523 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 524 | // float instruction). | ||
| 525 | |||
| 526 | Node4 values; | ||
| 527 | u32 dest_elem = 0; | ||
| 528 | for (u32 component = 0; component < 4; ++component) { | ||
| 529 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 530 | continue; | ||
| 531 | values[dest_elem++] = components[component]; | ||
| 532 | } | ||
| 533 | if (dest_elem == 0) | ||
| 534 | return; | ||
| 535 | |||
| 536 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 537 | |||
| 538 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 539 | if (dest_elem <= 2) { | ||
| 540 | SetRegister(bb, instr.gpr0, first_value); | ||
| 541 | return; | ||
| 542 | } | ||
| 543 | |||
| 544 | SetTemporary(bb, 0, first_value); | ||
| 545 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 546 | |||
| 547 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 548 | SetRegister(bb, instr.gpr28, GetTemporary(1)); | ||
| 549 | } | ||
| 550 | |||
| 551 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 552 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 553 | Node array, Node depth_compare, u32 bias_offset, | ||
| 554 | std::vector<Node> aoffi, | ||
| 555 | std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 556 | const bool is_array = array != nullptr; | ||
| 557 | const bool is_shadow = depth_compare != nullptr; | ||
| 558 | const bool is_bindless = bindless_reg.has_value(); | ||
| 559 | |||
| 560 | ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, | ||
| 561 | "Illegal texture type"); | ||
| 562 | |||
| 563 | SamplerInfo info; | ||
| 564 | info.type = texture_type; | ||
| 565 | info.is_array = is_array; | ||
| 566 | info.is_shadow = is_shadow; | ||
| 567 | info.is_buffer = false; | ||
| 568 | |||
| 569 | Node index_var; | ||
| 570 | const std::optional<SamplerEntry> sampler = | ||
| 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) | ||
| 572 | : GetSampler(instr.sampler, info); | ||
| 573 | if (!sampler) { | ||
| 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | ||
| 575 | } | ||
| 576 | |||
| 577 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 578 | process_mode == TextureProcessMode::LL || | ||
| 579 | process_mode == TextureProcessMode::LLA; | ||
| 580 | const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 581 | |||
| 582 | Node bias; | ||
| 583 | Node lod; | ||
| 584 | switch (process_mode) { | ||
| 585 | case TextureProcessMode::None: | ||
| 586 | break; | ||
| 587 | case TextureProcessMode::LZ: | ||
| 588 | lod = Immediate(0.0f); | ||
| 589 | break; | ||
| 590 | case TextureProcessMode::LB: | ||
| 591 | // If present, lod or bias are always stored in the register indexed by the gpr20 field with | ||
| 592 | // an offset depending on the usage of the other registers. | ||
| 593 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 594 | break; | ||
| 595 | case TextureProcessMode::LL: | ||
| 596 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 597 | break; | ||
| 598 | default: | ||
| 599 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); | ||
| 600 | break; | ||
| 601 | } | ||
| 602 | |||
| 603 | Node4 values; | ||
| 604 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 605 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, | ||
| 606 | lod, {}, element, index_var}; | ||
| 607 | values[element] = Operation(opcode, meta, coords); | ||
| 608 | } | ||
| 609 | |||
| 610 | return values; | ||
| 611 | } | ||
| 612 | |||
| 613 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 614 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 615 | bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 616 | const bool lod_bias_enabled{ | ||
| 617 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 618 | |||
| 619 | const bool is_bindless = bindless_reg.has_value(); | ||
| 620 | |||
| 621 | u64 parameter_register = instr.gpr20.Value(); | ||
| 622 | if (is_bindless) { | ||
| 623 | ++parameter_register; | ||
| 624 | } | ||
| 625 | |||
| 626 | const u32 bias_lod_offset = (is_bindless ? 1 : 0); | ||
| 627 | if (lod_bias_enabled) { | ||
| 628 | ++parameter_register; | ||
| 629 | } | ||
| 630 | |||
| 631 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 632 | lod_bias_enabled, 4, 5); | ||
| 633 | const auto coord_count = std::get<0>(coord_counts); | ||
| 634 | // If enabled arrays index is always stored in the gpr8 field | ||
| 635 | const u64 array_register = instr.gpr8.Value(); | ||
| 636 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 637 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 638 | |||
| 639 | std::vector<Node> coords; | ||
| 640 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 641 | coords.push_back(GetRegister(coord_register + i)); | ||
| 642 | } | ||
| 643 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 644 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 645 | coords.push_back(Immediate(0.0f)); | ||
| 646 | } | ||
| 647 | |||
| 648 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 649 | |||
| 650 | std::vector<Node> aoffi; | ||
| 651 | if (is_aoffi) { | ||
| 652 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 653 | } | ||
| 654 | |||
| 655 | Node dc; | ||
| 656 | if (depth_compare) { | ||
| 657 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 658 | // or bias are used | ||
| 659 | dc = GetRegister(parameter_register++); | ||
| 660 | } | ||
| 661 | |||
| 662 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, | ||
| 663 | aoffi, bindless_reg); | ||
| 664 | } | ||
| 665 | |||
| 666 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 667 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 668 | const bool lod_bias_enabled = | ||
| 669 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 670 | |||
| 671 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 672 | lod_bias_enabled, 4, 4); | ||
| 673 | const auto coord_count = std::get<0>(coord_counts); | ||
| 674 | |||
| 675 | // If enabled arrays index is always stored in the gpr8 field | ||
| 676 | const u64 array_register = instr.gpr8.Value(); | ||
| 677 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 678 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 679 | const u64 last_coord_register = | ||
| 680 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 681 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 682 | : coord_register + 1; | ||
| 683 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 684 | |||
| 685 | std::vector<Node> coords; | ||
| 686 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 687 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 688 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 689 | } | ||
| 690 | |||
| 691 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 692 | |||
| 693 | Node dc; | ||
| 694 | if (depth_compare) { | ||
| 695 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 696 | // or bias are used | ||
| 697 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 698 | dc = GetRegister(depth_register); | ||
| 699 | } | ||
| 700 | |||
| 701 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, | ||
| 702 | {}); | ||
| 703 | } | ||
| 704 | |||
| 705 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 706 | bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { | ||
| 707 | ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); | ||
| 708 | |||
| 709 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 710 | |||
| 711 | // If enabled arrays index is always stored in the gpr8 field | ||
| 712 | const u64 array_register = instr.gpr8.Value(); | ||
| 713 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 714 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 715 | |||
| 716 | std::vector<Node> coords; | ||
| 717 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 718 | coords.push_back(GetRegister(coord_register + i)); | ||
| 719 | } | ||
| 720 | |||
| 721 | u64 parameter_register = instr.gpr20.Value(); | ||
| 722 | |||
| 723 | SamplerInfo info; | ||
| 724 | info.type = texture_type; | ||
| 725 | info.is_array = is_array; | ||
| 726 | info.is_shadow = depth_compare; | ||
| 727 | |||
| 728 | Node index_var; | ||
| 729 | const std::optional<SamplerEntry> sampler = | ||
| 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | ||
| 731 | : GetSampler(instr.sampler, info); | ||
| 732 | Node4 values; | ||
| 733 | if (!sampler) { | ||
| 734 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 735 | values[element] = Immediate(0); | ||
| 736 | } | ||
| 737 | return values; | ||
| 738 | } | ||
| 739 | |||
| 740 | std::vector<Node> aoffi, ptp; | ||
| 741 | if (is_aoffi) { | ||
| 742 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 743 | } else if (is_ptp) { | ||
| 744 | ptp = GetPtpCoordinates( | ||
| 745 | {GetRegister(parameter_register++), GetRegister(parameter_register++)}); | ||
| 746 | } | ||
| 747 | |||
| 748 | Node dc; | ||
| 749 | if (depth_compare) { | ||
| 750 | dc = GetRegister(parameter_register++); | ||
| 751 | } | ||
| 752 | |||
| 753 | const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) | ||
| 754 | : Immediate(static_cast<u32>(instr.tld4.component)); | ||
| 755 | |||
| 756 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 757 | auto coords_copy = coords; | ||
| 758 | MetaTexture meta{ | ||
| 759 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, | ||
| 760 | index_var}; | ||
| 761 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 762 | } | ||
| 763 | |||
| 764 | return values; | ||
| 765 | } | ||
| 766 | |||
| 767 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 768 | const auto texture_type{instr.tld.texture_type}; | ||
| 769 | const bool is_array{instr.tld.is_array != 0}; | ||
| 770 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 771 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 772 | |||
| 773 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 774 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 775 | |||
| 776 | std::vector<Node> coords; | ||
| 777 | coords.reserve(coord_count); | ||
| 778 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 779 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 780 | } | ||
| 781 | |||
| 782 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 783 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 784 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 787 | |||
| 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); | ||
| 789 | |||
| 790 | Node4 values; | ||
| 791 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 792 | auto coords_copy = coords; | ||
| 793 | MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||
| 794 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 795 | } | ||
| 796 | |||
| 797 | return values; | ||
| 798 | } | ||
| 799 | |||
| 800 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 801 | SamplerInfo info; | ||
| 802 | info.type = texture_type; | ||
| 803 | info.is_array = is_array; | ||
| 804 | info.is_shadow = false; | ||
| 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 806 | |||
| 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 809 | const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 810 | |||
| 811 | // If enabled arrays index is always stored in the gpr8 field | ||
| 812 | const u64 array_register = instr.gpr8.Value(); | ||
| 813 | // if is array gpr20 is used | ||
| 814 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 815 | |||
| 816 | const u64 last_coord_register = | ||
| 817 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 818 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 819 | : coord_register + 1; | ||
| 820 | |||
| 821 | std::vector<Node> coords; | ||
| 822 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 823 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 824 | coords.push_back( | ||
| 825 | GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); | ||
| 826 | } | ||
| 827 | |||
| 828 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 829 | // When lod is used always is in gpr20 | ||
| 830 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 831 | |||
| 832 | std::vector<Node> aoffi; | ||
| 833 | if (aoffi_enabled) { | ||
| 834 | aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); | ||
| 835 | } | ||
| 836 | |||
| 837 | Node4 values; | ||
| 838 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 839 | auto coords_copy = coords; | ||
| 840 | MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; | ||
| 841 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 842 | } | ||
| 843 | return values; | ||
| 844 | } | ||
| 845 | |||
| 846 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 847 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 848 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 849 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 850 | |||
| 851 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 852 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 853 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 854 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 855 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 856 | } | ||
| 857 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 858 | total_coord_count += | ||
| 859 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 860 | |||
| 861 | return {coord_count, total_coord_count}; | ||
| 862 | } | ||
| 863 | |||
| 864 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||
| 865 | bool is_tld4) { | ||
| 866 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; | ||
| 867 | const u32 size = is_tld4 ? 6 : 4; | ||
| 868 | const s32 wrap_value = is_tld4 ? 32 : 8; | ||
| 869 | const s32 diff_value = is_tld4 ? 64 : 16; | ||
| 870 | const u32 mask = (1U << size) - 1; | ||
| 871 | |||
| 872 | std::vector<Node> aoffi; | ||
| 873 | aoffi.reserve(coord_count); | ||
| 874 | |||
| 875 | const auto aoffi_immediate{ | ||
| 876 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 877 | if (!aoffi_immediate) { | ||
| 878 | // Variable access, not supported on AMD. | ||
| 879 | LOG_WARNING(HW_GPU, | ||
| 880 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 881 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 882 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); | ||
| 883 | const Node condition = | ||
| 884 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 885 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 886 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 887 | } | ||
| 888 | return aoffi; | ||
| 889 | } | ||
| 890 | |||
| 891 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 892 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; | ||
| 893 | if (value >= wrap_value) { | ||
| 894 | value -= diff_value; | ||
| 895 | } | ||
| 896 | aoffi.push_back(Immediate(value)); | ||
| 897 | } | ||
| 898 | return aoffi; | ||
| 899 | } | ||
| 900 | |||
| 901 | std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { | ||
| 902 | static constexpr u32 num_entries = 8; | ||
| 903 | |||
| 904 | std::vector<Node> ptp; | ||
| 905 | ptp.reserve(num_entries); | ||
| 906 | |||
| 907 | const auto global_size = static_cast<s64>(global_code.size()); | ||
| 908 | const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); | ||
| 909 | const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); | ||
| 910 | if (!low || !high) { | ||
| 911 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 912 | const u32 reg = entry / 4; | ||
| 913 | const u32 offset = entry % 4; | ||
| 914 | const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); | ||
| 915 | const Node condition = | ||
| 916 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); | ||
| 917 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); | ||
| 918 | ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 919 | } | ||
| 920 | return ptp; | ||
| 921 | } | ||
| 922 | |||
| 923 | const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); | ||
| 924 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 925 | s32 value = (immediate >> (entry * 8)) & 0b111111; | ||
| 926 | if (value >= 32) { | ||
| 927 | value -= 64; | ||
| 928 | } | ||
| 929 | ptp.push_back(Immediate(value)); | ||
| 930 | } | ||
| 931 | |||
| 932 | return ptp; | ||
| 933 | } | ||
| 934 | |||
| 935 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null | |||
| @@ -1,169 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::VideoType; | ||
| 18 | using Tegra::Shader::VmadShr; | ||
| 19 | using Tegra::Shader::VmnmxOperation; | ||
| 20 | using Tegra::Shader::VmnmxType; | ||
| 21 | |||
| 22 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | ||
| 23 | const Instruction instr = {program_code[pc]}; | ||
| 24 | const auto opcode = OpCode::Decode(instr); | ||
| 25 | |||
| 26 | if (opcode->get().GetId() == OpCode::Id::VMNMX) { | ||
| 27 | DecodeVMNMX(bb, instr); | ||
| 28 | return pc; | ||
| 29 | } | ||
| 30 | |||
| 31 | const Node op_a = | ||
| 32 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 33 | instr.video.type_a, instr.video.byte_height_a); | ||
| 34 | const Node op_b = [this, instr] { | ||
| 35 | if (instr.video.use_register_b) { | ||
| 36 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 37 | instr.video.signed_b, instr.video.type_b, | ||
| 38 | instr.video.byte_height_b); | ||
| 39 | } | ||
| 40 | if (instr.video.signed_b) { | ||
| 41 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 42 | return Immediate(static_cast<u32>(imm)); | ||
| 43 | } else { | ||
| 44 | return Immediate(instr.alu.GetImm20_16()); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | switch (opcode->get().GetId()) { | ||
| 49 | case OpCode::Id::VMAD: { | ||
| 50 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 51 | const Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 54 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 55 | |||
| 56 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 57 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 58 | value = | ||
| 59 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 60 | } | ||
| 61 | |||
| 62 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 63 | SetRegister(bb, instr.gpr0, value); | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | case OpCode::Id::VSETP: { | ||
| 67 | // We can't use the constant predicate as destination. | ||
| 68 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 69 | |||
| 70 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 71 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 72 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 73 | |||
| 74 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 75 | |||
| 76 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 77 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 78 | |||
| 79 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 80 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 81 | // if enabled | ||
| 82 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 83 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 84 | } | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | default: | ||
| 88 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 89 | } | ||
| 90 | |||
| 91 | return pc; | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, | ||
| 95 | u64 byte_height) { | ||
| 96 | if (!is_chunk) { | ||
| 97 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 98 | } | ||
| 99 | |||
| 100 | switch (type) { | ||
| 101 | case VideoType::Size16_Low: | ||
| 102 | return BitfieldExtract(op, 0, 16); | ||
| 103 | case VideoType::Size16_High: | ||
| 104 | return BitfieldExtract(op, 16, 16); | ||
| 105 | case VideoType::Size32: | ||
| 106 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 107 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 108 | UNIMPLEMENTED(); | ||
| 109 | return Immediate(0); | ||
| 110 | case VideoType::Invalid: | ||
| 111 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 112 | return Immediate(0); | ||
| 113 | default: | ||
| 114 | UNREACHABLE(); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { | ||
| 120 | UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); | ||
| 121 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); | ||
| 122 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); | ||
| 123 | UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); | ||
| 124 | UNIMPLEMENTED_IF(instr.vmnmx.sat); | ||
| 125 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 126 | |||
| 127 | Node op_a = GetRegister(instr.gpr8); | ||
| 128 | Node op_b = GetRegister(instr.gpr20); | ||
| 129 | Node op_c = GetRegister(instr.gpr39); | ||
| 130 | |||
| 131 | const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed | ||
| 132 | const bool is_oper2_signed = instr.vmnmx.is_dest_signed; | ||
| 133 | |||
| 134 | const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; | ||
| 135 | Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); | ||
| 136 | |||
| 137 | switch (instr.vmnmx.operation) { | ||
| 138 | case VmnmxOperation::Mrg_16H: | ||
| 139 | value = BitfieldInsert(move(op_c), move(value), 16, 16); | ||
| 140 | break; | ||
| 141 | case VmnmxOperation::Mrg_16L: | ||
| 142 | value = BitfieldInsert(move(op_c), move(value), 0, 16); | ||
| 143 | break; | ||
| 144 | case VmnmxOperation::Mrg_8B0: | ||
| 145 | value = BitfieldInsert(move(op_c), move(value), 0, 8); | ||
| 146 | break; | ||
| 147 | case VmnmxOperation::Mrg_8B2: | ||
| 148 | value = BitfieldInsert(move(op_c), move(value), 16, 8); | ||
| 149 | break; | ||
| 150 | case VmnmxOperation::Acc: | ||
| 151 | value = Operation(OperationCode::IAdd, move(value), move(op_c)); | ||
| 152 | break; | ||
| 153 | case VmnmxOperation::Min: | ||
| 154 | value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); | ||
| 155 | break; | ||
| 156 | case VmnmxOperation::Max: | ||
| 157 | value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); | ||
| 158 | break; | ||
| 159 | case VmnmxOperation::Nop: | ||
| 160 | break; | ||
| 161 | default: | ||
| 162 | UNREACHABLE(); | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | |||
| 166 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::ShuffleOperation; | ||
| 17 | using Tegra::Shader::VoteOperation; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 22 | switch (vote_op) { | ||
| 23 | case VoteOperation::All: | ||
| 24 | return OperationCode::VoteAll; | ||
| 25 | case VoteOperation::Any: | ||
| 26 | return OperationCode::VoteAny; | ||
| 27 | case VoteOperation::Eq: | ||
| 28 | return OperationCode::VoteEqual; | ||
| 29 | default: | ||
| 30 | UNREACHABLE_MSG("Invalid vote operation={}", vote_op); | ||
| 31 | return OperationCode::VoteAll; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | } // Anonymous namespace | ||
| 36 | |||
| 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 38 | const Instruction instr = {program_code[pc]}; | ||
| 39 | const auto opcode = OpCode::Decode(instr); | ||
| 40 | |||
| 41 | // Signal the backend that this shader uses warp instructions. | ||
| 42 | uses_warps = true; | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::VOTE: { | ||
| 46 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 47 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 48 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 49 | SetRegister(bb, instr.gpr0, active); | ||
| 50 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | case OpCode::Id::SHFL: { | ||
| 54 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | ||
| 55 | : GetRegister(instr.gpr39); | ||
| 56 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | ||
| 57 | : GetRegister(instr.gpr20); | ||
| 58 | |||
| 59 | Node thread_id = Operation(OperationCode::ThreadId); | ||
| 60 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); | ||
| 61 | Node seg_mask = BitfieldExtract(mask, 8, 16); | ||
| 62 | |||
| 63 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); | ||
| 64 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 65 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 66 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 67 | |||
| 68 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 69 | switch (instr.shfl.operation) { | ||
| 70 | case ShuffleOperation::Idx: | ||
| 71 | return Operation(OperationCode::IBitwiseOr, | ||
| 72 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), | ||
| 73 | min_thread_id); | ||
| 74 | case ShuffleOperation::Down: | ||
| 75 | return Operation(OperationCode::IAdd, thread_id, index); | ||
| 76 | case ShuffleOperation::Up: | ||
| 77 | return Operation(OperationCode::IAdd, thread_id, | ||
| 78 | Operation(OperationCode::INegate, index)); | ||
| 79 | case ShuffleOperation::Bfly: | ||
| 80 | return Operation(OperationCode::IBitwiseXor, thread_id, index); | ||
| 81 | } | ||
| 82 | UNREACHABLE(); | ||
| 83 | return Immediate(0U); | ||
| 84 | }(); | ||
| 85 | |||
| 86 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { | ||
| 87 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 88 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); | ||
| 89 | } else { | ||
| 90 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | |||
| 94 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 95 | SetRegister( | ||
| 96 | bb, instr.gpr0, | ||
| 97 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case OpCode::Id::FSWZADD: { | ||
| 101 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 102 | |||
| 103 | Node op_a = GetRegister(instr.gpr8); | ||
| 104 | Node op_b = GetRegister(instr.gpr20); | ||
| 105 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 106 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | default: | ||
| 110 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | return pc; | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::PredCondition; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 22 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 24 | "Condition codes generation in XMAD is not implemented"); | ||
| 25 | |||
| 26 | Node op_a = GetRegister(instr.gpr8); | ||
| 27 | |||
| 28 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 29 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 30 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 31 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 32 | const bool is_signed_c = is_signed_a; | ||
| 33 | |||
| 34 | auto [is_merge, is_psl, is_high_b, mode, op_b_binding, | ||
| 35 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { | ||
| 36 | switch (opcode->get().GetId()) { | ||
| 37 | case OpCode::Id::XMAD_CR: | ||
| 38 | return {instr.xmad.merge_56, | ||
| 39 | instr.xmad.product_shift_left_second, | ||
| 40 | instr.xmad.high_b, | ||
| 41 | instr.xmad.mode_cbf, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::XMAD_RR: | ||
| 45 | return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, | ||
| 46 | instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 47 | case OpCode::Id::XMAD_RC: | ||
| 48 | return {false, | ||
| 49 | false, | ||
| 50 | instr.xmad.high_b, | ||
| 51 | instr.xmad.mode_cbf, | ||
| 52 | GetRegister(instr.gpr39), | ||
| 53 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 54 | case OpCode::Id::XMAD_IMM: | ||
| 55 | return {instr.xmad.merge_37, | ||
| 56 | instr.xmad.product_shift_left, | ||
| 57 | false, | ||
| 58 | instr.xmad.mode, | ||
| 59 | Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 60 | GetRegister(instr.gpr39)}; | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 63 | return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; | ||
| 64 | } | ||
| 65 | }(); | ||
| 66 | |||
| 67 | op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), | ||
| 68 | instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 69 | |||
| 70 | const Node original_b = op_b_binding; | ||
| 71 | const Node op_b = | ||
| 72 | SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), | ||
| 73 | is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 74 | |||
| 75 | // we already check sign_a and sign_b is difference or not before so just use one in here. | ||
| 76 | Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); | ||
| 77 | if (is_psl) { | ||
| 78 | product = | ||
| 79 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); | ||
| 80 | } | ||
| 81 | SetTemporary(bb, 0, product); | ||
| 82 | product = GetTemporary(0); | ||
| 83 | |||
| 84 | Node original_c = op_c; | ||
| 85 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | ||
| 86 | op_c = [&] { | ||
| 87 | switch (set_mode) { | ||
| 88 | case Tegra::Shader::XmadMode::None: | ||
| 89 | return original_c; | ||
| 90 | case Tegra::Shader::XmadMode::CLo: | ||
| 91 | return BitfieldExtract(std::move(original_c), 0, 16); | ||
| 92 | case Tegra::Shader::XmadMode::CHi: | ||
| 93 | return BitfieldExtract(std::move(original_c), 16, 16); | ||
| 94 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 95 | Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 96 | original_b, Immediate(16)); | ||
| 97 | return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), | ||
| 98 | std::move(shifted_b)); | ||
| 99 | } | ||
| 100 | case Tegra::Shader::XmadMode::CSfu: { | ||
| 101 | const Node comp_a = | ||
| 102 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); | ||
| 103 | const Node comp_b = | ||
| 104 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); | ||
| 105 | const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); | ||
| 106 | |||
| 107 | const Node comp_minus_a = GetPredicateComparisonInteger( | ||
| 108 | PredCondition::NE, is_signed_a, | ||
| 109 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, | ||
| 110 | Immediate(0x80000000)), | ||
| 111 | Immediate(0)); | ||
| 112 | const Node comp_minus_b = GetPredicateComparisonInteger( | ||
| 113 | PredCondition::NE, is_signed_b, | ||
| 114 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, | ||
| 115 | Immediate(0x80000000)), | ||
| 116 | Immediate(0)); | ||
| 117 | |||
| 118 | Node new_c = Operation( | ||
| 119 | OperationCode::Select, comp_minus_a, | ||
| 120 | SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), | ||
| 121 | original_c); | ||
| 122 | new_c = Operation( | ||
| 123 | OperationCode::Select, comp_minus_b, | ||
| 124 | SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), | ||
| 125 | std::move(new_c)); | ||
| 126 | |||
| 127 | return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); | ||
| 128 | } | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0); | ||
| 132 | } | ||
| 133 | }(); | ||
| 134 | |||
| 135 | SetTemporary(bb, 1, op_c); | ||
| 136 | op_c = GetTemporary(1); | ||
| 137 | |||
| 138 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 139 | Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); | ||
| 140 | SetTemporary(bb, 2, sum); | ||
| 141 | sum = GetTemporary(2); | ||
| 142 | if (is_merge) { | ||
| 143 | const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), | ||
| 144 | Immediate(0), Immediate(16)); | ||
| 145 | const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, | ||
| 146 | Immediate(16)); | ||
| 147 | sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); | ||
| 148 | } | ||
| 149 | |||
| 150 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 151 | SetRegister(bb, instr.gpr0, std::move(sum)); | ||
| 152 | |||
| 153 | return pc; | ||
| 154 | } | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <variant> | ||
| 7 | |||
| 8 | #include "video_core/shader/expr.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | namespace { | ||
| 12 | bool ExprIsBoolean(const Expr& expr) { | ||
| 13 | return std::holds_alternative<ExprBoolean>(*expr); | ||
| 14 | } | ||
| 15 | |||
| 16 | bool ExprBooleanGet(const Expr& expr) { | ||
| 17 | return std::get_if<ExprBoolean>(expr.get())->value; | ||
| 18 | } | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | bool ExprAnd::operator==(const ExprAnd& b) const { | ||
| 22 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 23 | } | ||
| 24 | |||
| 25 | bool ExprAnd::operator!=(const ExprAnd& b) const { | ||
| 26 | return !operator==(b); | ||
| 27 | } | ||
| 28 | |||
| 29 | bool ExprOr::operator==(const ExprOr& b) const { | ||
| 30 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool ExprOr::operator!=(const ExprOr& b) const { | ||
| 34 | return !operator==(b); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool ExprNot::operator==(const ExprNot& b) const { | ||
| 38 | return *operand1 == *b.operand1; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool ExprNot::operator!=(const ExprNot& b) const { | ||
| 42 | return !operator==(b); | ||
| 43 | } | ||
| 44 | |||
| 45 | Expr MakeExprNot(Expr first) { | ||
| 46 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 47 | return std::get_if<ExprNot>(first.get())->operand1; | ||
| 48 | } | ||
| 49 | return MakeExpr<ExprNot>(std::move(first)); | ||
| 50 | } | ||
| 51 | |||
| 52 | Expr MakeExprAnd(Expr first, Expr second) { | ||
| 53 | if (ExprIsBoolean(first)) { | ||
| 54 | return ExprBooleanGet(first) ? second : first; | ||
| 55 | } | ||
| 56 | if (ExprIsBoolean(second)) { | ||
| 57 | return ExprBooleanGet(second) ? first : second; | ||
| 58 | } | ||
| 59 | return MakeExpr<ExprAnd>(std::move(first), std::move(second)); | ||
| 60 | } | ||
| 61 | |||
| 62 | Expr MakeExprOr(Expr first, Expr second) { | ||
| 63 | if (ExprIsBoolean(first)) { | ||
| 64 | return ExprBooleanGet(first) ? first : second; | ||
| 65 | } | ||
| 66 | if (ExprIsBoolean(second)) { | ||
| 67 | return ExprBooleanGet(second) ? second : first; | ||
| 68 | } | ||
| 69 | return MakeExpr<ExprOr>(std::move(first), std::move(second)); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool ExprAreEqual(const Expr& first, const Expr& second) { | ||
| 73 | return (*first) == (*second); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool ExprAreOpposite(const Expr& first, const Expr& second) { | ||
| 77 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 78 | return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); | ||
| 79 | } | ||
| 80 | if (std::holds_alternative<ExprNot>(*second)) { | ||
| 81 | return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); | ||
| 82 | } | ||
| 83 | return false; | ||
| 84 | } | ||
| 85 | |||
| 86 | bool ExprIsTrue(const Expr& first) { | ||
| 87 | if (ExprIsBoolean(first)) { | ||
| 88 | return ExprBooleanGet(first); | ||
| 89 | } | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <variant> | ||
| 9 | |||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::ConditionCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | class ExprAnd; | ||
| 18 | class ExprBoolean; | ||
| 19 | class ExprCondCode; | ||
| 20 | class ExprGprEqual; | ||
| 21 | class ExprNot; | ||
| 22 | class ExprOr; | ||
| 23 | class ExprPredicate; | ||
| 24 | class ExprVar; | ||
| 25 | |||
| 26 | using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, | ||
| 27 | ExprBoolean, ExprGprEqual>; | ||
| 28 | using Expr = std::shared_ptr<ExprData>; | ||
| 29 | |||
| 30 | class ExprAnd final { | ||
| 31 | public: | ||
| 32 | explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 33 | |||
| 34 | bool operator==(const ExprAnd& b) const; | ||
| 35 | bool operator!=(const ExprAnd& b) const; | ||
| 36 | |||
| 37 | Expr operand1; | ||
| 38 | Expr operand2; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class ExprOr final { | ||
| 42 | public: | ||
| 43 | explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 44 | |||
| 45 | bool operator==(const ExprOr& b) const; | ||
| 46 | bool operator!=(const ExprOr& b) const; | ||
| 47 | |||
| 48 | Expr operand1; | ||
| 49 | Expr operand2; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class ExprNot final { | ||
| 53 | public: | ||
| 54 | explicit ExprNot(Expr a) : operand1{std::move(a)} {} | ||
| 55 | |||
| 56 | bool operator==(const ExprNot& b) const; | ||
| 57 | bool operator!=(const ExprNot& b) const; | ||
| 58 | |||
| 59 | Expr operand1; | ||
| 60 | }; | ||
| 61 | |||
| 62 | class ExprVar final { | ||
| 63 | public: | ||
| 64 | explicit ExprVar(u32 index) : var_index{index} {} | ||
| 65 | |||
| 66 | bool operator==(const ExprVar& b) const { | ||
| 67 | return var_index == b.var_index; | ||
| 68 | } | ||
| 69 | |||
| 70 | bool operator!=(const ExprVar& b) const { | ||
| 71 | return !operator==(b); | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 var_index; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ExprPredicate final { | ||
| 78 | public: | ||
| 79 | explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} | ||
| 80 | |||
| 81 | bool operator==(const ExprPredicate& b) const { | ||
| 82 | return predicate == b.predicate; | ||
| 83 | } | ||
| 84 | |||
| 85 | bool operator!=(const ExprPredicate& b) const { | ||
| 86 | return !operator==(b); | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 predicate; | ||
| 90 | }; | ||
| 91 | |||
| 92 | class ExprCondCode final { | ||
| 93 | public: | ||
| 94 | explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} | ||
| 95 | |||
| 96 | bool operator==(const ExprCondCode& b) const { | ||
| 97 | return cc == b.cc; | ||
| 98 | } | ||
| 99 | |||
| 100 | bool operator!=(const ExprCondCode& b) const { | ||
| 101 | return !operator==(b); | ||
| 102 | } | ||
| 103 | |||
| 104 | ConditionCode cc; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class ExprBoolean final { | ||
| 108 | public: | ||
| 109 | explicit ExprBoolean(bool val) : value{val} {} | ||
| 110 | |||
| 111 | bool operator==(const ExprBoolean& b) const { | ||
| 112 | return value == b.value; | ||
| 113 | } | ||
| 114 | |||
| 115 | bool operator!=(const ExprBoolean& b) const { | ||
| 116 | return !operator==(b); | ||
| 117 | } | ||
| 118 | |||
| 119 | bool value; | ||
| 120 | }; | ||
| 121 | |||
| 122 | class ExprGprEqual final { | ||
| 123 | public: | ||
| 124 | explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} | ||
| 125 | |||
| 126 | bool operator==(const ExprGprEqual& b) const { | ||
| 127 | return gpr == b.gpr && value == b.value; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool operator!=(const ExprGprEqual& b) const { | ||
| 131 | return !operator==(b); | ||
| 132 | } | ||
| 133 | |||
| 134 | u32 gpr; | ||
| 135 | u32 value; | ||
| 136 | }; | ||
| 137 | |||
| 138 | template <typename T, typename... Args> | ||
| 139 | Expr MakeExpr(Args&&... args) { | ||
| 140 | static_assert(std::is_convertible_v<T, ExprData>); | ||
| 141 | return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); | ||
| 142 | } | ||
| 143 | |||
| 144 | bool ExprAreEqual(const Expr& first, const Expr& second); | ||
| 145 | |||
| 146 | bool ExprAreOpposite(const Expr& first, const Expr& second); | ||
| 147 | |||
| 148 | Expr MakeExprNot(Expr first); | ||
| 149 | |||
| 150 | Expr MakeExprAnd(Expr first, Expr second); | ||
| 151 | |||
| 152 | Expr MakeExprOr(Expr first, Expr second); | ||
| 153 | |||
| 154 | bool ExprIsTrue(const Expr& first); | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null | |||
| @@ -1,76 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | |||
| 8 | #include <boost/container_hash/hash.hpp> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/shader/memory_util.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 20 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { | ||
| 21 | const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; | ||
| 22 | return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 26 | // Sched instructions appear once every 4 instructions. | ||
| 27 | constexpr std::size_t SchedPeriod = 4; | ||
| 28 | const std::size_t absolute_offset = offset - main_offset; | ||
| 29 | return (absolute_offset % SchedPeriod) == 0; | ||
| 30 | } | ||
| 31 | |||
| 32 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { | ||
| 33 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 34 | // shaders end with one. | ||
| 35 | static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; | ||
| 36 | static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; | ||
| 37 | |||
| 38 | const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 39 | std::size_t offset = start_offset; | ||
| 40 | while (offset < program.size()) { | ||
| 41 | const u64 instruction = program[offset]; | ||
| 42 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 43 | if ((instruction & MASK) == SELF_JUMPING_BRANCH) { | ||
| 44 | // End on Maxwell's "nop" instruction | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | if (instruction == 0) { | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | ++offset; | ||
| 52 | } | ||
| 53 | // The last instruction is included in the program size | ||
| 54 | return std::min(offset + 1, program.size()); | ||
| 55 | } | ||
| 56 | |||
| 57 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 58 | const u8* host_ptr, bool is_compute) { | ||
| 59 | ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||
| 60 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); | ||
| 61 | memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); | ||
| 62 | code.resize(CalculateProgramSize(code, is_compute)); | ||
| 63 | return code; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 67 | const ProgramCode& code_b) { | ||
| 68 | size_t unique_identifier = boost::hash_value(code); | ||
| 69 | if (is_a) { | ||
| 70 | // VertexA programs include two programs | ||
| 71 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); | ||
| 72 | } | ||
| 73 | return static_cast<u64>(unique_identifier); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class MemoryManager; | ||
| 16 | } | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using ProgramCode = std::vector<u64>; | ||
| 21 | |||
| 22 | constexpr u32 STAGE_MAIN_OFFSET = 10; | ||
| 23 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | ||
| 24 | |||
| 25 | /// Gets the address for the specified shader stage program | ||
| 26 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 27 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); | ||
| 28 | |||
| 29 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 30 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); | ||
| 31 | |||
| 32 | /// Calculates the size of a program stream | ||
| 33 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); | ||
| 34 | |||
| 35 | /// Gets the shader program code from memory for the specified address | ||
| 36 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 37 | const u8* host_ptr, bool is_compute); | ||
| 38 | |||
| 39 | /// Hashes one (or two) program streams | ||
| 40 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 41 | const ProgramCode& code_b = {}); | ||
| 42 | |||
| 43 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null | |||
| @@ -1,701 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <utility> | ||
| 14 | #include <variant> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | enum class OperationCode { | ||
| 23 | Assign, /// (float& dest, float src) -> void | ||
| 24 | |||
| 25 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 26 | |||
| 27 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 28 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 29 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 30 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 31 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 32 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 33 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 34 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 35 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 36 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 37 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 38 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 39 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 40 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 41 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 42 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 43 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 44 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 45 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 46 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 47 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 48 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 50 | FSwizzleAdd, /// (float a, float b, uint mask) -> float | ||
| 51 | |||
| 52 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 53 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 54 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 55 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 56 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 57 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 58 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 59 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 60 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 61 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 62 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 63 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 64 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 65 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 66 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 67 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 68 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 69 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 70 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 71 | IBitMSB, /// (MetaArithmetic, int) -> int | ||
| 72 | |||
| 73 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 74 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 75 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 76 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 77 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 78 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 79 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 80 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 81 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 82 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 83 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 84 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 85 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 86 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 87 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 88 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 89 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 90 | UBitMSB, /// (MetaArithmetic, uint) -> uint | ||
| 91 | |||
| 92 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 93 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 94 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 95 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 96 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 97 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | ||
| 98 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 | ||
| 99 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | ||
| 100 | HMergeF32, /// (f16vec2 src) -> float | ||
| 101 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 102 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 103 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 104 | |||
| 105 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 106 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 107 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 108 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 109 | LogicalNegate, /// (bool a) -> bool | ||
| 110 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 111 | LogicalAnd2, /// (bool2 a) -> bool | ||
| 112 | |||
| 113 | LogicalFOrdLessThan, /// (float a, float b) -> bool | ||
| 114 | LogicalFOrdEqual, /// (float a, float b) -> bool | ||
| 115 | LogicalFOrdLessEqual, /// (float a, float b) -> bool | ||
| 116 | LogicalFOrdGreaterThan, /// (float a, float b) -> bool | ||
| 117 | LogicalFOrdNotEqual, /// (float a, float b) -> bool | ||
| 118 | LogicalFOrdGreaterEqual, /// (float a, float b) -> bool | ||
| 119 | LogicalFOrdered, /// (float a, float b) -> bool | ||
| 120 | LogicalFUnordered, /// (float a, float b) -> bool | ||
| 121 | LogicalFUnordLessThan, /// (float a, float b) -> bool | ||
| 122 | LogicalFUnordEqual, /// (float a, float b) -> bool | ||
| 123 | LogicalFUnordLessEqual, /// (float a, float b) -> bool | ||
| 124 | LogicalFUnordGreaterThan, /// (float a, float b) -> bool | ||
| 125 | LogicalFUnordNotEqual, /// (float a, float b) -> bool | ||
| 126 | LogicalFUnordGreaterEqual, /// (float a, float b) -> bool | ||
| 127 | |||
| 128 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 129 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 130 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 131 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 132 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 133 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 134 | |||
| 135 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 136 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 137 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 138 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 139 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 140 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 141 | |||
| 142 | LogicalAddCarry, /// (uint a, uint b) -> bool | ||
| 143 | |||
| 144 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 145 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 146 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 147 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 148 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 149 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 150 | Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 151 | Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 152 | Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | |||
| 157 | Texture, /// (MetaTexture, float[N] coords) -> float4 | ||
| 158 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 159 | TextureGather, /// (MetaTexture, float[N] coords) -> float4 | ||
| 160 | TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 161 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 162 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 163 | TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 | ||
| 164 | |||
| 165 | ImageLoad, /// (MetaImage, int[N] coords) -> void | ||
| 166 | ImageStore, /// (MetaImage, int[N] coords) -> void | ||
| 167 | |||
| 168 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | ||
| 169 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void | ||
| 170 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | ||
| 171 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | ||
| 172 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | ||
| 173 | |||
| 174 | AtomicUExchange, /// (memory, uint) -> uint | ||
| 175 | AtomicUAdd, /// (memory, uint) -> uint | ||
| 176 | AtomicUMin, /// (memory, uint) -> uint | ||
| 177 | AtomicUMax, /// (memory, uint) -> uint | ||
| 178 | AtomicUAnd, /// (memory, uint) -> uint | ||
| 179 | AtomicUOr, /// (memory, uint) -> uint | ||
| 180 | AtomicUXor, /// (memory, uint) -> uint | ||
| 181 | |||
| 182 | AtomicIExchange, /// (memory, int) -> int | ||
| 183 | AtomicIAdd, /// (memory, int) -> int | ||
| 184 | AtomicIMin, /// (memory, int) -> int | ||
| 185 | AtomicIMax, /// (memory, int) -> int | ||
| 186 | AtomicIAnd, /// (memory, int) -> int | ||
| 187 | AtomicIOr, /// (memory, int) -> int | ||
| 188 | AtomicIXor, /// (memory, int) -> int | ||
| 189 | |||
| 190 | ReduceUAdd, /// (memory, uint) -> void | ||
| 191 | ReduceUMin, /// (memory, uint) -> void | ||
| 192 | ReduceUMax, /// (memory, uint) -> void | ||
| 193 | ReduceUAnd, /// (memory, uint) -> void | ||
| 194 | ReduceUOr, /// (memory, uint) -> void | ||
| 195 | ReduceUXor, /// (memory, uint) -> void | ||
| 196 | |||
| 197 | ReduceIAdd, /// (memory, int) -> void | ||
| 198 | ReduceIMin, /// (memory, int) -> void | ||
| 199 | ReduceIMax, /// (memory, int) -> void | ||
| 200 | ReduceIAnd, /// (memory, int) -> void | ||
| 201 | ReduceIOr, /// (memory, int) -> void | ||
| 202 | ReduceIXor, /// (memory, int) -> void | ||
| 203 | |||
| 204 | Branch, /// (uint branch_target) -> void | ||
| 205 | BranchIndirect, /// (uint branch_target) -> void | ||
| 206 | PushFlowStack, /// (uint branch_target) -> void | ||
| 207 | PopFlowStack, /// () -> void | ||
| 208 | Exit, /// () -> void | ||
| 209 | Discard, /// () -> void | ||
| 210 | |||
| 211 | EmitVertex, /// () -> void | ||
| 212 | EndPrimitive, /// () -> void | ||
| 213 | |||
| 214 | InvocationId, /// () -> int | ||
| 215 | YNegate, /// () -> float | ||
| 216 | LocalInvocationIdX, /// () -> uint | ||
| 217 | LocalInvocationIdY, /// () -> uint | ||
| 218 | LocalInvocationIdZ, /// () -> uint | ||
| 219 | WorkGroupIdX, /// () -> uint | ||
| 220 | WorkGroupIdY, /// () -> uint | ||
| 221 | WorkGroupIdZ, /// () -> uint | ||
| 222 | |||
| 223 | BallotThread, /// (bool) -> uint | ||
| 224 | VoteAll, /// (bool) -> bool | ||
| 225 | VoteAny, /// (bool) -> bool | ||
| 226 | VoteEqual, /// (bool) -> bool | ||
| 227 | |||
| 228 | ThreadId, /// () -> uint | ||
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 234 | ShuffleIndexed, /// (uint value, uint index) -> uint | ||
| 235 | |||
| 236 | Barrier, /// () -> void | ||
| 237 | MemoryBarrierGroup, /// () -> void | ||
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 239 | |||
| 240 | Amount, | ||
| 241 | }; | ||
| 242 | |||
| 243 | enum class InternalFlag { | ||
| 244 | Zero = 0, | ||
| 245 | Sign = 1, | ||
| 246 | Carry = 2, | ||
| 247 | Overflow = 3, | ||
| 248 | Amount = 4, | ||
| 249 | }; | ||
| 250 | |||
| 251 | enum class MetaStackClass { | ||
| 252 | Ssy, | ||
| 253 | Pbk, | ||
| 254 | }; | ||
| 255 | |||
| 256 | class OperationNode; | ||
| 257 | class ConditionalNode; | ||
| 258 | class GprNode; | ||
| 259 | class CustomVarNode; | ||
| 260 | class ImmediateNode; | ||
| 261 | class InternalFlagNode; | ||
| 262 | class PredicateNode; | ||
| 263 | class AbufNode; | ||
| 264 | class CbufNode; | ||
| 265 | class LmemNode; | ||
| 266 | class PatchNode; | ||
| 267 | class SmemNode; | ||
| 268 | class GmemNode; | ||
| 269 | class CommentNode; | ||
| 270 | |||
| 271 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, | ||
| 272 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | ||
| 273 | LmemNode, SmemNode, GmemNode, CommentNode>; | ||
| 274 | using Node = std::shared_ptr<NodeData>; | ||
| 275 | using Node4 = std::array<Node, 4>; | ||
| 276 | using NodeBlock = std::vector<Node>; | ||
| 277 | |||
| 278 | struct ArraySamplerNode; | ||
| 279 | struct BindlessSamplerNode; | ||
| 280 | struct SeparateSamplerNode; | ||
| 281 | |||
| 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; | ||
| 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||
| 284 | |||
| 285 | struct SamplerEntry { | ||
| 286 | /// Bound samplers constructor | ||
| 287 | explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, | ||
| 288 | bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, | ||
| 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} | ||
| 291 | |||
| 292 | /// Separate sampler constructor | ||
| 293 | explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, | ||
| 294 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, | ||
| 295 | bool is_buffer_) | ||
| 296 | : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, | ||
| 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, | ||
| 298 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} | ||
| 299 | |||
| 300 | /// Bindless samplers constructor | ||
| 301 | explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, | ||
| 302 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 303 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, | ||
| 304 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { | ||
| 305 | } | ||
| 306 | |||
| 307 | u32 index = 0; ///< Emulated index given for the this sampler. | ||
| 308 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. | ||
| 309 | u32 secondary_offset = 0; ///< Secondary offset in the const buffer. | ||
| 310 | u32 buffer = 0; ///< Buffer where the bindless sampler is read. | ||
| 311 | u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. | ||
| 312 | u32 size = 1; ///< Size of the sampler. | ||
| 313 | |||
| 314 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 315 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. | ||
| 316 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 317 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. | ||
| 318 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. | ||
| 319 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. | ||
| 320 | bool is_separated = false; ///< Whether the image and sampler is separated or not. | ||
| 321 | }; | ||
| 322 | |||
| 323 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 324 | struct ArraySamplerNode { | ||
| 325 | u32 index; | ||
| 326 | u32 base_offset; | ||
| 327 | u32 bindless_var; | ||
| 328 | }; | ||
| 329 | |||
| 330 | /// Represents a tracked separate sampler image pair that was folded statically | ||
| 331 | struct SeparateSamplerNode { | ||
| 332 | std::pair<u32, u32> indices; | ||
| 333 | std::pair<u32, u32> offsets; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 337 | struct BindlessSamplerNode { | ||
| 338 | u32 index; | ||
| 339 | u32 offset; | ||
| 340 | }; | ||
| 341 | |||
| 342 | struct ImageEntry { | ||
| 343 | public: | ||
| 344 | /// Bound images constructor | ||
| 345 | explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) | ||
| 346 | : index{index_}, offset{offset_}, type{type_} {} | ||
| 347 | |||
| 348 | /// Bindless samplers constructor | ||
| 349 | explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) | ||
| 350 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} | ||
| 351 | |||
| 352 | void MarkWrite() { | ||
| 353 | is_written = true; | ||
| 354 | } | ||
| 355 | |||
| 356 | void MarkRead() { | ||
| 357 | is_read = true; | ||
| 358 | } | ||
| 359 | |||
| 360 | void MarkAtomic() { | ||
| 361 | MarkWrite(); | ||
| 362 | MarkRead(); | ||
| 363 | is_atomic = true; | ||
| 364 | } | ||
| 365 | |||
| 366 | u32 index = 0; | ||
| 367 | u32 offset = 0; | ||
| 368 | u32 buffer = 0; | ||
| 369 | |||
| 370 | Tegra::Shader::ImageType type{}; | ||
| 371 | bool is_bindless = false; | ||
| 372 | bool is_written = false; | ||
| 373 | bool is_read = false; | ||
| 374 | bool is_atomic = false; | ||
| 375 | }; | ||
| 376 | |||
| 377 | struct GlobalMemoryBase { | ||
| 378 | u32 cbuf_index = 0; | ||
| 379 | u32 cbuf_offset = 0; | ||
| 380 | |||
| 381 | [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 382 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 383 | } | ||
| 384 | }; | ||
| 385 | |||
| 386 | /// Parameters describing an arithmetic operation | ||
| 387 | struct MetaArithmetic { | ||
| 388 | bool precise{}; ///< Whether the operation can be constraint or not | ||
| 389 | }; | ||
| 390 | |||
| 391 | /// Parameters describing a texture sampler | ||
| 392 | struct MetaTexture { | ||
| 393 | SamplerEntry sampler; | ||
| 394 | Node array; | ||
| 395 | Node depth_compare; | ||
| 396 | std::vector<Node> aoffi; | ||
| 397 | std::vector<Node> ptp; | ||
| 398 | std::vector<Node> derivates; | ||
| 399 | Node bias; | ||
| 400 | Node lod; | ||
| 401 | Node component; | ||
| 402 | u32 element{}; | ||
| 403 | Node index; | ||
| 404 | }; | ||
| 405 | |||
| 406 | struct MetaImage { | ||
| 407 | const ImageEntry& image; | ||
| 408 | std::vector<Node> values; | ||
| 409 | u32 element{}; | ||
| 410 | }; | ||
| 411 | |||
| 412 | /// Parameters that modify an operation but are not part of any particular operand | ||
| 413 | using Meta = | ||
| 414 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | ||
| 415 | |||
| 416 | class AmendNode { | ||
| 417 | public: | ||
| 418 | [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { | ||
| 419 | if (amend_index == amend_null_index) { | ||
| 420 | return std::nullopt; | ||
| 421 | } | ||
| 422 | return {amend_index}; | ||
| 423 | } | ||
| 424 | |||
| 425 | void SetAmendIndex(std::size_t index) { | ||
| 426 | amend_index = index; | ||
| 427 | } | ||
| 428 | |||
| 429 | void ClearAmend() { | ||
| 430 | amend_index = amend_null_index; | ||
| 431 | } | ||
| 432 | |||
| 433 | private: | ||
| 434 | static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; | ||
| 435 | std::size_t amend_index{amend_null_index}; | ||
| 436 | }; | ||
| 437 | |||
| 438 | /// Holds any kind of operation that can be done in the IR | ||
| 439 | class OperationNode final : public AmendNode { | ||
| 440 | public: | ||
| 441 | explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} | ||
| 442 | |||
| 443 | explicit OperationNode(OperationCode code_, Meta meta_) | ||
| 444 | : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} | ||
| 445 | |||
| 446 | explicit OperationNode(OperationCode code_, std::vector<Node> operands_) | ||
| 447 | : OperationNode(code_, Meta{}, std::move(operands_)) {} | ||
| 448 | |||
| 449 | explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) | ||
| 450 | : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} | ||
| 451 | |||
| 452 | template <typename... Args> | ||
| 453 | explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) | ||
| 454 | : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} | ||
| 455 | |||
| 456 | [[nodiscard]] OperationCode GetCode() const { | ||
| 457 | return code; | ||
| 458 | } | ||
| 459 | |||
| 460 | [[nodiscard]] const Meta& GetMeta() const { | ||
| 461 | return meta; | ||
| 462 | } | ||
| 463 | |||
| 464 | [[nodiscard]] std::size_t GetOperandsCount() const { | ||
| 465 | return operands.size(); | ||
| 466 | } | ||
| 467 | |||
| 468 | [[nodiscard]] const Node& operator[](std::size_t operand_index) const { | ||
| 469 | return operands.at(operand_index); | ||
| 470 | } | ||
| 471 | |||
| 472 | private: | ||
| 473 | OperationCode code{}; | ||
| 474 | Meta meta{}; | ||
| 475 | std::vector<Node> operands; | ||
| 476 | }; | ||
| 477 | |||
| 478 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 479 | class ConditionalNode final : public AmendNode { | ||
| 480 | public: | ||
| 481 | explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) | ||
| 482 | : condition{std::move(condition_)}, code{std::move(code_)} {} | ||
| 483 | |||
| 484 | [[nodiscard]] const Node& GetCondition() const { | ||
| 485 | return condition; | ||
| 486 | } | ||
| 487 | |||
| 488 | [[nodiscard]] const std::vector<Node>& GetCode() const { | ||
| 489 | return code; | ||
| 490 | } | ||
| 491 | |||
| 492 | private: | ||
| 493 | Node condition; ///< Condition to be satisfied | ||
| 494 | std::vector<Node> code; ///< Code to execute | ||
| 495 | }; | ||
| 496 | |||
| 497 | /// A general purpose register | ||
| 498 | class GprNode final { | ||
| 499 | public: | ||
| 500 | explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} | ||
| 501 | |||
| 502 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 503 | return static_cast<u32>(index); | ||
| 504 | } | ||
| 505 | |||
| 506 | private: | ||
| 507 | Tegra::Shader::Register index{}; | ||
| 508 | }; | ||
| 509 | |||
| 510 | /// A custom variable | ||
| 511 | class CustomVarNode final { | ||
| 512 | public: | ||
| 513 | explicit constexpr CustomVarNode(u32 index_) : index{index_} {} | ||
| 514 | |||
| 515 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 516 | return index; | ||
| 517 | } | ||
| 518 | |||
| 519 | private: | ||
| 520 | u32 index{}; | ||
| 521 | }; | ||
| 522 | |||
| 523 | /// A 32-bits value that represents an immediate value | ||
| 524 | class ImmediateNode final { | ||
| 525 | public: | ||
| 526 | explicit constexpr ImmediateNode(u32 value_) : value{value_} {} | ||
| 527 | |||
| 528 | [[nodiscard]] constexpr u32 GetValue() const { | ||
| 529 | return value; | ||
| 530 | } | ||
| 531 | |||
| 532 | private: | ||
| 533 | u32 value{}; | ||
| 534 | }; | ||
| 535 | |||
| 536 | /// One of Maxwell's internal flags | ||
| 537 | class InternalFlagNode final { | ||
| 538 | public: | ||
| 539 | explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} | ||
| 540 | |||
| 541 | [[nodiscard]] constexpr InternalFlag GetFlag() const { | ||
| 542 | return flag; | ||
| 543 | } | ||
| 544 | |||
| 545 | private: | ||
| 546 | InternalFlag flag{}; | ||
| 547 | }; | ||
| 548 | |||
| 549 | /// A predicate register, it can be negated without additional nodes | ||
| 550 | class PredicateNode final { | ||
| 551 | public: | ||
| 552 | explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) | ||
| 553 | : index{index_}, negated{negated_} {} | ||
| 554 | |||
| 555 | [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { | ||
| 556 | return index; | ||
| 557 | } | ||
| 558 | |||
| 559 | [[nodiscard]] constexpr bool IsNegated() const { | ||
| 560 | return negated; | ||
| 561 | } | ||
| 562 | |||
| 563 | private: | ||
| 564 | Tegra::Shader::Pred index{}; | ||
| 565 | bool negated{}; | ||
| 566 | }; | ||
| 567 | |||
| 568 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 569 | class AbufNode final { | ||
| 570 | public: | ||
| 571 | // Initialize for standard attributes (index is explicit). | ||
| 572 | explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) | ||
| 573 | : buffer{std::move(buffer_)}, index{index_}, element{element_} {} | ||
| 574 | |||
| 575 | // Initialize for physical attributes (index is a variable value). | ||
| 576 | explicit AbufNode(Node physical_address_, Node buffer_ = {}) | ||
| 577 | : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} | ||
| 578 | |||
| 579 | [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 580 | return index; | ||
| 581 | } | ||
| 582 | |||
| 583 | [[nodiscard]] u32 GetElement() const { | ||
| 584 | return element; | ||
| 585 | } | ||
| 586 | |||
| 587 | [[nodiscard]] const Node& GetBuffer() const { | ||
| 588 | return buffer; | ||
| 589 | } | ||
| 590 | |||
| 591 | [[nodiscard]] bool IsPhysicalBuffer() const { | ||
| 592 | return static_cast<bool>(physical_address); | ||
| 593 | } | ||
| 594 | |||
| 595 | [[nodiscard]] const Node& GetPhysicalAddress() const { | ||
| 596 | return physical_address; | ||
| 597 | } | ||
| 598 | |||
| 599 | private: | ||
| 600 | Node physical_address; | ||
| 601 | Node buffer; | ||
| 602 | Tegra::Shader::Attribute::Index index{}; | ||
| 603 | u32 element{}; | ||
| 604 | }; | ||
| 605 | |||
| 606 | /// Patch memory (used to communicate tessellation stages). | ||
| 607 | class PatchNode final { | ||
| 608 | public: | ||
| 609 | explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} | ||
| 610 | |||
| 611 | [[nodiscard]] constexpr u32 GetOffset() const { | ||
| 612 | return offset; | ||
| 613 | } | ||
| 614 | |||
| 615 | private: | ||
| 616 | u32 offset{}; | ||
| 617 | }; | ||
| 618 | |||
| 619 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 620 | class CbufNode final { | ||
| 621 | public: | ||
| 622 | explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} | ||
| 623 | |||
| 624 | [[nodiscard]] u32 GetIndex() const { | ||
| 625 | return index; | ||
| 626 | } | ||
| 627 | |||
| 628 | [[nodiscard]] const Node& GetOffset() const { | ||
| 629 | return offset; | ||
| 630 | } | ||
| 631 | |||
| 632 | private: | ||
| 633 | u32 index{}; | ||
| 634 | Node offset; | ||
| 635 | }; | ||
| 636 | |||
| 637 | /// Local memory node | ||
| 638 | class LmemNode final { | ||
| 639 | public: | ||
| 640 | explicit LmemNode(Node address_) : address{std::move(address_)} {} | ||
| 641 | |||
| 642 | [[nodiscard]] const Node& GetAddress() const { | ||
| 643 | return address; | ||
| 644 | } | ||
| 645 | |||
| 646 | private: | ||
| 647 | Node address; | ||
| 648 | }; | ||
| 649 | |||
| 650 | /// Shared memory node | ||
| 651 | class SmemNode final { | ||
| 652 | public: | ||
| 653 | explicit SmemNode(Node address_) : address{std::move(address_)} {} | ||
| 654 | |||
| 655 | [[nodiscard]] const Node& GetAddress() const { | ||
| 656 | return address; | ||
| 657 | } | ||
| 658 | |||
| 659 | private: | ||
| 660 | Node address; | ||
| 661 | }; | ||
| 662 | |||
| 663 | /// Global memory node | ||
| 664 | class GmemNode final { | ||
| 665 | public: | ||
| 666 | explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) | ||
| 667 | : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, | ||
| 668 | descriptor{descriptor_} {} | ||
| 669 | |||
| 670 | [[nodiscard]] const Node& GetRealAddress() const { | ||
| 671 | return real_address; | ||
| 672 | } | ||
| 673 | |||
| 674 | [[nodiscard]] const Node& GetBaseAddress() const { | ||
| 675 | return base_address; | ||
| 676 | } | ||
| 677 | |||
| 678 | [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { | ||
| 679 | return descriptor; | ||
| 680 | } | ||
| 681 | |||
| 682 | private: | ||
| 683 | Node real_address; | ||
| 684 | Node base_address; | ||
| 685 | GlobalMemoryBase descriptor; | ||
| 686 | }; | ||
| 687 | |||
| 688 | /// Commentary, can be dropped | ||
| 689 | class CommentNode final { | ||
| 690 | public: | ||
| 691 | explicit CommentNode(std::string text_) : text{std::move(text_)} {} | ||
| 692 | |||
| 693 | [[nodiscard]] const std::string& GetText() const { | ||
| 694 | return text; | ||
| 695 | } | ||
| 696 | |||
| 697 | private: | ||
| 698 | std::string text; | ||
| 699 | }; | ||
| 700 | |||
| 701 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | ||
| 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); | ||
| 16 | } | ||
| 17 | |||
| 18 | Node Comment(std::string text) { | ||
| 19 | return MakeNode<CommentNode>(std::move(text)); | ||
| 20 | } | ||
| 21 | |||
| 22 | Node Immediate(u32 value) { | ||
| 23 | return MakeNode<ImmediateNode>(value); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node Immediate(s32 value) { | ||
| 27 | return Immediate(static_cast<u32>(value)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node Immediate(f32 value) { | ||
| 31 | u32 integral; | ||
| 32 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 33 | return Immediate(integral); | ||
| 34 | } | ||
| 35 | |||
| 36 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { | ||
| 37 | if (is_signed) { | ||
| 38 | return operation_code; | ||
| 39 | } | ||
| 40 | switch (operation_code) { | ||
| 41 | case OperationCode::FCastInteger: | ||
| 42 | return OperationCode::FCastUInteger; | ||
| 43 | case OperationCode::IAdd: | ||
| 44 | return OperationCode::UAdd; | ||
| 45 | case OperationCode::IMul: | ||
| 46 | return OperationCode::UMul; | ||
| 47 | case OperationCode::IDiv: | ||
| 48 | return OperationCode::UDiv; | ||
| 49 | case OperationCode::IMin: | ||
| 50 | return OperationCode::UMin; | ||
| 51 | case OperationCode::IMax: | ||
| 52 | return OperationCode::UMax; | ||
| 53 | case OperationCode::ICastFloat: | ||
| 54 | return OperationCode::UCastFloat; | ||
| 55 | case OperationCode::ICastUnsigned: | ||
| 56 | return OperationCode::UCastSigned; | ||
| 57 | case OperationCode::ILogicalShiftLeft: | ||
| 58 | return OperationCode::ULogicalShiftLeft; | ||
| 59 | case OperationCode::ILogicalShiftRight: | ||
| 60 | return OperationCode::ULogicalShiftRight; | ||
| 61 | case OperationCode::IArithmeticShiftRight: | ||
| 62 | return OperationCode::UArithmeticShiftRight; | ||
| 63 | case OperationCode::IBitwiseAnd: | ||
| 64 | return OperationCode::UBitwiseAnd; | ||
| 65 | case OperationCode::IBitwiseOr: | ||
| 66 | return OperationCode::UBitwiseOr; | ||
| 67 | case OperationCode::IBitwiseXor: | ||
| 68 | return OperationCode::UBitwiseXor; | ||
| 69 | case OperationCode::IBitwiseNot: | ||
| 70 | return OperationCode::UBitwiseNot; | ||
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 73 | case OperationCode::IBitfieldInsert: | ||
| 74 | return OperationCode::UBitfieldInsert; | ||
| 75 | case OperationCode::IBitCount: | ||
| 76 | return OperationCode::UBitCount; | ||
| 77 | case OperationCode::LogicalILessThan: | ||
| 78 | return OperationCode::LogicalULessThan; | ||
| 79 | case OperationCode::LogicalIEqual: | ||
| 80 | return OperationCode::LogicalUEqual; | ||
| 81 | case OperationCode::LogicalILessEqual: | ||
| 82 | return OperationCode::LogicalULessEqual; | ||
| 83 | case OperationCode::LogicalIGreaterThan: | ||
| 84 | return OperationCode::LogicalUGreaterThan; | ||
| 85 | case OperationCode::LogicalINotEqual: | ||
| 86 | return OperationCode::LogicalUNotEqual; | ||
| 87 | case OperationCode::LogicalIGreaterEqual: | ||
| 88 | return OperationCode::LogicalUGreaterEqual; | ||
| 89 | case OperationCode::AtomicIExchange: | ||
| 90 | return OperationCode::AtomicUExchange; | ||
| 91 | case OperationCode::AtomicIAdd: | ||
| 92 | return OperationCode::AtomicUAdd; | ||
| 93 | case OperationCode::AtomicIMin: | ||
| 94 | return OperationCode::AtomicUMin; | ||
| 95 | case OperationCode::AtomicIMax: | ||
| 96 | return OperationCode::AtomicUMax; | ||
| 97 | case OperationCode::AtomicIAnd: | ||
| 98 | return OperationCode::AtomicUAnd; | ||
| 99 | case OperationCode::AtomicIOr: | ||
| 100 | return OperationCode::AtomicUOr; | ||
| 101 | case OperationCode::AtomicIXor: | ||
| 102 | return OperationCode::AtomicUXor; | ||
| 103 | case OperationCode::INegate: | ||
| 104 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 105 | return {}; | ||
| 106 | case OperationCode::IAbsolute: | ||
| 107 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 108 | return {}; | ||
| 109 | default: | ||
| 110 | UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); | ||
| 111 | return {}; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <string> | ||
| 9 | #include <tuple> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/shader/node.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | /// This arithmetic operation cannot be constraint | ||
| 20 | inline constexpr MetaArithmetic PRECISE = {true}; | ||
| 21 | /// This arithmetic operation can be optimized away | ||
| 22 | inline constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 23 | |||
| 24 | /// Creates a conditional node | ||
| 25 | Node Conditional(Node condition, std::vector<Node> code); | ||
| 26 | |||
| 27 | /// Creates a commentary node | ||
| 28 | Node Comment(std::string text); | ||
| 29 | |||
| 30 | /// Creates an u32 immediate | ||
| 31 | Node Immediate(u32 value); | ||
| 32 | |||
| 33 | /// Creates a s32 immediate | ||
| 34 | Node Immediate(s32 value); | ||
| 35 | |||
| 36 | /// Creates a f32 immediate | ||
| 37 | Node Immediate(f32 value); | ||
| 38 | |||
| 39 | /// Converts an signed operation code to an unsigned operation code | ||
| 40 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | Node MakeNode(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, NodeData>); | ||
| 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename T, typename... Args> | ||
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | ||
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||
| 51 | return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | Node Operation(OperationCode code, Args&&... args) { | ||
| 56 | if constexpr (sizeof...(args) == 0) { | ||
| 57 | return MakeNode<OperationNode>(code); | ||
| 58 | } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, | ||
| 59 | Meta>) { | ||
| 60 | return MakeNode<OperationNode>(code, std::forward<Args>(args)...); | ||
| 61 | } else { | ||
| 62 | return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | template <typename... Args> | ||
| 67 | Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { | ||
| 68 | return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null | |||
| @@ -1,181 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | |||
| 28 | auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 29 | |||
| 30 | return { | ||
| 31 | .tfb_layouts = graphics.regs.tfb_layouts, | ||
| 32 | .tfb_varying_locs = graphics.regs.tfb_varying_locs, | ||
| 33 | .primitive_topology = graphics.regs.draw.topology, | ||
| 34 | .tessellation_primitive = graphics.regs.tess_mode.prim, | ||
| 35 | .tessellation_spacing = graphics.regs.tess_mode.spacing, | ||
| 36 | .tfb_enabled = graphics.regs.tfb_enabled != 0, | ||
| 37 | .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, | ||
| 38 | }; | ||
| 39 | } | ||
| 40 | |||
| 41 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 42 | if (shader_stage != ShaderType::Compute) { | ||
| 43 | return {}; | ||
| 44 | } | ||
| 45 | |||
| 46 | auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 47 | const auto& launch = compute.launch_description; | ||
| 48 | |||
| 49 | return { | ||
| 50 | .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, | ||
| 51 | .shared_memory_size_in_words = launch.shared_alloc, | ||
| 52 | .local_memory_size_in_words = launch.local_pos_alloc, | ||
| 53 | }; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 59 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 60 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 61 | |||
| 62 | Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) | ||
| 63 | : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, | ||
| 64 | graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( | ||
| 65 | shader_stage, engine_)} {} | ||
| 66 | |||
| 67 | Registry::~Registry() = default; | ||
| 68 | |||
| 69 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 70 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 71 | const auto iter = keys.find(key); | ||
| 72 | if (iter != keys.end()) { | ||
| 73 | return iter->second; | ||
| 74 | } | ||
| 75 | if (!engine) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 79 | keys.emplace(key, value); | ||
| 80 | return value; | ||
| 81 | } | ||
| 82 | |||
| 83 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 84 | const u32 key = offset; | ||
| 85 | const auto iter = bound_samplers.find(key); | ||
| 86 | if (iter != bound_samplers.end()) { | ||
| 87 | return iter->second; | ||
| 88 | } | ||
| 89 | if (!engine) { | ||
| 90 | return std::nullopt; | ||
| 91 | } | ||
| 92 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 93 | bound_samplers.emplace(key, value); | ||
| 94 | return value; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( | ||
| 98 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { | ||
| 99 | SeparateSamplerKey key; | ||
| 100 | key.buffers = buffers; | ||
| 101 | key.offsets = offsets; | ||
| 102 | const auto iter = separate_samplers.find(key); | ||
| 103 | if (iter != separate_samplers.end()) { | ||
| 104 | return iter->second; | ||
| 105 | } | ||
| 106 | if (!engine) { | ||
| 107 | return std::nullopt; | ||
| 108 | } | ||
| 109 | |||
| 110 | const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); | ||
| 111 | const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); | ||
| 112 | const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); | ||
| 113 | separate_samplers.emplace(key, value); | ||
| 114 | return value; | ||
| 115 | } | ||
| 116 | |||
| 117 | std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { | ||
| 118 | const std::pair key = {buffer, offset}; | ||
| 119 | const auto iter = bindless_samplers.find(key); | ||
| 120 | if (iter != bindless_samplers.end()) { | ||
| 121 | return iter->second; | ||
| 122 | } | ||
| 123 | if (!engine) { | ||
| 124 | return std::nullopt; | ||
| 125 | } | ||
| 126 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 127 | bindless_samplers.emplace(key, value); | ||
| 128 | return value; | ||
| 129 | } | ||
| 130 | |||
| 131 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 132 | keys.insert_or_assign({buffer, offset}, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 136 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 137 | } | ||
| 138 | |||
| 139 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 140 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 141 | } | ||
| 142 | |||
| 143 | bool Registry::IsConsistent() const { | ||
| 144 | if (!engine) { | ||
| 145 | return true; | ||
| 146 | } | ||
| 147 | return std::all_of(keys.begin(), keys.end(), | ||
| 148 | [this](const auto& pair) { | ||
| 149 | const auto [cbuf, offset] = pair.first; | ||
| 150 | const auto value = pair.second; | ||
| 151 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 152 | }) && | ||
| 153 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 154 | [this](const auto& sampler) { | ||
| 155 | const auto [key, value] = sampler; | ||
| 156 | return value == engine->AccessBoundSampler(stage, key); | ||
| 157 | }) && | ||
| 158 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 159 | [this](const auto& sampler) { | ||
| 160 | const auto [cbuf, offset] = sampler.first; | ||
| 161 | const auto value = sampler.second; | ||
| 162 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 163 | }); | ||
| 164 | } | ||
| 165 | |||
| 166 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 167 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 168 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 169 | } | ||
| 170 | |||
| 171 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 172 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 173 | return graphics_info; | ||
| 174 | } | ||
| 175 | |||
| 176 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 177 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 178 | return compute_info; | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null | |||
| @@ -1,172 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | struct SeparateSamplerKey { | ||
| 23 | std::pair<u32, u32> buffers; | ||
| 24 | std::pair<u32, u32> offsets; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace VideoCommon::Shader | ||
| 28 | |||
| 29 | namespace std { | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct hash<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 33 | std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { | ||
| 34 | return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ | ||
| 35 | key.offsets.second); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | template <> | ||
| 40 | struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 41 | bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, | ||
| 42 | const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { | ||
| 43 | return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace std | ||
| 48 | |||
| 49 | namespace VideoCommon::Shader { | ||
| 50 | |||
| 51 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 52 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 53 | using SeparateSamplerMap = | ||
| 54 | std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; | ||
| 55 | using BindlessSamplerMap = | ||
| 56 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 57 | |||
| 58 | struct GraphicsInfo { | ||
| 59 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 60 | |||
| 61 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 62 | tfb_layouts{}; | ||
| 63 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 64 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 65 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 66 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 67 | bool tfb_enabled = false; | ||
| 68 | bool tessellation_clockwise = false; | ||
| 69 | }; | ||
| 70 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 71 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 72 | |||
| 73 | struct ComputeInfo { | ||
| 74 | std::array<u32, 3> workgroup_size{}; | ||
| 75 | u32 shared_memory_size_in_words = 0; | ||
| 76 | u32 local_memory_size_in_words = 0; | ||
| 77 | }; | ||
| 78 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 79 | |||
| 80 | struct SerializedRegistryInfo { | ||
| 81 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 82 | u32 bound_buffer = 0; | ||
| 83 | GraphicsInfo graphics; | ||
| 84 | ComputeInfo compute; | ||
| 85 | }; | ||
| 86 | |||
| 87 | /** | ||
| 88 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 89 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 90 | * compilation. | ||
| 91 | */ | ||
| 92 | class Registry { | ||
| 93 | public: | ||
| 94 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 95 | |||
| 96 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 97 | Tegra::Engines::ConstBufferEngineInterface& engine_); | ||
| 98 | |||
| 99 | ~Registry(); | ||
| 100 | |||
| 101 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 102 | /// not it will obtain it from maxwell3d and register it. | ||
| 103 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 104 | |||
| 105 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 106 | |||
| 107 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( | ||
| 108 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); | ||
| 109 | |||
| 110 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 111 | |||
| 112 | /// Inserts a key. | ||
| 113 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 114 | |||
| 115 | /// Inserts a bound sampler key. | ||
| 116 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 117 | |||
| 118 | /// Inserts a bindless sampler key. | ||
| 119 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 120 | |||
| 121 | /// Checks keys and samplers against engine's current const buffers. | ||
| 122 | /// Returns true if they are the same value, false otherwise. | ||
| 123 | bool IsConsistent() const; | ||
| 124 | |||
| 125 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 126 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 127 | |||
| 128 | /// Returns graphics information from this shader | ||
| 129 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 130 | |||
| 131 | /// Returns compute information from this shader | ||
| 132 | const ComputeInfo& GetComputeInfo() const; | ||
| 133 | |||
| 134 | /// Gives an getter to the const buffer keys in the database. | ||
| 135 | const KeyMap& GetKeys() const { | ||
| 136 | return keys; | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Gets samplers database. | ||
| 140 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 141 | return bound_samplers; | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Gets bindless samplers database. | ||
| 145 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 146 | return bindless_samplers; | ||
| 147 | } | ||
| 148 | |||
| 149 | /// Gets bound buffer used on this shader | ||
| 150 | u32 GetBoundBuffer() const { | ||
| 151 | return bound_buffer; | ||
| 152 | } | ||
| 153 | |||
| 154 | /// Obtains access to the guest driver's profile. | ||
| 155 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 156 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 157 | } | ||
| 158 | |||
| 159 | private: | ||
| 160 | const Tegra::Engines::ShaderType stage; | ||
| 161 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 162 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 163 | KeyMap keys; | ||
| 164 | BoundSamplerMap bound_samplers; | ||
| 165 | SeparateSamplerMap separate_samplers; | ||
| 166 | BindlessSamplerMap bindless_samplers; | ||
| 167 | u32 bound_buffer; | ||
| 168 | GraphicsInfo graphics_info; | ||
| 169 | ComputeInfo compute_info; | ||
| 170 | }; | ||
| 171 | |||
| 172 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null | |||
| @@ -1,464 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/node.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Attribute; | ||
| 21 | using Tegra::Shader::Instruction; | ||
| 22 | using Tegra::Shader::IpaMode; | ||
| 23 | using Tegra::Shader::Pred; | ||
| 24 | using Tegra::Shader::PredCondition; | ||
| 25 | using Tegra::Shader::PredOperation; | ||
| 26 | using Tegra::Shader::Register; | ||
| 27 | |||
| 28 | ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, | ||
| 29 | Registry& registry_) | ||
| 30 | : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ | ||
| 31 | registry_} { | ||
| 32 | Decode(); | ||
| 33 | PostDecode(); | ||
| 34 | } | ||
| 35 | |||
| 36 | ShaderIR::~ShaderIR() = default; | ||
| 37 | |||
| 38 | Node ShaderIR::GetRegister(Register reg) { | ||
| 39 | if (reg != Register::ZeroIndex) { | ||
| 40 | used_registers.insert(static_cast<u32>(reg)); | ||
| 41 | } | ||
| 42 | return MakeNode<GprNode>(reg); | ||
| 43 | } | ||
| 44 | |||
| 45 | Node ShaderIR::GetCustomVariable(u32 id) { | ||
| 46 | return MakeNode<CustomVarNode>(id); | ||
| 47 | } | ||
| 48 | |||
| 49 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 50 | return Immediate(instr.alu.GetImm20_19()); | ||
| 51 | } | ||
| 52 | |||
| 53 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 54 | return Immediate(instr.alu.GetImm20_32()); | ||
| 55 | } | ||
| 56 | |||
| 57 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 58 | const auto index = static_cast<u32>(index_); | ||
| 59 | const auto offset = static_cast<u32>(offset_); | ||
| 60 | |||
| 61 | used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); | ||
| 62 | |||
| 63 | return MakeNode<CbufNode>(index, Immediate(offset)); | ||
| 64 | } | ||
| 65 | |||
| 66 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 67 | const auto index = static_cast<u32>(index_); | ||
| 68 | const auto offset = static_cast<u32>(offset_); | ||
| 69 | |||
| 70 | used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); | ||
| 71 | |||
| 72 | Node final_offset = [&] { | ||
| 73 | // Attempt to inline constant buffer without a variable offset. This is done to allow | ||
| 74 | // tracking LDC calls. | ||
| 75 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 76 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 77 | return Immediate(offset); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 81 | }(); | ||
| 82 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 86 | const auto pred = static_cast<Pred>(pred_); | ||
| 87 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 88 | used_predicates.insert(pred); | ||
| 89 | } | ||
| 90 | |||
| 91 | return MakeNode<PredicateNode>(pred, negated); | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 95 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 96 | } | ||
| 97 | |||
| 98 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 99 | MarkAttributeUsage(index, element); | ||
| 100 | used_input_attributes.emplace(index); | ||
| 101 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | ||
| 105 | uses_physical_attributes = true; | ||
| 106 | return MakeNode<AbufNode>(GetRegister(physical_address), buffer); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 110 | MarkAttributeUsage(index, element); | ||
| 111 | used_output_attributes.insert(index); | ||
| 112 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 113 | } | ||
| 114 | |||
| 115 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { | ||
| 116 | Node node = MakeNode<InternalFlagNode>(flag); | ||
| 117 | if (negated) { | ||
| 118 | return Operation(OperationCode::LogicalNegate, std::move(node)); | ||
| 119 | } | ||
| 120 | return node; | ||
| 121 | } | ||
| 122 | |||
| 123 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 124 | return MakeNode<LmemNode>(std::move(address)); | ||
| 125 | } | ||
| 126 | |||
| 127 | Node ShaderIR::GetSharedMemory(Node address) { | ||
| 128 | return MakeNode<SmemNode>(std::move(address)); | ||
| 129 | } | ||
| 130 | |||
| 131 | Node ShaderIR::GetTemporary(u32 id) { | ||
| 132 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 133 | } | ||
| 134 | |||
| 135 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 136 | if (absolute) { | ||
| 137 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); | ||
| 138 | } | ||
| 139 | if (negate) { | ||
| 140 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); | ||
| 141 | } | ||
| 142 | return value; | ||
| 143 | } | ||
| 144 | |||
| 145 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 146 | if (!saturate) { | ||
| 147 | return value; | ||
| 148 | } | ||
| 149 | |||
| 150 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 151 | Node positive_one = Immediate(1.0f); | ||
| 152 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 153 | std::move(positive_one)); | ||
| 154 | } | ||
| 155 | |||
| 156 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { | ||
| 157 | switch (size) { | ||
| 158 | case Register::Size::Byte: | ||
| 159 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 160 | std::move(value), Immediate(24)); | ||
| 161 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 162 | std::move(value), Immediate(24)); | ||
| 163 | return value; | ||
| 164 | case Register::Size::Short: | ||
| 165 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 166 | std::move(value), Immediate(16)); | ||
| 167 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 168 | std::move(value), Immediate(16)); | ||
| 169 | return value; | ||
| 170 | case Register::Size::Word: | ||
| 171 | // Default - do nothing | ||
| 172 | return value; | ||
| 173 | default: | ||
| 174 | UNREACHABLE_MSG("Unimplemented conversion size: {}", size); | ||
| 175 | return value; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 180 | if (!is_signed) { | ||
| 181 | // Absolute or negate on an unsigned is pointless | ||
| 182 | return value; | ||
| 183 | } | ||
| 184 | if (absolute) { | ||
| 185 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); | ||
| 186 | } | ||
| 187 | if (negate) { | ||
| 188 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); | ||
| 189 | } | ||
| 190 | return value; | ||
| 191 | } | ||
| 192 | |||
| 193 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 194 | Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 195 | if (!has_negation) { | ||
| 196 | return value; | ||
| 197 | } | ||
| 198 | |||
| 199 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 200 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 201 | |||
| 202 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 203 | std::move(second_negate)); | ||
| 204 | } | ||
| 205 | |||
| 206 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | ||
| 207 | return Operation(OperationCode::HUnpack, type, std::move(value)); | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 211 | switch (merge) { | ||
| 212 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 213 | return src; | ||
| 214 | case Tegra::Shader::HalfMerge::F32: | ||
| 215 | return Operation(OperationCode::HMergeF32, std::move(src)); | ||
| 216 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 217 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); | ||
| 218 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 219 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); | ||
| 220 | } | ||
| 221 | UNREACHABLE(); | ||
| 222 | return src; | ||
| 223 | } | ||
| 224 | |||
| 225 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 226 | if (absolute) { | ||
| 227 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); | ||
| 228 | } | ||
| 229 | if (negate) { | ||
| 230 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), | ||
| 231 | GetPredicate(true)); | ||
| 232 | } | ||
| 233 | return value; | ||
| 234 | } | ||
| 235 | |||
| 236 | Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | ||
| 237 | if (!saturate) { | ||
| 238 | return value; | ||
| 239 | } | ||
| 240 | |||
| 241 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 242 | Node positive_one = Immediate(1.0f); | ||
| 243 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 244 | std::move(positive_one)); | ||
| 245 | } | ||
| 246 | |||
| 247 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 248 | if (condition == PredCondition::T) { | ||
| 249 | return GetPredicate(true); | ||
| 250 | } else if (condition == PredCondition::F) { | ||
| 251 | return GetPredicate(false); | ||
| 252 | } | ||
| 253 | |||
| 254 | static constexpr std::array comparison_table{ | ||
| 255 | OperationCode(0), | ||
| 256 | OperationCode::LogicalFOrdLessThan, // LT | ||
| 257 | OperationCode::LogicalFOrdEqual, // EQ | ||
| 258 | OperationCode::LogicalFOrdLessEqual, // LE | ||
| 259 | OperationCode::LogicalFOrdGreaterThan, // GT | ||
| 260 | OperationCode::LogicalFOrdNotEqual, // NE | ||
| 261 | OperationCode::LogicalFOrdGreaterEqual, // GE | ||
| 262 | OperationCode::LogicalFOrdered, // NUM | ||
| 263 | OperationCode::LogicalFUnordered, // NAN | ||
| 264 | OperationCode::LogicalFUnordLessThan, // LTU | ||
| 265 | OperationCode::LogicalFUnordEqual, // EQU | ||
| 266 | OperationCode::LogicalFUnordLessEqual, // LEU | ||
| 267 | OperationCode::LogicalFUnordGreaterThan, // GTU | ||
| 268 | OperationCode::LogicalFUnordNotEqual, // NEU | ||
| 269 | OperationCode::LogicalFUnordGreaterEqual, // GEU | ||
| 270 | }; | ||
| 271 | const std::size_t index = static_cast<std::size_t>(condition); | ||
| 272 | ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); | ||
| 273 | |||
| 274 | return Operation(comparison_table[index], op_a, op_b); | ||
| 275 | } | ||
| 276 | |||
| 277 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 278 | Node op_b) { | ||
| 279 | static constexpr std::array comparison_table{ | ||
| 280 | std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, | ||
| 281 | std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, | ||
| 282 | std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, | ||
| 283 | std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, | ||
| 284 | std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, | ||
| 285 | std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, | ||
| 286 | }; | ||
| 287 | |||
| 288 | const auto comparison = | ||
| 289 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 290 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 291 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 292 | "Unknown predicate comparison operation"); | ||
| 293 | |||
| 294 | return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), | ||
| 295 | std::move(op_b)); | ||
| 296 | } | ||
| 297 | |||
| 298 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, | ||
| 299 | Node op_b) { | ||
| 300 | static constexpr std::array comparison_table{ | ||
| 301 | std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, | ||
| 302 | std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, | ||
| 303 | std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, | ||
| 304 | std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, | ||
| 305 | std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, | ||
| 306 | std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, | ||
| 307 | std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, | ||
| 308 | std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, | ||
| 309 | std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, | ||
| 310 | std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, | ||
| 311 | std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, | ||
| 312 | }; | ||
| 313 | |||
| 314 | const auto comparison = | ||
| 315 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 316 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 317 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 318 | "Unknown predicate comparison operation"); | ||
| 319 | |||
| 320 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); | ||
| 321 | } | ||
| 322 | |||
| 323 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 324 | static constexpr std::array operation_table{ | ||
| 325 | OperationCode::LogicalAnd, | ||
| 326 | OperationCode::LogicalOr, | ||
| 327 | OperationCode::LogicalXor, | ||
| 328 | }; | ||
| 329 | |||
| 330 | const auto index = static_cast<std::size_t>(operation); | ||
| 331 | if (index >= operation_table.size()) { | ||
| 332 | UNIMPLEMENTED_MSG("Unknown predicate operation."); | ||
| 333 | return {}; | ||
| 334 | } | ||
| 335 | |||
| 336 | return operation_table[index]; | ||
| 337 | } | ||
| 338 | |||
| 339 | Node ShaderIR::GetConditionCode(ConditionCode cc) const { | ||
| 340 | switch (cc) { | ||
| 341 | case ConditionCode::NEU: | ||
| 342 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 343 | case ConditionCode::FCSM_TR: | ||
| 344 | UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); | ||
| 345 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 346 | default: | ||
| 347 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); | ||
| 348 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | ||
| 357 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | ||
| 361 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); | ||
| 362 | } | ||
| 363 | |||
| 364 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | ||
| 365 | bb.push_back( | ||
| 366 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { | ||
| 370 | bb.push_back( | ||
| 371 | Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); | ||
| 372 | } | ||
| 373 | |||
| 374 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { | ||
| 375 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); | ||
| 376 | } | ||
| 377 | |||
| 378 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 379 | if (!sets_cc) { | ||
| 380 | return; | ||
| 381 | } | ||
| 382 | Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); | ||
| 383 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 384 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 385 | } | ||
| 386 | |||
| 387 | void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 388 | if (!sets_cc) { | ||
| 389 | return; | ||
| 390 | } | ||
| 391 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); | ||
| 392 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 393 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 394 | } | ||
| 395 | |||
| 396 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 397 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), | ||
| 398 | Immediate(offset), Immediate(bits)); | ||
| 399 | } | ||
| 400 | |||
| 401 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 402 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 403 | Immediate(bits)); | ||
| 404 | } | ||
| 405 | |||
| 406 | void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { | ||
| 407 | switch (index) { | ||
| 408 | case Attribute::Index::LayerViewportPointSize: | ||
| 409 | switch (element) { | ||
| 410 | case 0: | ||
| 411 | UNIMPLEMENTED(); | ||
| 412 | break; | ||
| 413 | case 1: | ||
| 414 | uses_layer = true; | ||
| 415 | break; | ||
| 416 | case 2: | ||
| 417 | uses_viewport_index = true; | ||
| 418 | break; | ||
| 419 | case 3: | ||
| 420 | uses_point_size = true; | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | break; | ||
| 424 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 425 | switch (element) { | ||
| 426 | case 2: | ||
| 427 | uses_instance_id = true; | ||
| 428 | break; | ||
| 429 | case 3: | ||
| 430 | uses_vertex_id = true; | ||
| 431 | break; | ||
| 432 | } | ||
| 433 | break; | ||
| 434 | case Attribute::Index::ClipDistances0123: | ||
| 435 | case Attribute::Index::ClipDistances4567: { | ||
| 436 | const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; | ||
| 437 | used_clip_distances.at(clip_index) = true; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | case Attribute::Index::FrontColor: | ||
| 441 | case Attribute::Index::FrontSecondaryColor: | ||
| 442 | case Attribute::Index::BackColor: | ||
| 443 | case Attribute::Index::BackSecondaryColor: | ||
| 444 | uses_legacy_varyings = true; | ||
| 445 | break; | ||
| 446 | default: | ||
| 447 | if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { | ||
| 448 | uses_legacy_varyings = true; | ||
| 449 | } | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { | ||
| 455 | const auto id = amend_code.size(); | ||
| 456 | amend_code.push_back(std::move(new_amend)); | ||
| 457 | return id; | ||
| 458 | } | ||
| 459 | |||
| 460 | u32 ShaderIR::NewCustomVariable() { | ||
| 461 | return num_custom_variables++; | ||
| 462 | } | ||
| 463 | |||
| 464 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null | |||
| @@ -1,479 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <list> | ||
| 9 | #include <map> | ||
| 10 | #include <optional> | ||
| 11 | #include <set> | ||
| 12 | #include <tuple> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/engines/shader_header.h" | ||
| 19 | #include "video_core/shader/ast.h" | ||
| 20 | #include "video_core/shader/compiler_settings.h" | ||
| 21 | #include "video_core/shader/memory_util.h" | ||
| 22 | #include "video_core/shader/node.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace VideoCommon::Shader { | ||
| 26 | |||
| 27 | struct ShaderBlock; | ||
| 28 | |||
| 29 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 30 | |||
| 31 | struct ConstBuffer { | ||
| 32 | constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) | ||
| 33 | : max_offset{max_offset_}, is_indirect{is_indirect_} {} | ||
| 34 | |||
| 35 | constexpr ConstBuffer() = default; | ||
| 36 | |||
| 37 | void MarkAsUsed(u64 offset) { | ||
| 38 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsUsedIndirect() { | ||
| 42 | is_indirect = true; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsIndirect() const { | ||
| 46 | return is_indirect; | ||
| 47 | } | ||
| 48 | |||
| 49 | u32 GetSize() const { | ||
| 50 | return max_offset + static_cast<u32>(sizeof(float)); | ||
| 51 | } | ||
| 52 | |||
| 53 | u32 GetMaxOffset() const { | ||
| 54 | return max_offset; | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | u32 max_offset = 0; | ||
| 59 | bool is_indirect = false; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct GlobalMemoryUsage { | ||
| 63 | bool is_read{}; | ||
| 64 | bool is_written{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | class ShaderIR final { | ||
| 68 | public: | ||
| 69 | explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, | ||
| 70 | CompilerSettings settings_, Registry& registry_); | ||
| 71 | ~ShaderIR(); | ||
| 72 | |||
| 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | ||
| 74 | return basic_blocks; | ||
| 75 | } | ||
| 76 | |||
| 77 | const std::set<u32>& GetRegisters() const { | ||
| 78 | return used_registers; | ||
| 79 | } | ||
| 80 | |||
| 81 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 82 | return used_predicates; | ||
| 83 | } | ||
| 84 | |||
| 85 | const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { | ||
| 86 | return used_input_attributes; | ||
| 87 | } | ||
| 88 | |||
| 89 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 90 | return used_output_attributes; | ||
| 91 | } | ||
| 92 | |||
| 93 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 94 | return used_cbufs; | ||
| 95 | } | ||
| 96 | |||
| 97 | const std::list<SamplerEntry>& GetSamplers() const { | ||
| 98 | return used_samplers; | ||
| 99 | } | ||
| 100 | |||
| 101 | const std::list<ImageEntry>& GetImages() const { | ||
| 102 | return used_images; | ||
| 103 | } | ||
| 104 | |||
| 105 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 106 | const { | ||
| 107 | return used_clip_distances; | ||
| 108 | } | ||
| 109 | |||
| 110 | const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { | ||
| 111 | return used_global_memory; | ||
| 112 | } | ||
| 113 | |||
| 114 | std::size_t GetLength() const { | ||
| 115 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 116 | } | ||
| 117 | |||
| 118 | bool UsesLayer() const { | ||
| 119 | return uses_layer; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool UsesViewportIndex() const { | ||
| 123 | return uses_viewport_index; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool UsesPointSize() const { | ||
| 127 | return uses_point_size; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool UsesInstanceId() const { | ||
| 131 | return uses_instance_id; | ||
| 132 | } | ||
| 133 | |||
| 134 | bool UsesVertexId() const { | ||
| 135 | return uses_vertex_id; | ||
| 136 | } | ||
| 137 | |||
| 138 | bool UsesLegacyVaryings() const { | ||
| 139 | return uses_legacy_varyings; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool UsesYNegate() const { | ||
| 143 | return uses_y_negate; | ||
| 144 | } | ||
| 145 | |||
| 146 | bool UsesWarps() const { | ||
| 147 | return uses_warps; | ||
| 148 | } | ||
| 149 | |||
| 150 | bool HasPhysicalAttributes() const { | ||
| 151 | return uses_physical_attributes; | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::Header& GetHeader() const { | ||
| 155 | return header; | ||
| 156 | } | ||
| 157 | |||
| 158 | bool IsFlowStackDisabled() const { | ||
| 159 | return disable_flow_stack; | ||
| 160 | } | ||
| 161 | |||
| 162 | bool IsDecompiled() const { | ||
| 163 | return decompiled; | ||
| 164 | } | ||
| 165 | |||
| 166 | const ASTManager& GetASTManager() const { | ||
| 167 | return program_manager; | ||
| 168 | } | ||
| 169 | |||
| 170 | ASTNode GetASTProgram() const { | ||
| 171 | return program_manager.GetProgram(); | ||
| 172 | } | ||
| 173 | |||
| 174 | u32 GetASTNumVariables() const { | ||
| 175 | return program_manager.GetVariables(); | ||
| 176 | } | ||
| 177 | |||
| 178 | u32 ConvertAddressToNvidiaSpace(u32 address) const { | ||
| 179 | return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); | ||
| 180 | } | ||
| 181 | |||
| 182 | /// Returns a condition code evaluated from internal flags | ||
| 183 | Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; | ||
| 184 | |||
| 185 | const Node& GetAmendNode(std::size_t index) const { | ||
| 186 | return amend_code[index]; | ||
| 187 | } | ||
| 188 | |||
| 189 | u32 GetNumCustomVariables() const { | ||
| 190 | return num_custom_variables; | ||
| 191 | } | ||
| 192 | |||
| 193 | private: | ||
| 194 | friend class ASTDecoder; | ||
| 195 | |||
| 196 | struct SamplerInfo { | ||
| 197 | std::optional<Tegra::Shader::TextureType> type; | ||
| 198 | std::optional<bool> is_array; | ||
| 199 | std::optional<bool> is_shadow; | ||
| 200 | std::optional<bool> is_buffer; | ||
| 201 | |||
| 202 | constexpr bool IsComplete() const noexcept { | ||
| 203 | return type && is_array && is_shadow && is_buffer; | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | void Decode(); | ||
| 208 | void PostDecode(); | ||
| 209 | |||
| 210 | NodeBlock DecodeRange(u32 begin, u32 end); | ||
| 211 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 212 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 213 | |||
| 214 | /** | ||
| 215 | * Decodes a single instruction from Tegra to IR. | ||
| 216 | * @param bb Basic block where the nodes will be written to. | ||
| 217 | * @param pc Program counter. Offset to decode. | ||
| 218 | * @return Next address to decode. | ||
| 219 | */ | ||
| 220 | u32 DecodeInstr(NodeBlock& bb, u32 pc); | ||
| 221 | |||
| 222 | u32 DecodeArithmetic(NodeBlock& bb, u32 pc); | ||
| 223 | u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); | ||
| 224 | u32 DecodeBfe(NodeBlock& bb, u32 pc); | ||
| 225 | u32 DecodeBfi(NodeBlock& bb, u32 pc); | ||
| 226 | u32 DecodeShift(NodeBlock& bb, u32 pc); | ||
| 227 | u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); | ||
| 228 | u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); | ||
| 229 | u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); | ||
| 230 | u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); | ||
| 231 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | ||
| 232 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | ||
| 233 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | ||
| 234 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 235 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | ||
| 236 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | ||
| 237 | u32 DecodeImage(NodeBlock& bb, u32 pc); | ||
| 238 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | ||
| 239 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | ||
| 240 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | ||
| 241 | u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); | ||
| 242 | u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); | ||
| 243 | u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); | ||
| 244 | u32 DecodeFloatSet(NodeBlock& bb, u32 pc); | ||
| 245 | u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); | ||
| 246 | u32 DecodeHalfSet(NodeBlock& bb, u32 pc); | ||
| 247 | u32 DecodeVideo(NodeBlock& bb, u32 pc); | ||
| 248 | u32 DecodeXmad(NodeBlock& bb, u32 pc); | ||
| 249 | u32 DecodeOther(NodeBlock& bb, u32 pc); | ||
| 250 | |||
| 251 | /// Generates a node for a passed register. | ||
| 252 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 253 | /// Generates a node for a custom variable | ||
| 254 | Node GetCustomVariable(u32 id); | ||
| 255 | /// Generates a node representing a 19-bit immediate value | ||
| 256 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 257 | /// Generates a node representing a 32-bit immediate value | ||
| 258 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 259 | /// Generates a node representing a constant buffer | ||
| 260 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 261 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 262 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 263 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 264 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 265 | /// Generates a predicate node for an immediate true or false value | ||
| 266 | Node GetPredicate(bool immediate); | ||
| 267 | /// Generates a node representing an input attribute. Keeps track of used attributes. | ||
| 268 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); | ||
| 269 | /// Generates a node representing a physical input attribute. | ||
| 270 | Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); | ||
| 271 | /// Generates a node representing an output attribute. Keeps track of used attributes. | ||
| 272 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 273 | /// Generates a node representing an internal flag | ||
| 274 | Node GetInternalFlag(InternalFlag flag, bool negated = false) const; | ||
| 275 | /// Generates a node representing a local memory address | ||
| 276 | Node GetLocalMemory(Node address); | ||
| 277 | /// Generates a node representing a shared memory address | ||
| 278 | Node GetSharedMemory(Node address); | ||
| 279 | /// Generates a temporary, internally it uses a post-RZ register | ||
| 280 | Node GetTemporary(u32 id); | ||
| 281 | |||
| 282 | /// Sets a register. src value must be a number-evaluated node. | ||
| 283 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 284 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 285 | void SetPredicate(NodeBlock& bb, u64 dest, Node src); | ||
| 286 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 287 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | ||
| 288 | /// Sets a local memory address with a value. | ||
| 289 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | ||
| 290 | /// Sets a shared memory address with a value. | ||
| 291 | void SetSharedMemory(NodeBlock& bb, Node address, Node value); | ||
| 292 | /// Sets a temporary. Internally it uses a post-RZ register | ||
| 293 | void SetTemporary(NodeBlock& bb, u32 id, Node value); | ||
| 294 | |||
| 295 | /// Sets internal flags from a float | ||
| 296 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 297 | /// Sets internal flags from an integer | ||
| 298 | void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 299 | |||
| 300 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 301 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 302 | /// Conditionally saturates a float | ||
| 303 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 304 | |||
| 305 | /// Converts an integer to different sizes. | ||
| 306 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 307 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 308 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 309 | |||
| 310 | /// Unpacks a half immediate from an instruction | ||
| 311 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 312 | /// Unpacks a binary value into a half float pair with a type format | ||
| 313 | Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); | ||
| 314 | /// Merges a half pair into another value | ||
| 315 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 316 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 317 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 318 | /// Conditionally saturates a half float pair | ||
| 319 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||
| 320 | |||
| 321 | /// Get image component value by type and size | ||
| 322 | std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, | ||
| 323 | u32 component_size, Node original_value); | ||
| 324 | |||
| 325 | /// Returns a predicate comparing two floats | ||
| 326 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 327 | /// Returns a predicate comparing two integers | ||
| 328 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 329 | Node op_a, Node op_b); | ||
| 330 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 331 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 332 | |||
| 333 | /// Returns a predicate combiner operation | ||
| 334 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 335 | |||
| 336 | /// Queries the missing sampler info from the execution context. | ||
| 337 | SamplerInfo GetSamplerInfo(SamplerInfo info, | ||
| 338 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); | ||
| 339 | |||
| 340 | /// Accesses a texture sampler. | ||
| 341 | std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | ||
| 342 | |||
| 343 | /// Accesses a texture sampler for a bindless texture. | ||
| 344 | std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | ||
| 345 | Node& index_var); | ||
| 346 | |||
| 347 | /// Accesses an image. | ||
| 348 | ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | ||
| 349 | |||
| 350 | /// Access a bindless image sampler. | ||
| 351 | ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | ||
| 352 | |||
| 353 | /// Extracts a sequence of bits from a node | ||
| 354 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 355 | |||
| 356 | /// Inserts a sequence of bits from a node | ||
| 357 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 358 | |||
| 359 | /// Marks the usage of a input or output attribute. | ||
| 360 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); | ||
| 361 | |||
| 362 | /// Decodes VMNMX instruction and inserts its code into the passed basic block. | ||
| 363 | void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); | ||
| 364 | |||
| 365 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 366 | const Node4& components); | ||
| 367 | |||
| 368 | void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 369 | const Node4& components, bool ignore_mask = false); | ||
| 370 | void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 371 | const Node4& components, bool ignore_mask = false); | ||
| 372 | |||
| 373 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 374 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 375 | bool is_array, bool is_aoffi, | ||
| 376 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 377 | |||
| 378 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 379 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 380 | bool is_array); | ||
| 381 | |||
| 382 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 383 | bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, | ||
| 384 | bool is_bindless); | ||
| 385 | |||
| 386 | Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||
| 387 | |||
| 388 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 389 | bool is_array); | ||
| 390 | |||
| 391 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 392 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 393 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 394 | |||
| 395 | std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||
| 396 | |||
| 397 | std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); | ||
| 398 | |||
| 399 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 400 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 401 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, | ||
| 402 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 403 | |||
| 404 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 405 | u64 byte_height); | ||
| 406 | |||
| 407 | void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, | ||
| 408 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 409 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 410 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 411 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 412 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 413 | |||
| 414 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 415 | |||
| 416 | std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 417 | s64 cursor); | ||
| 418 | |||
| 419 | std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, | ||
| 420 | const OperationNode& operation, | ||
| 421 | Node gpr, Node base_offset, | ||
| 422 | Node tracked, const NodeBlock& code, | ||
| 423 | s64 cursor); | ||
| 424 | |||
| 425 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 426 | |||
| 427 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 428 | s64 cursor) const; | ||
| 429 | |||
| 430 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, | ||
| 431 | Tegra::Shader::Instruction instr, | ||
| 432 | bool is_read, bool is_write); | ||
| 433 | |||
| 434 | /// Register new amending code and obtain the reference id. | ||
| 435 | std::size_t DeclareAmend(Node new_amend); | ||
| 436 | |||
| 437 | u32 NewCustomVariable(); | ||
| 438 | |||
| 439 | const ProgramCode& program_code; | ||
| 440 | const u32 main_offset; | ||
| 441 | const CompilerSettings settings; | ||
| 442 | Registry& registry; | ||
| 443 | |||
| 444 | bool decompiled{}; | ||
| 445 | bool disable_flow_stack{}; | ||
| 446 | |||
| 447 | u32 coverage_begin{}; | ||
| 448 | u32 coverage_end{}; | ||
| 449 | |||
| 450 | std::map<u32, NodeBlock> basic_blocks; | ||
| 451 | NodeBlock global_code; | ||
| 452 | ASTManager program_manager{true, true}; | ||
| 453 | std::vector<Node> amend_code; | ||
| 454 | u32 num_custom_variables{}; | ||
| 455 | |||
| 456 | std::set<u32> used_registers; | ||
| 457 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 458 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; | ||
| 459 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 460 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 461 | std::list<SamplerEntry> used_samplers; | ||
| 462 | std::list<ImageEntry> used_images; | ||
| 463 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 464 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | ||
| 465 | bool uses_layer{}; | ||
| 466 | bool uses_viewport_index{}; | ||
| 467 | bool uses_point_size{}; | ||
| 468 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | ||
| 469 | bool uses_instance_id{}; | ||
| 470 | bool uses_vertex_id{}; | ||
| 471 | bool uses_legacy_varyings{}; | ||
| 472 | bool uses_y_negate{}; | ||
| 473 | bool uses_warps{}; | ||
| 474 | bool uses_indexed_samplers{}; | ||
| 475 | |||
| 476 | Tegra::Shader::Header header; | ||
| 477 | }; | ||
| 478 | |||
| 479 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null | |||
| @@ -1,236 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/node.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||
| 19 | OperationCode operation_code) { | ||
| 20 | for (; cursor >= 0; --cursor) { | ||
| 21 | Node node = code.at(cursor); | ||
| 22 | |||
| 23 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 24 | if (operation->GetCode() == operation_code) { | ||
| 25 | return {std::move(node), cursor}; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 30 | const auto& conditional_code = conditional->GetCode(); | ||
| 31 | auto result = FindOperation( | ||
| 32 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | ||
| 33 | auto& found = result.first; | ||
| 34 | if (found) { | ||
| 35 | return {std::move(found), cursor}; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | return {}; | ||
| 40 | } | ||
| 41 | |||
| 42 | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||
| 43 | if (operation.GetCode() != OperationCode::UAdd) { | ||
| 44 | return std::nullopt; | ||
| 45 | } | ||
| 46 | Node gpr; | ||
| 47 | Node offset; | ||
| 48 | ASSERT(operation.GetOperandsCount() == 2); | ||
| 49 | for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||
| 50 | Node operand = operation[i]; | ||
| 51 | if (std::holds_alternative<ImmediateNode>(*operand)) { | ||
| 52 | offset = operation[i]; | ||
| 53 | } else if (std::holds_alternative<GprNode>(*operand)) { | ||
| 54 | gpr = operation[i]; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | if (offset && gpr) { | ||
| 58 | return std::make_pair(gpr, offset); | ||
| 59 | } | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||
| 64 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 65 | operation->SetAmendIndex(amend_index); | ||
| 66 | return true; | ||
| 67 | } | ||
| 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 69 | conditional->SetAmendIndex(amend_index); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | } // Anonymous namespace | ||
| 76 | |||
| 77 | std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 78 | s64 cursor) { | ||
| 79 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 80 | const u32 cbuf_index = cbuf->GetIndex(); | ||
| 81 | |||
| 82 | // Constant buffer found, test if it's an immediate | ||
| 83 | const auto& offset = cbuf->GetOffset(); | ||
| 84 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 85 | auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); | ||
| 86 | return {tracked, track}; | ||
| 87 | } | ||
| 88 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||
| 89 | const u32 bound_buffer = registry.GetBoundBuffer(); | ||
| 90 | if (bound_buffer != cbuf_index) { | ||
| 91 | return {}; | ||
| 92 | } | ||
| 93 | if (const std::optional pair = DecoupleIndirectRead(*operation)) { | ||
| 94 | auto [gpr, base_offset] = *pair; | ||
| 95 | return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, | ||
| 96 | code, cursor); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 102 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 103 | return {}; | ||
| 104 | } | ||
| 105 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 106 | // register that it uses as operand | ||
| 107 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 108 | if (!source) { | ||
| 109 | return {}; | ||
| 110 | } | ||
| 111 | return TrackBindlessSampler(source, code, new_cursor); | ||
| 112 | } | ||
| 113 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 114 | const OperationNode& op = *operation; | ||
| 115 | |||
| 116 | const OperationCode opcode = operation->GetCode(); | ||
| 117 | if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { | ||
| 118 | ASSERT(op.GetOperandsCount() == 2); | ||
| 119 | auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); | ||
| 120 | auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); | ||
| 121 | if (node_a && node_b) { | ||
| 122 | auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, | ||
| 123 | std::pair{offset_a, offset_b}); | ||
| 124 | return {tracked, std::move(track)}; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | std::size_t i = op.GetOperandsCount(); | ||
| 128 | while (i--) { | ||
| 129 | if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { | ||
| 130 | // Constant buffer found in operand. | ||
| 131 | return found; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | return {}; | ||
| 135 | } | ||
| 136 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 137 | const auto& conditional_code = conditional->GetCode(); | ||
| 138 | return TrackBindlessSampler(tracked, conditional_code, | ||
| 139 | static_cast<s64>(conditional_code.size())); | ||
| 140 | } | ||
| 141 | return {}; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( | ||
| 145 | const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, | ||
| 146 | const NodeBlock& code, s64 cursor) { | ||
| 147 | const auto offset_imm = std::get<ImmediateNode>(*base_offset); | ||
| 148 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 149 | const u32 bindless_cv = NewCustomVariable(); | ||
| 150 | const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); | ||
| 151 | Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); | ||
| 152 | |||
| 153 | Node cv_node = GetCustomVariable(bindless_cv); | ||
| 154 | Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); | ||
| 155 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 156 | AmendNodeCv(amend_index, code[cursor]); | ||
| 157 | |||
| 158 | // TODO: Implement bindless index custom variable | ||
| 159 | auto track = | ||
| 160 | MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); | ||
| 161 | return {tracked, track}; | ||
| 162 | } | ||
| 163 | |||
| 164 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | ||
| 165 | s64 cursor) const { | ||
| 166 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 167 | // Constant buffer found, test if it's an immediate | ||
| 168 | const auto& offset = cbuf->GetOffset(); | ||
| 169 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 170 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 171 | } | ||
| 172 | return {}; | ||
| 173 | } | ||
| 174 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 175 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 179 | // register that it uses as operand | ||
| 180 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 181 | if (!source) { | ||
| 182 | return {}; | ||
| 183 | } | ||
| 184 | return TrackCbuf(source, code, new_cursor); | ||
| 185 | } | ||
| 186 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 187 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||
| 188 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { | ||
| 189 | // Cbuf found in operand. | ||
| 190 | return found; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | return {}; | ||
| 194 | } | ||
| 195 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 196 | const auto& conditional_code = conditional->GetCode(); | ||
| 197 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | ||
| 198 | } | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | |||
| 202 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | ||
| 203 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||
| 204 | // that it uses as operand | ||
| 205 | const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||
| 206 | const auto& found = result.first; | ||
| 207 | if (!found) { | ||
| 208 | return std::nullopt; | ||
| 209 | } | ||
| 210 | if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { | ||
| 211 | return immediate->GetValue(); | ||
| 212 | } | ||
| 213 | return std::nullopt; | ||
| 214 | } | ||
| 215 | |||
| 216 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 217 | s64 cursor) const { | ||
| 218 | for (; cursor >= 0; --cursor) { | ||
| 219 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 220 | if (!found_node) { | ||
| 221 | return {}; | ||
| 222 | } | ||
| 223 | const auto operation = std::get_if<OperationNode>(&*found_node); | ||
| 224 | ASSERT(operation); | ||
| 225 | |||
| 226 | const auto& target = (*operation)[0]; | ||
| 227 | if (const auto gpr_target = std::get_if<GprNode>(&*target)) { | ||
| 228 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 229 | return {(*operation)[1], new_cursor}; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | return {}; | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null | |||
| @@ -1,23 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..78bf90c48 --- /dev/null +++ b/src/video_core/shader_cache.cpp | |||
| @@ -0,0 +1,250 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/object_pool.h" | ||
| 12 | #include "video_core/dirty_flags.h" | ||
| 13 | #include "video_core/engines/kepler_compute.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/memory_manager.h" | ||
| 16 | #include "video_core/shader_cache.h" | ||
| 17 | #include "video_core/shader_environment.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | ||
| 22 | std::scoped_lock lock{invalidation_mutex}; | ||
| 23 | InvalidatePagesInRegion(addr, size); | ||
| 24 | RemovePendingShaders(); | ||
| 25 | } | ||
| 26 | |||
| 27 | void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | ||
| 28 | std::lock_guard lock{invalidation_mutex}; | ||
| 29 | InvalidatePagesInRegion(addr, size); | ||
| 30 | } | ||
| 31 | |||
| 32 | void ShaderCache::SyncGuestHost() { | ||
| 33 | std::scoped_lock lock{invalidation_mutex}; | ||
| 34 | RemovePendingShaders(); | ||
| 35 | } | ||
| 36 | |||
| 37 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 38 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 39 | Tegra::Engines::KeplerCompute& kepler_compute_) | ||
| 40 | : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||
| 41 | rasterizer{rasterizer_} {} | ||
| 42 | |||
| 43 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | ||
| 44 | auto& dirty{maxwell3d.dirty.flags}; | ||
| 45 | if (!dirty[VideoCommon::Dirty::Shaders]) { | ||
| 46 | return last_shaders_valid; | ||
| 47 | } | ||
| 48 | dirty[VideoCommon::Dirty::Shaders] = false; | ||
| 49 | |||
| 50 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 51 | for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { | ||
| 52 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 53 | unique_hashes[index] = 0; | ||
| 54 | continue; | ||
| 55 | } | ||
| 56 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | ||
| 57 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | ||
| 58 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | ||
| 59 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 60 | if (!cpu_shader_addr) { | ||
| 61 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 62 | last_shaders_valid = false; | ||
| 63 | return false; | ||
| 64 | } | ||
| 65 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | ||
| 66 | if (!shader_info) { | ||
| 67 | const u32 start_address{shader_config.offset}; | ||
| 68 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 69 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 70 | } | ||
| 71 | shader_infos[index] = shader_info; | ||
| 72 | unique_hashes[index] = shader_info->unique_hash; | ||
| 73 | } | ||
| 74 | last_shaders_valid = true; | ||
| 75 | return true; | ||
| 76 | } | ||
| 77 | |||
| 78 | const ShaderInfo* ShaderCache::ComputeShader() { | ||
| 79 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | ||
| 80 | const auto& qmd{kepler_compute.launch_description}; | ||
| 81 | const GPUVAddr shader_addr{program_base + qmd.program_start}; | ||
| 82 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 83 | if (!cpu_shader_addr) { | ||
| 84 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 85 | return nullptr; | ||
| 86 | } | ||
| 87 | if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { | ||
| 88 | return shader; | ||
| 89 | } | ||
| 90 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | ||
| 91 | return MakeShaderInfo(env, *cpu_shader_addr); | ||
| 92 | } | ||
| 93 | |||
| 94 | void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, | ||
| 95 | const std::array<u64, NUM_PROGRAMS>& unique_hashes) { | ||
| 96 | size_t env_index{}; | ||
| 97 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 98 | for (size_t index = 0; index < NUM_PROGRAMS; ++index) { | ||
| 99 | if (unique_hashes[index] == 0) { | ||
| 100 | continue; | ||
| 101 | } | ||
| 102 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | ||
| 103 | auto& env{result.envs[index]}; | ||
| 104 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||
| 105 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 106 | env.SetCachedSize(shader_infos[index]->size_bytes); | ||
| 107 | result.env_ptrs[env_index++] = &env; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | ShaderInfo* ShaderCache::TryGet(VAddr addr) const { | ||
| 112 | std::scoped_lock lock{lookup_mutex}; | ||
| 113 | |||
| 114 | const auto it = lookup_cache.find(addr); | ||
| 115 | if (it == lookup_cache.end()) { | ||
| 116 | return nullptr; | ||
| 117 | } | ||
| 118 | return it->second->data; | ||
| 119 | } | ||
| 120 | |||
| 121 | void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) { | ||
| 122 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 123 | |||
| 124 | const VAddr addr_end = addr + size; | ||
| 125 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | ||
| 126 | |||
| 127 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 128 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 129 | invalidation_cache[page].push_back(entry); | ||
| 130 | } | ||
| 131 | |||
| 132 | storage.push_back(std::move(data)); | ||
| 133 | |||
| 134 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||
| 135 | } | ||
| 136 | |||
| 137 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | ||
| 138 | const VAddr addr_end = addr + size; | ||
| 139 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 140 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 141 | auto it = invalidation_cache.find(page); | ||
| 142 | if (it == invalidation_cache.end()) { | ||
| 143 | continue; | ||
| 144 | } | ||
| 145 | InvalidatePageEntries(it->second, addr, addr_end); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | void ShaderCache::RemovePendingShaders() { | ||
| 150 | if (marked_for_removal.empty()) { | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | // Remove duplicates | ||
| 154 | std::ranges::sort(marked_for_removal); | ||
| 155 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | ||
| 156 | marked_for_removal.end()); | ||
| 157 | |||
| 158 | std::vector<ShaderInfo*> removed_shaders; | ||
| 159 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 160 | |||
| 161 | std::scoped_lock lock{lookup_mutex}; | ||
| 162 | |||
| 163 | for (Entry* const entry : marked_for_removal) { | ||
| 164 | removed_shaders.push_back(entry->data); | ||
| 165 | |||
| 166 | const auto it = lookup_cache.find(entry->addr_start); | ||
| 167 | ASSERT(it != lookup_cache.end()); | ||
| 168 | lookup_cache.erase(it); | ||
| 169 | } | ||
| 170 | marked_for_removal.clear(); | ||
| 171 | |||
| 172 | if (!removed_shaders.empty()) { | ||
| 173 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | ||
| 178 | size_t index = 0; | ||
| 179 | while (index < entries.size()) { | ||
| 180 | Entry* const entry = entries[index]; | ||
| 181 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 182 | ++index; | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 186 | UnmarkMemory(entry); | ||
| 187 | RemoveEntryFromInvalidationCache(entry); | ||
| 188 | marked_for_removal.push_back(entry); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { | ||
| 193 | const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 194 | for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { | ||
| 195 | const auto entries_it = invalidation_cache.find(page); | ||
| 196 | ASSERT(entries_it != invalidation_cache.end()); | ||
| 197 | std::vector<Entry*>& entries = entries_it->second; | ||
| 198 | |||
| 199 | const auto entry_it = std::ranges::find(entries, entry); | ||
| 200 | ASSERT(entry_it != entries.end()); | ||
| 201 | entries.erase(entry_it); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | void ShaderCache::UnmarkMemory(Entry* entry) { | ||
| 206 | if (!entry->is_memory_marked) { | ||
| 207 | return; | ||
| 208 | } | ||
| 209 | entry->is_memory_marked = false; | ||
| 210 | |||
| 211 | const VAddr addr = entry->addr_start; | ||
| 212 | const size_t size = entry->addr_end - addr; | ||
| 213 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 214 | } | ||
| 215 | |||
| 216 | void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) { | ||
| 217 | // Remove them from the cache | ||
| 218 | std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) { | ||
| 219 | return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); | ||
| 220 | }); | ||
| 221 | } | ||
| 222 | |||
| 223 | ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { | ||
| 224 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 225 | Entry* const entry_pointer = entry.get(); | ||
| 226 | |||
| 227 | lookup_cache.emplace(addr, std::move(entry)); | ||
| 228 | return entry_pointer; | ||
| 229 | } | ||
| 230 | |||
| 231 | const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { | ||
| 232 | auto info = std::make_unique<ShaderInfo>(); | ||
| 233 | if (const std::optional<u64> cached_hash{env.Analyze()}) { | ||
| 234 | info->unique_hash = *cached_hash; | ||
| 235 | info->size_bytes = env.CachedSize(); | ||
| 236 | } else { | ||
| 237 | // Slow path, not really hit on commercial games | ||
| 238 | // Build a control flow graph to get the real shader size | ||
| 239 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | ||
| 240 | Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; | ||
| 241 | info->unique_hash = env.CalculateHash(); | ||
| 242 | info->size_bytes = env.ReadSize(); | ||
| 243 | } | ||
| 244 | const size_t size_bytes{info->size_bytes}; | ||
| 245 | const ShaderInfo* const result{info.get()}; | ||
| 246 | Register(std::move(info), cpu_addr, size_bytes); | ||
| 247 | return result; | ||
| 248 | } | ||
| 249 | |||
| 250 | } // namespace VideoCommon | ||
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -5,226 +5,147 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <mutex> | 10 | #include <mutex> |
| 11 | #include <span> | ||
| 10 | #include <unordered_map> | 12 | #include <unordered_map> |
| 11 | #include <utility> | 13 | #include <utility> |
| 12 | #include <vector> | 14 | #include <vector> |
| 13 | 15 | ||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 16 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/shader_environment.h" | ||
| 19 | |||
| 20 | namespace Tegra { | ||
| 21 | class MemoryManager; | ||
| 22 | } | ||
| 17 | 23 | ||
| 18 | namespace VideoCommon { | 24 | namespace VideoCommon { |
| 19 | 25 | ||
| 20 | template <class T> | 26 | class GenericEnvironment; |
| 27 | |||
| 28 | struct ShaderInfo { | ||
| 29 | u64 unique_hash{}; | ||
| 30 | size_t size_bytes{}; | ||
| 31 | }; | ||
| 32 | |||
| 21 | class ShaderCache { | 33 | class ShaderCache { |
| 22 | static constexpr u64 PAGE_BITS = 14; | 34 | static constexpr u64 PAGE_BITS = 14; |
| 23 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; | 35 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; |
| 24 | 36 | ||
| 37 | static constexpr size_t NUM_PROGRAMS = 6; | ||
| 38 | |||
| 25 | struct Entry { | 39 | struct Entry { |
| 26 | VAddr addr_start; | 40 | VAddr addr_start; |
| 27 | VAddr addr_end; | 41 | VAddr addr_end; |
| 28 | T* data; | 42 | ShaderInfo* data; |
| 29 | 43 | ||
| 30 | bool is_memory_marked = true; | 44 | bool is_memory_marked = true; |
| 31 | 45 | ||
| 32 | constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { | 46 | bool Overlaps(VAddr start, VAddr end) const noexcept { |
| 33 | return start < addr_end && addr_start < end; | 47 | return start < addr_end && addr_start < end; |
| 34 | } | 48 | } |
| 35 | }; | 49 | }; |
| 36 | 50 | ||
| 37 | public: | 51 | public: |
| 38 | virtual ~ShaderCache() = default; | ||
| 39 | |||
| 40 | /// @brief Removes shaders inside a given region | 52 | /// @brief Removes shaders inside a given region |
| 41 | /// @note Checks for ranges | 53 | /// @note Checks for ranges |
| 42 | /// @param addr Start address of the invalidation | 54 | /// @param addr Start address of the invalidation |
| 43 | /// @param size Number of bytes of the invalidation | 55 | /// @param size Number of bytes of the invalidation |
| 44 | void InvalidateRegion(VAddr addr, std::size_t size) { | 56 | void InvalidateRegion(VAddr addr, size_t size); |
| 45 | std::scoped_lock lock{invalidation_mutex}; | ||
| 46 | InvalidatePagesInRegion(addr, size); | ||
| 47 | RemovePendingShaders(); | ||
| 48 | } | ||
| 49 | 57 | ||
| 50 | /// @brief Unmarks a memory region as cached and marks it for removal | 58 | /// @brief Unmarks a memory region as cached and marks it for removal |
| 51 | /// @param addr Start address of the CPU write operation | 59 | /// @param addr Start address of the CPU write operation |
| 52 | /// @param size Number of bytes of the CPU write operation | 60 | /// @param size Number of bytes of the CPU write operation |
| 53 | void OnCPUWrite(VAddr addr, std::size_t size) { | 61 | void OnCPUWrite(VAddr addr, size_t size); |
| 54 | std::lock_guard lock{invalidation_mutex}; | ||
| 55 | InvalidatePagesInRegion(addr, size); | ||
| 56 | } | ||
| 57 | 62 | ||
| 58 | /// @brief Flushes delayed removal operations | 63 | /// @brief Flushes delayed removal operations |
| 59 | void SyncGuestHost() { | 64 | void SyncGuestHost(); |
| 60 | std::scoped_lock lock{invalidation_mutex}; | ||
| 61 | RemovePendingShaders(); | ||
| 62 | } | ||
| 63 | 65 | ||
| 64 | /// @brief Tries to obtain a cached shader starting in a given address | 66 | protected: |
| 65 | /// @note Doesn't check for ranges, the given address has to be the start of the shader | 67 | struct GraphicsEnvironments { |
| 66 | /// @param addr Start address of the shader, this doesn't cache for region | 68 | std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; |
| 67 | /// @return Pointer to a valid shader, nullptr when nothing is found | 69 | std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; |
| 68 | T* TryGet(VAddr addr) const { | ||
| 69 | std::scoped_lock lock{lookup_mutex}; | ||
| 70 | 70 | ||
| 71 | const auto it = lookup_cache.find(addr); | 71 | std::span<Shader::Environment* const> Span() const noexcept { |
| 72 | if (it == lookup_cache.end()) { | 72 | return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); |
| 73 | return nullptr; | ||
| 74 | } | 73 | } |
| 75 | return it->second->data; | 74 | }; |
| 76 | } | ||
| 77 | |||
| 78 | protected: | ||
| 79 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ||
| 80 | 75 | ||
| 81 | /// @brief Register in the cache a given entry | 76 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, |
| 82 | /// @param data Shader to store in the cache | 77 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 83 | /// @param addr Start address of the shader that will be registered | 78 | Tegra::Engines::KeplerCompute& kepler_compute_); |
| 84 | /// @param size Size in bytes of the shader | ||
| 85 | void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { | ||
| 86 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 87 | 79 | ||
| 88 | const VAddr addr_end = addr + size; | 80 | /// @brief Update the hashes and information of shader stages |
| 89 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | 81 | /// @param unique_hashes Shader hashes to store into when a stage is enabled |
| 82 | /// @return True no success, false on error | ||
| 83 | bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes); | ||
| 90 | 84 | ||
| 91 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | 85 | /// @brief Returns information about the current compute shader |
| 92 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | 86 | /// @return Pointer to a valid shader, nullptr on error |
| 93 | invalidation_cache[page].push_back(entry); | 87 | const ShaderInfo* ComputeShader(); |
| 94 | } | ||
| 95 | 88 | ||
| 96 | storage.push_back(std::move(data)); | 89 | /// @brief Collect the current graphics environments |
| 90 | void GetGraphicsEnvironments(GraphicsEnvironments& result, | ||
| 91 | const std::array<u64, NUM_PROGRAMS>& unique_hashes); | ||
| 97 | 92 | ||
| 98 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | 93 | Tegra::MemoryManager& gpu_memory; |
| 99 | } | 94 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 95 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 100 | 96 | ||
| 101 | /// @brief Called when a shader is going to be removed | 97 | std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; |
| 102 | /// @param shader Shader that will be removed | 98 | bool last_shaders_valid = false; |
| 103 | /// @pre invalidation_cache is locked | ||
| 104 | /// @pre lookup_mutex is locked | ||
| 105 | virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} | ||
| 106 | 99 | ||
| 107 | private: | 100 | private: |
| 101 | /// @brief Tries to obtain a cached shader starting in a given address | ||
| 102 | /// @note Doesn't check for ranges, the given address has to be the start of the shader | ||
| 103 | /// @param addr Start address of the shader, this doesn't cache for region | ||
| 104 | /// @return Pointer to a valid shader, nullptr when nothing is found | ||
| 105 | ShaderInfo* TryGet(VAddr addr) const; | ||
| 106 | |||
| 107 | /// @brief Register in the cache a given entry | ||
| 108 | /// @param data Shader to store in the cache | ||
| 109 | /// @param addr Start address of the shader that will be registered | ||
| 110 | /// @param size Size in bytes of the shader | ||
| 111 | void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size); | ||
| 112 | |||
| 108 | /// @brief Invalidate pages in a given region | 113 | /// @brief Invalidate pages in a given region |
| 109 | /// @pre invalidation_mutex is locked | 114 | /// @pre invalidation_mutex is locked |
| 110 | void InvalidatePagesInRegion(VAddr addr, std::size_t size) { | 115 | void InvalidatePagesInRegion(VAddr addr, size_t size); |
| 111 | const VAddr addr_end = addr + size; | ||
| 112 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 113 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 114 | auto it = invalidation_cache.find(page); | ||
| 115 | if (it == invalidation_cache.end()) { | ||
| 116 | continue; | ||
| 117 | } | ||
| 118 | InvalidatePageEntries(it->second, addr, addr_end); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 116 | ||
| 122 | /// @brief Remove shaders marked for deletion | 117 | /// @brief Remove shaders marked for deletion |
| 123 | /// @pre invalidation_mutex is locked | 118 | /// @pre invalidation_mutex is locked |
| 124 | void RemovePendingShaders() { | 119 | void RemovePendingShaders(); |
| 125 | if (marked_for_removal.empty()) { | ||
| 126 | return; | ||
| 127 | } | ||
| 128 | // Remove duplicates | ||
| 129 | std::sort(marked_for_removal.begin(), marked_for_removal.end()); | ||
| 130 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | ||
| 131 | marked_for_removal.end()); | ||
| 132 | |||
| 133 | std::vector<T*> removed_shaders; | ||
| 134 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 135 | |||
| 136 | std::scoped_lock lock{lookup_mutex}; | ||
| 137 | |||
| 138 | for (Entry* const entry : marked_for_removal) { | ||
| 139 | removed_shaders.push_back(entry->data); | ||
| 140 | |||
| 141 | const auto it = lookup_cache.find(entry->addr_start); | ||
| 142 | ASSERT(it != lookup_cache.end()); | ||
| 143 | lookup_cache.erase(it); | ||
| 144 | } | ||
| 145 | marked_for_removal.clear(); | ||
| 146 | |||
| 147 | if (!removed_shaders.empty()) { | ||
| 148 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | 120 | ||
| 152 | /// @brief Invalidates entries in a given range for the passed page | 121 | /// @brief Invalidates entries in a given range for the passed page |
| 153 | /// @param entries Vector of entries in the page, it will be modified on overlaps | 122 | /// @param entries Vector of entries in the page, it will be modified on overlaps |
| 154 | /// @param addr Start address of the invalidation | 123 | /// @param addr Start address of the invalidation |
| 155 | /// @param addr_end Non-inclusive end address of the invalidation | 124 | /// @param addr_end Non-inclusive end address of the invalidation |
| 156 | /// @pre invalidation_mutex is locked | 125 | /// @pre invalidation_mutex is locked |
| 157 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | 126 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end); |
| 158 | std::size_t index = 0; | ||
| 159 | while (index < entries.size()) { | ||
| 160 | Entry* const entry = entries[index]; | ||
| 161 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 162 | ++index; | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | |||
| 166 | UnmarkMemory(entry); | ||
| 167 | RemoveEntryFromInvalidationCache(entry); | ||
| 168 | marked_for_removal.push_back(entry); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | 127 | ||
| 172 | /// @brief Removes all references to an entry in the invalidation cache | 128 | /// @brief Removes all references to an entry in the invalidation cache |
| 173 | /// @param entry Entry to remove from the invalidation cache | 129 | /// @param entry Entry to remove from the invalidation cache |
| 174 | /// @pre invalidation_mutex is locked | 130 | /// @pre invalidation_mutex is locked |
| 175 | void RemoveEntryFromInvalidationCache(const Entry* entry) { | 131 | void RemoveEntryFromInvalidationCache(const Entry* entry); |
| 176 | const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 177 | for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { | ||
| 178 | const auto entries_it = invalidation_cache.find(page); | ||
| 179 | ASSERT(entries_it != invalidation_cache.end()); | ||
| 180 | std::vector<Entry*>& entries = entries_it->second; | ||
| 181 | |||
| 182 | const auto entry_it = std::find(entries.begin(), entries.end(), entry); | ||
| 183 | ASSERT(entry_it != entries.end()); | ||
| 184 | entries.erase(entry_it); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | 132 | ||
| 188 | /// @brief Unmarks an entry from the rasterizer cache | 133 | /// @brief Unmarks an entry from the rasterizer cache |
| 189 | /// @param entry Entry to unmark from memory | 134 | /// @param entry Entry to unmark from memory |
| 190 | void UnmarkMemory(Entry* entry) { | 135 | void UnmarkMemory(Entry* entry); |
| 191 | if (!entry->is_memory_marked) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | entry->is_memory_marked = false; | ||
| 195 | |||
| 196 | const VAddr addr = entry->addr_start; | ||
| 197 | const std::size_t size = entry->addr_end - addr; | ||
| 198 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 199 | } | ||
| 200 | 136 | ||
| 201 | /// @brief Removes a vector of shaders from a list | 137 | /// @brief Removes a vector of shaders from a list |
| 202 | /// @param removed_shaders Shaders to be removed from the storage | 138 | /// @param removed_shaders Shaders to be removed from the storage |
| 203 | /// @pre invalidation_mutex is locked | 139 | /// @pre invalidation_mutex is locked |
| 204 | /// @pre lookup_mutex is locked | 140 | /// @pre lookup_mutex is locked |
| 205 | void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { | 141 | void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders); |
| 206 | // Notify removals | ||
| 207 | for (T* const shader : removed_shaders) { | ||
| 208 | OnShaderRemoval(shader); | ||
| 209 | } | ||
| 210 | |||
| 211 | // Remove them from the cache | ||
| 212 | const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { | ||
| 213 | return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != | ||
| 214 | removed_shaders.end(); | ||
| 215 | }; | ||
| 216 | std::erase_if(storage, is_removed); | ||
| 217 | } | ||
| 218 | 142 | ||
| 219 | /// @brief Creates a new entry in the lookup cache and returns its pointer | 143 | /// @brief Creates a new entry in the lookup cache and returns its pointer |
| 220 | /// @pre lookup_mutex is locked | 144 | /// @pre lookup_mutex is locked |
| 221 | Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { | 145 | Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); |
| 222 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 223 | Entry* const entry_pointer = entry.get(); | ||
| 224 | 146 | ||
| 225 | lookup_cache.emplace(addr, std::move(entry)); | 147 | /// @brief Create a new shader entry and register it |
| 226 | return entry_pointer; | 148 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); |
| 227 | } | ||
| 228 | 149 | ||
| 229 | VideoCore::RasterizerInterface& rasterizer; | 150 | VideoCore::RasterizerInterface& rasterizer; |
| 230 | 151 | ||
| @@ -233,7 +154,7 @@ private: | |||
| 233 | 154 | ||
| 234 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; | 155 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; |
| 235 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; | 156 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; |
| 236 | std::vector<std::unique_ptr<T>> storage; | 157 | std::vector<std::unique_ptr<ShaderInfo>> storage; |
| 237 | std::vector<Entry*> marked_for_removal; | 158 | std::vector<Entry*> marked_for_removal; |
| 238 | }; | 159 | }; |
| 239 | 160 | ||
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..8a4581c19 --- /dev/null +++ b/src/video_core/shader_environment.cpp | |||
| @@ -0,0 +1,460 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <filesystem> | ||
| 6 | #include <fstream> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/cityhash.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/div_ceil.h" | ||
| 15 | #include "common/fs/fs.h" | ||
| 16 | #include "common/logging/log.h" | ||
| 17 | #include "shader_recompiler/environment.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/shader_environment.h" | ||
| 20 | #include "video_core/textures/texture.h" | ||
| 21 | |||
| 22 | namespace VideoCommon { | ||
| 23 | |||
| 24 | constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; | ||
| 25 | |||
| 26 | constexpr size_t INST_SIZE = sizeof(u64); | ||
| 27 | |||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 29 | |||
| 30 | static u64 MakeCbufKey(u32 index, u32 offset) { | ||
| 31 | return (static_cast<u64>(index) << 32) | offset; | ||
| 32 | } | ||
| 33 | |||
| 34 | static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { | ||
| 35 | switch (entry.texture_type) { | ||
| 36 | case Tegra::Texture::TextureType::Texture1D: | ||
| 37 | return Shader::TextureType::Color1D; | ||
| 38 | case Tegra::Texture::TextureType::Texture2D: | ||
| 39 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 40 | return Shader::TextureType::Color2D; | ||
| 41 | case Tegra::Texture::TextureType::Texture3D: | ||
| 42 | return Shader::TextureType::Color3D; | ||
| 43 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 44 | return Shader::TextureType::ColorCube; | ||
| 45 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 46 | return Shader::TextureType::ColorArray1D; | ||
| 47 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 48 | return Shader::TextureType::ColorArray2D; | ||
| 49 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 50 | return Shader::TextureType::Buffer; | ||
| 51 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 52 | return Shader::TextureType::ColorArrayCube; | ||
| 53 | default: | ||
| 54 | throw Shader::NotImplementedException("Unknown texture type"); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 59 | u32 start_address_) | ||
| 60 | : gpu_memory{&gpu_memory_}, program_base{program_base_} { | ||
| 61 | start_address = start_address_; | ||
| 62 | } | ||
| 63 | |||
| 64 | GenericEnvironment::~GenericEnvironment() = default; | ||
| 65 | |||
| 66 | u32 GenericEnvironment::TextureBoundBuffer() const { | ||
| 67 | return texture_bound; | ||
| 68 | } | ||
| 69 | |||
| 70 | u32 GenericEnvironment::LocalMemorySize() const { | ||
| 71 | return local_memory_size; | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 GenericEnvironment::SharedMemorySize() const { | ||
| 75 | return shared_memory_size; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::array<u32, 3> GenericEnvironment::WorkgroupSize() const { | ||
| 79 | return workgroup_size; | ||
| 80 | } | ||
| 81 | |||
| 82 | u64 GenericEnvironment::ReadInstruction(u32 address) { | ||
| 83 | read_lowest = std::min(read_lowest, address); | ||
| 84 | read_highest = std::max(read_highest, address); | ||
| 85 | |||
| 86 | if (address >= cached_lowest && address < cached_highest) { | ||
| 87 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 88 | } | ||
| 89 | has_unbound_instructions = true; | ||
| 90 | return gpu_memory->Read<u64>(program_base + address); | ||
| 91 | } | ||
| 92 | |||
| 93 | std::optional<u64> GenericEnvironment::Analyze() { | ||
| 94 | const std::optional<u64> size{TryFindSize()}; | ||
| 95 | if (!size) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | cached_lowest = start_address; | ||
| 99 | cached_highest = start_address + static_cast<u32>(*size); | ||
| 100 | return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size); | ||
| 101 | } | ||
| 102 | |||
| 103 | void GenericEnvironment::SetCachedSize(size_t size_bytes) { | ||
| 104 | cached_lowest = start_address; | ||
| 105 | cached_highest = start_address + static_cast<u32>(size_bytes); | ||
| 106 | code.resize(CachedSize()); | ||
| 107 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); | ||
| 108 | } | ||
| 109 | |||
| 110 | size_t GenericEnvironment::CachedSize() const noexcept { | ||
| 111 | return cached_highest - cached_lowest + INST_SIZE; | ||
| 112 | } | ||
| 113 | |||
| 114 | size_t GenericEnvironment::ReadSize() const noexcept { | ||
| 115 | return read_highest - read_lowest + INST_SIZE; | ||
| 116 | } | ||
| 117 | |||
| 118 | bool GenericEnvironment::CanBeSerialized() const noexcept { | ||
| 119 | return !has_unbound_instructions; | ||
| 120 | } | ||
| 121 | |||
| 122 | u64 GenericEnvironment::CalculateHash() const { | ||
| 123 | const size_t size{ReadSize()}; | ||
| 124 | const auto data{std::make_unique<char[]>(size)}; | ||
| 125 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); | ||
| 126 | return Common::CityHash64(data.get(), size); | ||
| 127 | } | ||
| 128 | |||
| 129 | void GenericEnvironment::Serialize(std::ofstream& file) const { | ||
| 130 | const u64 code_size{static_cast<u64>(CachedSize())}; | ||
| 131 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||
| 132 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||
| 133 | |||
| 134 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||
| 135 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 136 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 137 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 138 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||
| 139 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||
| 140 | .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) | ||
| 141 | .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) | ||
| 142 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) | ||
| 143 | .write(reinterpret_cast<const char*>(code.data()), code_size); | ||
| 144 | for (const auto [key, type] : texture_types) { | ||
| 145 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 146 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 147 | } | ||
| 148 | for (const auto [key, type] : cbuf_values) { | ||
| 149 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 150 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 151 | } | ||
| 152 | if (stage == Shader::Stage::Compute) { | ||
| 153 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 154 | .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 155 | } else { | ||
| 156 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); | ||
| 157 | if (stage == Shader::Stage::Geometry) { | ||
| 158 | file.write(reinterpret_cast<const char*>(&gp_passthrough_mask), | ||
| 159 | sizeof(gp_passthrough_mask)); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | std::optional<u64> GenericEnvironment::TryFindSize() { | ||
| 165 | static constexpr size_t BLOCK_SIZE = 0x1000; | ||
| 166 | static constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 167 | |||
| 168 | static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | ||
| 169 | static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | ||
| 170 | |||
| 171 | GPUVAddr guest_addr{program_base + start_address}; | ||
| 172 | size_t offset{0}; | ||
| 173 | size_t size{BLOCK_SIZE}; | ||
| 174 | while (size <= MAXIMUM_SIZE) { | ||
| 175 | code.resize(size / INST_SIZE); | ||
| 176 | u64* const data = code.data() + offset / INST_SIZE; | ||
| 177 | gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); | ||
| 178 | for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { | ||
| 179 | const u64 inst = data[index / INST_SIZE]; | ||
| 180 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { | ||
| 181 | return offset + index; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | guest_addr += BLOCK_SIZE; | ||
| 185 | size += BLOCK_SIZE; | ||
| 186 | offset += BLOCK_SIZE; | ||
| 187 | } | ||
| 188 | return std::nullopt; | ||
| 189 | } | ||
| 190 | |||
| 191 | Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, | ||
| 192 | bool via_header_index, u32 raw) { | ||
| 193 | const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; | ||
| 194 | const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; | ||
| 195 | Tegra::Texture::TICEntry entry; | ||
| 196 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||
| 197 | const Shader::TextureType result{ConvertType(entry)}; | ||
| 198 | texture_types.emplace(raw, result); | ||
| 199 | return result; | ||
| 200 | } | ||
| 201 | |||
| 202 | GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 203 | Tegra::MemoryManager& gpu_memory_, | ||
| 204 | Maxwell::ShaderProgram program, GPUVAddr program_base_, | ||
| 205 | u32 start_address_) | ||
| 206 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { | ||
| 207 | gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); | ||
| 208 | gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; | ||
| 209 | switch (program) { | ||
| 210 | case Maxwell::ShaderProgram::VertexA: | ||
| 211 | stage = Shader::Stage::VertexA; | ||
| 212 | stage_index = 0; | ||
| 213 | break; | ||
| 214 | case Maxwell::ShaderProgram::VertexB: | ||
| 215 | stage = Shader::Stage::VertexB; | ||
| 216 | stage_index = 0; | ||
| 217 | break; | ||
| 218 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 219 | stage = Shader::Stage::TessellationControl; | ||
| 220 | stage_index = 1; | ||
| 221 | break; | ||
| 222 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 223 | stage = Shader::Stage::TessellationEval; | ||
| 224 | stage_index = 2; | ||
| 225 | break; | ||
| 226 | case Maxwell::ShaderProgram::Geometry: | ||
| 227 | stage = Shader::Stage::Geometry; | ||
| 228 | stage_index = 3; | ||
| 229 | break; | ||
| 230 | case Maxwell::ShaderProgram::Fragment: | ||
| 231 | stage = Shader::Stage::Fragment; | ||
| 232 | stage_index = 4; | ||
| 233 | break; | ||
| 234 | default: | ||
| 235 | UNREACHABLE_MSG("Invalid program={}", program); | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | const u64 local_size{sph.LocalMemorySize()}; | ||
| 239 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||
| 240 | local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; | ||
| 241 | texture_bound = maxwell3d->regs.tex_cb_index; | ||
| 242 | } | ||
| 243 | |||
| 244 | u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 245 | const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; | ||
| 246 | ASSERT(cbuf.enabled); | ||
| 247 | u32 value{}; | ||
| 248 | if (cbuf_offset < cbuf.size) { | ||
| 249 | value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); | ||
| 250 | } | ||
| 251 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 252 | return value; | ||
| 253 | } | ||
| 254 | |||
| 255 | Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | ||
| 256 | const auto& regs{maxwell3d->regs}; | ||
| 257 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 258 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); | ||
| 259 | } | ||
| 260 | |||
| 261 | ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 262 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 263 | u32 start_address_) | ||
| 264 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ | ||
| 265 | &kepler_compute_} { | ||
| 266 | const auto& qmd{kepler_compute->launch_description}; | ||
| 267 | stage = Shader::Stage::Compute; | ||
| 268 | local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; | ||
| 269 | texture_bound = kepler_compute->regs.tex_cb_index; | ||
| 270 | shared_memory_size = qmd.shared_alloc; | ||
| 271 | workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 272 | } | ||
| 273 | |||
| 274 | u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 275 | const auto& qmd{kepler_compute->launch_description}; | ||
| 276 | ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); | ||
| 277 | const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; | ||
| 278 | u32 value{}; | ||
| 279 | if (cbuf_offset < cbuf.size) { | ||
| 280 | value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); | ||
| 281 | } | ||
| 282 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 283 | return value; | ||
| 284 | } | ||
| 285 | |||
| 286 | Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { | ||
| 287 | const auto& regs{kepler_compute->regs}; | ||
| 288 | const auto& qmd{kepler_compute->launch_description}; | ||
| 289 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||
| 290 | } | ||
| 291 | |||
| 292 | void FileEnvironment::Deserialize(std::ifstream& file) { | ||
| 293 | u64 code_size{}; | ||
| 294 | u64 num_texture_types{}; | ||
| 295 | u64 num_cbuf_values{}; | ||
| 296 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||
| 297 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 298 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 299 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 300 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||
| 301 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||
| 302 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) | ||
| 303 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) | ||
| 304 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); | ||
| 305 | code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); | ||
| 306 | file.read(reinterpret_cast<char*>(code.get()), code_size); | ||
| 307 | for (size_t i = 0; i < num_texture_types; ++i) { | ||
| 308 | u32 key; | ||
| 309 | Shader::TextureType type; | ||
| 310 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 311 | .read(reinterpret_cast<char*>(&type), sizeof(type)); | ||
| 312 | texture_types.emplace(key, type); | ||
| 313 | } | ||
| 314 | for (size_t i = 0; i < num_cbuf_values; ++i) { | ||
| 315 | u64 key; | ||
| 316 | u32 value; | ||
| 317 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 318 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||
| 319 | cbuf_values.emplace(key, value); | ||
| 320 | } | ||
| 321 | if (stage == Shader::Stage::Compute) { | ||
| 322 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 323 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 324 | } else { | ||
| 325 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); | ||
| 326 | if (stage == Shader::Stage::Geometry) { | ||
| 327 | file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); | ||
| 328 | } | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | u64 FileEnvironment::ReadInstruction(u32 address) { | ||
| 333 | if (address < read_lowest || address > read_highest) { | ||
| 334 | throw Shader::LogicError("Out of bounds address {}", address); | ||
| 335 | } | ||
| 336 | return code[(address - read_lowest) / sizeof(u64)]; | ||
| 337 | } | ||
| 338 | |||
| 339 | u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 340 | const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; | ||
| 341 | if (it == cbuf_values.end()) { | ||
| 342 | throw Shader::LogicError("Uncached read texture type"); | ||
| 343 | } | ||
| 344 | return it->second; | ||
| 345 | } | ||
| 346 | |||
| 347 | Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { | ||
| 348 | const auto it{texture_types.find(handle)}; | ||
| 349 | if (it == texture_types.end()) { | ||
| 350 | throw Shader::LogicError("Uncached read texture type"); | ||
| 351 | } | ||
| 352 | return it->second; | ||
| 353 | } | ||
| 354 | |||
| 355 | u32 FileEnvironment::LocalMemorySize() const { | ||
| 356 | return local_memory_size; | ||
| 357 | } | ||
| 358 | |||
| 359 | u32 FileEnvironment::SharedMemorySize() const { | ||
| 360 | return shared_memory_size; | ||
| 361 | } | ||
| 362 | |||
| 363 | u32 FileEnvironment::TextureBoundBuffer() const { | ||
| 364 | return texture_bound; | ||
| 365 | } | ||
| 366 | |||
| 367 | std::array<u32, 3> FileEnvironment::WorkgroupSize() const { | ||
| 368 | return workgroup_size; | ||
| 369 | } | ||
| 370 | |||
| 371 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 372 | const std::filesystem::path& filename, u32 cache_version) try { | ||
| 373 | std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); | ||
| 374 | file.exceptions(std::ifstream::failbit); | ||
| 375 | if (!file.is_open()) { | ||
| 376 | LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", | ||
| 377 | Common::FS::PathToUTF8String(filename)); | ||
| 378 | return; | ||
| 379 | } | ||
| 380 | if (file.tellp() == 0) { | ||
| 381 | // Write header | ||
| 382 | file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) | ||
| 383 | .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); | ||
| 384 | } | ||
| 385 | if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { | ||
| 386 | return; | ||
| 387 | } | ||
| 388 | const u32 num_envs{static_cast<u32>(envs.size())}; | ||
| 389 | file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); | ||
| 390 | for (const GenericEnvironment* const env : envs) { | ||
| 391 | env->Serialize(file); | ||
| 392 | } | ||
| 393 | file.write(key.data(), key.size_bytes()); | ||
| 394 | |||
| 395 | } catch (const std::ios_base::failure& e) { | ||
| 396 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 397 | if (!Common::FS::RemoveFile(filename)) { | ||
| 398 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", | ||
| 399 | Common::FS::PathToUTF8String(filename)); | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | void LoadPipelines( | ||
| 404 | std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, | ||
| 405 | Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, | ||
| 406 | Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try { | ||
| 407 | std::ifstream file(filename, std::ios::binary | std::ios::ate); | ||
| 408 | if (!file.is_open()) { | ||
| 409 | return; | ||
| 410 | } | ||
| 411 | file.exceptions(std::ifstream::failbit); | ||
| 412 | const auto end{file.tellg()}; | ||
| 413 | file.seekg(0, std::ios::beg); | ||
| 414 | |||
| 415 | std::array<char, 8> magic_number; | ||
| 416 | u32 cache_version; | ||
| 417 | file.read(magic_number.data(), magic_number.size()) | ||
| 418 | .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); | ||
| 419 | if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { | ||
| 420 | file.close(); | ||
| 421 | if (Common::FS::RemoveFile(filename)) { | ||
| 422 | if (magic_number != MAGIC_NUMBER) { | ||
| 423 | LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); | ||
| 424 | } | ||
| 425 | if (cache_version != expected_cache_version) { | ||
| 426 | LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); | ||
| 427 | } | ||
| 428 | } else { | ||
| 429 | LOG_ERROR(Common_Filesystem, | ||
| 430 | "Invalid pipeline cache file and failed to delete it in \"{}\"", | ||
| 431 | Common::FS::PathToUTF8String(filename)); | ||
| 432 | } | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | while (file.tellg() != end) { | ||
| 436 | if (stop_loading.stop_requested()) { | ||
| 437 | return; | ||
| 438 | } | ||
| 439 | u32 num_envs{}; | ||
| 440 | file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); | ||
| 441 | std::vector<FileEnvironment> envs(num_envs); | ||
| 442 | for (FileEnvironment& env : envs) { | ||
| 443 | env.Deserialize(file); | ||
| 444 | } | ||
| 445 | if (envs.front().ShaderStage() == Shader::Stage::Compute) { | ||
| 446 | load_compute(file, std::move(envs.front())); | ||
| 447 | } else { | ||
| 448 | load_graphics(file, std::move(envs)); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | } catch (const std::ios_base::failure& e) { | ||
| 453 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 454 | if (!Common::FS::RemoveFile(filename)) { | ||
| 455 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", | ||
| 456 | Common::FS::PathToUTF8String(filename)); | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | } // namespace VideoCommon | ||
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..2079979db --- /dev/null +++ b/src/video_core/shader_environment.h | |||
| @@ -0,0 +1,183 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <filesystem> | ||
| 10 | #include <iosfwd> | ||
| 11 | #include <limits> | ||
| 12 | #include <memory> | ||
| 13 | #include <optional> | ||
| 14 | #include <span> | ||
| 15 | #include <type_traits> | ||
| 16 | #include <unordered_map> | ||
| 17 | #include <vector> | ||
| 18 | |||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "common/unique_function.h" | ||
| 21 | #include "shader_recompiler/environment.h" | ||
| 22 | #include "video_core/engines/kepler_compute.h" | ||
| 23 | #include "video_core/engines/maxwell_3d.h" | ||
| 24 | #include "video_core/textures/texture.h" | ||
| 25 | |||
| 26 | namespace Tegra { | ||
| 27 | class Memorymanager; | ||
| 28 | } | ||
| 29 | |||
| 30 | namespace VideoCommon { | ||
| 31 | |||
| 32 | class GenericEnvironment : public Shader::Environment { | ||
| 33 | public: | ||
| 34 | explicit GenericEnvironment() = default; | ||
| 35 | explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 36 | u32 start_address_); | ||
| 37 | |||
| 38 | ~GenericEnvironment() override; | ||
| 39 | |||
| 40 | [[nodiscard]] u32 TextureBoundBuffer() const final; | ||
| 41 | |||
| 42 | [[nodiscard]] u32 LocalMemorySize() const final; | ||
| 43 | |||
| 44 | [[nodiscard]] u32 SharedMemorySize() const final; | ||
| 45 | |||
| 46 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final; | ||
| 47 | |||
| 48 | [[nodiscard]] u64 ReadInstruction(u32 address) final; | ||
| 49 | |||
| 50 | [[nodiscard]] std::optional<u64> Analyze(); | ||
| 51 | |||
| 52 | void SetCachedSize(size_t size_bytes); | ||
| 53 | |||
| 54 | [[nodiscard]] size_t CachedSize() const noexcept; | ||
| 55 | |||
| 56 | [[nodiscard]] size_t ReadSize() const noexcept; | ||
| 57 | |||
| 58 | [[nodiscard]] bool CanBeSerialized() const noexcept; | ||
| 59 | |||
| 60 | [[nodiscard]] u64 CalculateHash() const; | ||
| 61 | |||
| 62 | void Serialize(std::ofstream& file) const; | ||
| 63 | |||
| 64 | protected: | ||
| 65 | std::optional<u64> TryFindSize(); | ||
| 66 | |||
| 67 | Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, | ||
| 68 | u32 raw); | ||
| 69 | |||
| 70 | Tegra::MemoryManager* gpu_memory{}; | ||
| 71 | GPUVAddr program_base{}; | ||
| 72 | |||
| 73 | std::vector<u64> code; | ||
| 74 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 75 | std::unordered_map<u64, u32> cbuf_values; | ||
| 76 | |||
| 77 | u32 local_memory_size{}; | ||
| 78 | u32 texture_bound{}; | ||
| 79 | u32 shared_memory_size{}; | ||
| 80 | std::array<u32, 3> workgroup_size{}; | ||
| 81 | |||
| 82 | u32 read_lowest = std::numeric_limits<u32>::max(); | ||
| 83 | u32 read_highest = 0; | ||
| 84 | |||
| 85 | u32 cached_lowest = std::numeric_limits<u32>::max(); | ||
| 86 | u32 cached_highest = 0; | ||
| 87 | |||
| 88 | bool has_unbound_instructions = false; | ||
| 89 | }; | ||
| 90 | |||
| 91 | class GraphicsEnvironment final : public GenericEnvironment { | ||
| 92 | public: | ||
| 93 | explicit GraphicsEnvironment() = default; | ||
| 94 | explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 95 | Tegra::MemoryManager& gpu_memory_, | ||
| 96 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, | ||
| 97 | GPUVAddr program_base_, u32 start_address_); | ||
| 98 | |||
| 99 | ~GraphicsEnvironment() override = default; | ||
| 100 | |||
| 101 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 102 | |||
| 103 | Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 104 | |||
| 105 | private: | ||
| 106 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 107 | size_t stage_index{}; | ||
| 108 | }; | ||
| 109 | |||
| 110 | class ComputeEnvironment final : public GenericEnvironment { | ||
| 111 | public: | ||
| 112 | explicit ComputeEnvironment() = default; | ||
| 113 | explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 114 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 115 | u32 start_address_); | ||
| 116 | |||
| 117 | ~ComputeEnvironment() override = default; | ||
| 118 | |||
| 119 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 120 | |||
| 121 | Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 122 | |||
| 123 | private: | ||
| 124 | Tegra::Engines::KeplerCompute* kepler_compute{}; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class FileEnvironment final : public Shader::Environment { | ||
| 128 | public: | ||
| 129 | FileEnvironment() = default; | ||
| 130 | ~FileEnvironment() override = default; | ||
| 131 | |||
| 132 | FileEnvironment& operator=(FileEnvironment&&) noexcept = default; | ||
| 133 | FileEnvironment(FileEnvironment&&) noexcept = default; | ||
| 134 | |||
| 135 | FileEnvironment& operator=(const FileEnvironment&) = delete; | ||
| 136 | FileEnvironment(const FileEnvironment&) = delete; | ||
| 137 | |||
| 138 | void Deserialize(std::ifstream& file); | ||
| 139 | |||
| 140 | [[nodiscard]] u64 ReadInstruction(u32 address) override; | ||
| 141 | |||
| 142 | [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 143 | |||
| 144 | [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 145 | |||
| 146 | [[nodiscard]] u32 LocalMemorySize() const override; | ||
| 147 | |||
| 148 | [[nodiscard]] u32 SharedMemorySize() const override; | ||
| 149 | |||
| 150 | [[nodiscard]] u32 TextureBoundBuffer() const override; | ||
| 151 | |||
| 152 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; | ||
| 153 | |||
| 154 | private: | ||
| 155 | std::unique_ptr<u64[]> code; | ||
| 156 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 157 | std::unordered_map<u64, u32> cbuf_values; | ||
| 158 | std::array<u32, 3> workgroup_size{}; | ||
| 159 | u32 local_memory_size{}; | ||
| 160 | u32 shared_memory_size{}; | ||
| 161 | u32 texture_bound{}; | ||
| 162 | u32 read_lowest{}; | ||
| 163 | u32 read_highest{}; | ||
| 164 | }; | ||
| 165 | |||
| 166 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 167 | const std::filesystem::path& filename, u32 cache_version); | ||
| 168 | |||
| 169 | template <typename Key, typename Envs> | ||
| 170 | void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, | ||
| 171 | u32 cache_version) { | ||
| 172 | static_assert(std::is_trivially_copyable_v<Key>); | ||
| 173 | static_assert(std::has_unique_object_representations_v<Key>); | ||
| 174 | SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)), | ||
| 175 | std::span(envs.data(), envs.size()), filename, cache_version); | ||
| 176 | } | ||
| 177 | |||
| 178 | void LoadPipelines( | ||
| 179 | std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, | ||
| 180 | Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, | ||
| 181 | Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics); | ||
| 182 | |||
| 183 | } // namespace VideoCommon | ||
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index 693e47158..dc6995b46 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp | |||
| @@ -2,42 +2,35 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | 5 | #include <atomic> |
| 6 | #include <chrono> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 6 | #include "video_core/shader_notify.h" | 9 | #include "video_core/shader_notify.h" |
| 7 | 10 | ||
| 8 | using namespace std::chrono_literals; | 11 | using namespace std::chrono_literals; |
| 9 | 12 | ||
| 10 | namespace VideoCore { | 13 | namespace VideoCore { |
| 11 | namespace { | ||
| 12 | constexpr auto UPDATE_TICK = 32ms; | ||
| 13 | } | ||
| 14 | |||
| 15 | ShaderNotify::ShaderNotify() = default; | ||
| 16 | ShaderNotify::~ShaderNotify() = default; | ||
| 17 | 14 | ||
| 18 | std::size_t ShaderNotify::GetShadersBuilding() { | 15 | const auto TIME_TO_STOP_REPORTING = 2s; |
| 19 | const auto now = std::chrono::high_resolution_clock::now(); | 16 | |
| 20 | const auto diff = now - last_update; | 17 | int ShaderNotify::ShadersBuilding() noexcept { |
| 21 | if (diff > UPDATE_TICK) { | 18 | const int now_complete = num_complete.load(std::memory_order::relaxed); |
| 22 | std::shared_lock lock(mutex); | 19 | const int now_building = num_building.load(std::memory_order::relaxed); |
| 23 | last_updated_count = accurate_count; | 20 | if (now_complete == now_building) { |
| 21 | const auto now = std::chrono::high_resolution_clock::now(); | ||
| 22 | if (completed && num_complete == num_when_completed) { | ||
| 23 | if (now - complete_time > TIME_TO_STOP_REPORTING) { | ||
| 24 | report_base = now_complete; | ||
| 25 | completed = false; | ||
| 26 | } | ||
| 27 | } else { | ||
| 28 | completed = true; | ||
| 29 | num_when_completed = num_complete; | ||
| 30 | complete_time = now; | ||
| 31 | } | ||
| 24 | } | 32 | } |
| 25 | return last_updated_count; | 33 | return now_building - report_base; |
| 26 | } | ||
| 27 | |||
| 28 | std::size_t ShaderNotify::GetShadersBuildingAccurate() { | ||
| 29 | std::shared_lock lock{mutex}; | ||
| 30 | return accurate_count; | ||
| 31 | } | ||
| 32 | |||
| 33 | void ShaderNotify::MarkShaderComplete() { | ||
| 34 | std::unique_lock lock{mutex}; | ||
| 35 | accurate_count--; | ||
| 36 | } | ||
| 37 | |||
| 38 | void ShaderNotify::MarkSharderBuilding() { | ||
| 39 | std::unique_lock lock{mutex}; | ||
| 40 | accurate_count++; | ||
| 41 | } | 34 | } |
| 42 | 35 | ||
| 43 | } // namespace VideoCore | 36 | } // namespace VideoCore |
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index a9c92d179..ad363bfb5 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h | |||
| @@ -4,26 +4,30 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 7 | #include <chrono> | 8 | #include <chrono> |
| 8 | #include <shared_mutex> | 9 | #include <optional> |
| 9 | #include "common/common_types.h" | ||
| 10 | 10 | ||
| 11 | namespace VideoCore { | 11 | namespace VideoCore { |
| 12 | class ShaderNotify { | 12 | class ShaderNotify { |
| 13 | public: | 13 | public: |
| 14 | ShaderNotify(); | 14 | [[nodiscard]] int ShadersBuilding() noexcept; |
| 15 | ~ShaderNotify(); | ||
| 16 | 15 | ||
| 17 | std::size_t GetShadersBuilding(); | 16 | void MarkShaderComplete() noexcept { |
| 18 | std::size_t GetShadersBuildingAccurate(); | 17 | ++num_complete; |
| 18 | } | ||
| 19 | 19 | ||
| 20 | void MarkShaderComplete(); | 20 | void MarkShaderBuilding() noexcept { |
| 21 | void MarkSharderBuilding(); | 21 | ++num_building; |
| 22 | } | ||
| 22 | 23 | ||
| 23 | private: | 24 | private: |
| 24 | std::size_t last_updated_count{}; | 25 | std::atomic_int num_building{}; |
| 25 | std::size_t accurate_count{}; | 26 | std::atomic_int num_complete{}; |
| 26 | std::shared_mutex mutex; | 27 | int report_base{}; |
| 27 | std::chrono::high_resolution_clock::time_point last_update{}; | 28 | |
| 29 | bool completed{}; | ||
| 30 | int num_when_completed{}; | ||
| 31 | std::chrono::high_resolution_clock::time_point complete_time; | ||
| 28 | }; | 32 | }; |
| 29 | } // namespace VideoCore | 33 | } // namespace VideoCore |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { | |||
| 43 | return "Invalid"; | 43 | return "Invalid"; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | 46 | std::string Name(const ImageViewBase& image_view) { |
| 47 | const u32 width = image_view.size.width; | 47 | const u32 width = image_view.size.width; |
| 48 | const u32 height = image_view.size.height; | 48 | const u32 height = image_view.size.height; |
| 49 | const u32 depth = image_view.size.depth; | 49 | const u32 depth = image_view.size.depth; |
| @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t | |||
| 51 | const u32 num_layers = image_view.range.extent.layers; | 51 | const u32 num_layers = image_view.range.extent.layers; |
| 52 | 52 | ||
| 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; |
| 54 | switch (type.value_or(image_view.type)) { | 54 | switch (image_view.type) { |
| 55 | case ImageViewType::e1D: | 55 | case ImageViewType::e1D: |
| 56 | return fmt::format("ImageView 1D {}{}", width, level); | 56 | return fmt::format("ImageView 1D {}{}", width, level); |
| 57 | case ImageViewType::e2D: | 57 | case ImageViewType::e2D: |
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -255,8 +255,7 @@ struct RenderTargets; | |||
| 255 | 255 | ||
| 256 | [[nodiscard]] std::string Name(const ImageBase& image); | 256 | [[nodiscard]] std::string Name(const ImageBase& image); |
| 257 | 257 | ||
| 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, | 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view); |
| 259 | std::optional<ImageViewType> type = std::nullopt); | ||
| 260 | 259 | ||
| 261 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | 260 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); |
| 262 | 261 | ||
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i | |||
| 36 | } | 36 | } |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) | ||
| 40 | : format{info.format}, type{ImageViewType::Buffer}, size{ | ||
| 41 | .width = info.size.width, | ||
| 42 | .height = 1, | ||
| 43 | .depth = 1, | ||
| 44 | } { | ||
| 45 | ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); | ||
| 46 | } | ||
| 47 | |||
| 39 | ImageViewBase::ImageViewBase(const NullImageParams&) {} | 48 | ImageViewBase::ImageViewBase(const NullImageParams&) {} |
| 40 | 49 | ||
| 41 | } // namespace VideoCommon | 50 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) | |||
| 27 | struct ImageViewBase { | 27 | struct ImageViewBase { |
| 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, |
| 29 | ImageId image_id); | 29 | ImageId image_id); |
| 30 | explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); | ||
| 30 | explicit ImageViewBase(const NullImageParams&); | 31 | explicit ImageViewBase(const NullImageParams&); |
| 31 | 32 | ||
| 32 | [[nodiscard]] bool IsBuffer() const noexcept { | 33 | [[nodiscard]] bool IsBuffer() const noexcept { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..f34c9d9ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -117,6 +117,9 @@ public: | |||
| 117 | /// Return a reference to the given image view id | 117 | /// Return a reference to the given image view id |
| 118 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; | 118 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; |
| 119 | 119 | ||
| 120 | /// Mark an image as modified from the GPU | ||
| 121 | void MarkModification(ImageId id) noexcept; | ||
| 122 | |||
| 120 | /// Fill image_view_ids with the graphics images in indices | 123 | /// Fill image_view_ids with the graphics images in indices |
| 121 | void FillGraphicsImageViews(std::span<const u32> indices, | 124 | void FillGraphicsImageViews(std::span<const u32> indices, |
| 122 | std::span<ImageViewId> image_view_ids); | 125 | std::span<ImageViewId> image_view_ids); |
| @@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | |||
| 527 | } | 530 | } |
| 528 | 531 | ||
| 529 | template <class P> | 532 | template <class P> |
| 533 | void TextureCache<P>::MarkModification(ImageId id) noexcept { | ||
| 534 | MarkModification(slot_images[id]); | ||
| 535 | } | ||
| 536 | |||
| 537 | template <class P> | ||
| 530 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | 538 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, |
| 531 | std::span<ImageViewId> image_view_ids) { | 539 | std::span<ImageViewId> image_view_ids) { |
| 532 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | 540 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); |
| @@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | |||
| 540 | 548 | ||
| 541 | template <class P> | 549 | template <class P> |
| 542 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | 550 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
| 543 | [[unlikely]] if (index > graphics_sampler_table.Limit()) { | 551 | if (index > graphics_sampler_table.Limit()) { |
| 544 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); | 552 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
| 545 | return &slot_samplers[NULL_SAMPLER_ID]; | 553 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 546 | } | 554 | } |
| 547 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | 555 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); |
| 548 | SamplerId& id = graphics_sampler_ids[index]; | 556 | SamplerId& id = graphics_sampler_ids[index]; |
| 549 | [[unlikely]] if (is_new) { | 557 | if (is_new) { |
| 550 | id = FindSampler(descriptor); | 558 | id = FindSampler(descriptor); |
| 551 | } | 559 | } |
| 552 | return &slot_samplers[id]; | 560 | return &slot_samplers[id]; |
| @@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | |||
| 554 | 562 | ||
| 555 | template <class P> | 563 | template <class P> |
| 556 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | 564 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 557 | [[unlikely]] if (index > compute_sampler_table.Limit()) { | 565 | if (index > compute_sampler_table.Limit()) { |
| 558 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); | 566 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
| 559 | return &slot_samplers[NULL_SAMPLER_ID]; | 567 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 560 | } | 568 | } |
| 561 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | 569 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); |
| 562 | SamplerId& id = compute_sampler_ids[index]; | 570 | SamplerId& id = compute_sampler_ids[index]; |
| 563 | [[unlikely]] if (is_new) { | 571 | if (is_new) { |
| 564 | id = FindSampler(descriptor); | 572 | id = FindSampler(descriptor); |
| 565 | } | 573 | } |
| 566 | return &slot_samplers[id]; | 574 | return &slot_samplers[id]; |
| @@ -661,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, | |||
| 661 | std::span<ImageViewId> cached_image_view_ids, | 669 | std::span<ImageViewId> cached_image_view_ids, |
| 662 | u32 index) { | 670 | u32 index) { |
| 663 | if (index > table.Limit()) { | 671 | if (index > table.Limit()) { |
| 664 | LOG_ERROR(HW_GPU, "Invalid image view index={}", index); | 672 | LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); |
| 665 | return NULL_IMAGE_VIEW_ID; | 673 | return NULL_IMAGE_VIEW_ID; |
| 666 | } | 674 | } |
| 667 | const auto [descriptor, is_new] = table.Read(index); | 675 | const auto [descriptor, is_new] = table.Read(index); |
| @@ -968,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 968 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | 976 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); |
| 969 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 977 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
| 970 | image.UploadMemory(staging, copies); | 978 | image.UploadMemory(staging, copies); |
| 971 | } else if (image.info.type == ImageType::Buffer) { | ||
| 972 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | ||
| 973 | image.UploadMemory(staging, copies); | ||
| 974 | } else { | 979 | } else { |
| 975 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | 980 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); |
| 976 | image.UploadMemory(staging, copies); | 981 | image.UploadMemory(staging, copies); |
| @@ -993,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | |||
| 993 | template <class P> | 998 | template <class P> |
| 994 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { | 999 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { |
| 995 | const ImageInfo info(config); | 1000 | const ImageInfo info(config); |
| 996 | const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; | 1001 | if (info.type == ImageType::Buffer) { |
| 1002 | const ImageViewInfo view_info(config, 0); | ||
| 1003 | return slot_image_views.insert(runtime, info, view_info, config.Address()); | ||
| 1004 | } | ||
| 1005 | const u32 layer_offset = config.BaseLayer() * info.layer_stride; | ||
| 1006 | const GPUVAddr image_gpu_addr = config.Address() - layer_offset; | ||
| 997 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); | 1007 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); |
| 998 | if (!image_id) { | 1008 | if (!image_id) { |
| 999 | return NULL_IMAGE_VIEW_ID; | 1009 | return NULL_IMAGE_VIEW_ID; |
| @@ -1801,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi | |||
| 1801 | return; | 1811 | return; |
| 1802 | } | 1812 | } |
| 1803 | const ImageViewBase& image_view = slot_image_views[image_view_id]; | 1813 | const ImageViewBase& image_view = slot_image_views[image_view_id]; |
| 1814 | if (image_view.IsBuffer()) { | ||
| 1815 | return; | ||
| 1816 | } | ||
| 1804 | PrepareImage(image_view.image_id, is_modification, invalidate); | 1817 | PrepareImage(image_view.image_id, is_modification, invalidate); |
| 1805 | } | 1818 | } |
| 1806 | 1819 | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -154,6 +154,15 @@ union TextureHandle { | |||
| 154 | }; | 154 | }; |
| 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); |
| 156 | 156 | ||
| 157 | [[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) { | ||
| 158 | if (via_header_index) { | ||
| 159 | return {raw, raw}; | ||
| 160 | } else { | ||
| 161 | const Tegra::Texture::TextureHandle handle{raw}; | ||
| 162 | return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 157 | struct TICEntry { | 166 | struct TICEntry { |
| 158 | union { | 167 | union { |
| 159 | struct { | 168 | struct { |
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp new file mode 100644 index 000000000..ba26ac3f1 --- /dev/null +++ b/src/video_core/transform_feedback.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/assert.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | #include "video_core/transform_feedback.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | ||
| 17 | const TransformFeedbackState& state) { | ||
| 18 | static constexpr std::array VECTORS{ | ||
| 19 | 28, // gl_Position | ||
| 20 | 32, // Generic 0 | ||
| 21 | 36, // Generic 1 | ||
| 22 | 40, // Generic 2 | ||
| 23 | 44, // Generic 3 | ||
| 24 | 48, // Generic 4 | ||
| 25 | 52, // Generic 5 | ||
| 26 | 56, // Generic 6 | ||
| 27 | 60, // Generic 7 | ||
| 28 | 64, // Generic 8 | ||
| 29 | 68, // Generic 9 | ||
| 30 | 72, // Generic 10 | ||
| 31 | 76, // Generic 11 | ||
| 32 | 80, // Generic 12 | ||
| 33 | 84, // Generic 13 | ||
| 34 | 88, // Generic 14 | ||
| 35 | 92, // Generic 15 | ||
| 36 | 96, // Generic 16 | ||
| 37 | 100, // Generic 17 | ||
| 38 | 104, // Generic 18 | ||
| 39 | 108, // Generic 19 | ||
| 40 | 112, // Generic 20 | ||
| 41 | 116, // Generic 21 | ||
| 42 | 120, // Generic 22 | ||
| 43 | 124, // Generic 23 | ||
| 44 | 128, // Generic 24 | ||
| 45 | 132, // Generic 25 | ||
| 46 | 136, // Generic 26 | ||
| 47 | 140, // Generic 27 | ||
| 48 | 144, // Generic 28 | ||
| 49 | 148, // Generic 29 | ||
| 50 | 152, // Generic 30 | ||
| 51 | 156, // Generic 31 | ||
| 52 | 160, // gl_FrontColor | ||
| 53 | 164, // gl_FrontSecondaryColor | ||
| 54 | 160, // gl_BackColor | ||
| 55 | 164, // gl_BackSecondaryColor | ||
| 56 | 192, // gl_TexCoord[0] | ||
| 57 | 196, // gl_TexCoord[1] | ||
| 58 | 200, // gl_TexCoord[2] | ||
| 59 | 204, // gl_TexCoord[3] | ||
| 60 | 208, // gl_TexCoord[4] | ||
| 61 | 212, // gl_TexCoord[5] | ||
| 62 | 216, // gl_TexCoord[6] | ||
| 63 | 220, // gl_TexCoord[7] | ||
| 64 | }; | ||
| 65 | std::vector<Shader::TransformFeedbackVarying> xfb(256); | ||
| 66 | for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { | ||
| 67 | const auto& locations = state.varyings[buffer]; | ||
| 68 | const auto& layout = state.layouts[buffer]; | ||
| 69 | const u32 varying_count = layout.varying_count; | ||
| 70 | u32 highest = 0; | ||
| 71 | for (u32 offset = 0; offset < varying_count; ++offset) { | ||
| 72 | const u32 base_offset = offset; | ||
| 73 | const u8 location = locations[offset]; | ||
| 74 | |||
| 75 | UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); | ||
| 76 | Shader::TransformFeedbackVarying varying{ | ||
| 77 | .buffer = static_cast<u32>(buffer), | ||
| 78 | .stride = layout.stride, | ||
| 79 | .offset = offset * 4, | ||
| 80 | .components = 1, | ||
| 81 | }; | ||
| 82 | if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { | ||
| 83 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 84 | |||
| 85 | const u8 base_index = location / 4; | ||
| 86 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 87 | ++offset; | ||
| 88 | ++varying.components; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | xfb[location] = varying; | ||
| 92 | highest = std::max(highest, (base_offset + varying.components) * 4); | ||
| 93 | } | ||
| 94 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 95 | } | ||
| 96 | return xfb; | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace VideoCommon | ||
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h new file mode 100644 index 000000000..8f6946d65 --- /dev/null +++ b/src/video_core/transform_feedback.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | struct TransformFeedbackState { | ||
| 17 | struct Layout { | ||
| 18 | u32 stream; | ||
| 19 | u32 varying_count; | ||
| 20 | u32 stride; | ||
| 21 | }; | ||
| 22 | std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts; | ||
| 23 | std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 24 | varyings; | ||
| 25 | }; | ||
| 26 | |||
| 27 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | ||
| 28 | const TransformFeedbackState& state); | ||
| 29 | |||
| 30 | } // namespace VideoCommon | ||
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | |||
| @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { | |||
| 73 | } | 73 | } |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { | 76 | void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const { |
| 77 | if (!initialized) { | 77 | if (!initialized) { |
| 78 | return; | 78 | return; |
| 79 | } | 79 | } |
| 80 | 80 | std::vector<u32> spirv_copy(spirv.begin(), spirv.end()); | |
| 81 | std::vector<u32> spirv_copy = spirv; | ||
| 82 | GFSDK_Aftermath_SpirvCode shader; | 81 | GFSDK_Aftermath_SpirvCode shader; |
| 83 | shader.pData = spirv_copy.data(); | 82 | shader.pData = spirv_copy.data(); |
| 84 | shader.size = static_cast<u32>(spirv_copy.size() * 4); | 83 | shader.size = static_cast<u32>(spirv_copy.size() * 4); |
| @@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { | |||
| 100 | LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); | 99 | LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); |
| 101 | return; | 100 | return; |
| 102 | } | 101 | } |
| 103 | if (file.Write(spirv) != spirv.size()) { | 102 | if (file.WriteSpan(spirv) != spirv.size()) { |
| 104 | LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); | 103 | LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); |
| 105 | return; | 104 | return; |
| 106 | } | 105 | } |
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <filesystem> | 7 | #include <filesystem> |
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <span> | ||
| 9 | #include <string> | 10 | #include <string> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | 12 | ||
| @@ -33,7 +34,7 @@ public: | |||
| 33 | NsightAftermathTracker(NsightAftermathTracker&&) = delete; | 34 | NsightAftermathTracker(NsightAftermathTracker&&) = delete; |
| 34 | NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; | 35 | NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; |
| 35 | 36 | ||
| 36 | void SaveShader(const std::vector<u32>& spirv) const; | 37 | void SaveShader(std::span<const u32> spirv) const; |
| 37 | 38 | ||
| 38 | private: | 39 | private: |
| 39 | #ifdef HAS_NSIGHT_AFTERMATH | 40 | #ifdef HAS_NSIGHT_AFTERMATH |
| @@ -61,21 +62,21 @@ private: | |||
| 61 | bool initialized = false; | 62 | bool initialized = false; |
| 62 | 63 | ||
| 63 | Common::DynamicLibrary dl; | 64 | Common::DynamicLibrary dl; |
| 64 | PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; | 65 | PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; |
| 65 | PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; | 66 | PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; |
| 66 | PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; | 67 | PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; |
| 67 | PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; | 68 | PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; |
| 68 | PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; | 69 | PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; |
| 69 | PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; | 70 | PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; |
| 70 | PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; | 71 | PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; |
| 71 | PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; | 72 | PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; |
| 72 | #endif | 73 | #endif |
| 73 | }; | 74 | }; |
| 74 | 75 | ||
| 75 | #ifndef HAS_NSIGHT_AFTERMATH | 76 | #ifndef HAS_NSIGHT_AFTERMATH |
| 76 | inline NsightAftermathTracker::NsightAftermathTracker() = default; | 77 | inline NsightAftermathTracker::NsightAftermathTracker() = default; |
| 77 | inline NsightAftermathTracker::~NsightAftermathTracker() = default; | 78 | inline NsightAftermathTracker::~NsightAftermathTracker() = default; |
| 78 | inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} | 79 | inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {} |
| 79 | #endif | 80 | #endif |
| 80 | 81 | ||
| 81 | } // namespace Vulkan | 82 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <bitset> | 6 | #include <bitset> |
| 6 | #include <chrono> | 7 | #include <chrono> |
| 7 | #include <optional> | 8 | #include <optional> |
| @@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ | |||
| 33 | }; | 34 | }; |
| 34 | } // namespace Alternatives | 35 | } // namespace Alternatives |
| 35 | 36 | ||
| 37 | enum class NvidiaArchitecture { | ||
| 38 | AmpereOrNewer, | ||
| 39 | Turing, | ||
| 40 | VoltaOrOlder, | ||
| 41 | }; | ||
| 42 | |||
| 36 | constexpr std::array REQUIRED_EXTENSIONS{ | 43 | constexpr std::array REQUIRED_EXTENSIONS{ |
| 37 | VK_KHR_MAINTENANCE1_EXTENSION_NAME, | 44 | VK_KHR_MAINTENANCE1_EXTENSION_NAME, |
| 38 | VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, | 45 | VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, |
| @@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 43 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | 50 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, |
| 44 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, | 51 | VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, |
| 45 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, | 52 | VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, |
| 53 | VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, | ||
| 54 | VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, | ||
| 46 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 55 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 47 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 56 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 48 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 57 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| 49 | VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, | 58 | VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, |
| 50 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | 59 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, |
| 60 | VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, | ||
| 51 | #ifdef _WIN32 | 61 | #ifdef _WIN32 |
| 52 | VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, | 62 | VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, |
| 53 | #endif | 63 | #endif |
| @@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica | |||
| 112 | VK_FORMAT_R16G16_SFLOAT, | 122 | VK_FORMAT_R16G16_SFLOAT, |
| 113 | VK_FORMAT_R16G16_SINT, | 123 | VK_FORMAT_R16G16_SINT, |
| 114 | VK_FORMAT_R16_UNORM, | 124 | VK_FORMAT_R16_UNORM, |
| 125 | VK_FORMAT_R16_SNORM, | ||
| 115 | VK_FORMAT_R16_UINT, | 126 | VK_FORMAT_R16_UINT, |
| 116 | VK_FORMAT_R8G8B8A8_SRGB, | 127 | VK_FORMAT_R8G8B8A8_SRGB, |
| 117 | VK_FORMAT_R8G8_UNORM, | 128 | VK_FORMAT_R8G8_UNORM, |
| @@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica | |||
| 191 | return format_properties; | 202 | return format_properties; |
| 192 | } | 203 | } |
| 193 | 204 | ||
| 205 | std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) { | ||
| 206 | const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); | ||
| 207 | std::vector<std::string> supported_extensions; | ||
| 208 | supported_extensions.reserve(extensions.size()); | ||
| 209 | for (const auto& extension : extensions) { | ||
| 210 | supported_extensions.emplace_back(extension.extensionName); | ||
| 211 | } | ||
| 212 | return supported_extensions; | ||
| 213 | } | ||
| 214 | |||
| 215 | NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||
| 216 | std::span<const std::string> exts) { | ||
| 217 | if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { | ||
| 218 | VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; | ||
| 219 | shading_rate_props.sType = | ||
| 220 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; | ||
| 221 | VkPhysicalDeviceProperties2KHR physical_properties{}; | ||
| 222 | physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||
| 223 | physical_properties.pNext = &shading_rate_props; | ||
| 224 | physical.GetProperties2KHR(physical_properties); | ||
| 225 | if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { | ||
| 226 | // Only Ampere and newer support this feature | ||
| 227 | return NvidiaArchitecture::AmpereOrNewer; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { | ||
| 231 | return NvidiaArchitecture::Turing; | ||
| 232 | } | ||
| 233 | return NvidiaArchitecture::VoltaOrOlder; | ||
| 234 | } | ||
| 194 | } // Anonymous namespace | 235 | } // Anonymous namespace |
| 195 | 236 | ||
| 196 | Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, | 237 | Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, |
| 197 | const vk::InstanceDispatch& dld_) | 238 | const vk::InstanceDispatch& dld_) |
| 198 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, | 239 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, |
| 199 | format_properties{GetFormatProperties(physical)} { | 240 | supported_extensions{GetSupportedExtensions(physical)}, |
| 241 | format_properties(GetFormatProperties(physical)) { | ||
| 200 | CheckSuitability(surface != nullptr); | 242 | CheckSuitability(surface != nullptr); |
| 201 | SetupFamilies(surface); | 243 | SetupFamilies(surface); |
| 202 | SetupFeatures(); | 244 | SetupFeatures(); |
| 245 | SetupProperties(); | ||
| 203 | 246 | ||
| 204 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 247 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 205 | const std::vector extensions = LoadExtensions(surface != nullptr); | 248 | const std::vector extensions = LoadExtensions(surface != nullptr); |
| @@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 214 | .independentBlend = true, | 257 | .independentBlend = true, |
| 215 | .geometryShader = true, | 258 | .geometryShader = true, |
| 216 | .tessellationShader = true, | 259 | .tessellationShader = true, |
| 217 | .sampleRateShading = false, | 260 | .sampleRateShading = true, |
| 218 | .dualSrcBlend = false, | 261 | .dualSrcBlend = true, |
| 219 | .logicOp = false, | 262 | .logicOp = false, |
| 220 | .multiDrawIndirect = false, | 263 | .multiDrawIndirect = false, |
| 221 | .drawIndirectFirstInstance = false, | 264 | .drawIndirectFirstInstance = false, |
| 222 | .depthClamp = true, | 265 | .depthClamp = true, |
| 223 | .depthBiasClamp = true, | 266 | .depthBiasClamp = true, |
| 224 | .fillModeNonSolid = false, | 267 | .fillModeNonSolid = true, |
| 225 | .depthBounds = false, | 268 | .depthBounds = is_depth_bounds_supported, |
| 226 | .wideLines = false, | 269 | .wideLines = true, |
| 227 | .largePoints = true, | 270 | .largePoints = true, |
| 228 | .alphaToOne = false, | 271 | .alphaToOne = false, |
| 229 | .multiViewport = true, | 272 | .multiViewport = true, |
| @@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 245 | .shaderSampledImageArrayDynamicIndexing = false, | 288 | .shaderSampledImageArrayDynamicIndexing = false, |
| 246 | .shaderStorageBufferArrayDynamicIndexing = false, | 289 | .shaderStorageBufferArrayDynamicIndexing = false, |
| 247 | .shaderStorageImageArrayDynamicIndexing = false, | 290 | .shaderStorageImageArrayDynamicIndexing = false, |
| 248 | .shaderClipDistance = false, | 291 | .shaderClipDistance = true, |
| 249 | .shaderCullDistance = false, | 292 | .shaderCullDistance = true, |
| 250 | .shaderFloat64 = false, | 293 | .shaderFloat64 = is_shader_float64_supported, |
| 251 | .shaderInt64 = false, | 294 | .shaderInt64 = is_shader_int64_supported, |
| 252 | .shaderInt16 = false, | 295 | .shaderInt16 = is_shader_int16_supported, |
| 253 | .shaderResourceResidency = false, | 296 | .shaderResourceResidency = false, |
| 254 | .shaderResourceMinLod = false, | 297 | .shaderResourceMinLod = false, |
| 255 | .sparseBinding = false, | 298 | .sparseBinding = false, |
| @@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 278 | VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ | 321 | VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ |
| 279 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, | 322 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, |
| 280 | .pNext = nullptr, | 323 | .pNext = nullptr, |
| 281 | .storageBuffer16BitAccess = false, | 324 | .storageBuffer16BitAccess = true, |
| 282 | .uniformAndStorageBuffer16BitAccess = true, | 325 | .uniformAndStorageBuffer16BitAccess = true, |
| 283 | .storagePushConstant16 = false, | 326 | .storagePushConstant16 = false, |
| 284 | .storageInputOutput16 = false, | 327 | .storageInputOutput16 = false, |
| @@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 310 | }; | 353 | }; |
| 311 | SetNext(next, host_query_reset); | 354 | SetNext(next, host_query_reset); |
| 312 | 355 | ||
| 356 | VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ | ||
| 357 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, | ||
| 358 | .pNext = nullptr, | ||
| 359 | .variablePointersStorageBuffer = VK_TRUE, | ||
| 360 | .variablePointers = VK_TRUE, | ||
| 361 | }; | ||
| 362 | SetNext(next, variable_pointers); | ||
| 363 | |||
| 364 | VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ | ||
| 365 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, | ||
| 366 | .pNext = nullptr, | ||
| 367 | .shaderDemoteToHelperInvocation = true, | ||
| 368 | }; | ||
| 369 | SetNext(next, demote); | ||
| 370 | |||
| 313 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | 371 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; |
| 314 | if (is_float16_supported) { | 372 | if (is_float16_supported) { |
| 315 | float16_int8 = { | 373 | float16_int8 = { |
| @@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 327 | LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); | 385 | LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); |
| 328 | } | 386 | } |
| 329 | 387 | ||
| 388 | if (!nv_viewport_array2) { | ||
| 389 | LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); | ||
| 390 | } | ||
| 391 | |||
| 392 | if (!nv_geometry_shader_passthrough) { | ||
| 393 | LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); | ||
| 394 | } | ||
| 395 | |||
| 330 | VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; | 396 | VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; |
| 331 | if (khr_uniform_buffer_standard_layout) { | 397 | if (khr_uniform_buffer_standard_layout) { |
| 332 | std430_layout = { | 398 | std430_layout = { |
| @@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 389 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); | 455 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); |
| 390 | } | 456 | } |
| 391 | 457 | ||
| 458 | VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; | ||
| 459 | if (ext_line_rasterization) { | ||
| 460 | line_raster = { | ||
| 461 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, | ||
| 462 | .pNext = nullptr, | ||
| 463 | .rectangularLines = VK_TRUE, | ||
| 464 | .bresenhamLines = VK_FALSE, | ||
| 465 | .smoothLines = VK_TRUE, | ||
| 466 | .stippledRectangularLines = VK_FALSE, | ||
| 467 | .stippledBresenhamLines = VK_FALSE, | ||
| 468 | .stippledSmoothLines = VK_FALSE, | ||
| 469 | }; | ||
| 470 | SetNext(next, line_raster); | ||
| 471 | } else { | ||
| 472 | LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); | ||
| 473 | } | ||
| 474 | |||
| 475 | if (!ext_conservative_rasterization) { | ||
| 476 | LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); | ||
| 477 | } | ||
| 478 | |||
| 479 | VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; | ||
| 480 | if (ext_provoking_vertex) { | ||
| 481 | provoking_vertex = { | ||
| 482 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, | ||
| 483 | .pNext = nullptr, | ||
| 484 | .provokingVertexLast = VK_TRUE, | ||
| 485 | .transformFeedbackPreservesProvokingVertex = VK_TRUE, | ||
| 486 | }; | ||
| 487 | SetNext(next, provoking_vertex); | ||
| 488 | } else { | ||
| 489 | LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); | ||
| 490 | } | ||
| 491 | |||
| 492 | VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; | ||
| 493 | if (ext_vertex_input_dynamic_state) { | ||
| 494 | vertex_input_dynamic = { | ||
| 495 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, | ||
| 496 | .pNext = nullptr, | ||
| 497 | .vertexInputDynamicState = VK_TRUE, | ||
| 498 | }; | ||
| 499 | SetNext(next, vertex_input_dynamic); | ||
| 500 | } else { | ||
| 501 | LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); | ||
| 502 | } | ||
| 503 | |||
| 504 | VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; | ||
| 505 | if (ext_shader_atomic_int64) { | ||
| 506 | atomic_int64 = { | ||
| 507 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, | ||
| 508 | .pNext = nullptr, | ||
| 509 | .shaderBufferInt64Atomics = VK_TRUE, | ||
| 510 | .shaderSharedInt64Atomics = VK_TRUE, | ||
| 511 | }; | ||
| 512 | SetNext(next, atomic_int64); | ||
| 513 | } | ||
| 514 | |||
| 515 | VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; | ||
| 516 | if (khr_workgroup_memory_explicit_layout) { | ||
| 517 | workgroup_layout = { | ||
| 518 | .sType = | ||
| 519 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, | ||
| 520 | .pNext = nullptr, | ||
| 521 | .workgroupMemoryExplicitLayout = VK_TRUE, | ||
| 522 | .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, | ||
| 523 | .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, | ||
| 524 | .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, | ||
| 525 | }; | ||
| 526 | SetNext(next, workgroup_layout); | ||
| 527 | } | ||
| 528 | |||
| 392 | if (!ext_depth_range_unrestricted) { | 529 | if (!ext_depth_range_unrestricted) { |
| 393 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 530 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 394 | } | 531 | } |
| 395 | 532 | ||
| 396 | VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; | 533 | VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; |
| 397 | if (nv_device_diagnostics_config) { | 534 | if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { |
| 398 | nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); | 535 | nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); |
| 399 | 536 | ||
| 400 | diagnostics_nv = { | 537 | diagnostics_nv = { |
| @@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 412 | CollectTelemetryParameters(); | 549 | CollectTelemetryParameters(); |
| 413 | CollectToolingInfo(); | 550 | CollectToolingInfo(); |
| 414 | 551 | ||
| 552 | if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { | ||
| 553 | const auto arch = GetNvidiaArchitecture(physical, supported_extensions); | ||
| 554 | switch (arch) { | ||
| 555 | case NvidiaArchitecture::AmpereOrNewer: | ||
| 556 | LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); | ||
| 557 | is_float16_supported = false; | ||
| 558 | break; | ||
| 559 | case NvidiaArchitecture::Turing: | ||
| 560 | break; | ||
| 561 | case NvidiaArchitecture::VoltaOrOlder: | ||
| 562 | LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); | ||
| 563 | khr_push_descriptor = false; | ||
| 564 | break; | ||
| 565 | } | ||
| 566 | } | ||
| 415 | if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { | 567 | if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { |
| 416 | LOG_WARNING( | 568 | // Mask driver version variant |
| 417 | Render_Vulkan, | 569 | const u32 version = (properties.driverVersion << 3) >> 3; |
| 418 | "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); | 570 | if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { |
| 419 | ext_extended_dynamic_state = false; | 571 | LOG_WARNING(Render_Vulkan, |
| 572 | "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); | ||
| 573 | ext_extended_dynamic_state = false; | ||
| 574 | } | ||
| 575 | } | ||
| 576 | if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | ||
| 577 | LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); | ||
| 578 | ext_vertex_input_dynamic_state = false; | ||
| 420 | } | 579 | } |
| 421 | if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | 580 | if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { |
| 422 | // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. | 581 | // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. |
| @@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 426 | 585 | ||
| 427 | graphics_queue = logical.GetQueue(graphics_family); | 586 | graphics_queue = logical.GetQueue(graphics_family); |
| 428 | present_queue = logical.GetQueue(present_family); | 587 | present_queue = logical.GetQueue(present_family); |
| 429 | |||
| 430 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | ||
| 431 | } | 588 | } |
| 432 | 589 | ||
| 433 | Device::~Device() = default; | 590 | Device::~Device() = default; |
| @@ -471,7 +628,7 @@ void Device::ReportLoss() const { | |||
| 471 | std::this_thread::sleep_for(std::chrono::seconds{15}); | 628 | std::this_thread::sleep_for(std::chrono::seconds{15}); |
| 472 | } | 629 | } |
| 473 | 630 | ||
| 474 | void Device::SaveShader(const std::vector<u32>& spirv) const { | 631 | void Device::SaveShader(std::span<const u32> spirv) const { |
| 475 | if (nsight_aftermath_tracker) { | 632 | if (nsight_aftermath_tracker) { |
| 476 | nsight_aftermath_tracker->SaveShader(spirv); | 633 | nsight_aftermath_tracker->SaveShader(spirv); |
| 477 | } | 634 | } |
| @@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 597 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); | 754 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 598 | } | 755 | } |
| 599 | } | 756 | } |
| 757 | VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; | ||
| 758 | demote.sType = | ||
| 759 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; | ||
| 760 | demote.pNext = nullptr; | ||
| 761 | |||
| 762 | VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; | ||
| 763 | variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; | ||
| 764 | variable_pointers.pNext = &demote; | ||
| 765 | |||
| 600 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; | 766 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; |
| 601 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | 767 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; |
| 768 | robustness2.pNext = &variable_pointers; | ||
| 602 | 769 | ||
| 603 | VkPhysicalDeviceFeatures2 features2{}; | 770 | VkPhysicalDeviceFeatures2KHR features2{}; |
| 604 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | 771 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| 605 | features2.pNext = &robustness2; | 772 | features2.pNext = &robustness2; |
| 606 | 773 | ||
| @@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 610 | const std::array feature_report{ | 777 | const std::array feature_report{ |
| 611 | std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | 778 | std::make_pair(features.robustBufferAccess, "robustBufferAccess"), |
| 612 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 779 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 613 | std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | ||
| 614 | std::make_pair(features.imageCubeArray, "imageCubeArray"), | 780 | std::make_pair(features.imageCubeArray, "imageCubeArray"), |
| 615 | std::make_pair(features.independentBlend, "independentBlend"), | 781 | std::make_pair(features.independentBlend, "independentBlend"), |
| 616 | std::make_pair(features.depthClamp, "depthClamp"), | 782 | std::make_pair(features.depthClamp, "depthClamp"), |
| @@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 618 | std::make_pair(features.largePoints, "largePoints"), | 784 | std::make_pair(features.largePoints, "largePoints"), |
| 619 | std::make_pair(features.multiViewport, "multiViewport"), | 785 | std::make_pair(features.multiViewport, "multiViewport"), |
| 620 | std::make_pair(features.depthBiasClamp, "depthBiasClamp"), | 786 | std::make_pair(features.depthBiasClamp, "depthBiasClamp"), |
| 787 | std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), | ||
| 788 | std::make_pair(features.wideLines, "wideLines"), | ||
| 621 | std::make_pair(features.geometryShader, "geometryShader"), | 789 | std::make_pair(features.geometryShader, "geometryShader"), |
| 622 | std::make_pair(features.tessellationShader, "tessellationShader"), | 790 | std::make_pair(features.tessellationShader, "tessellationShader"), |
| 791 | std::make_pair(features.sampleRateShading, "sampleRateShading"), | ||
| 792 | std::make_pair(features.dualSrcBlend, "dualSrcBlend"), | ||
| 623 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), | 793 | std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), |
| 624 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), | 794 | std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), |
| 625 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 795 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 626 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 796 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| 627 | "shaderStorageImageWriteWithoutFormat"), | 797 | "shaderStorageImageWriteWithoutFormat"), |
| 798 | std::make_pair(features.shaderClipDistance, "shaderClipDistance"), | ||
| 799 | std::make_pair(features.shaderCullDistance, "shaderCullDistance"), | ||
| 800 | std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), | ||
| 801 | std::make_pair(variable_pointers.variablePointers, "variablePointers"), | ||
| 802 | std::make_pair(variable_pointers.variablePointersStorageBuffer, | ||
| 803 | "variablePointersStorageBuffer"), | ||
| 628 | std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), | 804 | std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), |
| 629 | std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), | 805 | std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), |
| 630 | std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), | 806 | std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), |
| @@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 647 | } | 823 | } |
| 648 | 824 | ||
| 649 | bool has_khr_shader_float16_int8{}; | 825 | bool has_khr_shader_float16_int8{}; |
| 826 | bool has_khr_workgroup_memory_explicit_layout{}; | ||
| 650 | bool has_ext_subgroup_size_control{}; | 827 | bool has_ext_subgroup_size_control{}; |
| 651 | bool has_ext_transform_feedback{}; | 828 | bool has_ext_transform_feedback{}; |
| 652 | bool has_ext_custom_border_color{}; | 829 | bool has_ext_custom_border_color{}; |
| 653 | bool has_ext_extended_dynamic_state{}; | 830 | bool has_ext_extended_dynamic_state{}; |
| 654 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | 831 | bool has_ext_shader_atomic_int64{}; |
| 832 | bool has_ext_provoking_vertex{}; | ||
| 833 | bool has_ext_vertex_input_dynamic_state{}; | ||
| 834 | bool has_ext_line_rasterization{}; | ||
| 835 | for (const std::string& extension : supported_extensions) { | ||
| 655 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | 836 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 656 | bool push) { | 837 | bool push) { |
| 657 | if (extension.extensionName != std::string_view(name)) { | 838 | if (extension != name) { |
| 658 | return; | 839 | return; |
| 659 | } | 840 | } |
| 660 | if (push) { | 841 | if (push) { |
| @@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 665 | } | 846 | } |
| 666 | }; | 847 | }; |
| 667 | test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); | 848 | test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); |
| 849 | test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); | ||
| 850 | test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, | ||
| 851 | true); | ||
| 668 | test(khr_uniform_buffer_standard_layout, | 852 | test(khr_uniform_buffer_standard_layout, |
| 669 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 853 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 854 | test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); | ||
| 855 | test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); | ||
| 670 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 856 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); |
| 671 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 857 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 672 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 858 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| @@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 675 | true); | 861 | true); |
| 676 | test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); | 862 | test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); |
| 677 | test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); | 863 | test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); |
| 864 | test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, | ||
| 865 | true); | ||
| 678 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); | 866 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); |
| 679 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 867 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 680 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 868 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 681 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 869 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 682 | if (Settings::values.renderer_debug) { | 870 | test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); |
| 871 | test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, | ||
| 872 | false); | ||
| 873 | test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); | ||
| 874 | test(has_khr_workgroup_memory_explicit_layout, | ||
| 875 | VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); | ||
| 876 | test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); | ||
| 877 | if (Settings::values.enable_nsight_aftermath) { | ||
| 683 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 878 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 684 | true); | 879 | true); |
| 685 | } | 880 | } |
| 686 | } | 881 | } |
| 687 | VkPhysicalDeviceFeatures2KHR features; | 882 | VkPhysicalDeviceFeatures2KHR features{}; |
| 688 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | 883 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; |
| 689 | 884 | ||
| 690 | VkPhysicalDeviceProperties2KHR physical_properties; | 885 | VkPhysicalDeviceProperties2KHR physical_properties; |
| @@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 722 | subgroup_properties.maxSubgroupSize >= GuestWarpSize) { | 917 | subgroup_properties.maxSubgroupSize >= GuestWarpSize) { |
| 723 | extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); | 918 | extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); |
| 724 | guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; | 919 | guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; |
| 920 | ext_subgroup_size_control = true; | ||
| 725 | } | 921 | } |
| 726 | } else { | 922 | } else { |
| 727 | is_warp_potentially_bigger = true; | 923 | is_warp_potentially_bigger = true; |
| 728 | } | 924 | } |
| 925 | if (has_ext_provoking_vertex) { | ||
| 926 | VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; | ||
| 927 | provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; | ||
| 928 | provoking_vertex.pNext = nullptr; | ||
| 929 | features.pNext = &provoking_vertex; | ||
| 930 | physical.GetFeatures2KHR(features); | ||
| 931 | |||
| 932 | if (provoking_vertex.provokingVertexLast && | ||
| 933 | provoking_vertex.transformFeedbackPreservesProvokingVertex) { | ||
| 934 | extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); | ||
| 935 | ext_provoking_vertex = true; | ||
| 936 | } | ||
| 937 | } | ||
| 938 | if (has_ext_vertex_input_dynamic_state) { | ||
| 939 | VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; | ||
| 940 | vertex_input.sType = | ||
| 941 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; | ||
| 942 | vertex_input.pNext = nullptr; | ||
| 943 | features.pNext = &vertex_input; | ||
| 944 | physical.GetFeatures2KHR(features); | ||
| 945 | |||
| 946 | if (vertex_input.vertexInputDynamicState) { | ||
| 947 | extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); | ||
| 948 | ext_vertex_input_dynamic_state = true; | ||
| 949 | } | ||
| 950 | } | ||
| 951 | if (has_ext_shader_atomic_int64) { | ||
| 952 | VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; | ||
| 953 | atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; | ||
| 954 | atomic_int64.pNext = nullptr; | ||
| 955 | features.pNext = &atomic_int64; | ||
| 956 | physical.GetFeatures2KHR(features); | ||
| 957 | |||
| 958 | if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { | ||
| 959 | extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); | ||
| 960 | ext_shader_atomic_int64 = true; | ||
| 961 | } | ||
| 962 | } | ||
| 729 | if (has_ext_transform_feedback) { | 963 | if (has_ext_transform_feedback) { |
| 730 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; | 964 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; |
| 731 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; | 965 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; |
| @@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 760 | } | 994 | } |
| 761 | } | 995 | } |
| 762 | if (has_ext_extended_dynamic_state) { | 996 | if (has_ext_extended_dynamic_state) { |
| 763 | VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; | 997 | VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; |
| 764 | dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; | 998 | extended_dynamic_state.sType = |
| 765 | dynamic_state.pNext = nullptr; | 999 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; |
| 766 | features.pNext = &dynamic_state; | 1000 | extended_dynamic_state.pNext = nullptr; |
| 1001 | features.pNext = &extended_dynamic_state; | ||
| 767 | physical.GetFeatures2KHR(features); | 1002 | physical.GetFeatures2KHR(features); |
| 768 | 1003 | ||
| 769 | if (dynamic_state.extendedDynamicState) { | 1004 | if (extended_dynamic_state.extendedDynamicState) { |
| 770 | extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); | 1005 | extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); |
| 771 | ext_extended_dynamic_state = true; | 1006 | ext_extended_dynamic_state = true; |
| 772 | } | 1007 | } |
| 773 | } | 1008 | } |
| 1009 | if (has_ext_line_rasterization) { | ||
| 1010 | VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; | ||
| 1011 | line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; | ||
| 1012 | line_raster.pNext = nullptr; | ||
| 1013 | features.pNext = &line_raster; | ||
| 1014 | physical.GetFeatures2KHR(features); | ||
| 1015 | if (line_raster.rectangularLines && line_raster.smoothLines) { | ||
| 1016 | extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); | ||
| 1017 | ext_line_rasterization = true; | ||
| 1018 | } | ||
| 1019 | } | ||
| 1020 | if (has_khr_workgroup_memory_explicit_layout) { | ||
| 1021 | VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; | ||
| 1022 | layout.sType = | ||
| 1023 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; | ||
| 1024 | layout.pNext = nullptr; | ||
| 1025 | features.pNext = &layout; | ||
| 1026 | physical.GetFeatures2KHR(features); | ||
| 1027 | |||
| 1028 | if (layout.workgroupMemoryExplicitLayout && | ||
| 1029 | layout.workgroupMemoryExplicitLayout8BitAccess && | ||
| 1030 | layout.workgroupMemoryExplicitLayout16BitAccess && | ||
| 1031 | layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { | ||
| 1032 | extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); | ||
| 1033 | khr_workgroup_memory_explicit_layout = true; | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | if (khr_push_descriptor) { | ||
| 1037 | VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; | ||
| 1038 | push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; | ||
| 1039 | push_descriptor.pNext = nullptr; | ||
| 1040 | |||
| 1041 | physical_properties.pNext = &push_descriptor; | ||
| 1042 | physical.GetProperties2KHR(physical_properties); | ||
| 1043 | |||
| 1044 | max_push_descriptors = push_descriptor.maxPushDescriptors; | ||
| 1045 | } | ||
| 774 | return extensions; | 1046 | return extensions; |
| 775 | } | 1047 | } |
| 776 | 1048 | ||
| @@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { | |||
| 806 | } | 1078 | } |
| 807 | 1079 | ||
| 808 | void Device::SetupFeatures() { | 1080 | void Device::SetupFeatures() { |
| 809 | const auto supported_features{physical.GetFeatures()}; | 1081 | const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; |
| 810 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | 1082 | is_depth_bounds_supported = features.depthBounds; |
| 811 | is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; | 1083 | is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; |
| 1084 | is_shader_float64_supported = features.shaderFloat64; | ||
| 1085 | is_shader_int64_supported = features.shaderInt64; | ||
| 1086 | is_shader_int16_supported = features.shaderInt16; | ||
| 1087 | is_shader_storage_image_multisample = features.shaderStorageImageMultisample; | ||
| 812 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); | 1088 | is_blit_depth_stencil_supported = TestDepthStencilBlits(); |
| 813 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); | 1089 | is_optimal_astc_supported = IsOptimalAstcSupported(features); |
| 1090 | } | ||
| 1091 | |||
| 1092 | void Device::SetupProperties() { | ||
| 1093 | float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; | ||
| 1094 | |||
| 1095 | VkPhysicalDeviceProperties2KHR properties2{}; | ||
| 1096 | properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||
| 1097 | properties2.pNext = &float_controls; | ||
| 1098 | |||
| 1099 | physical.GetProperties2KHR(properties2); | ||
| 814 | } | 1100 | } |
| 815 | 1101 | ||
| 816 | void Device::CollectTelemetryParameters() { | 1102 | void Device::CollectTelemetryParameters() { |
| @@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() { | |||
| 832 | 1118 | ||
| 833 | driver_id = driver.driverID; | 1119 | driver_id = driver.driverID; |
| 834 | vendor_name = driver.driverName; | 1120 | vendor_name = driver.driverName; |
| 835 | |||
| 836 | const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); | ||
| 837 | reported_extensions.reserve(std::size(extensions)); | ||
| 838 | for (const auto& extension : extensions) { | ||
| 839 | reported_extensions.emplace_back(extension.extensionName); | ||
| 840 | } | ||
| 841 | } | 1121 | } |
| 842 | 1122 | ||
| 843 | void Device::CollectPhysicalMemoryInfo() { | 1123 | void Device::CollectPhysicalMemoryInfo() { |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <span> | ||
| 7 | #include <string> | 8 | #include <string> |
| 8 | #include <string_view> | 9 | #include <string_view> |
| 9 | #include <unordered_map> | 10 | #include <unordered_map> |
| @@ -43,7 +44,7 @@ public: | |||
| 43 | void ReportLoss() const; | 44 | void ReportLoss() const; |
| 44 | 45 | ||
| 45 | /// Reports a shader to Nsight Aftermath. | 46 | /// Reports a shader to Nsight Aftermath. |
| 46 | void SaveShader(const std::vector<u32>& spirv) const; | 47 | void SaveShader(std::span<const u32> spirv) const; |
| 47 | 48 | ||
| 48 | /// Returns the name of the VkDriverId reported from Vulkan. | 49 | /// Returns the name of the VkDriverId reported from Vulkan. |
| 49 | std::string GetDriverName() const; | 50 | std::string GetDriverName() const; |
| @@ -128,6 +129,11 @@ public: | |||
| 128 | return properties.limits.maxComputeSharedMemorySize; | 129 | return properties.limits.maxComputeSharedMemorySize; |
| 129 | } | 130 | } |
| 130 | 131 | ||
| 132 | /// Returns float control properties of the device. | ||
| 133 | const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { | ||
| 134 | return float_controls; | ||
| 135 | } | ||
| 136 | |||
| 131 | /// Returns true if ASTC is natively supported. | 137 | /// Returns true if ASTC is natively supported. |
| 132 | bool IsOptimalAstcSupported() const { | 138 | bool IsOptimalAstcSupported() const { |
| 133 | return is_optimal_astc_supported; | 139 | return is_optimal_astc_supported; |
| @@ -148,11 +154,31 @@ public: | |||
| 148 | return guest_warp_stages & stage; | 154 | return guest_warp_stages & stage; |
| 149 | } | 155 | } |
| 150 | 156 | ||
| 157 | /// Returns the maximum number of push descriptors. | ||
| 158 | u32 MaxPushDescriptors() const { | ||
| 159 | return max_push_descriptors; | ||
| 160 | } | ||
| 161 | |||
| 151 | /// Returns true if formatless image load is supported. | 162 | /// Returns true if formatless image load is supported. |
| 152 | bool IsFormatlessImageLoadSupported() const { | 163 | bool IsFormatlessImageLoadSupported() const { |
| 153 | return is_formatless_image_load_supported; | 164 | return is_formatless_image_load_supported; |
| 154 | } | 165 | } |
| 155 | 166 | ||
| 167 | /// Returns true if shader int64 is supported. | ||
| 168 | bool IsShaderInt64Supported() const { | ||
| 169 | return is_shader_int64_supported; | ||
| 170 | } | ||
| 171 | |||
| 172 | /// Returns true if shader int16 is supported. | ||
| 173 | bool IsShaderInt16Supported() const { | ||
| 174 | return is_shader_int16_supported; | ||
| 175 | } | ||
| 176 | |||
| 177 | // Returns true if depth bounds is supported. | ||
| 178 | bool IsDepthBoundsSupported() const { | ||
| 179 | return is_depth_bounds_supported; | ||
| 180 | } | ||
| 181 | |||
| 156 | /// Returns true when blitting from and to depth stencil images is supported. | 182 | /// Returns true when blitting from and to depth stencil images is supported. |
| 157 | bool IsBlitDepthStencilSupported() const { | 183 | bool IsBlitDepthStencilSupported() const { |
| 158 | return is_blit_depth_stencil_supported; | 184 | return is_blit_depth_stencil_supported; |
| @@ -163,11 +189,36 @@ public: | |||
| 163 | return nv_viewport_swizzle; | 189 | return nv_viewport_swizzle; |
| 164 | } | 190 | } |
| 165 | 191 | ||
| 166 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 192 | /// Returns true if the device supports VK_NV_viewport_array2. |
| 193 | bool IsNvViewportArray2Supported() const { | ||
| 194 | return nv_viewport_array2; | ||
| 195 | } | ||
| 196 | |||
| 197 | /// Returns true if the device supports VK_NV_geometry_shader_passthrough. | ||
| 198 | bool IsNvGeometryShaderPassthroughSupported() const { | ||
| 199 | return nv_geometry_shader_passthrough; | ||
| 200 | } | ||
| 201 | |||
| 202 | /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. | ||
| 167 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 203 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 168 | return khr_uniform_buffer_standard_layout; | 204 | return khr_uniform_buffer_standard_layout; |
| 169 | } | 205 | } |
| 170 | 206 | ||
| 207 | /// Returns true if the device supports VK_KHR_spirv_1_4. | ||
| 208 | bool IsKhrSpirv1_4Supported() const { | ||
| 209 | return khr_spirv_1_4; | ||
| 210 | } | ||
| 211 | |||
| 212 | /// Returns true if the device supports VK_KHR_push_descriptor. | ||
| 213 | bool IsKhrPushDescriptorSupported() const { | ||
| 214 | return khr_push_descriptor; | ||
| 215 | } | ||
| 216 | |||
| 217 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. | ||
| 218 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { | ||
| 219 | return khr_workgroup_memory_explicit_layout; | ||
| 220 | } | ||
| 221 | |||
| 171 | /// Returns true if the device supports VK_EXT_index_type_uint8. | 222 | /// Returns true if the device supports VK_EXT_index_type_uint8. |
| 172 | bool IsExtIndexTypeUint8Supported() const { | 223 | bool IsExtIndexTypeUint8Supported() const { |
| 173 | return ext_index_type_uint8; | 224 | return ext_index_type_uint8; |
| @@ -188,6 +239,11 @@ public: | |||
| 188 | return ext_shader_viewport_index_layer; | 239 | return ext_shader_viewport_index_layer; |
| 189 | } | 240 | } |
| 190 | 241 | ||
| 242 | /// Returns true if the device supports VK_EXT_subgroup_size_control. | ||
| 243 | bool IsExtSubgroupSizeControlSupported() const { | ||
| 244 | return ext_subgroup_size_control; | ||
| 245 | } | ||
| 246 | |||
| 191 | /// Returns true if the device supports VK_EXT_transform_feedback. | 247 | /// Returns true if the device supports VK_EXT_transform_feedback. |
| 192 | bool IsExtTransformFeedbackSupported() const { | 248 | bool IsExtTransformFeedbackSupported() const { |
| 193 | return ext_transform_feedback; | 249 | return ext_transform_feedback; |
| @@ -203,11 +259,36 @@ public: | |||
| 203 | return ext_extended_dynamic_state; | 259 | return ext_extended_dynamic_state; |
| 204 | } | 260 | } |
| 205 | 261 | ||
| 262 | /// Returns true if the device supports VK_EXT_line_rasterization. | ||
| 263 | bool IsExtLineRasterizationSupported() const { | ||
| 264 | return ext_line_rasterization; | ||
| 265 | } | ||
| 266 | |||
| 267 | /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. | ||
| 268 | bool IsExtVertexInputDynamicStateSupported() const { | ||
| 269 | return ext_vertex_input_dynamic_state; | ||
| 270 | } | ||
| 271 | |||
| 206 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | 272 | /// Returns true if the device supports VK_EXT_shader_stencil_export. |
| 207 | bool IsExtShaderStencilExportSupported() const { | 273 | bool IsExtShaderStencilExportSupported() const { |
| 208 | return ext_shader_stencil_export; | 274 | return ext_shader_stencil_export; |
| 209 | } | 275 | } |
| 210 | 276 | ||
| 277 | /// Returns true if the device supports VK_EXT_conservative_rasterization. | ||
| 278 | bool IsExtConservativeRasterizationSupported() const { | ||
| 279 | return ext_conservative_rasterization; | ||
| 280 | } | ||
| 281 | |||
| 282 | /// Returns true if the device supports VK_EXT_provoking_vertex. | ||
| 283 | bool IsExtProvokingVertexSupported() const { | ||
| 284 | return ext_provoking_vertex; | ||
| 285 | } | ||
| 286 | |||
| 287 | /// Returns true if the device supports VK_KHR_shader_atomic_int64. | ||
| 288 | bool IsExtShaderAtomicInt64Supported() const { | ||
| 289 | return ext_shader_atomic_int64; | ||
| 290 | } | ||
| 291 | |||
| 211 | /// Returns true when a known debugging tool is attached. | 292 | /// Returns true when a known debugging tool is attached. |
| 212 | bool HasDebuggingToolAttached() const { | 293 | bool HasDebuggingToolAttached() const { |
| 213 | return has_renderdoc || has_nsight_graphics; | 294 | return has_renderdoc || has_nsight_graphics; |
| @@ -220,12 +301,7 @@ public: | |||
| 220 | 301 | ||
| 221 | /// Returns the list of available extensions. | 302 | /// Returns the list of available extensions. |
| 222 | const std::vector<std::string>& GetAvailableExtensions() const { | 303 | const std::vector<std::string>& GetAvailableExtensions() const { |
| 223 | return reported_extensions; | 304 | return supported_extensions; |
| 224 | } | ||
| 225 | |||
| 226 | /// Returns true if the setting for async shader compilation is enabled. | ||
| 227 | bool UseAsynchronousShaders() const { | ||
| 228 | return use_asynchronous_shaders; | ||
| 229 | } | 305 | } |
| 230 | 306 | ||
| 231 | u64 GetDeviceLocalMemory() const { | 307 | u64 GetDeviceLocalMemory() const { |
| @@ -245,6 +321,9 @@ private: | |||
| 245 | /// Sets up device features. | 321 | /// Sets up device features. |
| 246 | void SetupFeatures(); | 322 | void SetupFeatures(); |
| 247 | 323 | ||
| 324 | /// Sets up device properties. | ||
| 325 | void SetupProperties(); | ||
| 326 | |||
| 248 | /// Collects telemetry information from the device. | 327 | /// Collects telemetry information from the device. |
| 249 | void CollectTelemetryParameters(); | 328 | void CollectTelemetryParameters(); |
| 250 | 329 | ||
| @@ -267,46 +346,60 @@ private: | |||
| 267 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 346 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 268 | FormatType format_type) const; | 347 | FormatType format_type) const; |
| 269 | 348 | ||
| 270 | VkInstance instance; ///< Vulkan instance. | 349 | VkInstance instance; ///< Vulkan instance. |
| 271 | vk::DeviceDispatch dld; ///< Device function pointers. | 350 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 272 | vk::PhysicalDevice physical; ///< Physical device. | 351 | vk::PhysicalDevice physical; ///< Physical device. |
| 273 | VkPhysicalDeviceProperties properties; ///< Device properties. | 352 | VkPhysicalDeviceProperties properties; ///< Device properties. |
| 274 | vk::Device logical; ///< Logical device. | 353 | VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. |
| 275 | vk::Queue graphics_queue; ///< Main graphics queue. | 354 | vk::Device logical; ///< Logical device. |
| 276 | vk::Queue present_queue; ///< Main present queue. | 355 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 277 | u32 instance_version{}; ///< Vulkan onstance version. | 356 | vk::Queue present_queue; ///< Main present queue. |
| 357 | u32 instance_version{}; ///< Vulkan onstance version. | ||
| 278 | u32 graphics_family{}; ///< Main graphics queue family index. | 358 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 279 | u32 present_family{}; ///< Main present queue family index. | 359 | u32 present_family{}; ///< Main present queue family index. |
| 280 | VkDriverIdKHR driver_id{}; ///< Driver ID. | 360 | VkDriverIdKHR driver_id{}; ///< Driver ID. |
| 281 | VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 361 | VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. |
| 282 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. | 362 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. |
| 363 | u32 max_push_descriptors{}; ///< Maximum number of push descriptors | ||
| 283 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 364 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 284 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 365 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 285 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 366 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 286 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | 367 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. |
| 368 | bool is_depth_bounds_supported{}; ///< Support for depth bounds. | ||
| 369 | bool is_shader_float64_supported{}; ///< Support for float64. | ||
| 370 | bool is_shader_int64_supported{}; ///< Support for int64. | ||
| 371 | bool is_shader_int16_supported{}; ///< Support for int16. | ||
| 287 | bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. | 372 | bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. |
| 288 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. | 373 | bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. |
| 289 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 374 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 290 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 375 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. |
| 291 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 376 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. |
| 292 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. | 377 | bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. |
| 293 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 378 | bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. |
| 294 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 379 | bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. |
| 295 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. | 380 | bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. |
| 296 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 381 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 297 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 382 | bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. |
| 298 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 383 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 299 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | 384 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 300 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 385 | bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. |
| 301 | bool has_renderdoc{}; ///< Has RenderDoc attached | 386 | bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. |
| 302 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 387 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 303 | 388 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | |
| 304 | // Asynchronous Graphics Pipeline setting | 389 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 305 | bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | 390 | bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. |
| 391 | bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. | ||
| 392 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | ||
| 393 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. | ||
| 394 | bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. | ||
| 395 | bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. | ||
| 396 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | ||
| 397 | bool has_renderdoc{}; ///< Has RenderDoc attached | ||
| 398 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | ||
| 306 | 399 | ||
| 307 | // Telemetry parameters | 400 | // Telemetry parameters |
| 308 | std::string vendor_name; ///< Device's driver name. | 401 | std::string vendor_name; ///< Device's driver name. |
| 309 | std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. | 402 | std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. |
| 310 | 403 | ||
| 311 | /// Format properties dictionary. | 404 | /// Format properties dictionary. |
| 312 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 405 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 103 | X(vkCmdFillBuffer); | 103 | X(vkCmdFillBuffer); |
| 104 | X(vkCmdPipelineBarrier); | 104 | X(vkCmdPipelineBarrier); |
| 105 | X(vkCmdPushConstants); | 105 | X(vkCmdPushConstants); |
| 106 | X(vkCmdPushDescriptorSetWithTemplateKHR); | ||
| 106 | X(vkCmdSetBlendConstants); | 107 | X(vkCmdSetBlendConstants); |
| 107 | X(vkCmdSetDepthBias); | 108 | X(vkCmdSetDepthBias); |
| 108 | X(vkCmdSetDepthBounds); | 109 | X(vkCmdSetDepthBounds); |
| @@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 120 | X(vkCmdSetDepthTestEnableEXT); | 121 | X(vkCmdSetDepthTestEnableEXT); |
| 121 | X(vkCmdSetDepthWriteEnableEXT); | 122 | X(vkCmdSetDepthWriteEnableEXT); |
| 122 | X(vkCmdSetFrontFaceEXT); | 123 | X(vkCmdSetFrontFaceEXT); |
| 124 | X(vkCmdSetLineWidth); | ||
| 123 | X(vkCmdSetPrimitiveTopologyEXT); | 125 | X(vkCmdSetPrimitiveTopologyEXT); |
| 124 | X(vkCmdSetStencilOpEXT); | 126 | X(vkCmdSetStencilOpEXT); |
| 125 | X(vkCmdSetStencilTestEnableEXT); | 127 | X(vkCmdSetStencilTestEnableEXT); |
| 128 | X(vkCmdSetVertexInputEXT); | ||
| 126 | X(vkCmdResolveImage); | 129 | X(vkCmdResolveImage); |
| 127 | X(vkCreateBuffer); | 130 | X(vkCreateBuffer); |
| 128 | X(vkCreateBufferView); | 131 | X(vkCreateBufferView); |
| @@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept { | |||
| 311 | return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; | 314 | return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; |
| 312 | case VkResult::VK_ERROR_UNKNOWN: | 315 | case VkResult::VK_ERROR_UNKNOWN: |
| 313 | return "VK_ERROR_UNKNOWN"; | 316 | return "VK_ERROR_UNKNOWN"; |
| 314 | case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: | ||
| 315 | return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; | ||
| 316 | case VkResult::VK_THREAD_IDLE_KHR: | 317 | case VkResult::VK_THREAD_IDLE_KHR: |
| 317 | return "VK_THREAD_IDLE_KHR"; | 318 | return "VK_THREAD_IDLE_KHR"; |
| 318 | case VkResult::VK_THREAD_DONE_KHR: | 319 | case VkResult::VK_THREAD_DONE_KHR: |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 193 | PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; | 193 | PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; |
| 194 | PFN_vkBindBufferMemory vkBindBufferMemory{}; | 194 | PFN_vkBindBufferMemory vkBindBufferMemory{}; |
| 195 | PFN_vkBindImageMemory vkBindImageMemory{}; | 195 | PFN_vkBindImageMemory vkBindImageMemory{}; |
| 196 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||
| 196 | PFN_vkCmdBeginQuery vkCmdBeginQuery{}; | 197 | PFN_vkCmdBeginQuery vkCmdBeginQuery{}; |
| 197 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; | 198 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; |
| 198 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; | 199 | PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; |
| 199 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||
| 200 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; | 200 | PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; |
| 201 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; | 201 | PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; |
| 202 | PFN_vkCmdBindPipeline vkCmdBindPipeline{}; | 202 | PFN_vkCmdBindPipeline vkCmdBindPipeline{}; |
| 203 | PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; | 203 | PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; |
| 204 | PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; | 204 | PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; |
| 205 | PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; | ||
| 205 | PFN_vkCmdBlitImage vkCmdBlitImage{}; | 206 | PFN_vkCmdBlitImage vkCmdBlitImage{}; |
| 206 | PFN_vkCmdClearAttachments vkCmdClearAttachments{}; | 207 | PFN_vkCmdClearAttachments vkCmdClearAttachments{}; |
| 207 | PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; | 208 | PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; |
| @@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 211 | PFN_vkCmdDispatch vkCmdDispatch{}; | 212 | PFN_vkCmdDispatch vkCmdDispatch{}; |
| 212 | PFN_vkCmdDraw vkCmdDraw{}; | 213 | PFN_vkCmdDraw vkCmdDraw{}; |
| 213 | PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; | 214 | PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; |
| 215 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||
| 214 | PFN_vkCmdEndQuery vkCmdEndQuery{}; | 216 | PFN_vkCmdEndQuery vkCmdEndQuery{}; |
| 215 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | 217 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; |
| 216 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; | 218 | PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; |
| 217 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||
| 218 | PFN_vkCmdFillBuffer vkCmdFillBuffer{}; | 219 | PFN_vkCmdFillBuffer vkCmdFillBuffer{}; |
| 219 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; | 220 | PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; |
| 220 | PFN_vkCmdPushConstants vkCmdPushConstants{}; | 221 | PFN_vkCmdPushConstants vkCmdPushConstants{}; |
| 222 | PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; | ||
| 223 | PFN_vkCmdResolveImage vkCmdResolveImage{}; | ||
| 221 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; | 224 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; |
| 225 | PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; | ||
| 222 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; | 226 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; |
| 223 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; | 227 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; |
| 224 | PFN_vkCmdSetEvent vkCmdSetEvent{}; | ||
| 225 | PFN_vkCmdSetScissor vkCmdSetScissor{}; | ||
| 226 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; | ||
| 227 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; | ||
| 228 | PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; | ||
| 229 | PFN_vkCmdSetViewport vkCmdSetViewport{}; | ||
| 230 | PFN_vkCmdWaitEvents vkCmdWaitEvents{}; | ||
| 231 | PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; | ||
| 232 | PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; | ||
| 233 | PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; | 228 | PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; |
| 234 | PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; | 229 | PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; |
| 235 | PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; | 230 | PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; |
| 236 | PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; | 231 | PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; |
| 232 | PFN_vkCmdSetEvent vkCmdSetEvent{}; | ||
| 237 | PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; | 233 | PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; |
| 234 | PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; | ||
| 238 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; | 235 | PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; |
| 236 | PFN_vkCmdSetScissor vkCmdSetScissor{}; | ||
| 237 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; | ||
| 239 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; | 238 | PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; |
| 239 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; | ||
| 240 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; | 240 | PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; |
| 241 | PFN_vkCmdResolveImage vkCmdResolveImage{}; | 241 | PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; |
| 242 | PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; | ||
| 243 | PFN_vkCmdSetViewport vkCmdSetViewport{}; | ||
| 244 | PFN_vkCmdWaitEvents vkCmdWaitEvents{}; | ||
| 242 | PFN_vkCreateBuffer vkCreateBuffer{}; | 245 | PFN_vkCreateBuffer vkCreateBuffer{}; |
| 243 | PFN_vkCreateBufferView vkCreateBufferView{}; | 246 | PFN_vkCreateBufferView vkCreateBufferView{}; |
| 244 | PFN_vkCreateCommandPool vkCreateCommandPool{}; | 247 | PFN_vkCreateCommandPool vkCreateCommandPool{}; |
| @@ -989,6 +992,12 @@ public: | |||
| 989 | dynamic_offsets.size(), dynamic_offsets.data()); | 992 | dynamic_offsets.size(), dynamic_offsets.data()); |
| 990 | } | 993 | } |
| 991 | 994 | ||
| 995 | void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, | ||
| 996 | VkPipelineLayout layout, u32 set, | ||
| 997 | const void* data) const noexcept { | ||
| 998 | dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); | ||
| 999 | } | ||
| 1000 | |||
| 992 | void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { | 1001 | void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { |
| 993 | dld->vkCmdBindPipeline(handle, bind_point, pipeline); | 1002 | dld->vkCmdBindPipeline(handle, bind_point, pipeline); |
| 994 | } | 1003 | } |
| @@ -1190,6 +1199,10 @@ public: | |||
| 1190 | dld->vkCmdSetFrontFaceEXT(handle, front_face); | 1199 | dld->vkCmdSetFrontFaceEXT(handle, front_face); |
| 1191 | } | 1200 | } |
| 1192 | 1201 | ||
| 1202 | void SetLineWidth(float line_width) const noexcept { | ||
| 1203 | dld->vkCmdSetLineWidth(handle, line_width); | ||
| 1204 | } | ||
| 1205 | |||
| 1193 | void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { | 1206 | void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { |
| 1194 | dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); | 1207 | dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); |
| 1195 | } | 1208 | } |
| @@ -1203,6 +1216,13 @@ public: | |||
| 1203 | dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); | 1216 | dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); |
| 1204 | } | 1217 | } |
| 1205 | 1218 | ||
| 1219 | void SetVertexInputEXT( | ||
| 1220 | vk::Span<VkVertexInputBindingDescription2EXT> bindings, | ||
| 1221 | vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept { | ||
| 1222 | dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), | ||
| 1223 | attributes.data()); | ||
| 1224 | } | ||
| 1225 | |||
| 1206 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, | 1226 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, |
| 1207 | const VkDeviceSize* offsets, | 1227 | const VkDeviceSize* offsets, |
| 1208 | const VkDeviceSize* sizes) const noexcept { | 1228 | const VkDeviceSize* sizes) const noexcept { |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index d72ca5acc..25b658b2a 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -64,12 +64,13 @@ void EmuThread::run() { | |||
| 64 | 64 | ||
| 65 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); | 65 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); |
| 66 | 66 | ||
| 67 | system.Renderer().ReadRasterizer()->LoadDiskResources( | 67 | if (Settings::values.use_disk_shader_cache.GetValue()) { |
| 68 | system.CurrentProcess()->GetTitleID(), stop_token, | 68 | system.Renderer().ReadRasterizer()->LoadDiskResources( |
| 69 | [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { | 69 | system.CurrentProcess()->GetTitleID(), stop_token, |
| 70 | emit LoadProgress(stage, value, total); | 70 | [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { |
| 71 | }); | 71 | emit LoadProgress(stage, value, total); |
| 72 | 72 | }); | |
| 73 | } | ||
| 73 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); | 74 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); |
| 74 | 75 | ||
| 75 | gpu.ReleaseContext(); | 76 | gpu.ReleaseContext(); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 9fe5613b6..85c37b842 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { | |||
| 814 | ReadGlobalSetting(Settings::values.use_nvdec_emulation); | 814 | ReadGlobalSetting(Settings::values.use_nvdec_emulation); |
| 815 | ReadGlobalSetting(Settings::values.accelerate_astc); | 815 | ReadGlobalSetting(Settings::values.accelerate_astc); |
| 816 | ReadGlobalSetting(Settings::values.use_vsync); | 816 | ReadGlobalSetting(Settings::values.use_vsync); |
| 817 | ReadGlobalSetting(Settings::values.use_assembly_shaders); | 817 | ReadGlobalSetting(Settings::values.shader_backend); |
| 818 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); | 818 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 819 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); | 819 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); |
| 820 | ReadGlobalSetting(Settings::values.use_caches_gc); | 820 | ReadGlobalSetting(Settings::values.use_caches_gc); |
| @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { | |||
| 824 | 824 | ||
| 825 | if (global) { | 825 | if (global) { |
| 826 | ReadBasicSetting(Settings::values.renderer_debug); | 826 | ReadBasicSetting(Settings::values.renderer_debug); |
| 827 | ReadBasicSetting(Settings::values.enable_nsight_aftermath); | ||
| 828 | ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); | ||
| 827 | } | 829 | } |
| 828 | 830 | ||
| 829 | qt_config->endGroup(); | 831 | qt_config->endGroup(); |
| @@ -1346,7 +1348,10 @@ void Config::SaveRendererValues() { | |||
| 1346 | WriteGlobalSetting(Settings::values.use_nvdec_emulation); | 1348 | WriteGlobalSetting(Settings::values.use_nvdec_emulation); |
| 1347 | WriteGlobalSetting(Settings::values.accelerate_astc); | 1349 | WriteGlobalSetting(Settings::values.accelerate_astc); |
| 1348 | WriteGlobalSetting(Settings::values.use_vsync); | 1350 | WriteGlobalSetting(Settings::values.use_vsync); |
| 1349 | WriteGlobalSetting(Settings::values.use_assembly_shaders); | 1351 | WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), |
| 1352 | static_cast<u32>(Settings::values.shader_backend.GetValue(global)), | ||
| 1353 | static_cast<u32>(Settings::values.shader_backend.GetDefault()), | ||
| 1354 | Settings::values.shader_backend.UsingGlobal()); | ||
| 1350 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); | 1355 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 1351 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); | 1356 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); |
| 1352 | WriteGlobalSetting(Settings::values.use_caches_gc); | 1357 | WriteGlobalSetting(Settings::values.use_caches_gc); |
| @@ -1356,6 +1361,8 @@ void Config::SaveRendererValues() { | |||
| 1356 | 1361 | ||
| 1357 | if (global) { | 1362 | if (global) { |
| 1358 | WriteBasicSetting(Settings::values.renderer_debug); | 1363 | WriteBasicSetting(Settings::values.renderer_debug); |
| 1364 | WriteBasicSetting(Settings::values.enable_nsight_aftermath); | ||
| 1365 | WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); | ||
| 1359 | } | 1366 | } |
| 1360 | 1367 | ||
| 1361 | qt_config->endGroup(); | 1368 | qt_config->endGroup(); |
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 24950e8f8..c1d7feb9f 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h | |||
| @@ -180,6 +180,7 @@ private: | |||
| 180 | 180 | ||
| 181 | // These metatype declarations cannot be in common/settings.h because core is devoid of QT | 181 | // These metatype declarations cannot be in common/settings.h because core is devoid of QT |
| 182 | Q_DECLARE_METATYPE(Settings::CPUAccuracy); | 182 | Q_DECLARE_METATYPE(Settings::CPUAccuracy); |
| 183 | Q_DECLARE_METATYPE(Settings::RendererBackend); | ||
| 184 | Q_DECLARE_METATYPE(Settings::GPUAccuracy); | 183 | Q_DECLARE_METATYPE(Settings::GPUAccuracy); |
| 185 | Q_DECLARE_METATYPE(Settings::FullscreenMode); | 184 | Q_DECLARE_METATYPE(Settings::FullscreenMode); |
| 185 | Q_DECLARE_METATYPE(Settings::RendererBackend); | ||
| 186 | Q_DECLARE_METATYPE(Settings::ShaderBackend); | ||
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp | |||
| @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { | |||
| 45 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); | 45 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); |
| 46 | ui->enable_cpu_debugging->setEnabled(runtime_lock); | 46 | ui->enable_cpu_debugging->setEnabled(runtime_lock); |
| 47 | ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); | 47 | ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); |
| 48 | ui->enable_nsight_aftermath->setEnabled(runtime_lock); | ||
| 49 | ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); | ||
| 48 | ui->disable_macro_jit->setEnabled(runtime_lock); | 50 | ui->disable_macro_jit->setEnabled(runtime_lock); |
| 49 | ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); | 51 | ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); |
| 52 | ui->disable_loop_safety_checks->setEnabled(runtime_lock); | ||
| 53 | ui->disable_loop_safety_checks->setChecked( | ||
| 54 | Settings::values.disable_shader_loop_safety_checks.GetValue()); | ||
| 50 | ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); | 55 | ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); |
| 51 | } | 56 | } |
| 52 | 57 | ||
| @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { | |||
| 61 | Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); | 66 | Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); |
| 62 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); | 67 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); |
| 63 | Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); | 68 | Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); |
| 69 | Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); | ||
| 70 | Settings::values.disable_shader_loop_safety_checks = | ||
| 71 | ui->disable_loop_safety_checks->isChecked(); | ||
| 64 | Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); | 72 | Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); |
| 65 | Settings::values.extended_logging = ui->extended_logging->isChecked(); | 73 | Settings::values.extended_logging = ui->extended_logging->isChecked(); |
| 66 | Debugger::ToggleConsole(); | 74 | Debugger::ToggleConsole(); |
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui | |||
| @@ -126,6 +126,16 @@ | |||
| 126 | </widget> | 126 | </widget> |
| 127 | </item> | 127 | </item> |
| 128 | <item> | 128 | <item> |
| 129 | <widget class="QCheckBox" name="enable_nsight_aftermath"> | ||
| 130 | <property name="toolTip"> | ||
| 131 | <string>When checked, it enables Nsight Aftermath crash dumps</string> | ||
| 132 | </property> | ||
| 133 | <property name="text"> | ||
| 134 | <string>Enable Nsight Aftermath</string> | ||
| 135 | </property> | ||
| 136 | </widget> | ||
| 137 | </item> | ||
| 138 | <item> | ||
| 129 | <widget class="QCheckBox" name="disable_macro_jit"> | 139 | <widget class="QCheckBox" name="disable_macro_jit"> |
| 130 | <property name="enabled"> | 140 | <property name="enabled"> |
| 131 | <bool>true</bool> | 141 | <bool>true</bool> |
| @@ -138,6 +148,16 @@ | |||
| 138 | </property> | 148 | </property> |
| 139 | </widget> | 149 | </widget> |
| 140 | </item> | 150 | </item> |
| 151 | <item> | ||
| 152 | <widget class="QCheckBox" name="disable_loop_safety_checks"> | ||
| 153 | <property name="toolTip"> | ||
| 154 | <string>When checked, it executes shaders without loop logic changes</string> | ||
| 155 | </property> | ||
| 156 | <property name="text"> | ||
| 157 | <string>Disable Loop safety checks</string> | ||
| 158 | </property> | ||
| 159 | </widget> | ||
| 160 | </item> | ||
| 141 | </layout> | 161 | </layout> |
| 142 | </widget> | 162 | </widget> |
| 143 | </item> | 163 | </item> |
| @@ -252,11 +272,17 @@ | |||
| 252 | <tabstops> | 272 | <tabstops> |
| 253 | <tabstop>log_filter_edit</tabstop> | 273 | <tabstop>log_filter_edit</tabstop> |
| 254 | <tabstop>toggle_console</tabstop> | 274 | <tabstop>toggle_console</tabstop> |
| 275 | <tabstop>extended_logging</tabstop> | ||
| 255 | <tabstop>open_log_button</tabstop> | 276 | <tabstop>open_log_button</tabstop> |
| 256 | <tabstop>homebrew_args_edit</tabstop> | 277 | <tabstop>homebrew_args_edit</tabstop> |
| 257 | <tabstop>enable_graphics_debugging</tabstop> | 278 | <tabstop>enable_graphics_debugging</tabstop> |
| 279 | <tabstop>enable_nsight_aftermath</tabstop> | ||
| 280 | <tabstop>disable_macro_jit</tabstop> | ||
| 281 | <tabstop>disable_loop_safety_checks</tabstop> | ||
| 258 | <tabstop>reporting_services</tabstop> | 282 | <tabstop>reporting_services</tabstop> |
| 259 | <tabstop>quest_flag</tabstop> | 283 | <tabstop>quest_flag</tabstop> |
| 284 | <tabstop>use_debug_asserts</tabstop> | ||
| 285 | <tabstop>use_auto_stub</tabstop> | ||
| 260 | </tabstops> | 286 | </tabstops> |
| 261 | <resources/> | 287 | <resources/> |
| 262 | <connections/> | 288 | <connections/> |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 6287a3caa..4a5b17740 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -26,19 +26,29 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) | |||
| 26 | 26 | ||
| 27 | ui->setupUi(this); | 27 | ui->setupUi(this); |
| 28 | 28 | ||
| 29 | for (const auto& device : vulkan_devices) { | ||
| 30 | ui->device->addItem(device); | ||
| 31 | } | ||
| 32 | |||
| 33 | ui->backend->addItem(QStringLiteral("GLSL")); | ||
| 34 | ui->backend->addItem(tr("GLASM (NVIDIA Only)")); | ||
| 35 | ui->backend->addItem(QStringLiteral("SPIR-V (Experimental, Mesa Only)")); | ||
| 36 | |||
| 29 | SetupPerGameUI(); | 37 | SetupPerGameUI(); |
| 30 | 38 | ||
| 31 | SetConfiguration(); | 39 | SetConfiguration(); |
| 32 | 40 | ||
| 33 | connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { | 41 | connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { |
| 34 | UpdateDeviceComboBox(); | 42 | UpdateAPILayout(); |
| 35 | if (!Settings::IsConfiguringGlobal()) { | 43 | if (!Settings::IsConfiguringGlobal()) { |
| 36 | ConfigurationShared::SetHighlight( | 44 | ConfigurationShared::SetHighlight( |
| 37 | ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); | 45 | ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); |
| 38 | } | 46 | } |
| 39 | }); | 47 | }); |
| 40 | connect(ui->device, qOverload<int>(&QComboBox::activated), this, | 48 | connect(ui->device, qOverload<int>(&QComboBox::activated), this, |
| 41 | [this](int device) { UpdateDeviceSelection(device); }); | 49 | [this](int device) { UpdateDeviceSelection(device); }); |
| 50 | connect(ui->backend, qOverload<int>(&QComboBox::activated), this, | ||
| 51 | [this](int backend) { UpdateShaderBackendSelection(backend); }); | ||
| 42 | 52 | ||
| 43 | connect(ui->bg_button, &QPushButton::clicked, this, [this] { | 53 | connect(ui->bg_button, &QPushButton::clicked, this, [this] { |
| 44 | const QColor new_bg_color = QColorDialog::getColor(bg_color); | 54 | const QColor new_bg_color = QColorDialog::getColor(bg_color); |
| @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { | |||
| 61 | } | 71 | } |
| 62 | } | 72 | } |
| 63 | 73 | ||
| 74 | void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { | ||
| 75 | if (backend == -1) { | ||
| 76 | return; | ||
| 77 | } | ||
| 78 | if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { | ||
| 79 | shader_backend = static_cast<Settings::ShaderBackend>(backend); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 64 | ConfigureGraphics::~ConfigureGraphics() = default; | 83 | ConfigureGraphics::~ConfigureGraphics() = default; |
| 65 | 84 | ||
| 66 | void ConfigureGraphics::SetConfiguration() { | 85 | void ConfigureGraphics::SetConfiguration() { |
| 67 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | 86 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); |
| 68 | 87 | ||
| 69 | ui->api->setEnabled(runtime_lock); | 88 | ui->api_widget->setEnabled(runtime_lock); |
| 70 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); | 89 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); |
| 71 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 90 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 72 | ui->use_nvdec_emulation->setEnabled(runtime_lock); | 91 | ui->use_nvdec_emulation->setEnabled(runtime_lock); |
| @@ -84,7 +103,7 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 84 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); | 103 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); |
| 85 | } else { | 104 | } else { |
| 86 | ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); | 105 | ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); |
| 87 | ConfigurationShared::SetHighlight(ui->api_layout, | 106 | ConfigurationShared::SetHighlight(ui->api_widget, |
| 88 | !Settings::values.renderer_backend.UsingGlobal()); | 107 | !Settings::values.renderer_backend.UsingGlobal()); |
| 89 | 108 | ||
| 90 | ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, | 109 | ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, |
| @@ -101,11 +120,10 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 101 | ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); | 120 | ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); |
| 102 | ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); | 121 | ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); |
| 103 | } | 122 | } |
| 104 | |||
| 105 | UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), | 123 | UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), |
| 106 | Settings::values.bg_green.GetValue(), | 124 | Settings::values.bg_green.GetValue(), |
| 107 | Settings::values.bg_blue.GetValue())); | 125 | Settings::values.bg_blue.GetValue())); |
| 108 | UpdateDeviceComboBox(); | 126 | UpdateAPILayout(); |
| 109 | } | 127 | } |
| 110 | 128 | ||
| 111 | void ConfigureGraphics::ApplyConfiguration() { | 129 | void ConfigureGraphics::ApplyConfiguration() { |
| @@ -129,6 +147,9 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 129 | if (Settings::values.renderer_backend.UsingGlobal()) { | 147 | if (Settings::values.renderer_backend.UsingGlobal()) { |
| 130 | Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); | 148 | Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); |
| 131 | } | 149 | } |
| 150 | if (Settings::values.shader_backend.UsingGlobal()) { | ||
| 151 | Settings::values.shader_backend.SetValue(shader_backend); | ||
| 152 | } | ||
| 132 | if (Settings::values.vulkan_device.UsingGlobal()) { | 153 | if (Settings::values.vulkan_device.UsingGlobal()) { |
| 133 | Settings::values.vulkan_device.SetValue(vulkan_device); | 154 | Settings::values.vulkan_device.SetValue(vulkan_device); |
| 134 | } | 155 | } |
| @@ -140,15 +161,22 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 140 | } else { | 161 | } else { |
| 141 | if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | 162 | if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { |
| 142 | Settings::values.renderer_backend.SetGlobal(true); | 163 | Settings::values.renderer_backend.SetGlobal(true); |
| 164 | Settings::values.shader_backend.SetGlobal(true); | ||
| 143 | Settings::values.vulkan_device.SetGlobal(true); | 165 | Settings::values.vulkan_device.SetGlobal(true); |
| 144 | } else { | 166 | } else { |
| 145 | Settings::values.renderer_backend.SetGlobal(false); | 167 | Settings::values.renderer_backend.SetGlobal(false); |
| 146 | Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); | 168 | Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); |
| 147 | if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { | 169 | switch (GetCurrentGraphicsBackend()) { |
| 170 | case Settings::RendererBackend::OpenGL: | ||
| 171 | Settings::values.shader_backend.SetGlobal(false); | ||
| 172 | Settings::values.vulkan_device.SetGlobal(true); | ||
| 173 | Settings::values.shader_backend.SetValue(shader_backend); | ||
| 174 | break; | ||
| 175 | case Settings::RendererBackend::Vulkan: | ||
| 176 | Settings::values.shader_backend.SetGlobal(true); | ||
| 148 | Settings::values.vulkan_device.SetGlobal(false); | 177 | Settings::values.vulkan_device.SetGlobal(false); |
| 149 | Settings::values.vulkan_device.SetValue(vulkan_device); | 178 | Settings::values.vulkan_device.SetValue(vulkan_device); |
| 150 | } else { | 179 | break; |
| 151 | Settings::values.vulkan_device.SetGlobal(true); | ||
| 152 | } | 180 | } |
| 153 | } | 181 | } |
| 154 | 182 | ||
| @@ -189,32 +217,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { | |||
| 189 | ui->bg_button->setIcon(color_icon); | 217 | ui->bg_button->setIcon(color_icon); |
| 190 | } | 218 | } |
| 191 | 219 | ||
| 192 | void ConfigureGraphics::UpdateDeviceComboBox() { | 220 | void ConfigureGraphics::UpdateAPILayout() { |
| 193 | ui->device->clear(); | ||
| 194 | |||
| 195 | bool enabled = false; | ||
| 196 | |||
| 197 | if (!Settings::IsConfiguringGlobal() && | 221 | if (!Settings::IsConfiguringGlobal() && |
| 198 | ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | 222 | ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { |
| 223 | vulkan_device = Settings::values.vulkan_device.GetValue(true); | ||
| 224 | shader_backend = Settings::values.shader_backend.GetValue(true); | ||
| 225 | ui->device_widget->setEnabled(false); | ||
| 226 | ui->backend_widget->setEnabled(false); | ||
| 227 | } else { | ||
| 199 | vulkan_device = Settings::values.vulkan_device.GetValue(); | 228 | vulkan_device = Settings::values.vulkan_device.GetValue(); |
| 229 | shader_backend = Settings::values.shader_backend.GetValue(); | ||
| 230 | ui->device_widget->setEnabled(true); | ||
| 231 | ui->backend_widget->setEnabled(true); | ||
| 200 | } | 232 | } |
| 233 | |||
| 201 | switch (GetCurrentGraphicsBackend()) { | 234 | switch (GetCurrentGraphicsBackend()) { |
| 202 | case Settings::RendererBackend::OpenGL: | 235 | case Settings::RendererBackend::OpenGL: |
| 203 | ui->device->addItem(tr("OpenGL Graphics Device")); | 236 | ui->backend->setCurrentIndex(static_cast<u32>(shader_backend)); |
| 204 | enabled = false; | 237 | ui->device_widget->setVisible(false); |
| 238 | ui->backend_widget->setVisible(true); | ||
| 205 | break; | 239 | break; |
| 206 | case Settings::RendererBackend::Vulkan: | 240 | case Settings::RendererBackend::Vulkan: |
| 207 | for (const auto& device : vulkan_devices) { | ||
| 208 | ui->device->addItem(device); | ||
| 209 | } | ||
| 210 | ui->device->setCurrentIndex(vulkan_device); | 241 | ui->device->setCurrentIndex(vulkan_device); |
| 211 | enabled = !vulkan_devices.empty(); | 242 | ui->device_widget->setVisible(true); |
| 243 | ui->backend_widget->setVisible(false); | ||
| 212 | break; | 244 | break; |
| 213 | } | 245 | } |
| 214 | // If in per-game config and use global is selected, don't enable. | ||
| 215 | enabled &= !(!Settings::IsConfiguringGlobal() && | ||
| 216 | ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); | ||
| 217 | ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); | ||
| 218 | } | 246 | } |
| 219 | 247 | ||
| 220 | void ConfigureGraphics::RetrieveVulkanDevices() try { | 248 | void ConfigureGraphics::RetrieveVulkanDevices() try { |
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h | |||
| @@ -34,8 +34,9 @@ private: | |||
| 34 | void SetConfiguration(); | 34 | void SetConfiguration(); |
| 35 | 35 | ||
| 36 | void UpdateBackgroundColorButton(QColor color); | 36 | void UpdateBackgroundColorButton(QColor color); |
| 37 | void UpdateDeviceComboBox(); | 37 | void UpdateAPILayout(); |
| 38 | void UpdateDeviceSelection(int device); | 38 | void UpdateDeviceSelection(int device); |
| 39 | void UpdateShaderBackendSelection(int backend); | ||
| 39 | 40 | ||
| 40 | void RetrieveVulkanDevices(); | 41 | void RetrieveVulkanDevices(); |
| 41 | 42 | ||
| @@ -53,4 +54,5 @@ private: | |||
| 53 | 54 | ||
| 54 | std::vector<QString> vulkan_devices; | 55 | std::vector<QString> vulkan_devices; |
| 55 | u32 vulkan_device{}; | 56 | u32 vulkan_device{}; |
| 57 | Settings::ShaderBackend shader_backend{}; | ||
| 56 | }; | 58 | }; |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | </property> | 23 | </property> |
| 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> | 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> |
| 25 | <item> | 25 | <item> |
| 26 | <widget class="QWidget" name="api_layout" native="true"> | 26 | <widget class="QWidget" name="api_widget" native="true"> |
| 27 | <layout class="QGridLayout" name="gridLayout"> | 27 | <layout class="QGridLayout" name="gridLayout"> |
| 28 | <property name="leftMargin"> | 28 | <property name="leftMargin"> |
| 29 | <number>0</number> | 29 | <number>0</number> |
| @@ -40,37 +40,107 @@ | |||
| 40 | <property name="horizontalSpacing"> | 40 | <property name="horizontalSpacing"> |
| 41 | <number>6</number> | 41 | <number>6</number> |
| 42 | </property> | 42 | </property> |
| 43 | <item row="0" column="0"> | 43 | <item row="4" column="0"> |
| 44 | <widget class="QLabel" name="api_label"> | 44 | <widget class="QWidget" name="backend_widget" native="true"> |
| 45 | <property name="text"> | 45 | <layout class="QHBoxLayout" name="backend_layout"> |
| 46 | <string>API:</string> | 46 | <property name="leftMargin"> |
| 47 | </property> | 47 | <number>0</number> |
| 48 | </property> | ||
| 49 | <property name="topMargin"> | ||
| 50 | <number>0</number> | ||
| 51 | </property> | ||
| 52 | <property name="rightMargin"> | ||
| 53 | <number>0</number> | ||
| 54 | </property> | ||
| 55 | <property name="bottomMargin"> | ||
| 56 | <number>0</number> | ||
| 57 | </property> | ||
| 58 | <item> | ||
| 59 | <widget class="QLabel" name="backend_label"> | ||
| 60 | <property name="text"> | ||
| 61 | <string>Shader Backend:</string> | ||
| 62 | </property> | ||
| 63 | </widget> | ||
| 64 | </item> | ||
| 65 | <item> | ||
| 66 | <widget class="QComboBox" name="backend"/> | ||
| 67 | </item> | ||
| 68 | </layout> | ||
| 48 | </widget> | 69 | </widget> |
| 49 | </item> | 70 | </item> |
| 50 | <item row="0" column="1"> | 71 | <item row="2" column="0"> |
| 51 | <widget class="QComboBox" name="api"> | 72 | <widget class="QWidget" name="device_widget" native="true"> |
| 52 | <item> | 73 | <layout class="QHBoxLayout" name="device_layout"> |
| 53 | <property name="text"> | 74 | <property name="leftMargin"> |
| 54 | <string notr="true">OpenGL</string> | 75 | <number>0</number> |
| 55 | </property> | 76 | </property> |
| 56 | </item> | 77 | <property name="topMargin"> |
| 57 | <item> | 78 | <number>0</number> |
| 58 | <property name="text"> | ||
| 59 | <string notr="true">Vulkan</string> | ||
| 60 | </property> | 79 | </property> |
| 61 | </item> | 80 | <property name="rightMargin"> |
| 81 | <number>0</number> | ||
| 82 | </property> | ||
| 83 | <property name="bottomMargin"> | ||
| 84 | <number>0</number> | ||
| 85 | </property> | ||
| 86 | <item> | ||
| 87 | <widget class="QLabel" name="device_label"> | ||
| 88 | <property name="text"> | ||
| 89 | <string>Device:</string> | ||
| 90 | </property> | ||
| 91 | </widget> | ||
| 92 | </item> | ||
| 93 | <item> | ||
| 94 | <widget class="QComboBox" name="device"/> | ||
| 95 | </item> | ||
| 96 | </layout> | ||
| 62 | </widget> | 97 | </widget> |
| 63 | </item> | 98 | </item> |
| 64 | <item row="1" column="0"> | 99 | <item row="0" column="0"> |
| 65 | <widget class="QLabel" name="device_label"> | 100 | <widget class="QWidget" name="api_layout_2" native="true"> |
| 66 | <property name="text"> | 101 | <layout class="QHBoxLayout" name="api_layout"> |
| 67 | <string>Device:</string> | 102 | <property name="leftMargin"> |
| 68 | </property> | 103 | <number>0</number> |
| 104 | </property> | ||
| 105 | <property name="topMargin"> | ||
| 106 | <number>0</number> | ||
| 107 | </property> | ||
| 108 | <property name="rightMargin"> | ||
| 109 | <number>0</number> | ||
| 110 | </property> | ||
| 111 | <property name="bottomMargin"> | ||
| 112 | <number>0</number> | ||
| 113 | </property> | ||
| 114 | <item> | ||
| 115 | <widget class="QLabel" name="api_label"> | ||
| 116 | <property name="text"> | ||
| 117 | <string>API:</string> | ||
| 118 | </property> | ||
| 119 | </widget> | ||
| 120 | </item> | ||
| 121 | <item> | ||
| 122 | <widget class="QComboBox" name="api"> | ||
| 123 | <property name="sizePolicy"> | ||
| 124 | <sizepolicy hsizetype="Preferred" vsizetype="Fixed"> | ||
| 125 | <horstretch>0</horstretch> | ||
| 126 | <verstretch>0</verstretch> | ||
| 127 | </sizepolicy> | ||
| 128 | </property> | ||
| 129 | <item> | ||
| 130 | <property name="text"> | ||
| 131 | <string notr="true">OpenGL</string> | ||
| 132 | </property> | ||
| 133 | </item> | ||
| 134 | <item> | ||
| 135 | <property name="text"> | ||
| 136 | <string notr="true">Vulkan</string> | ||
| 137 | </property> | ||
| 138 | </item> | ||
| 139 | </widget> | ||
| 140 | </item> | ||
| 141 | </layout> | ||
| 69 | </widget> | 142 | </widget> |
| 70 | </item> | 143 | </item> |
| 71 | <item row="1" column="1"> | ||
| 72 | <widget class="QComboBox" name="device"/> | ||
| 73 | </item> | ||
| 74 | </layout> | 144 | </layout> |
| 75 | </widget> | 145 | </widget> |
| 76 | </item> | 146 | </item> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index e952777ab..a31b8e192 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | |||
| 23 | void ConfigureGraphicsAdvanced::SetConfiguration() { | 23 | void ConfigureGraphicsAdvanced::SetConfiguration() { |
| 24 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | 24 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); |
| 25 | ui->use_vsync->setEnabled(runtime_lock); | 25 | ui->use_vsync->setEnabled(runtime_lock); |
| 26 | ui->use_assembly_shaders->setEnabled(runtime_lock); | ||
| 27 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); | 26 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); |
| 28 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | 27 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); |
| 29 | 28 | ||
| 30 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); | 29 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); |
| 31 | ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); | ||
| 32 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); | 30 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); |
| 33 | ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); | 31 | ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); |
| 34 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); | 32 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); |
| @@ -54,8 +52,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 54 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, | 52 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, |
| 55 | ui->anisotropic_filtering_combobox); | 53 | ui->anisotropic_filtering_combobox); |
| 56 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); | 54 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); |
| 57 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, | ||
| 58 | ui->use_assembly_shaders, use_assembly_shaders); | ||
| 59 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, | 55 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, |
| 60 | ui->use_asynchronous_shaders, | 56 | ui->use_asynchronous_shaders, |
| 61 | use_asynchronous_shaders); | 57 | use_asynchronous_shaders); |
| @@ -82,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 82 | if (Settings::IsConfiguringGlobal()) { | 78 | if (Settings::IsConfiguringGlobal()) { |
| 83 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); | 79 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); |
| 84 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); | 80 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); |
| 85 | ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); | ||
| 86 | ui->use_asynchronous_shaders->setEnabled( | 81 | ui->use_asynchronous_shaders->setEnabled( |
| 87 | Settings::values.use_asynchronous_shaders.UsingGlobal()); | 82 | Settings::values.use_asynchronous_shaders.UsingGlobal()); |
| 88 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); | 83 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); |
| @@ -94,8 +89,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 94 | } | 89 | } |
| 95 | 90 | ||
| 96 | ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); | 91 | ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); |
| 97 | ConfigurationShared::SetColoredTristate( | ||
| 98 | ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); | ||
| 99 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, | 92 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, |
| 100 | Settings::values.use_asynchronous_shaders, | 93 | Settings::values.use_asynchronous_shaders, |
| 101 | use_asynchronous_shaders); | 94 | use_asynchronous_shaders); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -35,7 +35,6 @@ private: | |||
| 35 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; | 35 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; |
| 36 | 36 | ||
| 37 | ConfigurationShared::CheckState use_vsync; | 37 | ConfigurationShared::CheckState use_vsync; |
| 38 | ConfigurationShared::CheckState use_assembly_shaders; | ||
| 39 | ConfigurationShared::CheckState use_asynchronous_shaders; | 38 | ConfigurationShared::CheckState use_asynchronous_shaders; |
| 40 | ConfigurationShared::CheckState use_fast_gpu_time; | 39 | ConfigurationShared::CheckState use_fast_gpu_time; |
| 41 | ConfigurationShared::CheckState use_caches_gc; | 40 | ConfigurationShared::CheckState use_caches_gc; |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..379dc5d2e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -77,22 +77,12 @@ | |||
| 77 | </widget> | 77 | </widget> |
| 78 | </item> | 78 | </item> |
| 79 | <item> | 79 | <item> |
| 80 | <widget class="QCheckBox" name="use_assembly_shaders"> | ||
| 81 | <property name="toolTip"> | ||
| 82 | <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> | ||
| 83 | </property> | ||
| 84 | <property name="text"> | ||
| 85 | <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string> | ||
| 86 | </property> | ||
| 87 | </widget> | ||
| 88 | </item> | ||
| 89 | <item> | ||
| 90 | <widget class="QCheckBox" name="use_asynchronous_shaders"> | 80 | <widget class="QCheckBox" name="use_asynchronous_shaders"> |
| 91 | <property name="toolTip"> | 81 | <property name="toolTip"> |
| 92 | <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> | 82 | <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> |
| 93 | </property> | 83 | </property> |
| 94 | <property name="text"> | 84 | <property name="text"> |
| 95 | <string>Use asynchronous shader building (experimental)</string> | 85 | <string>Use asynchronous shader building</string> |
| 96 | </property> | 86 | </property> |
| 97 | </widget> | 87 | </widget> |
| 98 | </item> | 88 | </item> |
| @@ -144,22 +134,22 @@ | |||
| 144 | </item> | 134 | </item> |
| 145 | <item> | 135 | <item> |
| 146 | <property name="text"> | 136 | <property name="text"> |
| 147 | <string>2x</string> | 137 | <string>2x (WILL BREAK THINGS)</string> |
| 148 | </property> | 138 | </property> |
| 149 | </item> | 139 | </item> |
| 150 | <item> | 140 | <item> |
| 151 | <property name="text"> | 141 | <property name="text"> |
| 152 | <string>4x</string> | 142 | <string>4x (WILL BREAK THINGS)</string> |
| 153 | </property> | 143 | </property> |
| 154 | </item> | 144 | </item> |
| 155 | <item> | 145 | <item> |
| 156 | <property name="text"> | 146 | <property name="text"> |
| 157 | <string>8x</string> | 147 | <string>8x (WILL BREAK THINGS)</string> |
| 158 | </property> | 148 | </property> |
| 159 | </item> | 149 | </item> |
| 160 | <item> | 150 | <item> |
| 161 | <property name="text"> | 151 | <property name="text"> |
| 162 | <string>16x</string> | 152 | <string>16x (WILL BREAK THINGS)</string> |
| 163 | </property> | 153 | </property> |
| 164 | </item> | 154 | </item> |
| 165 | </widget> | 155 | </widget> |
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 76c063c97..f746bd85d 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp | |||
| @@ -520,9 +520,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri | |||
| 520 | QMenu* remove_menu = context_menu.addMenu(tr("Remove")); | 520 | QMenu* remove_menu = context_menu.addMenu(tr("Remove")); |
| 521 | QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); | 521 | QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); |
| 522 | QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); | 522 | QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); |
| 523 | QAction* remove_shader_cache = remove_menu->addAction(tr("Remove Shader Cache")); | ||
| 524 | QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); | 523 | QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); |
| 524 | QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); | ||
| 525 | QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); | ||
| 525 | remove_menu->addSeparator(); | 526 | remove_menu->addSeparator(); |
| 527 | QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); | ||
| 526 | QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); | 528 | QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); |
| 527 | QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); | 529 | QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); |
| 528 | QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); | 530 | QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); |
| @@ -540,6 +542,8 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri | |||
| 540 | open_transferable_shader_cache->setVisible(program_id != 0); | 542 | open_transferable_shader_cache->setVisible(program_id != 0); |
| 541 | remove_update->setVisible(program_id != 0); | 543 | remove_update->setVisible(program_id != 0); |
| 542 | remove_dlc->setVisible(program_id != 0); | 544 | remove_dlc->setVisible(program_id != 0); |
| 545 | remove_gl_shader_cache->setVisible(program_id != 0); | ||
| 546 | remove_vk_shader_cache->setVisible(program_id != 0); | ||
| 543 | remove_shader_cache->setVisible(program_id != 0); | 547 | remove_shader_cache->setVisible(program_id != 0); |
| 544 | remove_all_content->setVisible(program_id != 0); | 548 | remove_all_content->setVisible(program_id != 0); |
| 545 | auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); | 549 | auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); |
| @@ -569,8 +573,14 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri | |||
| 569 | connect(remove_dlc, &QAction::triggered, [this, program_id]() { | 573 | connect(remove_dlc, &QAction::triggered, [this, program_id]() { |
| 570 | emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent); | 574 | emit RemoveInstalledEntryRequested(program_id, InstalledEntryType::AddOnContent); |
| 571 | }); | 575 | }); |
| 576 | connect(remove_gl_shader_cache, &QAction::triggered, [this, program_id, path]() { | ||
| 577 | emit RemoveFileRequested(program_id, GameListRemoveTarget::GlShaderCache, path); | ||
| 578 | }); | ||
| 579 | connect(remove_vk_shader_cache, &QAction::triggered, [this, program_id, path]() { | ||
| 580 | emit RemoveFileRequested(program_id, GameListRemoveTarget::VkShaderCache, path); | ||
| 581 | }); | ||
| 572 | connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() { | 582 | connect(remove_shader_cache, &QAction::triggered, [this, program_id, path]() { |
| 573 | emit RemoveFileRequested(program_id, GameListRemoveTarget::ShaderCache, path); | 583 | emit RemoveFileRequested(program_id, GameListRemoveTarget::AllShaderCache, path); |
| 574 | }); | 584 | }); |
| 575 | connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { | 585 | connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { |
| 576 | emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); | 586 | emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); |
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index c9a9f4654..10339dcca 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h | |||
| @@ -41,7 +41,9 @@ enum class GameListOpenTarget { | |||
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | enum class GameListRemoveTarget { | 43 | enum class GameListRemoveTarget { |
| 44 | ShaderCache, | 44 | GlShaderCache, |
| 45 | VkShaderCache, | ||
| 46 | AllShaderCache, | ||
| 45 | CustomConfiguration, | 47 | CustomConfiguration, |
| 46 | }; | 48 | }; |
| 47 | 49 | ||
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 96a301dda..f848b2982 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -789,41 +789,28 @@ void GMainWindow::InitializeWidgets() { | |||
| 789 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); | 789 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); |
| 790 | statusBar()->insertPermanentWidget(0, dock_status_button); | 790 | statusBar()->insertPermanentWidget(0, dock_status_button); |
| 791 | 791 | ||
| 792 | // Setup ASync button | 792 | gpu_accuracy_button = new QPushButton(); |
| 793 | async_status_button = new QPushButton(); | 793 | gpu_accuracy_button->setObjectName(QStringLiteral("GPUStatusBarButton")); |
| 794 | async_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); | 794 | gpu_accuracy_button->setCheckable(true); |
| 795 | async_status_button->setFocusPolicy(Qt::NoFocus); | 795 | gpu_accuracy_button->setFocusPolicy(Qt::NoFocus); |
| 796 | connect(async_status_button, &QPushButton::clicked, [&] { | 796 | connect(gpu_accuracy_button, &QPushButton::clicked, [this] { |
| 797 | if (emulation_running) { | 797 | switch (Settings::values.gpu_accuracy.GetValue()) { |
| 798 | return; | 798 | case Settings::GPUAccuracy::High: { |
| 799 | Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::Normal); | ||
| 800 | break; | ||
| 801 | } | ||
| 802 | case Settings::GPUAccuracy::Normal: | ||
| 803 | case Settings::GPUAccuracy::Extreme: | ||
| 804 | default: { | ||
| 805 | Settings::values.gpu_accuracy.SetValue(Settings::GPUAccuracy::High); | ||
| 799 | } | 806 | } |
| 800 | Settings::values.use_asynchronous_gpu_emulation.SetValue( | ||
| 801 | !Settings::values.use_asynchronous_gpu_emulation.GetValue()); | ||
| 802 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | ||
| 803 | Core::System::GetInstance().ApplySettings(); | ||
| 804 | }); | ||
| 805 | async_status_button->setText(tr("ASYNC")); | ||
| 806 | async_status_button->setCheckable(true); | ||
| 807 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | ||
| 808 | |||
| 809 | // Setup Multicore button | ||
| 810 | multicore_status_button = new QPushButton(); | ||
| 811 | multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); | ||
| 812 | multicore_status_button->setFocusPolicy(Qt::NoFocus); | ||
| 813 | connect(multicore_status_button, &QPushButton::clicked, [&] { | ||
| 814 | if (emulation_running) { | ||
| 815 | return; | ||
| 816 | } | 807 | } |
| 817 | Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); | 808 | |
| 818 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | ||
| 819 | Core::System::GetInstance().ApplySettings(); | 809 | Core::System::GetInstance().ApplySettings(); |
| 810 | UpdateGPUAccuracyButton(); | ||
| 820 | }); | 811 | }); |
| 821 | multicore_status_button->setText(tr("MULTICORE")); | 812 | UpdateGPUAccuracyButton(); |
| 822 | multicore_status_button->setCheckable(true); | 813 | statusBar()->insertPermanentWidget(0, gpu_accuracy_button); |
| 823 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | ||
| 824 | |||
| 825 | statusBar()->insertPermanentWidget(0, multicore_status_button); | ||
| 826 | statusBar()->insertPermanentWidget(0, async_status_button); | ||
| 827 | 814 | ||
| 828 | // Setup Renderer API button | 815 | // Setup Renderer API button |
| 829 | renderer_status_button = new QPushButton(); | 816 | renderer_status_button = new QPushButton(); |
| @@ -1401,8 +1388,6 @@ void GMainWindow::BootGame(const QString& filename, u64 program_id, std::size_t | |||
| 1401 | game_list_placeholder->hide(); | 1388 | game_list_placeholder->hide(); |
| 1402 | } | 1389 | } |
| 1403 | status_bar_update_timer.start(500); | 1390 | status_bar_update_timer.start(500); |
| 1404 | async_status_button->setDisabled(true); | ||
| 1405 | multicore_status_button->setDisabled(true); | ||
| 1406 | renderer_status_button->setDisabled(true); | 1391 | renderer_status_button->setDisabled(true); |
| 1407 | 1392 | ||
| 1408 | if (UISettings::values.hide_mouse || Settings::values.mouse_panning) { | 1393 | if (UISettings::values.hide_mouse || Settings::values.mouse_panning) { |
| @@ -1506,8 +1491,6 @@ void GMainWindow::ShutdownGame() { | |||
| 1506 | emu_speed_label->setVisible(false); | 1491 | emu_speed_label->setVisible(false); |
| 1507 | game_fps_label->setVisible(false); | 1492 | game_fps_label->setVisible(false); |
| 1508 | emu_frametime_label->setVisible(false); | 1493 | emu_frametime_label->setVisible(false); |
| 1509 | async_status_button->setEnabled(true); | ||
| 1510 | multicore_status_button->setEnabled(true); | ||
| 1511 | renderer_status_button->setEnabled(true); | 1494 | renderer_status_button->setEnabled(true); |
| 1512 | 1495 | ||
| 1513 | emulation_running = false; | 1496 | emulation_running = false; |
| @@ -1654,35 +1637,15 @@ void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target | |||
| 1654 | 1637 | ||
| 1655 | void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { | 1638 | void GMainWindow::OnTransferableShaderCacheOpenFile(u64 program_id) { |
| 1656 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); | 1639 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); |
| 1657 | const auto transferable_shader_cache_folder_path = shader_cache_dir / "opengl" / "transferable"; | 1640 | const auto shader_cache_folder_path{shader_cache_dir / fmt::format("{:016x}", program_id)}; |
| 1658 | const auto transferable_shader_cache_file_path = | 1641 | if (!Common::FS::CreateDirs(shader_cache_folder_path)) { |
| 1659 | transferable_shader_cache_folder_path / fmt::format("{:016X}.bin", program_id); | ||
| 1660 | |||
| 1661 | if (!Common::FS::Exists(transferable_shader_cache_file_path)) { | ||
| 1662 | QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), | 1642 | QMessageBox::warning(this, tr("Error Opening Transferable Shader Cache"), |
| 1663 | tr("A shader cache for this title does not exist.")); | 1643 | tr("Filed to create the shader cache directory for this title.")); |
| 1664 | return; | 1644 | return; |
| 1665 | } | 1645 | } |
| 1666 | 1646 | const auto shader_path_string{Common::FS::PathToUTF8String(shader_cache_folder_path)}; | |
| 1667 | const auto qt_shader_cache_folder_path = | 1647 | const auto qt_shader_cache_path = QString::fromStdString(shader_path_string); |
| 1668 | QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_folder_path)); | 1648 | QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_path)); |
| 1669 | const auto qt_shader_cache_file_path = | ||
| 1670 | QString::fromStdString(Common::FS::PathToUTF8String(transferable_shader_cache_file_path)); | ||
| 1671 | |||
| 1672 | // Windows supports opening a folder with selecting a specified file in explorer. On every other | ||
| 1673 | // OS we just open the transferable shader cache folder without preselecting the transferable | ||
| 1674 | // shader cache file for the selected game. | ||
| 1675 | #if defined(Q_OS_WIN) | ||
| 1676 | const QString explorer = QStringLiteral("explorer"); | ||
| 1677 | QStringList param; | ||
| 1678 | if (!QFileInfo(qt_shader_cache_file_path).isDir()) { | ||
| 1679 | param << QStringLiteral("/select,"); | ||
| 1680 | } | ||
| 1681 | param << QDir::toNativeSeparators(qt_shader_cache_file_path); | ||
| 1682 | QProcess::startDetached(explorer, param); | ||
| 1683 | #else | ||
| 1684 | QDesktopServices::openUrl(QUrl::fromLocalFile(qt_shader_cache_folder_path)); | ||
| 1685 | #endif | ||
| 1686 | } | 1649 | } |
| 1687 | 1650 | ||
| 1688 | static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { | 1651 | static std::size_t CalculateRomFSEntrySize(const FileSys::VirtualDir& dir, bool full) { |
| @@ -1825,8 +1788,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ | |||
| 1825 | const std::string& game_path) { | 1788 | const std::string& game_path) { |
| 1826 | const QString question = [this, target] { | 1789 | const QString question = [this, target] { |
| 1827 | switch (target) { | 1790 | switch (target) { |
| 1828 | case GameListRemoveTarget::ShaderCache: | 1791 | case GameListRemoveTarget::GlShaderCache: |
| 1829 | return tr("Delete Transferable Shader Cache?"); | 1792 | return tr("Delete OpenGL Transferable Shader Cache?"); |
| 1793 | case GameListRemoveTarget::VkShaderCache: | ||
| 1794 | return tr("Delete Vulkan Transferable Shader Cache?"); | ||
| 1795 | case GameListRemoveTarget::AllShaderCache: | ||
| 1796 | return tr("Delete All Transferable Shader Caches?"); | ||
| 1830 | case GameListRemoveTarget::CustomConfiguration: | 1797 | case GameListRemoveTarget::CustomConfiguration: |
| 1831 | return tr("Remove Custom Game Configuration?"); | 1798 | return tr("Remove Custom Game Configuration?"); |
| 1832 | default: | 1799 | default: |
| @@ -1840,8 +1807,12 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ | |||
| 1840 | } | 1807 | } |
| 1841 | 1808 | ||
| 1842 | switch (target) { | 1809 | switch (target) { |
| 1843 | case GameListRemoveTarget::ShaderCache: | 1810 | case GameListRemoveTarget::GlShaderCache: |
| 1844 | RemoveTransferableShaderCache(program_id); | 1811 | case GameListRemoveTarget::VkShaderCache: |
| 1812 | RemoveTransferableShaderCache(program_id, target); | ||
| 1813 | break; | ||
| 1814 | case GameListRemoveTarget::AllShaderCache: | ||
| 1815 | RemoveAllTransferableShaderCaches(program_id); | ||
| 1845 | break; | 1816 | break; |
| 1846 | case GameListRemoveTarget::CustomConfiguration: | 1817 | case GameListRemoveTarget::CustomConfiguration: |
| 1847 | RemoveCustomConfiguration(program_id, game_path); | 1818 | RemoveCustomConfiguration(program_id, game_path); |
| @@ -1849,18 +1820,27 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ | |||
| 1849 | } | 1820 | } |
| 1850 | } | 1821 | } |
| 1851 | 1822 | ||
| 1852 | void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { | 1823 | void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target) { |
| 1824 | const auto target_file_name = [target] { | ||
| 1825 | switch (target) { | ||
| 1826 | case GameListRemoveTarget::GlShaderCache: | ||
| 1827 | return "opengl.bin"; | ||
| 1828 | case GameListRemoveTarget::VkShaderCache: | ||
| 1829 | return "vulkan.bin"; | ||
| 1830 | default: | ||
| 1831 | return ""; | ||
| 1832 | } | ||
| 1833 | }(); | ||
| 1853 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); | 1834 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); |
| 1854 | const auto transferable_shader_cache_file_path = | 1835 | const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); |
| 1855 | shader_cache_dir / "opengl" / "transferable" / fmt::format("{:016X}.bin", program_id); | 1836 | const auto target_file = shader_cache_folder_path / target_file_name; |
| 1856 | 1837 | ||
| 1857 | if (!Common::FS::Exists(transferable_shader_cache_file_path)) { | 1838 | if (!Common::FS::Exists(target_file)) { |
| 1858 | QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"), | 1839 | QMessageBox::warning(this, tr("Error Removing Transferable Shader Cache"), |
| 1859 | tr("A shader cache for this title does not exist.")); | 1840 | tr("A shader cache for this title does not exist.")); |
| 1860 | return; | 1841 | return; |
| 1861 | } | 1842 | } |
| 1862 | 1843 | if (Common::FS::RemoveFile(target_file)) { | |
| 1863 | if (Common::FS::RemoveFile(transferable_shader_cache_file_path)) { | ||
| 1864 | QMessageBox::information(this, tr("Successfully Removed"), | 1844 | QMessageBox::information(this, tr("Successfully Removed"), |
| 1865 | tr("Successfully removed the transferable shader cache.")); | 1845 | tr("Successfully removed the transferable shader cache.")); |
| 1866 | } else { | 1846 | } else { |
| @@ -1869,6 +1849,24 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id) { | |||
| 1869 | } | 1849 | } |
| 1870 | } | 1850 | } |
| 1871 | 1851 | ||
| 1852 | void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { | ||
| 1853 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); | ||
| 1854 | const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); | ||
| 1855 | |||
| 1856 | if (!Common::FS::Exists(program_shader_cache_dir)) { | ||
| 1857 | QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), | ||
| 1858 | tr("A shader cache for this title does not exist.")); | ||
| 1859 | return; | ||
| 1860 | } | ||
| 1861 | if (Common::FS::RemoveDirRecursively(program_shader_cache_dir)) { | ||
| 1862 | QMessageBox::information(this, tr("Successfully Removed"), | ||
| 1863 | tr("Successfully removed the transferable shader caches.")); | ||
| 1864 | } else { | ||
| 1865 | QMessageBox::warning(this, tr("Error Removing Transferable Shader Caches"), | ||
| 1866 | tr("Failed to remove the transferable shader cache directory.")); | ||
| 1867 | } | ||
| 1868 | } | ||
| 1869 | |||
| 1872 | void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) { | 1870 | void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& game_path) { |
| 1873 | const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path)); | 1871 | const auto file_path = std::filesystem::path(Common::FS::ToU8String(game_path)); |
| 1874 | const auto config_file_name = | 1872 | const auto config_file_name = |
| @@ -2823,7 +2821,7 @@ void GMainWindow::OnCaptureScreenshot() { | |||
| 2823 | QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); | 2821 | QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); |
| 2824 | const auto date = | 2822 | const auto date = |
| 2825 | QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd_hh-mm-ss-zzz")); | 2823 | QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd_hh-mm-ss-zzz")); |
| 2826 | QString filename = QStringLiteral("%1%2_%3.png") | 2824 | QString filename = QStringLiteral("%1/%2_%3.png") |
| 2827 | .arg(screenshot_path) | 2825 | .arg(screenshot_path) |
| 2828 | .arg(title_id, 16, 16, QLatin1Char{'0'}) | 2826 | .arg(title_id, 16, 16, QLatin1Char{'0'}) |
| 2829 | .arg(date); | 2827 | .arg(date); |
| @@ -2900,13 +2898,13 @@ void GMainWindow::UpdateStatusBar() { | |||
| 2900 | return; | 2898 | return; |
| 2901 | } | 2899 | } |
| 2902 | 2900 | ||
| 2903 | auto results = Core::System::GetInstance().GetAndResetPerfStats(); | 2901 | auto& system = Core::System::GetInstance(); |
| 2904 | auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); | 2902 | auto results = system.GetAndResetPerfStats(); |
| 2905 | const auto shaders_building = shader_notify.GetShadersBuilding(); | 2903 | auto& shader_notify = system.GPU().ShaderNotify(); |
| 2904 | const int shaders_building = shader_notify.ShadersBuilding(); | ||
| 2906 | 2905 | ||
| 2907 | if (shaders_building != 0) { | 2906 | if (shaders_building > 0) { |
| 2908 | shader_building_label->setText( | 2907 | shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building)); |
| 2909 | tr("Building: %n shader(s)", "", static_cast<int>(shaders_building))); | ||
| 2910 | shader_building_label->setVisible(true); | 2908 | shader_building_label->setVisible(true); |
| 2911 | } else { | 2909 | } else { |
| 2912 | shader_building_label->setVisible(false); | 2910 | shader_building_label->setVisible(false); |
| @@ -2932,12 +2930,35 @@ void GMainWindow::UpdateStatusBar() { | |||
| 2932 | emu_frametime_label->setVisible(true); | 2930 | emu_frametime_label->setVisible(true); |
| 2933 | } | 2931 | } |
| 2934 | 2932 | ||
| 2933 | void GMainWindow::UpdateGPUAccuracyButton() { | ||
| 2934 | switch (Settings::values.gpu_accuracy.GetValue()) { | ||
| 2935 | case Settings::GPUAccuracy::Normal: { | ||
| 2936 | gpu_accuracy_button->setText(tr("GPU NORMAL")); | ||
| 2937 | gpu_accuracy_button->setChecked(false); | ||
| 2938 | break; | ||
| 2939 | } | ||
| 2940 | case Settings::GPUAccuracy::High: { | ||
| 2941 | gpu_accuracy_button->setText(tr("GPU HIGH")); | ||
| 2942 | gpu_accuracy_button->setChecked(true); | ||
| 2943 | break; | ||
| 2944 | } | ||
| 2945 | case Settings::GPUAccuracy::Extreme: { | ||
| 2946 | gpu_accuracy_button->setText(tr("GPU EXTREME")); | ||
| 2947 | gpu_accuracy_button->setChecked(true); | ||
| 2948 | break; | ||
| 2949 | } | ||
| 2950 | default: { | ||
| 2951 | gpu_accuracy_button->setText(tr("GPU ERROR")); | ||
| 2952 | gpu_accuracy_button->setChecked(true); | ||
| 2953 | } | ||
| 2954 | } | ||
| 2955 | } | ||
| 2956 | |||
| 2935 | void GMainWindow::UpdateStatusButtons() { | 2957 | void GMainWindow::UpdateStatusButtons() { |
| 2936 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); | 2958 | dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); |
| 2937 | multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); | ||
| 2938 | async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); | ||
| 2939 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == | 2959 | renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == |
| 2940 | Settings::RendererBackend::Vulkan); | 2960 | Settings::RendererBackend::Vulkan); |
| 2961 | UpdateGPUAccuracyButton(); | ||
| 2941 | } | 2962 | } |
| 2942 | 2963 | ||
| 2943 | void GMainWindow::UpdateUISettings() { | 2964 | void GMainWindow::UpdateUISettings() { |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index a50e5b9fe..38e66ccd0 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -282,7 +282,8 @@ private: | |||
| 282 | void RemoveBaseContent(u64 program_id, const QString& entry_type); | 282 | void RemoveBaseContent(u64 program_id, const QString& entry_type); |
| 283 | void RemoveUpdateContent(u64 program_id, const QString& entry_type); | 283 | void RemoveUpdateContent(u64 program_id, const QString& entry_type); |
| 284 | void RemoveAddOnContent(u64 program_id, const QString& entry_type); | 284 | void RemoveAddOnContent(u64 program_id, const QString& entry_type); |
| 285 | void RemoveTransferableShaderCache(u64 program_id); | 285 | void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); |
| 286 | void RemoveAllTransferableShaderCaches(u64 program_id); | ||
| 286 | void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); | 287 | void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); |
| 287 | std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); | 288 | std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); |
| 288 | InstallResult InstallNSPXCI(const QString& filename); | 289 | InstallResult InstallNSPXCI(const QString& filename); |
| @@ -291,6 +292,7 @@ private: | |||
| 291 | void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, | 292 | void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, |
| 292 | std::string_view gpu_vendor = {}); | 293 | std::string_view gpu_vendor = {}); |
| 293 | void UpdateStatusBar(); | 294 | void UpdateStatusBar(); |
| 295 | void UpdateGPUAccuracyButton(); | ||
| 294 | void UpdateStatusButtons(); | 296 | void UpdateStatusButtons(); |
| 295 | void UpdateUISettings(); | 297 | void UpdateUISettings(); |
| 296 | void HideMouseCursor(); | 298 | void HideMouseCursor(); |
| @@ -316,8 +318,7 @@ private: | |||
| 316 | QLabel* emu_speed_label = nullptr; | 318 | QLabel* emu_speed_label = nullptr; |
| 317 | QLabel* game_fps_label = nullptr; | 319 | QLabel* game_fps_label = nullptr; |
| 318 | QLabel* emu_frametime_label = nullptr; | 320 | QLabel* emu_frametime_label = nullptr; |
| 319 | QPushButton* async_status_button = nullptr; | 321 | QPushButton* gpu_accuracy_button = nullptr; |
| 320 | QPushButton* multicore_status_button = nullptr; | ||
| 321 | QPushButton* renderer_status_button = nullptr; | 322 | QPushButton* renderer_status_button = nullptr; |
| 322 | QPushButton* dock_status_button = nullptr; | 323 | QPushButton* dock_status_button = nullptr; |
| 323 | QTimer status_bar_update_timer; | 324 | QTimer status_bar_update_timer; |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -444,6 +444,8 @@ void Config::ReadValues() { | |||
| 444 | // Renderer | 444 | // Renderer |
| 445 | ReadSetting("Renderer", Settings::values.renderer_backend); | 445 | ReadSetting("Renderer", Settings::values.renderer_backend); |
| 446 | ReadSetting("Renderer", Settings::values.renderer_debug); | 446 | ReadSetting("Renderer", Settings::values.renderer_debug); |
| 447 | ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); | ||
| 448 | ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); | ||
| 447 | ReadSetting("Renderer", Settings::values.vulkan_device); | 449 | ReadSetting("Renderer", Settings::values.vulkan_device); |
| 448 | 450 | ||
| 449 | ReadSetting("Renderer", Settings::values.fullscreen_mode); | 451 | ReadSetting("Renderer", Settings::values.fullscreen_mode); |
| @@ -456,7 +458,7 @@ void Config::ReadValues() { | |||
| 456 | ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); | 458 | ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); |
| 457 | ReadSetting("Renderer", Settings::values.use_vsync); | 459 | ReadSetting("Renderer", Settings::values.use_vsync); |
| 458 | ReadSetting("Renderer", Settings::values.disable_fps_limit); | 460 | ReadSetting("Renderer", Settings::values.disable_fps_limit); |
| 459 | ReadSetting("Renderer", Settings::values.use_assembly_shaders); | 461 | ReadSetting("Renderer", Settings::values.shader_backend); |
| 460 | ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); | 462 | ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); |
| 461 | ReadSetting("Renderer", Settings::values.use_nvdec_emulation); | 463 | ReadSetting("Renderer", Settings::values.use_nvdec_emulation); |
| 462 | ReadSetting("Renderer", Settings::values.accelerate_astc); | 464 | ReadSetting("Renderer", Settings::values.accelerate_astc); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -221,6 +221,14 @@ backend = | |||
| 221 | # 0 (default): Disabled, 1: Enabled | 221 | # 0 (default): Disabled, 1: Enabled |
| 222 | debug = | 222 | debug = |
| 223 | 223 | ||
| 224 | # Enable Nsight Aftermath crash dumps | ||
| 225 | # 0 (default): Disabled, 1: Enabled | ||
| 226 | nsight_aftermath = | ||
| 227 | |||
| 228 | # Disable shader loop safety checks, executing the shader without loop logic changes | ||
| 229 | # 0 (default): Disabled, 1: Enabled | ||
| 230 | disable_shader_loop_safety_checks = | ||
| 231 | |||
| 224 | # Which Vulkan physical device to use (defaults to 0) | 232 | # Which Vulkan physical device to use (defaults to 0) |
| 225 | vulkan_device = | 233 | vulkan_device = |
| 226 | 234 | ||
| @@ -240,9 +248,10 @@ max_anisotropy = | |||
| 240 | # 0 (default): Off, 1: On | 248 | # 0 (default): Off, 1: On |
| 241 | use_vsync = | 249 | use_vsync = |
| 242 | 250 | ||
| 243 | # Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. | 251 | # Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is |
| 244 | # 0: Off, 1 (default): On | 252 | # not available and GLASM is selected, GLSL will be used. |
| 245 | use_assembly_shaders = | 253 | # 0: GLSL, 1 (default): GLASM, 2: SPIR-V |
| 254 | shader_backend = | ||
| 246 | 255 | ||
| 247 | # Whether to allow asynchronous shader building. | 256 | # Whether to allow asynchronous shader building. |
| 248 | # 0 (default): Off, 1: On | 257 | # 0 (default): Off, 1: On |
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index ac4ea88d3..35ce23696 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -218,9 +218,11 @@ int main(int argc, char** argv) { | |||
| 218 | // Core is loaded, start the GPU (makes the GPU contexts current to this thread) | 218 | // Core is loaded, start the GPU (makes the GPU contexts current to this thread) |
| 219 | system.GPU().Start(); | 219 | system.GPU().Start(); |
| 220 | 220 | ||
| 221 | system.Renderer().ReadRasterizer()->LoadDiskResources( | 221 | if (Settings::values.use_disk_shader_cache.GetValue()) { |
| 222 | system.CurrentProcess()->GetTitleID(), std::stop_token{}, | 222 | system.Renderer().ReadRasterizer()->LoadDiskResources( |
| 223 | [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); | 223 | system.CurrentProcess()->GetTitleID(), std::stop_token{}, |
| 224 | [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); | ||
| 225 | } | ||
| 224 | 226 | ||
| 225 | void(system.Run()); | 227 | void(system.Run()); |
| 226 | while (emu_window->IsOpen()) { | 228 | while (emu_window->IsOpen()) { |