diff options
30 files changed, 2276 insertions, 399 deletions
diff --git a/.gitmodules b/.gitmodules index 2ec9dda62..9ba8fe207 100644 --- a/.gitmodules +++ b/.gitmodules | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | [submodule "soundtouch"] | 13 | [submodule "soundtouch"] |
| 14 | path = externals/soundtouch | 14 | path = externals/soundtouch |
| 15 | url = https://github.com/citra-emu/ext-soundtouch.git | 15 | url = https://github.com/citra-emu/ext-soundtouch.git |
| 16 | [submodule "libressl"] | ||
| 17 | path = externals/libressl | ||
| 18 | url = https://github.com/citra-emu/ext-libressl-portable.git | ||
| 16 | [submodule "discord-rpc"] | 19 | [submodule "discord-rpc"] |
| 17 | path = externals/discord-rpc | 20 | path = externals/discord-rpc |
| 18 | url = https://github.com/discordapp/discord-rpc.git | 21 | url = https://github.com/discordapp/discord-rpc.git |
diff --git a/CMakeLists.txt b/CMakeLists.txt index a9f669d56..b71071271 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -152,7 +152,6 @@ macro(yuzu_find_packages) | |||
| 152 | "Boost 1.71 boost/1.72.0" | 152 | "Boost 1.71 boost/1.72.0" |
| 153 | "Catch2 2.11 catch2/2.11.0" | 153 | "Catch2 2.11 catch2/2.11.0" |
| 154 | "fmt 6.2 fmt/6.2.0" | 154 | "fmt 6.2 fmt/6.2.0" |
| 155 | "OpenSSL 1.1 openssl/1.1.1f" | ||
| 156 | # can't use until https://github.com/bincrafters/community/issues/1173 | 155 | # can't use until https://github.com/bincrafters/community/issues/1173 |
| 157 | #"libzip 1.5 libzip/1.5.2@bincrafters/stable" | 156 | #"libzip 1.5 libzip/1.5.2@bincrafters/stable" |
| 158 | "lz4 1.8 lz4/1.9.2" | 157 | "lz4 1.8 lz4/1.9.2" |
| @@ -312,15 +311,6 @@ elseif (TARGET Boost::boost) | |||
| 312 | add_library(boost ALIAS Boost::boost) | 311 | add_library(boost ALIAS Boost::boost) |
| 313 | endif() | 312 | endif() |
| 314 | 313 | ||
| 315 | if (NOT TARGET OpenSSL::SSL) | ||
| 316 | set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE) | ||
| 317 | add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL) | ||
| 318 | endif() | ||
| 319 | if (NOT TARGET OpenSSL::Crypto) | ||
| 320 | set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE) | ||
| 321 | add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL) | ||
| 322 | endif() | ||
| 323 | |||
| 324 | if (TARGET sdl2::sdl2) | 314 | if (TARGET sdl2::sdl2) |
| 325 | # imported from the conan generated sdl2Config.cmake | 315 | # imported from the conan generated sdl2Config.cmake |
| 326 | set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) | 316 | set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) |
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 83e4e9df2..311ba1c2e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -51,6 +51,8 @@ endif() | |||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") |
| 53 | set(HASH_FILES | 53 | set(HASH_FILES |
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss index 7d088a719..2d5c9761f 100644 --- a/dist/qt_themes/qdarkstyle/style.qss +++ b/dist/qt_themes/qdarkstyle/style.qss | |||
| @@ -673,10 +673,6 @@ QTabWidget::pane { | |||
| 673 | border-bottom-left-radius: 2px; | 673 | border-bottom-left-radius: 2px; |
| 674 | } | 674 | } |
| 675 | 675 | ||
| 676 | QTabWidget::tab-bar { | ||
| 677 | overflow: visible; | ||
| 678 | } | ||
| 679 | |||
| 680 | QTabBar { | 676 | QTabBar { |
| 681 | qproperty-drawBase: 0; | 677 | qproperty-drawBase: 0; |
| 682 | border-radius: 3px; | 678 | border-radius: 3px; |
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 9be5b2780..b80b27605 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt | |||
| @@ -73,6 +73,15 @@ if (NOT LIBZIP_FOUND) | |||
| 73 | endif() | 73 | endif() |
| 74 | 74 | ||
| 75 | if (ENABLE_WEB_SERVICE) | 75 | if (ENABLE_WEB_SERVICE) |
| 76 | # LibreSSL | ||
| 77 | set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "") | ||
| 78 | add_subdirectory(libressl EXCLUDE_FROM_ALL) | ||
| 79 | target_include_directories(ssl INTERFACE ./libressl/include) | ||
| 80 | target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP) | ||
| 81 | get_directory_property(OPENSSL_LIBRARIES | ||
| 82 | DIRECTORY libressl | ||
| 83 | DEFINITION OPENSSL_LIBS) | ||
| 84 | |||
| 76 | # lurlparser | 85 | # lurlparser |
| 77 | add_subdirectory(lurlparser EXCLUDE_FROM_ALL) | 86 | add_subdirectory(lurlparser EXCLUDE_FROM_ALL) |
| 78 | 87 | ||
| @@ -80,5 +89,5 @@ if (ENABLE_WEB_SERVICE) | |||
| 80 | add_library(httplib INTERFACE) | 89 | add_library(httplib INTERFACE) |
| 81 | target_include_directories(httplib INTERFACE ./httplib) | 90 | target_include_directories(httplib INTERFACE ./httplib) |
| 82 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) | 91 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) |
| 83 | target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) | 92 | target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES}) |
| 84 | endif() | 93 | endif() |
diff --git a/externals/libressl b/externals/libressl new file mode 160000 | |||
| Subproject 7d01cb01cb1a926ecb4c9c98b107ef3c26f59df | |||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | namespace Common::X64 { | 12 | namespace Common::X64 { |
| 13 | 13 | ||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | 14 | inline std::size_t RegToIndex(const Xbyak::Reg& reg) { |
| 15 | using Kind = Xbyak::Reg::Kind; | 15 | using Kind = Xbyak::Reg::Kind; |
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, |
| 17 | "RegSet only support GPRs and XMM registers."); | 17 | "RegSet only support GPRs and XMM registers."); |
| @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { | |||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | 22 | inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { |
| 23 | ASSERT(reg_index < 16); | 23 | ASSERT(reg_index < 16); |
| 24 | return Xbyak::Reg64(reg_index); | 24 | return Xbyak::Reg64(static_cast<int>(reg_index)); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | 27 | inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { |
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | 28 | ASSERT(reg_index >= 16 && reg_index < 32); |
| 29 | return Xbyak::Xmm(reg_index - 16); | 29 | return Xbyak::Xmm(static_cast<int>(reg_index - 16)); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | 32 | inline Xbyak::Reg IndexToReg(std::size_t reg_index) { |
| 33 | if (reg_index < 16) { | 33 | if (reg_index < 16) { |
| 34 | return IndexToReg64(reg_index); | 34 | return IndexToReg64(reg_index); |
| 35 | } else { | 35 | } else { |
| @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; | |||
| 151 | 151 | ||
| 152 | #endif | 152 | #endif |
| 153 | 153 | ||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | 154 | struct ABIFrameInfo { |
| 155 | size_t needed_frame_size, s32* out_subtraction, | 155 | s32 subtraction; |
| 156 | s32* out_xmm_offset) { | 156 | s32 xmm_offset; |
| 157 | }; | ||
| 158 | |||
| 159 | inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 160 | size_t needed_frame_size) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | 161 | const auto count = (regs & ABI_ALL_GPRS).count(); |
| 158 | rsp_alignment -= count * 8; | 162 | rsp_alignment -= count * 8; |
| 159 | size_t subtraction = 0; | 163 | size_t subtraction = 0; |
| @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | |||
| 170 | rsp_alignment -= subtraction; | 174 | rsp_alignment -= subtraction; |
| 171 | subtraction += rsp_alignment & 0xF; | 175 | subtraction += rsp_alignment & 0xF; |
| 172 | 176 | ||
| 173 | *out_subtraction = (s32)subtraction; | 177 | return ABIFrameInfo{static_cast<s32>(subtraction), |
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | 178 | static_cast<s32>(subtraction - xmm_base_subtraction)}; |
| 175 | } | 179 | } |
| 176 | 180 | ||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 181 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 182 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 179 | s32 subtraction, xmm_offset; | 183 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | 184 | |
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | 185 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | 186 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | 187 | code.push(IndexToReg64(i)); |
| 184 | } | 188 | } |
| 185 | } | 189 | } |
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | 190 | ||
| 190 | for (int i = 0; i < regs.count(); i++) { | 191 | if (frame_info.subtraction != 0) { |
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | 192 | code.sub(code.rsp, frame_info.subtraction); |
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | 193 | } |
| 195 | 194 | ||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | 195 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | 196 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | 197 | code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); |
| 199 | xmm_offset += 0x10; | 198 | frame_info.xmm_offset += 0x10; |
| 200 | } | 199 | } |
| 201 | } | 200 | } |
| 202 | 201 | ||
| @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b | |||
| 205 | 204 | ||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 205 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 206 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 208 | s32 subtraction, xmm_offset; | 207 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | 208 | ||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | 209 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | 210 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | 211 | code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); |
| 214 | xmm_offset += 0x10; | 212 | frame_info.xmm_offset += 0x10; |
| 215 | } | 213 | } |
| 216 | } | 214 | } |
| 217 | 215 | ||
| 218 | if (subtraction != 0) { | 216 | if (frame_info.subtraction != 0) { |
| 219 | code.add(code.rsp, subtraction); | 217 | code.add(code.rsp, frame_info.subtraction); |
| 220 | } | 218 | } |
| 221 | 219 | ||
| 222 | // GPRs need to be popped in reverse order | 220 | // GPRs need to be popped in reverse order |
| 223 | for (int i = 15; i >= 0; i--) { | 221 | for (std::size_t j = 0; j < regs.size(); ++j) { |
| 224 | if (regs[i]) { | 222 | const std::size_t i = regs.size() - j - 1; |
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | 223 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | 224 | code.pop(IndexToReg64(i)); |
| 262 | } | 225 | } |
| 263 | } | 226 | } |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 9bc86e3b9..e7456a8c3 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp | |||
| @@ -50,7 +50,8 @@ public: | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void InterpreterFallback(u32 pc, std::size_t num_instructions) override { | 52 | void InterpreterFallback(u32 pc, std::size_t num_instructions) override { |
| 53 | UNIMPLEMENTED(); | 53 | UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc, |
| 54 | MemoryReadCode(pc)); | ||
| 54 | } | 55 | } |
| 55 | 56 | ||
| 56 | void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { | 57 | void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { |
| @@ -89,8 +90,6 @@ public: | |||
| 89 | 90 | ||
| 90 | ARM_Dynarmic_32& parent; | 91 | ARM_Dynarmic_32& parent; |
| 91 | std::size_t num_interpreted_instructions{}; | 92 | std::size_t num_interpreted_instructions{}; |
| 92 | u64 tpidrro_el0{}; | ||
| 93 | u64 tpidr_el0{}; | ||
| 94 | }; | 93 | }; |
| 95 | 94 | ||
| 96 | std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, | 95 | std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, |
| @@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& | |||
| 99 | config.callbacks = cb.get(); | 98 | config.callbacks = cb.get(); |
| 100 | // TODO(bunnei): Implement page table for 32-bit | 99 | // TODO(bunnei): Implement page table for 32-bit |
| 101 | // config.page_table = &page_table.pointers; | 100 | // config.page_table = &page_table.pointers; |
| 102 | config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); | 101 | config.coprocessors[15] = cp15; |
| 103 | config.define_unpredictable_behaviour = true; | 102 | config.define_unpredictable_behaviour = true; |
| 104 | return std::make_unique<Dynarmic::A32::Jit>(config); | 103 | return std::make_unique<Dynarmic::A32::Jit>(config); |
| 105 | } | 104 | } |
| @@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() { | |||
| 112 | } | 111 | } |
| 113 | 112 | ||
| 114 | void ARM_Dynarmic_32::Step() { | 113 | void ARM_Dynarmic_32::Step() { |
| 115 | cb->InterpreterFallback(jit->Regs()[15], 1); | 114 | jit->Step(); |
| 116 | } | 115 | } |
| 117 | 116 | ||
| 118 | ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, | 117 | ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, |
| 119 | std::size_t core_index) | 118 | std::size_t core_index) |
| 120 | : ARM_Interface{system}, | 119 | : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)), |
| 121 | cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, | 120 | cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index}, |
| 122 | exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} | 121 | exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} |
| 123 | 122 | ||
| 124 | ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; | 123 | ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; |
| @@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) { | |||
| 154 | } | 153 | } |
| 155 | 154 | ||
| 156 | u64 ARM_Dynarmic_32::GetTlsAddress() const { | 155 | u64 ARM_Dynarmic_32::GetTlsAddress() const { |
| 157 | return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; | 156 | return cp15->uro; |
| 158 | } | 157 | } |
| 159 | 158 | ||
| 160 | void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { | 159 | void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { |
| 161 | CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); | 160 | cp15->uro = static_cast<u32>(address); |
| 162 | } | 161 | } |
| 163 | 162 | ||
| 164 | u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { | 163 | u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { |
| 165 | return cb->tpidr_el0; | 164 | return cp15->uprw; |
| 166 | } | 165 | } |
| 167 | 166 | ||
| 168 | void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { | 167 | void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { |
| 169 | cb->tpidr_el0 = value; | 168 | cp15->uprw = value; |
| 170 | } | 169 | } |
| 171 | 170 | ||
| 172 | void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { | 171 | void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index 8ba9cea8f..e5b92d7bb 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h | |||
| @@ -22,6 +22,7 @@ class Memory; | |||
| 22 | namespace Core { | 22 | namespace Core { |
| 23 | 23 | ||
| 24 | class DynarmicCallbacks32; | 24 | class DynarmicCallbacks32; |
| 25 | class DynarmicCP15; | ||
| 25 | class DynarmicExclusiveMonitor; | 26 | class DynarmicExclusiveMonitor; |
| 26 | class System; | 27 | class System; |
| 27 | 28 | ||
| @@ -66,12 +67,14 @@ private: | |||
| 66 | std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; | 67 | std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; |
| 67 | 68 | ||
| 68 | friend class DynarmicCallbacks32; | 69 | friend class DynarmicCallbacks32; |
| 70 | friend class DynarmicCP15; | ||
| 71 | |||
| 69 | std::unique_ptr<DynarmicCallbacks32> cb; | 72 | std::unique_ptr<DynarmicCallbacks32> cb; |
| 70 | JitCacheType jit_cache; | 73 | JitCacheType jit_cache; |
| 71 | std::shared_ptr<Dynarmic::A32::Jit> jit; | 74 | std::shared_ptr<Dynarmic::A32::Jit> jit; |
| 75 | std::shared_ptr<DynarmicCP15> cp15; | ||
| 72 | std::size_t core_index; | 76 | std::size_t core_index; |
| 73 | DynarmicExclusiveMonitor& exclusive_monitor; | 77 | DynarmicExclusiveMonitor& exclusive_monitor; |
| 74 | std::array<u32, 84> CP15_regs{}; | ||
| 75 | }; | 78 | }; |
| 76 | 79 | ||
| 77 | } // namespace Core | 80 | } // namespace Core |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp index 3fdcdebde..d43e4dd70 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp | |||
| @@ -2,79 +2,132 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <fmt/format.h> | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "core/arm/dynarmic/arm_dynarmic_32.h" | ||
| 5 | #include "core/arm/dynarmic/arm_dynarmic_cp15.h" | 8 | #include "core/arm/dynarmic/arm_dynarmic_cp15.h" |
| 9 | #include "core/core.h" | ||
| 10 | #include "core/core_timing.h" | ||
| 11 | #include "core/core_timing_util.h" | ||
| 6 | 12 | ||
| 7 | using Callback = Dynarmic::A32::Coprocessor::Callback; | 13 | using Callback = Dynarmic::A32::Coprocessor::Callback; |
| 8 | using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; | 14 | using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; |
| 9 | using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; | 15 | using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; |
| 10 | 16 | ||
| 17 | template <> | ||
| 18 | struct fmt::formatter<Dynarmic::A32::CoprocReg> { | ||
| 19 | constexpr auto parse(format_parse_context& ctx) { | ||
| 20 | return ctx.begin(); | ||
| 21 | } | ||
| 22 | template <typename FormatContext> | ||
| 23 | auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) { | ||
| 24 | return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg)); | ||
| 25 | } | ||
| 26 | }; | ||
| 27 | |||
| 28 | namespace Core { | ||
| 29 | |||
| 30 | static u32 dummy_value; | ||
| 31 | |||
| 11 | std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, | 32 | std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, |
| 12 | CoprocReg CRd, CoprocReg CRn, | 33 | CoprocReg CRd, CoprocReg CRn, |
| 13 | CoprocReg CRm, unsigned opc2) { | 34 | CoprocReg CRm, unsigned opc2) { |
| 35 | LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn, | ||
| 36 | CRm, opc2); | ||
| 14 | return {}; | 37 | return {}; |
| 15 | } | 38 | } |
| 16 | 39 | ||
| 17 | CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, | 40 | CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, |
| 18 | CoprocReg CRm, unsigned opc2) { | 41 | CoprocReg CRm, unsigned opc2) { |
| 19 | // TODO(merry): Privileged CP15 registers | ||
| 20 | |||
| 21 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { | 42 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { |
| 43 | // CP15_FLUSH_PREFETCH_BUFFER | ||
| 22 | // This is a dummy write, we ignore the value written here. | 44 | // This is a dummy write, we ignore the value written here. |
| 23 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; | 45 | return &dummy_value; |
| 24 | } | 46 | } |
| 25 | 47 | ||
| 26 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { | 48 | if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { |
| 27 | switch (opc2) { | 49 | switch (opc2) { |
| 28 | case 4: | 50 | case 4: |
| 51 | // CP15_DATA_SYNC_BARRIER | ||
| 29 | // This is a dummy write, we ignore the value written here. | 52 | // This is a dummy write, we ignore the value written here. |
| 30 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; | 53 | return &dummy_value; |
| 31 | case 5: | 54 | case 5: |
| 55 | // CP15_DATA_MEMORY_BARRIER | ||
| 32 | // This is a dummy write, we ignore the value written here. | 56 | // This is a dummy write, we ignore the value written here. |
| 33 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; | 57 | return &dummy_value; |
| 34 | default: | ||
| 35 | return {}; | ||
| 36 | } | 58 | } |
| 37 | } | 59 | } |
| 38 | 60 | ||
| 39 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { | 61 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { |
| 40 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; | 62 | // CP15_THREAD_UPRW |
| 63 | return &uprw; | ||
| 41 | } | 64 | } |
| 42 | 65 | ||
| 66 | LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, | ||
| 67 | opc2); | ||
| 43 | return {}; | 68 | return {}; |
| 44 | } | 69 | } |
| 45 | 70 | ||
| 46 | CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { | 71 | CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { |
| 72 | LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); | ||
| 47 | return {}; | 73 | return {}; |
| 48 | } | 74 | } |
| 49 | 75 | ||
| 50 | CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, | 76 | CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, |
| 51 | CoprocReg CRm, unsigned opc2) { | 77 | CoprocReg CRm, unsigned opc2) { |
| 52 | // TODO(merry): Privileged CP15 registers | ||
| 53 | |||
| 54 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { | 78 | if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { |
| 55 | switch (opc2) { | 79 | switch (opc2) { |
| 56 | case 2: | 80 | case 2: |
| 57 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; | 81 | // CP15_THREAD_UPRW |
| 82 | return &uprw; | ||
| 58 | case 3: | 83 | case 3: |
| 59 | return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; | 84 | // CP15_THREAD_URO |
| 60 | default: | 85 | return &uro; |
| 61 | return {}; | ||
| 62 | } | 86 | } |
| 63 | } | 87 | } |
| 64 | 88 | ||
| 89 | LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm, | ||
| 90 | opc2); | ||
| 65 | return {}; | 91 | return {}; |
| 66 | } | 92 | } |
| 67 | 93 | ||
| 68 | CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { | 94 | CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { |
| 95 | if (!two && opc == 0 && CRm == CoprocReg::C14) { | ||
| 96 | // CNTPCT | ||
| 97 | const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>( | ||
| 98 | [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 { | ||
| 99 | ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg; | ||
| 100 | return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); | ||
| 101 | }); | ||
| 102 | return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent}; | ||
| 103 | } | ||
| 104 | |||
| 105 | LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm); | ||
| 69 | return {}; | 106 | return {}; |
| 70 | } | 107 | } |
| 71 | 108 | ||
| 72 | std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, | 109 | std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, |
| 73 | std::optional<u8> option) { | 110 | std::optional<u8> option) { |
| 111 | if (option) { | ||
| 112 | LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", | ||
| 113 | long_transfer ? "l" : "", CRd, *option); | ||
| 114 | } else { | ||
| 115 | LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", | ||
| 116 | long_transfer ? "l" : "", CRd); | ||
| 117 | } | ||
| 74 | return {}; | 118 | return {}; |
| 75 | } | 119 | } |
| 76 | 120 | ||
| 77 | std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, | 121 | std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, |
| 78 | std::optional<u8> option) { | 122 | std::optional<u8> option) { |
| 123 | if (option) { | ||
| 124 | LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "", | ||
| 125 | long_transfer ? "l" : "", CRd, *option); | ||
| 126 | } else { | ||
| 127 | LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "", | ||
| 128 | long_transfer ? "l" : "", CRd); | ||
| 129 | } | ||
| 79 | return {}; | 130 | return {}; |
| 80 | } | 131 | } |
| 132 | |||
| 133 | } // namespace Core | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h index 07bcde5f9..7356d252e 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h | |||
| @@ -10,128 +10,15 @@ | |||
| 10 | #include <dynarmic/A32/coprocessor.h> | 10 | #include <dynarmic/A32/coprocessor.h> |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | 12 | ||
| 13 | enum class CP15Register { | 13 | namespace Core { |
| 14 | // c0 - Information registers | ||
| 15 | CP15_MAIN_ID, | ||
| 16 | CP15_CACHE_TYPE, | ||
| 17 | CP15_TCM_STATUS, | ||
| 18 | CP15_TLB_TYPE, | ||
| 19 | CP15_CPU_ID, | ||
| 20 | CP15_PROCESSOR_FEATURE_0, | ||
| 21 | CP15_PROCESSOR_FEATURE_1, | ||
| 22 | CP15_DEBUG_FEATURE_0, | ||
| 23 | CP15_AUXILIARY_FEATURE_0, | ||
| 24 | CP15_MEMORY_MODEL_FEATURE_0, | ||
| 25 | CP15_MEMORY_MODEL_FEATURE_1, | ||
| 26 | CP15_MEMORY_MODEL_FEATURE_2, | ||
| 27 | CP15_MEMORY_MODEL_FEATURE_3, | ||
| 28 | CP15_ISA_FEATURE_0, | ||
| 29 | CP15_ISA_FEATURE_1, | ||
| 30 | CP15_ISA_FEATURE_2, | ||
| 31 | CP15_ISA_FEATURE_3, | ||
| 32 | CP15_ISA_FEATURE_4, | ||
| 33 | 14 | ||
| 34 | // c1 - Control registers | 15 | class ARM_Dynarmic_32; |
| 35 | CP15_CONTROL, | ||
| 36 | CP15_AUXILIARY_CONTROL, | ||
| 37 | CP15_COPROCESSOR_ACCESS_CONTROL, | ||
| 38 | |||
| 39 | // c2 - Translation table registers | ||
| 40 | CP15_TRANSLATION_BASE_TABLE_0, | ||
| 41 | CP15_TRANSLATION_BASE_TABLE_1, | ||
| 42 | CP15_TRANSLATION_BASE_CONTROL, | ||
| 43 | CP15_DOMAIN_ACCESS_CONTROL, | ||
| 44 | CP15_RESERVED, | ||
| 45 | |||
| 46 | // c5 - Fault status registers | ||
| 47 | CP15_FAULT_STATUS, | ||
| 48 | CP15_INSTR_FAULT_STATUS, | ||
| 49 | CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS, | ||
| 50 | CP15_INST_FSR, | ||
| 51 | |||
| 52 | // c6 - Fault Address registers | ||
| 53 | CP15_FAULT_ADDRESS, | ||
| 54 | CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS, | ||
| 55 | CP15_WFAR, | ||
| 56 | CP15_IFAR, | ||
| 57 | |||
| 58 | // c7 - Cache operation registers | ||
| 59 | CP15_WAIT_FOR_INTERRUPT, | ||
| 60 | CP15_PHYS_ADDRESS, | ||
| 61 | CP15_INVALIDATE_INSTR_CACHE, | ||
| 62 | CP15_INVALIDATE_INSTR_CACHE_USING_MVA, | ||
| 63 | CP15_INVALIDATE_INSTR_CACHE_USING_INDEX, | ||
| 64 | CP15_FLUSH_PREFETCH_BUFFER, | ||
| 65 | CP15_FLUSH_BRANCH_TARGET_CACHE, | ||
| 66 | CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY, | ||
| 67 | CP15_INVALIDATE_DATA_CACHE, | ||
| 68 | CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA, | ||
| 69 | CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, | ||
| 70 | CP15_INVALIDATE_DATA_AND_INSTR_CACHE, | ||
| 71 | CP15_CLEAN_DATA_CACHE, | ||
| 72 | CP15_CLEAN_DATA_CACHE_LINE_USING_MVA, | ||
| 73 | CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX, | ||
| 74 | CP15_DATA_SYNC_BARRIER, | ||
| 75 | CP15_DATA_MEMORY_BARRIER, | ||
| 76 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE, | ||
| 77 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA, | ||
| 78 | CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX, | ||
| 79 | |||
| 80 | // c8 - TLB operations | ||
| 81 | CP15_INVALIDATE_ITLB, | ||
| 82 | CP15_INVALIDATE_ITLB_SINGLE_ENTRY, | ||
| 83 | CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH, | ||
| 84 | CP15_INVALIDATE_ITLB_ENTRY_ON_MVA, | ||
| 85 | CP15_INVALIDATE_DTLB, | ||
| 86 | CP15_INVALIDATE_DTLB_SINGLE_ENTRY, | ||
| 87 | CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH, | ||
| 88 | CP15_INVALIDATE_DTLB_ENTRY_ON_MVA, | ||
| 89 | CP15_INVALIDATE_UTLB, | ||
| 90 | CP15_INVALIDATE_UTLB_SINGLE_ENTRY, | ||
| 91 | CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH, | ||
| 92 | CP15_INVALIDATE_UTLB_ENTRY_ON_MVA, | ||
| 93 | |||
| 94 | // c9 - Data cache lockdown register | ||
| 95 | CP15_DATA_CACHE_LOCKDOWN, | ||
| 96 | |||
| 97 | // c10 - TLB/Memory map registers | ||
| 98 | CP15_TLB_LOCKDOWN, | ||
| 99 | CP15_PRIMARY_REGION_REMAP, | ||
| 100 | CP15_NORMAL_REGION_REMAP, | ||
| 101 | |||
| 102 | // c13 - Thread related registers | ||
| 103 | CP15_PID, | ||
| 104 | CP15_CONTEXT_ID, | ||
| 105 | CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write | ||
| 106 | CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W) | ||
| 107 | CP15_THREAD_PRW, // Thread ID register - Privileged R/W only. | ||
| 108 | |||
| 109 | // c15 - Performance and TLB lockdown registers | ||
| 110 | CP15_PERFORMANCE_MONITOR_CONTROL, | ||
| 111 | CP15_CYCLE_COUNTER, | ||
| 112 | CP15_COUNT_0, | ||
| 113 | CP15_COUNT_1, | ||
| 114 | CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY, | ||
| 115 | CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY, | ||
| 116 | CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS, | ||
| 117 | CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS, | ||
| 118 | CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE, | ||
| 119 | CP15_TLB_DEBUG_CONTROL, | ||
| 120 | |||
| 121 | // Skyeye defined | ||
| 122 | CP15_TLB_FAULT_ADDR, | ||
| 123 | CP15_TLB_FAULT_STATUS, | ||
| 124 | |||
| 125 | // Not an actual register. | ||
| 126 | // All registers should be defined above this. | ||
| 127 | CP15_REGISTER_COUNT, | ||
| 128 | }; | ||
| 129 | 16 | ||
| 130 | class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { | 17 | class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { |
| 131 | public: | 18 | public: |
| 132 | using CoprocReg = Dynarmic::A32::CoprocReg; | 19 | using CoprocReg = Dynarmic::A32::CoprocReg; |
| 133 | 20 | ||
| 134 | explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; | 21 | explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {} |
| 135 | 22 | ||
| 136 | std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, | 23 | std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, |
| 137 | CoprocReg CRn, CoprocReg CRm, | 24 | CoprocReg CRn, CoprocReg CRm, |
| @@ -147,6 +34,9 @@ public: | |||
| 147 | std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, | 34 | std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, |
| 148 | std::optional<u8> option) override; | 35 | std::optional<u8> option) override; |
| 149 | 36 | ||
| 150 | private: | 37 | ARM_Dynarmic_32& parent; |
| 151 | u32* CP15{}; | 38 | u32 uprw; |
| 39 | u32 uro; | ||
| 152 | }; | 40 | }; |
| 41 | |||
| 42 | } // namespace Core | ||
diff --git a/src/core/settings.h b/src/core/settings.h index 36cd66fd4..33e1e06cd 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -437,7 +437,7 @@ struct Values { | |||
| 437 | bool renderer_debug; | 437 | bool renderer_debug; |
| 438 | int vulkan_device; | 438 | int vulkan_device; |
| 439 | 439 | ||
| 440 | float resolution_factor; | 440 | u16 resolution_factor{1}; |
| 441 | int aspect_ratio; | 441 | int aspect_ratio; |
| 442 | int max_anisotropy; | 442 | int max_anisotropy; |
| 443 | bool use_frame_limit; | 443 | bool use_frame_limit; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 39d5d8401..099bb446e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -52,6 +52,8 @@ add_library(video_core STATIC | |||
| 52 | rasterizer_interface.h | 52 | rasterizer_interface.h |
| 53 | renderer_base.cpp | 53 | renderer_base.cpp |
| 54 | renderer_base.h | 54 | renderer_base.h |
| 55 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 56 | renderer_opengl/gl_arb_decompiler.h | ||
| 55 | renderer_opengl/gl_buffer_cache.cpp | 57 | renderer_opengl/gl_buffer_cache.cpp |
| 56 | renderer_opengl/gl_buffer_cache.h | 58 | renderer_opengl/gl_buffer_cache.h |
| 57 | renderer_opengl/gl_device.cpp | 59 | renderer_opengl/gl_device.cpp |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 11c1cc3be..d4a97ec7b 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
| 15 | 15 | ||
| 16 | namespace Tegra { | 16 | namespace Tegra { |
| 17 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; | 17 | static const Xbyak::Reg64 STATE = Xbyak::util::rbx; |
| 18 | static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; | 18 | static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; |
| 19 | static const Xbyak::Reg64 STATE = Xbyak::util::r11; | 19 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
| 20 | static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; | ||
| 21 | static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; | ||
| 22 | static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; | ||
| 23 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | 20 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
| 24 | static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; | ||
| 25 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | 21 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
| 26 | 22 | ||
| 27 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | 23 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
| 28 | PARAMETERS, | ||
| 29 | REGISTERS, | ||
| 30 | STATE, | 24 | STATE, |
| 31 | NEXT_PARAMETER, | ||
| 32 | RESULT, | 25 | RESULT, |
| 26 | PARAMETERS, | ||
| 33 | METHOD_ADDRESS, | 27 | METHOD_ADDRESS, |
| 34 | BRANCH_HOLDER, | 28 | BRANCH_HOLDER, |
| 35 | }); | 29 | }); |
| @@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | |||
| 53 | JITState state{}; | 47 | JITState state{}; |
| 54 | state.maxwell3d = &maxwell3d; | 48 | state.maxwell3d = &maxwell3d; |
| 55 | state.registers = {}; | 49 | state.registers = {}; |
| 56 | state.parameters = parameters.data(); | 50 | program(&state, parameters.data()); |
| 57 | program(&state); | ||
| 58 | } | 51 | } |
| 59 | 52 | ||
| 60 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | 53 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { |
| @@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | |||
| 64 | const bool is_move_operation = !is_a_zero && is_b_zero; | 57 | const bool is_move_operation = !is_a_zero && is_b_zero; |
| 65 | const bool has_zero_register = is_a_zero || is_b_zero; | 58 | const bool has_zero_register = is_a_zero || is_b_zero; |
| 66 | 59 | ||
| 67 | Xbyak::Reg64 src_a; | 60 | Xbyak::Reg32 src_a; |
| 68 | Xbyak::Reg32 src_b; | 61 | Xbyak::Reg32 src_b; |
| 69 | 62 | ||
| 70 | if (!optimizer.zero_reg_skip) { | 63 | if (!optimizer.zero_reg_skip) { |
| 71 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | 64 | src_a = Compile_GetRegister(opcode.src_a, RESULT); |
| 72 | src_b = Compile_GetRegister(opcode.src_b, ebx); | 65 | src_b = Compile_GetRegister(opcode.src_b, eax); |
| 73 | } else { | 66 | } else { |
| 74 | if (!is_a_zero) { | 67 | if (!is_a_zero) { |
| 75 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | 68 | src_a = Compile_GetRegister(opcode.src_a, RESULT); |
| 76 | } | 69 | } |
| 77 | if (!is_b_zero) { | 70 | if (!is_b_zero) { |
| 78 | src_b = Compile_GetRegister(opcode.src_b, ebx); | 71 | src_b = Compile_GetRegister(opcode.src_b, eax); |
| 79 | } | 72 | } |
| 80 | } | 73 | } |
| 81 | Xbyak::Label skip_carry{}; | 74 | Xbyak::Label skip_carry{}; |
| @@ -302,22 +295,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||
| 302 | sub(result, opcode.immediate * -1); | 295 | sub(result, opcode.immediate * -1); |
| 303 | } | 296 | } |
| 304 | } | 297 | } |
| 305 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 298 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 306 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 299 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 307 | mov(Common::X64::ABI_PARAM2, RESULT); | 300 | mov(Common::X64::ABI_PARAM2, RESULT); |
| 308 | Common::X64::CallFarFunction(*this, &Read); | 301 | Common::X64::CallFarFunction(*this, &Read); |
| 309 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 302 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 310 | mov(RESULT, Common::X64::ABI_RETURN.cvt32()); | 303 | mov(RESULT, Common::X64::ABI_RETURN.cvt32()); |
| 311 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | 304 | Compile_ProcessResult(opcode.result_operation, opcode.dst); |
| 312 | } | 305 | } |
| 313 | 306 | ||
| 314 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | 307 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
| 315 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 308 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 316 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 309 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 317 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | 310 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); |
| 318 | mov(Common::X64::ABI_PARAM3, value); | 311 | mov(Common::X64::ABI_PARAM3, value); |
| 319 | Common::X64::CallFarFunction(*this, &Send); | 312 | Common::X64::CallFarFunction(*this, &Send); |
| 320 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 313 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 321 | 314 | ||
| 322 | Xbyak::Label dont_process{}; | 315 | Xbyak::Label dont_process{}; |
| 323 | // Get increment | 316 | // Get increment |
| @@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||
| 329 | and_(METHOD_ADDRESS, 0xfff); | 322 | and_(METHOD_ADDRESS, 0xfff); |
| 330 | shr(ecx, 12); | 323 | shr(ecx, 12); |
| 331 | and_(ecx, 0x3f); | 324 | and_(ecx, 0x3f); |
| 332 | lea(eax, ptr[rcx + METHOD_ADDRESS_64]); | 325 | lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); |
| 333 | sal(ecx, 12); | 326 | sal(ecx, 12); |
| 334 | or_(eax, ecx); | 327 | or_(eax, ecx); |
| 335 | 328 | ||
| @@ -421,19 +414,15 @@ void MacroJITx64Impl::Compile() { | |||
| 421 | bool keep_executing = true; | 414 | bool keep_executing = true; |
| 422 | labels.fill(Xbyak::Label()); | 415 | labels.fill(Xbyak::Label()); |
| 423 | 416 | ||
| 424 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | 417 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
| 425 | // JIT state | 418 | // JIT state |
| 426 | mov(STATE, Common::X64::ABI_PARAM1); | 419 | mov(STATE, Common::X64::ABI_PARAM1); |
| 427 | mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + | 420 | mov(PARAMETERS, Common::X64::ABI_PARAM2); |
| 428 | static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]); | ||
| 429 | mov(REGISTERS, Common::X64::ABI_PARAM1); | ||
| 430 | add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers))); | ||
| 431 | xor_(RESULT, RESULT); | 421 | xor_(RESULT, RESULT); |
| 432 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); | 422 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); |
| 433 | xor_(NEXT_PARAMETER, NEXT_PARAMETER); | ||
| 434 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | 423 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); |
| 435 | 424 | ||
| 436 | mov(dword[REGISTERS + 4], Compile_FetchParameter()); | 425 | mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); |
| 437 | 426 | ||
| 438 | // Track get register for zero registers and mark it as no-op | 427 | // Track get register for zero registers and mark it as no-op |
| 439 | optimizer.zero_reg_skip = true; | 428 | optimizer.zero_reg_skip = true; |
| @@ -463,7 +452,7 @@ void MacroJITx64Impl::Compile() { | |||
| 463 | 452 | ||
| 464 | L(end_of_code); | 453 | L(end_of_code); |
| 465 | 454 | ||
| 466 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | 455 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
| 467 | ret(); | 456 | ret(); |
| 468 | ready(); | 457 | ready(); |
| 469 | program = getCode<ProgramType>(); | 458 | program = getCode<ProgramType>(); |
| @@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||
| 537 | } | 526 | } |
| 538 | 527 | ||
| 539 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | 528 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { |
| 540 | mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); | 529 | mov(eax, dword[PARAMETERS]); |
| 541 | inc(NEXT_PARAMETER); | 530 | add(PARAMETERS, sizeof(u32)); |
| 542 | return eax; | 531 | return eax; |
| 543 | } | 532 | } |
| 544 | 533 | ||
| @@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { | |||
| 547 | // Register 0 is always zero | 536 | // Register 0 is always zero |
| 548 | xor_(dst, dst); | 537 | xor_(dst, dst); |
| 549 | } else { | 538 | } else { |
| 550 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | 539 | mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); |
| 551 | } | 540 | } |
| 552 | 541 | ||
| 553 | return dst; | 542 | return dst; |
| 554 | } | 543 | } |
| 555 | 544 | ||
| 556 | Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { | ||
| 557 | if (index == 0) { | ||
| 558 | // Register 0 is always zero | ||
| 559 | xor_(dst, dst); | ||
| 560 | } else { | ||
| 561 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | ||
| 562 | } | ||
| 563 | |||
| 564 | return dst; | ||
| 565 | } | ||
| 566 | |||
| 567 | void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { | ||
| 568 | Xbyak::Label zero{}, end{}; | ||
| 569 | xor_(ecx, ecx); | ||
| 570 | shr(dst, 32); | ||
| 571 | setne(cl); | ||
| 572 | mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); | ||
| 573 | } | ||
| 574 | |||
| 575 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { | 545 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
| 576 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { | 546 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { |
| 577 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero | 547 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero |
| @@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 | |||
| 579 | if (reg == 0) { | 549 | if (reg == 0) { |
| 580 | return; | 550 | return; |
| 581 | } | 551 | } |
| 582 | mov(dword[REGISTERS + reg * sizeof(u32)], result); | 552 | mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); |
| 583 | }; | 553 | }; |
| 584 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; | 554 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; |
| 585 | 555 | ||
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 71f738b9a..51ec090b8 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -55,8 +55,6 @@ private: | |||
| 55 | 55 | ||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | 56 | Xbyak::Reg32 Compile_FetchParameter(); |
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); |
| 58 | Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); | ||
| 59 | void Compile_WriteCarry(Xbyak::Reg64 dst); | ||
| 60 | 58 | ||
| 61 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | 59 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); |
| 62 | void Compile_Send(Xbyak::Reg32 value); | 60 | void Compile_Send(Xbyak::Reg32 value); |
| @@ -67,11 +65,10 @@ private: | |||
| 67 | struct JITState { | 65 | struct JITState { |
| 68 | Engines::Maxwell3D* maxwell3d{}; | 66 | Engines::Maxwell3D* maxwell3d{}; |
| 69 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | 67 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; |
| 70 | const u32* parameters{}; | ||
| 71 | u32 carry_flag{}; | 68 | u32 carry_flag{}; |
| 72 | }; | 69 | }; |
| 73 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | 70 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); |
| 74 | using ProgramType = void (*)(JITState*); | 71 | using ProgramType = void (*)(JITState*, const u32*); |
| 75 | 72 | ||
| 76 | struct OptimizerState { | 73 | struct OptimizerState { |
| 77 | bool can_skip_carry{}; | 74 | bool can_skip_carry{}; |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..1e96b0310 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -0,0 +1,2074 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | ASSERT(component < 4); | ||
| 43 | return component["xyzw"]; | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | ||
| 193 | public: | ||
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 195 | ShaderType stage, std::string_view identifier); | ||
| 196 | |||
| 197 | std::string Code() const { | ||
| 198 | return shader_source; | ||
| 199 | } | ||
| 200 | |||
| 201 | private: | ||
| 202 | void DeclareHeader(); | ||
| 203 | void DeclareVertex(); | ||
| 204 | void DeclareGeometry(); | ||
| 205 | void DeclareFragment(); | ||
| 206 | void DeclareCompute(); | ||
| 207 | void DeclareInputAttributes(); | ||
| 208 | void DeclareOutputAttributes(); | ||
| 209 | void DeclareLocalMemory(); | ||
| 210 | void DeclareGlobalMemory(); | ||
| 211 | void DeclareConstantBuffers(); | ||
| 212 | void DeclareRegisters(); | ||
| 213 | void DeclareTemporaries(); | ||
| 214 | void DeclarePredicates(); | ||
| 215 | void DeclareInternalFlags(); | ||
| 216 | |||
| 217 | void InitializeVariables(); | ||
| 218 | |||
| 219 | void DecompileAST(); | ||
| 220 | void DecompileBranchMode(); | ||
| 221 | |||
| 222 | void VisitAST(const ASTNode& node); | ||
| 223 | std::string VisitExpression(const Expr& node); | ||
| 224 | |||
| 225 | void VisitBlock(const NodeBlock& bb); | ||
| 226 | |||
| 227 | std::string Visit(const Node& node); | ||
| 228 | |||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | ||
| 230 | std::string BuildAoffi(Operation); | ||
| 231 | void Exit(); | ||
| 232 | |||
| 233 | std::string Assign(Operation); | ||
| 234 | std::string Select(Operation); | ||
| 235 | std::string FClamp(Operation); | ||
| 236 | std::string FCastHalf0(Operation); | ||
| 237 | std::string FCastHalf1(Operation); | ||
| 238 | std::string FSqrt(Operation); | ||
| 239 | std::string FSwizzleAdd(Operation); | ||
| 240 | std::string HAdd2(Operation); | ||
| 241 | std::string HMul2(Operation); | ||
| 242 | std::string HFma2(Operation); | ||
| 243 | std::string HAbsolute(Operation); | ||
| 244 | std::string HNegate(Operation); | ||
| 245 | std::string HClamp(Operation); | ||
| 246 | std::string HCastFloat(Operation); | ||
| 247 | std::string HUnpack(Operation); | ||
| 248 | std::string HMergeF32(Operation); | ||
| 249 | std::string HMergeH0(Operation); | ||
| 250 | std::string HMergeH1(Operation); | ||
| 251 | std::string HPack2(Operation); | ||
| 252 | std::string LogicalAssign(Operation); | ||
| 253 | std::string LogicalPick2(Operation); | ||
| 254 | std::string LogicalAnd2(Operation); | ||
| 255 | std::string FloatOrdered(Operation); | ||
| 256 | std::string FloatUnordered(Operation); | ||
| 257 | std::string LogicalAddCarry(Operation); | ||
| 258 | std::string Texture(Operation); | ||
| 259 | std::string TextureGather(Operation); | ||
| 260 | std::string TextureQueryDimensions(Operation); | ||
| 261 | std::string TextureQueryLod(Operation); | ||
| 262 | std::string TexelFetch(Operation); | ||
| 263 | std::string TextureGradient(Operation); | ||
| 264 | std::string ImageLoad(Operation); | ||
| 265 | std::string ImageStore(Operation); | ||
| 266 | std::string Branch(Operation); | ||
| 267 | std::string BranchIndirect(Operation); | ||
| 268 | std::string PushFlowStack(Operation); | ||
| 269 | std::string PopFlowStack(Operation); | ||
| 270 | std::string Exit(Operation); | ||
| 271 | std::string Discard(Operation); | ||
| 272 | std::string EmitVertex(Operation); | ||
| 273 | std::string EndPrimitive(Operation); | ||
| 274 | std::string InvocationId(Operation); | ||
| 275 | std::string YNegate(Operation); | ||
| 276 | std::string ThreadId(Operation); | ||
| 277 | std::string ShuffleIndexed(Operation); | ||
| 278 | std::string Barrier(Operation); | ||
| 279 | std::string MemoryBarrierGroup(Operation); | ||
| 280 | std::string MemoryBarrierGlobal(Operation); | ||
| 281 | |||
| 282 | template <const std::string_view& op> | ||
| 283 | std::string Unary(Operation operation) { | ||
| 284 | const std::string temporary = AllocTemporary(); | ||
| 285 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 286 | return temporary; | ||
| 287 | } | ||
| 288 | |||
| 289 | template <const std::string_view& op> | ||
| 290 | std::string Binary(Operation operation) { | ||
| 291 | const std::string temporary = AllocTemporary(); | ||
| 292 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 293 | Visit(operation[1])); | ||
| 294 | return temporary; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <const std::string_view& op> | ||
| 298 | std::string Trinary(Operation operation) { | ||
| 299 | const std::string temporary = AllocTemporary(); | ||
| 300 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 301 | Visit(operation[1]), Visit(operation[2])); | ||
| 302 | return temporary; | ||
| 303 | } | ||
| 304 | |||
| 305 | template <const std::string_view& op, bool unordered> | ||
| 306 | std::string FloatComparison(Operation operation) { | ||
| 307 | const std::string temporary = AllocTemporary(); | ||
| 308 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 309 | AddLine("MOV.S {}, 0;", temporary); | ||
| 310 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 311 | |||
| 312 | const std::string op_a = Visit(operation[0]); | ||
| 313 | const std::string op_b = Visit(operation[1]); | ||
| 314 | if constexpr (unordered) { | ||
| 315 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 316 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 317 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 318 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 319 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 320 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 321 | } else if (op == SNE_F) { | ||
| 322 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 323 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 324 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 325 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 326 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 327 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 328 | } | ||
| 329 | return temporary; | ||
| 330 | } | ||
| 331 | |||
| 332 | template <const std::string_view& op, bool is_nan> | ||
| 333 | std::string HalfComparison(Operation operation) { | ||
| 334 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 335 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 336 | const std::string op_a = Visit(operation[0]); | ||
| 337 | const std::string op_b = Visit(operation[1]); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 339 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 340 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 341 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 342 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 343 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 344 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 345 | if constexpr (is_nan) { | ||
| 346 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 347 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 348 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 349 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 350 | } | ||
| 351 | return tmp1; | ||
| 352 | } | ||
| 353 | |||
| 354 | template <const std::string_view& op, const std::string_view& type> | ||
| 355 | std::string AtomicImage(Operation operation) { | ||
| 356 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 357 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 358 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 359 | const std::size_t num_values = meta.values.size(); | ||
| 360 | |||
| 361 | const std::string coord = AllocVectorTemporary(); | ||
| 362 | const std::string value = AllocVectorTemporary(); | ||
| 363 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 364 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 365 | } | ||
| 366 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 367 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 368 | } | ||
| 369 | |||
| 370 | const std::string result = coord; | ||
| 371 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord, | ||
| 372 | image_id, ImageType(meta.image.type)); | ||
| 373 | return fmt::format("{}.x", result); | ||
| 374 | } | ||
| 375 | |||
| 376 | template <const std::string_view& op, const std::string_view& type> | ||
| 377 | std::string Atomic(Operation operation) { | ||
| 378 | const std::string temporary = AllocTemporary(); | ||
| 379 | std::string address; | ||
| 380 | std::string_view opname; | ||
| 381 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 382 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 383 | Visit(gmem->GetBaseAddress())); | ||
| 384 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 385 | opname = "ATOMB"; | ||
| 386 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 387 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 388 | opname = "ATOMS"; | ||
| 389 | } else { | ||
| 390 | UNREACHABLE(); | ||
| 391 | return "{0, 0, 0, 0}"; | ||
| 392 | } | ||
| 393 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 394 | return temporary; | ||
| 395 | } | ||
| 396 | |||
| 397 | template <char type> | ||
| 398 | std::string Negate(Operation operation) { | ||
| 399 | const std::string temporary = AllocTemporary(); | ||
| 400 | if constexpr (type == 'F') { | ||
| 401 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 402 | } else { | ||
| 403 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 404 | } | ||
| 405 | return temporary; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <char type> | ||
| 409 | std::string Absolute(Operation operation) { | ||
| 410 | const std::string temporary = AllocTemporary(); | ||
| 411 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 412 | return temporary; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <char type> | ||
| 416 | std::string BitfieldInsert(Operation operation) { | ||
| 417 | const std::string temporary = AllocVectorTemporary(); | ||
| 418 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 419 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 420 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 421 | Visit(operation[0])); | ||
| 422 | return fmt::format("{}.x", temporary); | ||
| 423 | } | ||
| 424 | |||
| 425 | template <char type> | ||
| 426 | std::string BitfieldExtract(Operation operation) { | ||
| 427 | const std::string temporary = AllocVectorTemporary(); | ||
| 428 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 429 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 430 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 431 | return fmt::format("{}.x", temporary); | ||
| 432 | } | ||
| 433 | |||
| 434 | template <char swizzle> | ||
| 435 | std::string LocalInvocationId(Operation) { | ||
| 436 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <char swizzle> | ||
| 440 | std::string WorkGroupId(Operation) { | ||
| 441 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 442 | } | ||
| 443 | |||
| 444 | template <char c1, char c2> | ||
| 445 | std::string ThreadMask(Operation) { | ||
| 446 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 447 | } | ||
| 448 | |||
| 449 | template <typename... Args> | ||
| 450 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 451 | shader_source += fmt::format(text, std::forward<Args>(args)...); | ||
| 452 | } | ||
| 453 | |||
| 454 | template <typename... Args> | ||
| 455 | void AddLine(std::string_view text, Args&&... args) { | ||
| 456 | AddExpression(text, std::forward<Args>(args)...); | ||
| 457 | shader_source += '\n'; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::string AllocTemporary() { | ||
| 461 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 462 | return fmt::format("T{}.x", num_temporaries++); | ||
| 463 | } | ||
| 464 | |||
| 465 | std::string AllocVectorTemporary() { | ||
| 466 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 467 | return fmt::format("T{}", num_temporaries++); | ||
| 468 | } | ||
| 469 | |||
| 470 | void ResetTemporaries() noexcept { | ||
| 471 | num_temporaries = 0; | ||
| 472 | } | ||
| 473 | |||
| 474 | const Device& device; | ||
| 475 | const ShaderIR& ir; | ||
| 476 | const Registry& registry; | ||
| 477 | const ShaderType stage; | ||
| 478 | |||
| 479 | std::size_t num_temporaries = 0; | ||
| 480 | std::size_t max_temporaries = 0; | ||
| 481 | |||
| 482 | std::string shader_source; | ||
| 483 | |||
| 484 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 485 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 486 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 487 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 488 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 489 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 490 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 491 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 492 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 493 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 494 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 495 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 496 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 497 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 498 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 499 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 500 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 501 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 502 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 503 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 504 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 505 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 506 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 507 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 508 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 509 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 510 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 511 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 512 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 513 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 514 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 515 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 516 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 517 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 518 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 519 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 520 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 521 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 522 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 523 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 524 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 525 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 526 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 527 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 528 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 529 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 530 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 531 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 532 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 533 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 534 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 535 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 536 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 537 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 538 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 539 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 540 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 541 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 542 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 543 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 544 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 545 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 546 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 547 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 548 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 549 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 550 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 551 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 552 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 553 | static constexpr std::string_view ADD = "ADD"; | ||
| 554 | static constexpr std::string_view MIN = "MIN"; | ||
| 555 | static constexpr std::string_view MAX = "MAX"; | ||
| 556 | static constexpr std::string_view AND = "AND"; | ||
| 557 | static constexpr std::string_view OR = "OR"; | ||
| 558 | static constexpr std::string_view XOR = "XOR"; | ||
| 559 | static constexpr std::string_view U32 = "U32"; | ||
| 560 | static constexpr std::string_view S32 = "S32"; | ||
| 561 | |||
| 562 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 563 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 564 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 565 | &ARBDecompiler::Assign, | ||
| 566 | |||
| 567 | &ARBDecompiler::Select, | ||
| 568 | |||
| 569 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 570 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 571 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 572 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 573 | &ARBDecompiler::Negate<'F'>, | ||
| 574 | &ARBDecompiler::Absolute<'F'>, | ||
| 575 | &ARBDecompiler::FClamp, | ||
| 576 | &ARBDecompiler::FCastHalf0, | ||
| 577 | &ARBDecompiler::FCastHalf1, | ||
| 578 | &ARBDecompiler::Binary<MIN_F>, | ||
| 579 | &ARBDecompiler::Binary<MAX_F>, | ||
| 580 | &ARBDecompiler::Unary<COS_F32>, | ||
| 581 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 582 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 583 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 584 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 585 | &ARBDecompiler::FSqrt, | ||
| 586 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 587 | &ARBDecompiler::Unary<FLR_F>, | ||
| 588 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 589 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 590 | &ARBDecompiler::Unary<I2F_S>, | ||
| 591 | &ARBDecompiler::Unary<I2F_U>, | ||
| 592 | &ARBDecompiler::FSwizzleAdd, | ||
| 593 | |||
| 594 | &ARBDecompiler::Binary<ADD_S>, | ||
| 595 | &ARBDecompiler::Binary<MUL_S>, | ||
| 596 | &ARBDecompiler::Binary<DIV_S>, | ||
| 597 | &ARBDecompiler::Negate<'S'>, | ||
| 598 | &ARBDecompiler::Absolute<'S'>, | ||
| 599 | &ARBDecompiler::Binary<MIN_S>, | ||
| 600 | &ARBDecompiler::Binary<MAX_S>, | ||
| 601 | |||
| 602 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 603 | &ARBDecompiler::Unary<MOV_U>, | ||
| 604 | &ARBDecompiler::Binary<SHL_S>, | ||
| 605 | &ARBDecompiler::Binary<SHR_U>, | ||
| 606 | &ARBDecompiler::Binary<SHR_S>, | ||
| 607 | &ARBDecompiler::Binary<AND_S>, | ||
| 608 | &ARBDecompiler::Binary<OR_S>, | ||
| 609 | &ARBDecompiler::Binary<XOR_S>, | ||
| 610 | &ARBDecompiler::Unary<NOT_S>, | ||
| 611 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 612 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 613 | &ARBDecompiler::Unary<BTC_S>, | ||
| 614 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 615 | |||
| 616 | &ARBDecompiler::Binary<ADD_U>, | ||
| 617 | &ARBDecompiler::Binary<MUL_U>, | ||
| 618 | &ARBDecompiler::Binary<DIV_U>, | ||
| 619 | &ARBDecompiler::Binary<MIN_U>, | ||
| 620 | &ARBDecompiler::Binary<MAX_U>, | ||
| 621 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 622 | &ARBDecompiler::Unary<MOV_U>, | ||
| 623 | &ARBDecompiler::Binary<SHL_U>, | ||
| 624 | &ARBDecompiler::Binary<SHR_U>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<AND_U>, | ||
| 627 | &ARBDecompiler::Binary<OR_U>, | ||
| 628 | &ARBDecompiler::Binary<XOR_U>, | ||
| 629 | &ARBDecompiler::Unary<NOT_U>, | ||
| 630 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 631 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 632 | &ARBDecompiler::Unary<BTC_U>, | ||
| 633 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 634 | |||
| 635 | &ARBDecompiler::HAdd2, | ||
| 636 | &ARBDecompiler::HMul2, | ||
| 637 | &ARBDecompiler::HFma2, | ||
| 638 | &ARBDecompiler::HAbsolute, | ||
| 639 | &ARBDecompiler::HNegate, | ||
| 640 | &ARBDecompiler::HClamp, | ||
| 641 | &ARBDecompiler::HCastFloat, | ||
| 642 | &ARBDecompiler::HUnpack, | ||
| 643 | &ARBDecompiler::HMergeF32, | ||
| 644 | &ARBDecompiler::HMergeH0, | ||
| 645 | &ARBDecompiler::HMergeH1, | ||
| 646 | &ARBDecompiler::HPack2, | ||
| 647 | |||
| 648 | &ARBDecompiler::LogicalAssign, | ||
| 649 | &ARBDecompiler::Binary<AND_U>, | ||
| 650 | &ARBDecompiler::Binary<OR_U>, | ||
| 651 | &ARBDecompiler::Binary<XOR_U>, | ||
| 652 | &ARBDecompiler::Unary<NOT_U>, | ||
| 653 | &ARBDecompiler::LogicalPick2, | ||
| 654 | &ARBDecompiler::LogicalAnd2, | ||
| 655 | |||
| 656 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 657 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 658 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 659 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 660 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 661 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 662 | &ARBDecompiler::FloatOrdered, | ||
| 663 | &ARBDecompiler::FloatUnordered, | ||
| 664 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 665 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 666 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 667 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 668 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 669 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 670 | |||
| 671 | &ARBDecompiler::Binary<SLT_S>, | ||
| 672 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 673 | &ARBDecompiler::Binary<SLE_S>, | ||
| 674 | &ARBDecompiler::Binary<SGT_S>, | ||
| 675 | &ARBDecompiler::Binary<SNE_S>, | ||
| 676 | &ARBDecompiler::Binary<SGE_S>, | ||
| 677 | |||
| 678 | &ARBDecompiler::Binary<SLT_U>, | ||
| 679 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 680 | &ARBDecompiler::Binary<SLE_U>, | ||
| 681 | &ARBDecompiler::Binary<SGT_U>, | ||
| 682 | &ARBDecompiler::Binary<SNE_U>, | ||
| 683 | &ARBDecompiler::Binary<SGE_U>, | ||
| 684 | |||
| 685 | &ARBDecompiler::LogicalAddCarry, | ||
| 686 | |||
| 687 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 688 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 689 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 690 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 691 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 692 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 693 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 694 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 695 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 696 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 697 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 698 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 699 | |||
| 700 | &ARBDecompiler::Texture, | ||
| 701 | &ARBDecompiler::Texture, | ||
| 702 | &ARBDecompiler::TextureGather, | ||
| 703 | &ARBDecompiler::TextureQueryDimensions, | ||
| 704 | &ARBDecompiler::TextureQueryLod, | ||
| 705 | &ARBDecompiler::TexelFetch, | ||
| 706 | &ARBDecompiler::TextureGradient, | ||
| 707 | |||
| 708 | &ARBDecompiler::ImageLoad, | ||
| 709 | &ARBDecompiler::ImageStore, | ||
| 710 | |||
| 711 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 712 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 713 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 714 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 715 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 716 | |||
| 717 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 718 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 719 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 720 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 721 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 722 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 723 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 724 | |||
| 725 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 726 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 727 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 728 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 729 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 730 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 731 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 732 | |||
| 733 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 734 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 735 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 736 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 737 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 738 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 739 | |||
| 740 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 741 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 742 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 743 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 744 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 745 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 746 | |||
| 747 | &ARBDecompiler::Branch, | ||
| 748 | &ARBDecompiler::BranchIndirect, | ||
| 749 | &ARBDecompiler::PushFlowStack, | ||
| 750 | &ARBDecompiler::PopFlowStack, | ||
| 751 | &ARBDecompiler::Exit, | ||
| 752 | &ARBDecompiler::Discard, | ||
| 753 | |||
| 754 | &ARBDecompiler::EmitVertex, | ||
| 755 | &ARBDecompiler::EndPrimitive, | ||
| 756 | |||
| 757 | &ARBDecompiler::InvocationId, | ||
| 758 | &ARBDecompiler::YNegate, | ||
| 759 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 760 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 761 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 762 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 763 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 764 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 765 | |||
| 766 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 767 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 768 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 769 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 770 | |||
| 771 | &ARBDecompiler::ThreadId, | ||
| 772 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 773 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 774 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 775 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 776 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 777 | &ARBDecompiler::ShuffleIndexed, | ||
| 778 | |||
| 779 | &ARBDecompiler::Barrier, | ||
| 780 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 781 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 782 | }; | ||
| 783 | }; | ||
| 784 | |||
| 785 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 786 | ShaderType stage, std::string_view identifier) | ||
| 787 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | ||
| 788 | AddLine("TEMP RC;"); | ||
| 789 | AddLine("TEMP FSWZA[4];"); | ||
| 790 | AddLine("TEMP FSWZB[4];"); | ||
| 791 | if (ir.IsDecompiled()) { | ||
| 792 | DecompileAST(); | ||
| 793 | } else { | ||
| 794 | DecompileBranchMode(); | ||
| 795 | } | ||
| 796 | AddLine("END"); | ||
| 797 | |||
| 798 | const std::string code = std::move(shader_source); | ||
| 799 | DeclareHeader(); | ||
| 800 | DeclareVertex(); | ||
| 801 | DeclareGeometry(); | ||
| 802 | DeclareFragment(); | ||
| 803 | DeclareCompute(); | ||
| 804 | DeclareInputAttributes(); | ||
| 805 | DeclareOutputAttributes(); | ||
| 806 | DeclareLocalMemory(); | ||
| 807 | DeclareGlobalMemory(); | ||
| 808 | DeclareConstantBuffers(); | ||
| 809 | DeclareRegisters(); | ||
| 810 | DeclareTemporaries(); | ||
| 811 | DeclarePredicates(); | ||
| 812 | DeclareInternalFlags(); | ||
| 813 | |||
| 814 | shader_source += code; | ||
| 815 | } | ||
| 816 | |||
| 817 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 818 | switch (stage) { | ||
| 819 | case ShaderType::Vertex: | ||
| 820 | return "vp"; | ||
| 821 | case ShaderType::Geometry: | ||
| 822 | return "gp"; | ||
| 823 | case ShaderType::Fragment: | ||
| 824 | return "fp"; | ||
| 825 | case ShaderType::Compute: | ||
| 826 | return "cp"; | ||
| 827 | default: | ||
| 828 | UNREACHABLE(); | ||
| 829 | return ""; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 833 | void ARBDecompiler::DeclareHeader() { | ||
| 834 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 835 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 836 | AddLine("OPTION NV_internal;"); | ||
| 837 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 838 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 839 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 840 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 841 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 842 | } | ||
| 843 | if (stage == ShaderType::Vertex) { | ||
| 844 | if (device.HasNvViewportArray2()) { | ||
| 845 | AddLine("OPTION NV_viewport_array2;"); | ||
| 846 | } | ||
| 847 | } | ||
| 848 | if (stage == ShaderType::Fragment) { | ||
| 849 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 850 | } | ||
| 851 | if (device.HasImageLoadFormatted()) { | ||
| 852 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void ARBDecompiler::DeclareVertex() { | ||
| 857 | if (stage != ShaderType::Vertex) { | ||
| 858 | return; | ||
| 859 | } | ||
| 860 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 861 | } | ||
| 862 | |||
| 863 | void ARBDecompiler::DeclareGeometry() { | ||
| 864 | if (stage != ShaderType::Geometry) { | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | const auto& info = registry.GetGraphicsInfo(); | ||
| 868 | const auto& header = ir.GetHeader(); | ||
| 869 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 870 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 871 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 872 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 873 | } | ||
| 874 | |||
| 875 | void ARBDecompiler::DeclareFragment() { | ||
| 876 | if (stage != ShaderType::Fragment) { | ||
| 877 | return; | ||
| 878 | } | ||
| 879 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 880 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 881 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 882 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 883 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 884 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 885 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 886 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 887 | } | ||
| 888 | |||
| 889 | void ARBDecompiler::DeclareCompute() { | ||
| 890 | if (stage != ShaderType::Compute) { | ||
| 891 | return; | ||
| 892 | } | ||
| 893 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 894 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 895 | info.workgroup_size[2]); | ||
| 896 | if (info.shared_memory_size_in_words > 0) { | ||
| 897 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 898 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 899 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 900 | } | ||
| 901 | } | ||
| 902 | |||
| 903 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 904 | if (stage == ShaderType::Compute) { | ||
| 905 | return; | ||
| 906 | } | ||
| 907 | const std::string_view stage_name = StageInputName(stage); | ||
| 908 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 909 | if (!IsGenericAttribute(attribute)) { | ||
| 910 | continue; | ||
| 911 | } | ||
| 912 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 913 | |||
| 914 | std::string_view suffix; | ||
| 915 | if (stage == ShaderType::Fragment) { | ||
| 916 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 917 | if (input_mode == PixelImap::Unused) { | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | suffix = GetInputFlags(input_mode); | ||
| 921 | } | ||
| 922 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 923 | index); | ||
| 924 | } | ||
| 925 | } | ||
| 926 | |||
| 927 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 928 | if (stage == ShaderType::Compute) { | ||
| 929 | return; | ||
| 930 | } | ||
| 931 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 932 | if (!IsGenericAttribute(attribute)) { | ||
| 933 | continue; | ||
| 934 | } | ||
| 935 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 936 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 941 | u64 size = 0; | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 944 | } else { | ||
| 945 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 946 | } | ||
| 947 | if (size == 0) { | ||
| 948 | return; | ||
| 949 | } | ||
| 950 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 951 | AddLine("TEMP lmem[{}];", element_count); | ||
| 952 | } | ||
| 953 | |||
| 954 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 955 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 956 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 957 | const auto& base = pair.first; | ||
| 958 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | ||
| 959 | ++binding; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | |||
| 963 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 964 | u32 binding = 0; | ||
| 965 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 966 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 967 | ++binding; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | void ARBDecompiler::DeclareRegisters() { | ||
| 972 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 973 | AddLine("TEMP R{};", gpr); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void ARBDecompiler::DeclareTemporaries() { | ||
| 978 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 979 | AddLine("TEMP T{};", i); | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | void ARBDecompiler::DeclarePredicates() { | ||
| 984 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 985 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 990 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 991 | AddLine("TEMP {};", name); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void ARBDecompiler::InitializeVariables() { | ||
| 996 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 997 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 998 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 999 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1000 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1001 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1002 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1003 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1004 | |||
| 1005 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1006 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1007 | } | ||
| 1008 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1009 | if (!IsGenericAttribute(attribute)) { | ||
| 1010 | continue; | ||
| 1011 | } | ||
| 1012 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1013 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1014 | } | ||
| 1015 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1016 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1017 | } | ||
| 1018 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1019 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void ARBDecompiler::DecompileAST() { | ||
| 1024 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1025 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1026 | AddLine("TEMP F{};", i); | ||
| 1027 | } | ||
| 1028 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1029 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | InitializeVariables(); | ||
| 1033 | |||
| 1034 | VisitAST(ir.GetASTProgram()); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1038 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1039 | if (!ir.IsFlowStackDisabled()) { | ||
| 1040 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1041 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1042 | AddLine("TEMP SSY_TOP;"); | ||
| 1043 | AddLine("TEMP PBK_TOP;"); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | AddLine("TEMP PC;"); | ||
| 1047 | |||
| 1048 | if (!ir.IsFlowStackDisabled()) { | ||
| 1049 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1050 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | InitializeVariables(); | ||
| 1054 | |||
| 1055 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1056 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1057 | const u32 first_address = basic_block_it->first; | ||
| 1058 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1059 | |||
| 1060 | AddLine("REP;"); | ||
| 1061 | |||
| 1062 | std::size_t num_blocks = 0; | ||
| 1063 | while (basic_block_it != basic_block_end) { | ||
| 1064 | const auto& [address, bb] = *basic_block_it; | ||
| 1065 | ++num_blocks; | ||
| 1066 | |||
| 1067 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1068 | AddLine("IF NE.x;"); | ||
| 1069 | |||
| 1070 | VisitBlock(bb); | ||
| 1071 | |||
| 1072 | ++basic_block_it; | ||
| 1073 | |||
| 1074 | if (basic_block_it != basic_block_end) { | ||
| 1075 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1076 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1077 | const u32 next_address = basic_block_it->first; | ||
| 1078 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1079 | AddLine("CONT;"); | ||
| 1080 | } | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | AddLine("ELSE;"); | ||
| 1084 | } | ||
| 1085 | AddLine("RET;"); | ||
| 1086 | while (num_blocks--) { | ||
| 1087 | AddLine("ENDIF;"); | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | AddLine("ENDREP;"); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1094 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1095 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1096 | VisitAST(current); | ||
| 1097 | } | ||
| 1098 | } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1099 | const std::string condition = VisitExpression(ast->condition); | ||
| 1100 | ResetTemporaries(); | ||
| 1101 | |||
| 1102 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1103 | AddLine("IF NE.x;"); | ||
| 1104 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1105 | VisitAST(current); | ||
| 1106 | } | ||
| 1107 | AddLine("ENDIF;"); | ||
| 1108 | } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1109 | AddLine("ELSE;"); | ||
| 1110 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1111 | VisitAST(current); | ||
| 1112 | } | ||
| 1113 | } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1114 | VisitBlock(ast->nodes); | ||
| 1115 | } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1116 | AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); | ||
| 1117 | ResetTemporaries(); | ||
| 1118 | } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1119 | const std::string condition = VisitExpression(ast->condition); | ||
| 1120 | ResetTemporaries(); | ||
| 1121 | AddLine("REP;"); | ||
| 1122 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1123 | VisitAST(current); | ||
| 1124 | } | ||
| 1125 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1126 | AddLine("BRK (NE.x);"); | ||
| 1127 | AddLine("ENDREP;"); | ||
| 1128 | } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1129 | const bool is_true = ExprIsTrue(ast->condition); | ||
| 1130 | if (!is_true) { | ||
| 1131 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1132 | AddLine("IF NE.x;"); | ||
| 1133 | ResetTemporaries(); | ||
| 1134 | } | ||
| 1135 | if (ast->kills) { | ||
| 1136 | AddLine("KIL TR;"); | ||
| 1137 | } else { | ||
| 1138 | Exit(); | ||
| 1139 | } | ||
| 1140 | if (!is_true) { | ||
| 1141 | AddLine("ENDIF;"); | ||
| 1142 | } | ||
| 1143 | } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1144 | if (ExprIsTrue(ast->condition)) { | ||
| 1145 | AddLine("BRK;"); | ||
| 1146 | } else { | ||
| 1147 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1148 | AddLine("BRK (NE.x);"); | ||
| 1149 | ResetTemporaries(); | ||
| 1150 | } | ||
| 1151 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1152 | // Nothing to do | ||
| 1153 | } else { | ||
| 1154 | UNREACHABLE(); | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1159 | const std::string result = AllocTemporary(); | ||
| 1160 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1161 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1162 | VisitExpression(expr->operand2)); | ||
| 1163 | return result; | ||
| 1164 | } | ||
| 1165 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1166 | const std::string result = AllocTemporary(); | ||
| 1167 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1168 | VisitExpression(expr->operand2)); | ||
| 1169 | return result; | ||
| 1170 | } | ||
| 1171 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1172 | const std::string result = AllocTemporary(); | ||
| 1173 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1174 | return result; | ||
| 1175 | } | ||
| 1176 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1177 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1178 | } | ||
| 1179 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1180 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1181 | } | ||
| 1182 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1183 | return fmt::format("F{}.x", expr->var_index); | ||
| 1184 | } | ||
| 1185 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1186 | return expr->value ? "0xffffffff" : "0"; | ||
| 1187 | } | ||
| 1188 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1189 | const std::string result = AllocTemporary(); | ||
| 1190 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1191 | return result; | ||
| 1192 | } | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | return "0"; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1198 | for (const auto& node : bb) { | ||
| 1199 | Visit(node); | ||
| 1200 | } | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1204 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1205 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1206 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1207 | } | ||
| 1208 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1209 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1210 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1211 | return {}; | ||
| 1212 | } | ||
| 1213 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1214 | if (decompiler == nullptr) { | ||
| 1215 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1216 | return {}; | ||
| 1217 | } | ||
| 1218 | return (this->*decompiler)(*operation); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1222 | const u32 index = gpr->GetIndex(); | ||
| 1223 | if (index == Register::ZeroIndex) { | ||
| 1224 | return "{0, 0, 0, 0}.x"; | ||
| 1225 | } | ||
| 1226 | return fmt::format("R{}.x", index); | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1230 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1234 | const std::string temporary = AllocTemporary(); | ||
| 1235 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1236 | return temporary; | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1240 | const std::string temporary = AllocTemporary(); | ||
| 1241 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1242 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1243 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1244 | break; | ||
| 1245 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1246 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1247 | break; | ||
| 1248 | default: | ||
| 1249 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | if (predicate->IsNegated()) { | ||
| 1253 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1254 | } | ||
| 1255 | return temporary; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1259 | if (abuf->IsPhysicalBuffer()) { | ||
| 1260 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1261 | return "{0, 0, 0, 0}.x"; | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | const auto buffer_index = [this, &abuf]() -> std::string { | ||
| 1265 | if (stage != ShaderType::Geometry) { | ||
| 1266 | return ""; | ||
| 1267 | } | ||
| 1268 | return fmt::format("[{}]", Visit(abuf->GetBuffer())); | ||
| 1269 | }; | ||
| 1270 | |||
| 1271 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1272 | const u32 element = abuf->GetElement(); | ||
| 1273 | const char swizzle = Swizzle(element); | ||
| 1274 | switch (index) { | ||
| 1275 | case Attribute::Index::Position: { | ||
| 1276 | if (stage == ShaderType::Geometry) { | ||
| 1277 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1278 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1279 | } else { | ||
| 1280 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1284 | ASSERT(stage == ShaderType::Vertex); | ||
| 1285 | switch (element) { | ||
| 1286 | case 2: | ||
| 1287 | return "vertex.instance"; | ||
| 1288 | case 3: | ||
| 1289 | return "vertex.id"; | ||
| 1290 | } | ||
| 1291 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1292 | break; | ||
| 1293 | case Attribute::Index::PointCoord: | ||
| 1294 | switch (element) { | ||
| 1295 | case 0: | ||
| 1296 | return "fragment.pointcoord.x"; | ||
| 1297 | case 1: | ||
| 1298 | return "fragment.pointcoord.y"; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED(); | ||
| 1301 | break; | ||
| 1302 | case Attribute::Index::FrontFacing: { | ||
| 1303 | ASSERT(stage == ShaderType::Fragment); | ||
| 1304 | ASSERT(element == 3); | ||
| 1305 | const std::string temporary = AllocVectorTemporary(); | ||
| 1306 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1307 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1308 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1309 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1310 | return fmt::format("{}.x", temporary); | ||
| 1311 | } | ||
| 1312 | default: | ||
| 1313 | if (IsGenericAttribute(index)) { | ||
| 1314 | if (stage == ShaderType::Geometry) { | ||
| 1315 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1316 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1317 | } else { | ||
| 1318 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1319 | GetGenericAttributeIndex(index), swizzle); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); | ||
| 1323 | break; | ||
| 1324 | } | ||
| 1325 | return "{0, 0, 0, 0}.x"; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1329 | std::string offset_string; | ||
| 1330 | const auto& offset = cbuf->GetOffset(); | ||
| 1331 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1332 | offset_string = std::to_string(imm->GetValue()); | ||
| 1333 | } else { | ||
| 1334 | offset_string = Visit(offset); | ||
| 1335 | } | ||
| 1336 | const std::string temporary = AllocTemporary(); | ||
| 1337 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1338 | return temporary; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1342 | const std::string temporary = AllocTemporary(); | ||
| 1343 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1344 | Visit(gmem->GetBaseAddress())); | ||
| 1345 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1346 | temporary); | ||
| 1347 | return temporary; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1351 | const std::string temporary = Visit(lmem->GetAddress()); | ||
| 1352 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1353 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1354 | return temporary; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1358 | const std::string temporary = Visit(smem->GetAddress()); | ||
| 1359 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1360 | return temporary; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1364 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1365 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1369 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1370 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1371 | } | ||
| 1372 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1373 | AddLine("IF NE.x;"); | ||
| 1374 | VisitBlock(conditional->GetCode()); | ||
| 1375 | AddLine("ENDIF;"); | ||
| 1376 | return {}; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | if (const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1380 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1381 | // AddLine("// {}", cmt->GetText()); | ||
| 1382 | return {}; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | UNIMPLEMENTED(); | ||
| 1386 | return {}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1390 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1391 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1392 | UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1393 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube); | ||
| 1394 | |||
| 1395 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1396 | std::string temporary = AllocVectorTemporary(); | ||
| 1397 | std::size_t i = 0; | ||
| 1398 | for (; i < count; ++i) { | ||
| 1399 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1400 | } | ||
| 1401 | if (meta.sampler.is_array) { | ||
| 1402 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); | ||
| 1403 | } | ||
| 1404 | if (meta.sampler.is_shadow) { | ||
| 1405 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); | ||
| 1406 | } | ||
| 1407 | return {std::move(temporary), i}; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1411 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1412 | if (meta.aoffi.empty()) { | ||
| 1413 | return {}; | ||
| 1414 | } | ||
| 1415 | const std::string temporary = AllocVectorTemporary(); | ||
| 1416 | std::size_t i = 0; | ||
| 1417 | for (auto& node : meta.aoffi) { | ||
| 1418 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1419 | } | ||
| 1420 | return fmt::format(", offset({})", temporary); | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | void ARBDecompiler::Exit() { | ||
| 1424 | if (stage != ShaderType::Fragment) { | ||
| 1425 | AddLine("RET;"); | ||
| 1426 | return; | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1430 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1431 | const auto& used_registers = ir.GetRegisters(); | ||
| 1432 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1433 | return fmt::format("R{}.x", reg); | ||
| 1434 | } | ||
| 1435 | return "{0, 0, 0, 0}.x"; | ||
| 1436 | }; | ||
| 1437 | |||
| 1438 | const auto& header = ir.GetHeader(); | ||
| 1439 | u32 current_reg = 0; | ||
| 1440 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1441 | for (u32 component = 0; component < 4; ++component) { | ||
| 1442 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1443 | continue; | ||
| 1444 | } | ||
| 1445 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1446 | safe_get_register(current_reg)); | ||
| 1447 | ++current_reg; | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | if (header.ps.omap.depth) { | ||
| 1451 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | AddLine("RET;"); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1458 | const Node& dest = operation[0]; | ||
| 1459 | const Node& src = operation[1]; | ||
| 1460 | |||
| 1461 | std::string dest_name; | ||
| 1462 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1463 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1464 | // Writing to Register::ZeroIndex is a no op | ||
| 1465 | return {}; | ||
| 1466 | } | ||
| 1467 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1468 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1469 | const u32 element = abuf->GetElement(); | ||
| 1470 | const char swizzle = Swizzle(element); | ||
| 1471 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1472 | case Attribute::Index::Position: | ||
| 1473 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1474 | break; | ||
| 1475 | case Attribute::Index::LayerViewportPointSize: | ||
| 1476 | switch (element) { | ||
| 1477 | case 0: | ||
| 1478 | UNIMPLEMENTED(); | ||
| 1479 | return {}; | ||
| 1480 | case 1: | ||
| 1481 | case 2: | ||
| 1482 | if (!device.HasNvViewportArray2()) { | ||
| 1483 | LOG_ERROR( | ||
| 1484 | Render_OpenGL, | ||
| 1485 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1486 | return {}; | ||
| 1487 | } | ||
| 1488 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1489 | break; | ||
| 1490 | case 3: | ||
| 1491 | dest_name = "result.pointsize.x"; | ||
| 1492 | break; | ||
| 1493 | } | ||
| 1494 | break; | ||
| 1495 | case Attribute::Index::ClipDistances0123: | ||
| 1496 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1497 | break; | ||
| 1498 | case Attribute::Index::ClipDistances4567: | ||
| 1499 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1500 | break; | ||
| 1501 | default: | ||
| 1502 | if (!IsGenericAttribute(index)) { | ||
| 1503 | UNREACHABLE(); | ||
| 1504 | return {}; | ||
| 1505 | } | ||
| 1506 | dest_name = | ||
| 1507 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1508 | break; | ||
| 1509 | } | ||
| 1510 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1511 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1512 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1513 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1514 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1515 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1516 | ResetTemporaries(); | ||
| 1517 | return {}; | ||
| 1518 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1519 | const std::string temporary = AllocTemporary(); | ||
| 1520 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1521 | Visit(gmem->GetBaseAddress())); | ||
| 1522 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1523 | temporary); | ||
| 1524 | ResetTemporaries(); | ||
| 1525 | return {}; | ||
| 1526 | } else { | ||
| 1527 | UNREACHABLE(); | ||
| 1528 | ResetTemporaries(); | ||
| 1529 | return {}; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1533 | ResetTemporaries(); | ||
| 1534 | return {}; | ||
| 1535 | } | ||
| 1536 | |||
| 1537 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1538 | const std::string temporary = AllocTemporary(); | ||
| 1539 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1540 | Visit(operation[2])); | ||
| 1541 | return temporary; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1545 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1546 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1547 | |||
| 1548 | const std::string temporary = AllocTemporary(); | ||
| 1549 | const Node& value = operation[0]; | ||
| 1550 | const Node& low = operation[1]; | ||
| 1551 | const Node& high = operation[2]; | ||
| 1552 | const auto imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1553 | const auto imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1554 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1555 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1556 | } else { | ||
| 1557 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1558 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1559 | } | ||
| 1560 | return temporary; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1564 | const std::string temporary = AllocVectorTemporary(); | ||
| 1565 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1566 | return fmt::format("{}.x", temporary); | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1570 | const std::string temporary = AllocVectorTemporary(); | ||
| 1571 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1572 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1573 | return fmt::format("{}.x", temporary); | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1577 | const std::string temporary = AllocTemporary(); | ||
| 1578 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1579 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1580 | return temporary; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1584 | const std::string temporary = AllocVectorTemporary(); | ||
| 1585 | if (!device.HasWarpIntrinsics()) { | ||
| 1586 | LOG_ERROR(Render_OpenGL, | ||
| 1587 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1588 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1589 | return fmt::format("{}.x", temporary); | ||
| 1590 | } | ||
| 1591 | const std::string lut = AllocVectorTemporary(); | ||
| 1592 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1593 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1594 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1595 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1596 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1597 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1598 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1599 | return fmt::format("{}.x", temporary); | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1603 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1604 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1605 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1606 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1607 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1608 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1609 | return fmt::format("{}.x", tmp1); | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1613 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1614 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1615 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1616 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1617 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1618 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1619 | return fmt::format("{}.x", tmp1); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1623 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1624 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1625 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1626 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1627 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1628 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1629 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1630 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1631 | return fmt::format("{}.x", tmp1); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1635 | const std::string temporary = AllocVectorTemporary(); | ||
| 1636 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1637 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1638 | return fmt::format("{}.x", temporary); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1642 | const std::string temporary = AllocVectorTemporary(); | ||
| 1643 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1644 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1645 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1646 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1647 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1648 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1649 | return fmt::format("{}.x", temporary); | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1653 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1654 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1655 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1656 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1657 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1658 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1659 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1660 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1661 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1667 | const std::string temporary = AllocVectorTemporary(); | ||
| 1668 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1669 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1670 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1671 | return fmt::format("{}.x", temporary); | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1675 | const std::string operand = Visit(operation[0]); | ||
| 1676 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1677 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1678 | return operand; | ||
| 1679 | case Tegra::Shader::HalfType::F32: { | ||
| 1680 | const std::string temporary = AllocVectorTemporary(); | ||
| 1681 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1682 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1683 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1684 | return fmt::format("{}.x", temporary); | ||
| 1685 | } | ||
| 1686 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1687 | const std::string temporary = AllocVectorTemporary(); | ||
| 1688 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1689 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1690 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1691 | return fmt::format("{}.x", temporary); | ||
| 1692 | } | ||
| 1693 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1694 | const std::string temporary = AllocVectorTemporary(); | ||
| 1695 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1696 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1697 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1698 | return fmt::format("{}.x", temporary); | ||
| 1699 | } | ||
| 1700 | } | ||
| 1701 | UNREACHABLE(); | ||
| 1702 | return "{0, 0, 0, 0}.x"; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1706 | const std::string temporary = AllocVectorTemporary(); | ||
| 1707 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1708 | return fmt::format("{}.x", temporary); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1712 | const std::string temporary = AllocVectorTemporary(); | ||
| 1713 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1714 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1715 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1717 | return fmt::format("{}.x", temporary); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1723 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1724 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1725 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1726 | return fmt::format("{}.x", temporary); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1730 | const std::string temporary = AllocVectorTemporary(); | ||
| 1731 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1732 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1733 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1734 | return fmt::format("{}.x", temporary); | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1738 | const Node& dest = operation[0]; | ||
| 1739 | const Node& src = operation[1]; | ||
| 1740 | |||
| 1741 | std::string target; | ||
| 1742 | |||
| 1743 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1744 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1745 | |||
| 1746 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1747 | switch (index) { | ||
| 1748 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1749 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1750 | // Writing to these predicates is a no-op | ||
| 1751 | return {}; | ||
| 1752 | } | ||
| 1753 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1754 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1755 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1756 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1757 | } else { | ||
| 1758 | UNREACHABLE(); | ||
| 1759 | ResetTemporaries(); | ||
| 1760 | return {}; | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1764 | ResetTemporaries(); | ||
| 1765 | return {}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1769 | const std::string temporary = AllocTemporary(); | ||
| 1770 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1771 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1772 | return temporary; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1776 | const std::string temporary = AllocTemporary(); | ||
| 1777 | const std::string op = Visit(operation[0]); | ||
| 1778 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1779 | return temporary; | ||
| 1780 | } | ||
| 1781 | |||
| 1782 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1783 | const std::string temporary = AllocTemporary(); | ||
| 1784 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1785 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1786 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1787 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1788 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1789 | return temporary; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1793 | const std::string temporary = AllocTemporary(); | ||
| 1794 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1795 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1796 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1797 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1798 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1799 | return temporary; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1803 | const std::string temporary = AllocTemporary(); | ||
| 1804 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1805 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1806 | AddLine("IF CF.x;"); | ||
| 1807 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1808 | AddLine("ENDIF;"); | ||
| 1809 | return temporary; | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1813 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1814 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1815 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1816 | |||
| 1817 | std::string_view opcode = "TEX"; | ||
| 1818 | std::string extra; | ||
| 1819 | if (meta.bias) { | ||
| 1820 | ASSERT(!meta.lod); | ||
| 1821 | opcode = "TXB"; | ||
| 1822 | |||
| 1823 | if (swizzle < 4) { | ||
| 1824 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1825 | } else { | ||
| 1826 | const std::string bias = AllocTemporary(); | ||
| 1827 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1828 | extra = fmt::format(" {},", bias); | ||
| 1829 | } | ||
| 1830 | } | ||
| 1831 | if (meta.lod) { | ||
| 1832 | ASSERT(!meta.bias); | ||
| 1833 | opcode = "TXL"; | ||
| 1834 | |||
| 1835 | if (swizzle < 4) { | ||
| 1836 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1837 | } else { | ||
| 1838 | const std::string lod = AllocTemporary(); | ||
| 1839 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1840 | extra = fmt::format(" {},", lod); | ||
| 1841 | } | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, | ||
| 1845 | TextureType(meta), BuildAoffi(operation)); | ||
| 1846 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1847 | return fmt::format("{}.x", temporary); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1851 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1852 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1853 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1854 | |||
| 1855 | std::string comp; | ||
| 1856 | if (!meta.sampler.is_shadow) { | ||
| 1857 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1858 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1862 | TextureType(meta), BuildAoffi(operation)); | ||
| 1863 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1864 | return fmt::format("{}.x", temporary); | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1868 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1869 | const std::string temporary = AllocVectorTemporary(); | ||
| 1870 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1871 | |||
| 1872 | ASSERT(!meta.sampler.is_array); | ||
| 1873 | |||
| 1874 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1875 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1876 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1877 | return fmt::format("{}.x", temporary); | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1881 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1882 | const std::string temporary = AllocVectorTemporary(); | ||
| 1883 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1884 | |||
| 1885 | ASSERT(!meta.sampler.is_array); | ||
| 1886 | |||
| 1887 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1888 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1889 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1890 | } | ||
| 1891 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1892 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1893 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1894 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1895 | return fmt::format("{}.x", temporary); | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1899 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1900 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1901 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1902 | |||
| 1903 | if (!meta.sampler.is_buffer) { | ||
| 1904 | ASSERT(swizzle < 4); | ||
| 1905 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1906 | } | ||
| 1907 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), | ||
| 1908 | BuildAoffi(operation)); | ||
| 1909 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1910 | return fmt::format("{}.x", temporary); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1914 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1915 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1916 | const std::string ddx = AllocVectorTemporary(); | ||
| 1917 | const std::string ddy = AllocVectorTemporary(); | ||
| 1918 | const std::string coord = BuildCoords(operation).first; | ||
| 1919 | |||
| 1920 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1921 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1922 | const char swizzle = Swizzle(index); | ||
| 1923 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1924 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | const std::string_view result = coord; | ||
| 1928 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1929 | TextureType(meta), BuildAoffi(operation)); | ||
| 1930 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", result); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1936 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1937 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1938 | const std::string_view type = ImageType(meta.image.type); | ||
| 1939 | |||
| 1940 | const std::string temporary = AllocVectorTemporary(); | ||
| 1941 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1942 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1943 | } | ||
| 1944 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1945 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1946 | return fmt::format("{}.x", temporary); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 1950 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1951 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1952 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 1953 | const std::size_t num_values = meta.values.size(); | ||
| 1954 | const std::string_view type = ImageType(meta.image.type); | ||
| 1955 | |||
| 1956 | const std::string coord = AllocVectorTemporary(); | ||
| 1957 | const std::string value = AllocVectorTemporary(); | ||
| 1958 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 1959 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 1960 | } | ||
| 1961 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 1962 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 1963 | } | ||
| 1964 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 1965 | return {}; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 1969 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 1970 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 1971 | AddLine("CONT;"); | ||
| 1972 | return {}; | ||
| 1973 | } | ||
| 1974 | |||
| 1975 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 1976 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 1977 | AddLine("CONT;"); | ||
| 1978 | return {}; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 1982 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1983 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 1984 | const std::string_view stack_name = StackName(stack); | ||
| 1985 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 1986 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1987 | return {}; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 1991 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1992 | const std::string_view stack_name = StackName(stack); | ||
| 1993 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1994 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 1995 | AddLine("CONT;"); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2000 | Exit(); | ||
| 2001 | return {}; | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2005 | AddLine("KIL TR;"); | ||
| 2006 | return {}; | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2010 | AddLine("EMIT;"); | ||
| 2011 | return {}; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2015 | AddLine("ENDPRIM;"); | ||
| 2016 | return {}; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2020 | return "primitive.invocation"; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2024 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2025 | const std::string temporary = AllocTemporary(); | ||
| 2026 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2027 | return temporary; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2031 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2035 | if (!device.HasWarpIntrinsics()) { | ||
| 2036 | LOG_ERROR(Render_OpenGL, | ||
| 2037 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2038 | return Visit(operation[0]); | ||
| 2039 | } | ||
| 2040 | const std::string temporary = AllocVectorTemporary(); | ||
| 2041 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2042 | Visit(operation[1])); | ||
| 2043 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2044 | return fmt::format("{}.x", temporary); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2048 | if (!ir.IsDecompiled()) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | AddLine("BAR;"); | ||
| 2053 | return {}; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2057 | AddLine("MEMBAR.CTA;"); | ||
| 2058 | return {}; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2062 | AddLine("MEMBAR;"); | ||
| 2063 | return {}; | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | } // Anonymous namespace | ||
| 2067 | |||
| 2068 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2069 | const VideoCommon::Shader::Registry& registry, | ||
| 2070 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2071 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 21a4f4def..b31d604e4 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -221,6 +221,7 @@ Device::Device() | |||
| 221 | has_component_indexing_bug = is_amd; | 221 | has_component_indexing_bug = is_amd; |
| 222 | has_precise_bug = TestPreciseBug(); | 222 | has_precise_bug = TestPreciseBug(); |
| 223 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 223 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 224 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | ||
| 224 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 225 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 225 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 226 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 226 | GLAD_GL_NV_transform_feedback2; | 227 | GLAD_GL_NV_transform_feedback2; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 98cca0254..145347943 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool HasNvViewportArray2() const { | ||
| 92 | return has_nv_viewport_array2; | ||
| 93 | } | ||
| 94 | |||
| 91 | bool UseAssemblyShaders() const { | 95 | bool UseAssemblyShaders() const { |
| 92 | return use_assembly_shaders; | 96 | return use_assembly_shaders; |
| 93 | } | 97 | } |
| @@ -111,6 +115,7 @@ private: | |||
| 111 | bool has_component_indexing_bug{}; | 115 | bool has_component_indexing_bug{}; |
| 112 | bool has_precise_bug{}; | 116 | bool has_precise_bug{}; |
| 113 | bool has_fast_buffer_sub_data{}; | 117 | bool has_fast_buffer_sub_data{}; |
| 118 | bool has_nv_viewport_array2{}; | ||
| 114 | bool use_assembly_shaders{}; | 119 | bool use_assembly_shaders{}; |
| 115 | }; | 120 | }; |
| 116 | 121 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c28486b1d..46e780a06 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | |||
| 148 | auto program = std::make_shared<ProgramHandle>(); | 149 | auto program = std::make_shared<ProgramHandle>(); |
| 149 | 150 | ||
| 150 | if (device.UseAssemblyShaders()) { | 151 | if (device.UseAssemblyShaders()) { |
| 151 | const std::string arb = "Not implemented"; | 152 | const std::string arb = |
| 153 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 152 | 154 | ||
| 153 | GLuint& arb_prog = program->assembly_program.handle; | 155 | GLuint& arb_prog = program->assembly_program.handle; |
| 154 | 156 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7e9073cc3..32c81dc70 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -631,13 +631,11 @@ void Config::ReadRendererValues() { | |||
| 631 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); | 631 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); |
| 632 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); | 632 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); |
| 633 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); | 633 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); |
| 634 | Settings::values.resolution_factor = | ||
| 635 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); | ||
| 636 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); | 634 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); |
| 637 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); | 635 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); |
| 638 | Settings::values.use_frame_limit = | 636 | Settings::values.use_frame_limit = |
| 639 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); | 637 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); |
| 640 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 638 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt(); |
| 641 | Settings::values.use_disk_shader_cache = | 639 | Settings::values.use_disk_shader_cache = |
| 642 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | 640 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); |
| 643 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); | 641 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); |
| @@ -722,8 +720,6 @@ void Config::ReadUIValues() { | |||
| 722 | .toString(); | 720 | .toString(); |
| 723 | UISettings::values.enable_discord_presence = | 721 | UISettings::values.enable_discord_presence = |
| 724 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); | 722 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); |
| 725 | UISettings::values.screenshot_resolution_factor = | ||
| 726 | static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt()); | ||
| 727 | UISettings::values.select_user_on_boot = | 723 | UISettings::values.select_user_on_boot = |
| 728 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); | 724 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); |
| 729 | 725 | ||
| @@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() { | |||
| 1082 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); | 1078 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); |
| 1083 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); | 1079 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); |
| 1084 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); | 1080 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); |
| 1085 | WriteSetting(QStringLiteral("resolution_factor"), | ||
| 1086 | static_cast<double>(Settings::values.resolution_factor), 1.0); | ||
| 1087 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); | 1081 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); |
| 1088 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); | 1082 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); |
| 1089 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); | 1083 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); |
| @@ -1159,8 +1153,6 @@ void Config::SaveUIValues() { | |||
| 1159 | QString::fromUtf8(UISettings::themes[0].second)); | 1153 | QString::fromUtf8(UISettings::themes[0].second)); |
| 1160 | WriteSetting(QStringLiteral("enable_discord_presence"), | 1154 | WriteSetting(QStringLiteral("enable_discord_presence"), |
| 1161 | UISettings::values.enable_discord_presence, true); | 1155 | UISettings::values.enable_discord_presence, true); |
| 1162 | WriteSetting(QStringLiteral("screenshot_resolution_factor"), | ||
| 1163 | UISettings::values.screenshot_resolution_factor, 0); | ||
| 1164 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, | 1156 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, |
| 1165 | false); | 1157 | false); |
| 1166 | 1158 | ||
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea667caef..304625cd7 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -19,47 +19,6 @@ | |||
| 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 20 | #endif | 20 | #endif |
| 21 | 21 | ||
| 22 | namespace { | ||
| 23 | enum class Resolution : int { | ||
| 24 | Auto, | ||
| 25 | Scale1x, | ||
| 26 | Scale2x, | ||
| 27 | Scale3x, | ||
| 28 | Scale4x, | ||
| 29 | }; | ||
| 30 | |||
| 31 | float ToResolutionFactor(Resolution option) { | ||
| 32 | switch (option) { | ||
| 33 | case Resolution::Auto: | ||
| 34 | return 0.f; | ||
| 35 | case Resolution::Scale1x: | ||
| 36 | return 1.f; | ||
| 37 | case Resolution::Scale2x: | ||
| 38 | return 2.f; | ||
| 39 | case Resolution::Scale3x: | ||
| 40 | return 3.f; | ||
| 41 | case Resolution::Scale4x: | ||
| 42 | return 4.f; | ||
| 43 | } | ||
| 44 | return 0.f; | ||
| 45 | } | ||
| 46 | |||
| 47 | Resolution FromResolutionFactor(float factor) { | ||
| 48 | if (factor == 0.f) { | ||
| 49 | return Resolution::Auto; | ||
| 50 | } else if (factor == 1.f) { | ||
| 51 | return Resolution::Scale1x; | ||
| 52 | } else if (factor == 2.f) { | ||
| 53 | return Resolution::Scale2x; | ||
| 54 | } else if (factor == 3.f) { | ||
| 55 | return Resolution::Scale3x; | ||
| 56 | } else if (factor == 4.f) { | ||
| 57 | return Resolution::Scale4x; | ||
| 58 | } | ||
| 59 | return Resolution::Auto; | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) | 22 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) |
| 64 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { | 23 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { |
| 65 | vulkan_device = Settings::values.vulkan_device; | 24 | vulkan_device = Settings::values.vulkan_device; |
| @@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 99 | 58 | ||
| 100 | ui->api->setEnabled(runtime_lock); | 59 | ui->api->setEnabled(runtime_lock); |
| 101 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); | 60 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); |
| 102 | ui->resolution_factor_combobox->setCurrentIndex( | ||
| 103 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); | ||
| 104 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); | 61 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); |
| 105 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 62 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 106 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 63 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| @@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 114 | void ConfigureGraphics::ApplyConfiguration() { | 71 | void ConfigureGraphics::ApplyConfiguration() { |
| 115 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); | 72 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); |
| 116 | Settings::values.vulkan_device = vulkan_device; | 73 | Settings::values.vulkan_device = vulkan_device; |
| 117 | Settings::values.resolution_factor = | ||
| 118 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | ||
| 119 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); | 74 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); |
| 120 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 75 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 121 | Settings::values.use_asynchronous_gpu_emulation = | 76 | Settings::values.use_asynchronous_gpu_emulation = |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index c816d6108..6e75447a5 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -85,46 +85,6 @@ | |||
| 85 | </widget> | 85 | </widget> |
| 86 | </item> | 86 | </item> |
| 87 | <item> | 87 | <item> |
| 88 | <layout class="QHBoxLayout" name="horizontalLayout_2"> | ||
| 89 | <item> | ||
| 90 | <widget class="QLabel" name="label"> | ||
| 91 | <property name="text"> | ||
| 92 | <string>Internal Resolution:</string> | ||
| 93 | </property> | ||
| 94 | </widget> | ||
| 95 | </item> | ||
| 96 | <item> | ||
| 97 | <widget class="QComboBox" name="resolution_factor_combobox"> | ||
| 98 | <item> | ||
| 99 | <property name="text"> | ||
| 100 | <string>Auto (Window Size)</string> | ||
| 101 | </property> | ||
| 102 | </item> | ||
| 103 | <item> | ||
| 104 | <property name="text"> | ||
| 105 | <string>Native (1280x720)</string> | ||
| 106 | </property> | ||
| 107 | </item> | ||
| 108 | <item> | ||
| 109 | <property name="text"> | ||
| 110 | <string>2x Native (2560x1440)</string> | ||
| 111 | </property> | ||
| 112 | </item> | ||
| 113 | <item> | ||
| 114 | <property name="text"> | ||
| 115 | <string>3x Native (3840x2160)</string> | ||
| 116 | </property> | ||
| 117 | </item> | ||
| 118 | <item> | ||
| 119 | <property name="text"> | ||
| 120 | <string>4x Native (5120x2880)</string> | ||
| 121 | </property> | ||
| 122 | </item> | ||
| 123 | </widget> | ||
| 124 | </item> | ||
| 125 | </layout> | ||
| 126 | </item> | ||
| 127 | <item> | ||
| 128 | <layout class="QHBoxLayout" name="horizontalLayout_6"> | 88 | <layout class="QHBoxLayout" name="horizontalLayout_6"> |
| 129 | <item> | 89 | <item> |
| 130 | <widget class="QLabel" name="ar_label"> | 90 | <widget class="QLabel" name="ar_label"> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 37aadf7f8..be5006ad3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) | |||
| 12 | 12 | ||
| 13 | ui->setupUi(this); | 13 | ui->setupUi(this); |
| 14 | 14 | ||
| 15 | // TODO: Remove this after assembly shaders are fully integrated | ||
| 16 | ui->use_assembly_shaders->setVisible(false); | ||
| 17 | |||
| 18 | SetConfiguration(); | 15 | SetConfiguration(); |
| 19 | } | 16 | } |
| 20 | 17 | ||
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 270cccc77..4119d7907 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() { | |||
| 689 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; | 689 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; |
| 690 | UpdateStatusBar(); | 690 | UpdateStatusBar(); |
| 691 | }); | 691 | }); |
| 692 | // TODO: Remove this comment/static whenever the next major release of | 692 | constexpr u16 SPEED_LIMIT_STEP = 5; |
| 693 | // MSVC occurs and we make it a requirement (see: | ||
| 694 | // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) | ||
| 695 | static constexpr u16 SPEED_LIMIT_STEP = 5; | ||
| 696 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), | 693 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), |
| 697 | &QShortcut::activated, this, [&] { | 694 | &QShortcut::activated, this, [&] { |
| 698 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { | 695 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 7240270f5..659b9f701 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -380,8 +380,6 @@ void Config::ReadValues() { | |||
| 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); | 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); |
| 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); | 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); |
| 382 | 382 | ||
| 383 | Settings::values.resolution_factor = | ||
| 384 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 385 | Settings::values.aspect_ratio = | 383 | Settings::values.aspect_ratio = |
| 386 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 384 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 387 | Settings::values.max_anisotropy = | 385 | Settings::values.max_anisotropy = |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 6f53e9659..45c07ed5d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -117,11 +117,6 @@ use_hw_renderer = | |||
| 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 118 | use_shader_jit = | 118 | use_shader_jit = |
| 119 | 119 | ||
| 120 | # Resolution scale factor | ||
| 121 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 122 | # factor for the Switch resolution | ||
| 123 | resolution_factor = | ||
| 124 | |||
| 125 | # Aspect ratio | 120 | # Aspect ratio |
| 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 121 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 127 | aspect_ratio = | 122 | aspect_ratio = |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 3be58b15d..1566c2e3f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -116,8 +116,6 @@ void Config::ReadValues() { | |||
| 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 117 | 117 | ||
| 118 | // Renderer | 118 | // Renderer |
| 119 | Settings::values.resolution_factor = | ||
| 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 121 | Settings::values.aspect_ratio = | 119 | Settings::values.aspect_ratio = |
| 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 120 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 123 | Settings::values.max_anisotropy = | 121 | Settings::values.max_anisotropy = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index ca203b64d..41bbbbf60 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -21,11 +21,6 @@ use_hw_renderer = | |||
| 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 22 | use_shader_jit = | 22 | use_shader_jit = |
| 23 | 23 | ||
| 24 | # Resolution scale factor | ||
| 25 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 26 | # factor for the Switch resolution | ||
| 27 | resolution_factor = | ||
| 28 | |||
| 29 | # Aspect ratio | 24 | # Aspect ratio |
| 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 25 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 31 | aspect_ratio = | 26 | aspect_ratio = |