summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt10
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--dist/qt_themes/qdarkstyle/style.qss4
-rw-r--r--externals/CMakeLists.txt11
m---------externals/libressl0
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/x64/xbyak_abi.h95
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp21
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp81
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h126
-rw-r--r--src/core/settings.h2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp76
-rw-r--r--src/video_core/macro/macro_jit_x64.h5
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2074
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/yuzu/configuration/config.cpp10
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp45
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp3
-rw-r--r--src/yuzu/main.cpp5
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h5
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h5
30 files changed, 2276 insertions, 399 deletions
diff --git a/.gitmodules b/.gitmodules
index 2ec9dda62..9ba8fe207 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,6 +13,9 @@
13[submodule "soundtouch"] 13[submodule "soundtouch"]
14 path = externals/soundtouch 14 path = externals/soundtouch
15 url = https://github.com/citra-emu/ext-soundtouch.git 15 url = https://github.com/citra-emu/ext-soundtouch.git
16[submodule "libressl"]
17 path = externals/libressl
18 url = https://github.com/citra-emu/ext-libressl-portable.git
16[submodule "discord-rpc"] 19[submodule "discord-rpc"]
17 path = externals/discord-rpc 20 path = externals/discord-rpc
18 url = https://github.com/discordapp/discord-rpc.git 21 url = https://github.com/discordapp/discord-rpc.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9f669d56..b71071271 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,7 +152,6 @@ macro(yuzu_find_packages)
152 "Boost 1.71 boost/1.72.0" 152 "Boost 1.71 boost/1.72.0"
153 "Catch2 2.11 catch2/2.11.0" 153 "Catch2 2.11 catch2/2.11.0"
154 "fmt 6.2 fmt/6.2.0" 154 "fmt 6.2 fmt/6.2.0"
155 "OpenSSL 1.1 openssl/1.1.1f"
156 # can't use until https://github.com/bincrafters/community/issues/1173 155 # can't use until https://github.com/bincrafters/community/issues/1173
157 #"libzip 1.5 libzip/1.5.2@bincrafters/stable" 156 #"libzip 1.5 libzip/1.5.2@bincrafters/stable"
158 "lz4 1.8 lz4/1.9.2" 157 "lz4 1.8 lz4/1.9.2"
@@ -312,15 +311,6 @@ elseif (TARGET Boost::boost)
312 add_library(boost ALIAS Boost::boost) 311 add_library(boost ALIAS Boost::boost)
313endif() 312endif()
314 313
315if (NOT TARGET OpenSSL::SSL)
316 set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
317 add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
318endif()
319if (NOT TARGET OpenSSL::Crypto)
320 set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
321 add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
322endif()
323
324if (TARGET sdl2::sdl2) 314if (TARGET sdl2::sdl2)
325 # imported from the conan generated sdl2Config.cmake 315 # imported from the conan generated sdl2Config.cmake
326 set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) 316 set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 83e4e9df2..311ba1c2e 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -51,6 +51,8 @@ endif()
51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) 51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
52set(VIDEO_CORE "${SRC_DIR}/src/video_core") 52set(VIDEO_CORE "${SRC_DIR}/src/video_core")
53set(HASH_FILES 53set(HASH_FILES
54 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
54 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 56 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 57 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 58 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss
index 7d088a719..2d5c9761f 100644
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -673,10 +673,6 @@ QTabWidget::pane {
673 border-bottom-left-radius: 2px; 673 border-bottom-left-radius: 2px;
674} 674}
675 675
676QTabWidget::tab-bar {
677 overflow: visible;
678}
679
680QTabBar { 676QTabBar {
681 qproperty-drawBase: 0; 677 qproperty-drawBase: 0;
682 border-radius: 3px; 678 border-radius: 3px;
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 9be5b2780..b80b27605 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -73,6 +73,15 @@ if (NOT LIBZIP_FOUND)
73endif() 73endif()
74 74
75if (ENABLE_WEB_SERVICE) 75if (ENABLE_WEB_SERVICE)
76 # LibreSSL
77 set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
78 add_subdirectory(libressl EXCLUDE_FROM_ALL)
79 target_include_directories(ssl INTERFACE ./libressl/include)
80 target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
81 get_directory_property(OPENSSL_LIBRARIES
82 DIRECTORY libressl
83 DEFINITION OPENSSL_LIBS)
84
76 # lurlparser 85 # lurlparser
77 add_subdirectory(lurlparser EXCLUDE_FROM_ALL) 86 add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
78 87
@@ -80,5 +89,5 @@ if (ENABLE_WEB_SERVICE)
80 add_library(httplib INTERFACE) 89 add_library(httplib INTERFACE)
81 target_include_directories(httplib INTERFACE ./httplib) 90 target_include_directories(httplib INTERFACE ./httplib)
82 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) 91 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
83 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) 92 target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
84endif() 93endif()
diff --git a/externals/libressl b/externals/libressl
new file mode 160000
Subproject 7d01cb01cb1a926ecb4c9c98b107ef3c26f59df
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..0a3e2f4d1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
35 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
11 11
12namespace Common::X64 { 12namespace Common::X64 {
13 13
14inline int RegToIndex(const Xbyak::Reg& reg) { 14inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind; 15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, 16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers."); 17 "RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); 19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20} 20}
21 21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) { 22inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
23 ASSERT(reg_index < 16); 23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index); 24 return Xbyak::Reg64(static_cast<int>(reg_index));
25} 25}
26 26
27inline Xbyak::Xmm IndexToXmm(int reg_index) { 27inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32); 28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16); 29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30} 30}
31 31
32inline Xbyak::Reg IndexToReg(int reg_index) { 32inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
33 if (reg_index < 16) { 33 if (reg_index < 16) {
34 return IndexToReg64(reg_index); 34 return IndexToReg64(reg_index);
35 } else { 35 } else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
151 151
152#endif 152#endif
153 153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, 154struct ABIFrameInfo {
155 size_t needed_frame_size, s32* out_subtraction, 155 s32 subtraction;
156 s32* out_xmm_offset) { 156 s32 xmm_offset;
157};
158
159inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
160 size_t needed_frame_size) {
157 const auto count = (regs & ABI_ALL_GPRS).count(); 161 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8; 162 rsp_alignment -= count * 8;
159 size_t subtraction = 0; 163 size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
170 rsp_alignment -= subtraction; 174 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF; 175 subtraction += rsp_alignment & 0xF;
172 176
173 *out_subtraction = (s32)subtraction; 177 return ABIFrameInfo{static_cast<s32>(subtraction),
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); 178 static_cast<s32>(subtraction - xmm_base_subtraction)};
175} 179}
176 180
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 181inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) { 182 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset; 183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); 184
181 for (std::size_t i = 0; i < regs.size(); ++i) { 185 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) { 186 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i))); 187 code.push(IndexToReg64(i));
184 } 188 }
185 } 189 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189 190
190 for (int i = 0; i < regs.count(); i++) { 191 if (frame_info.subtraction != 0) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) { 192 code.sub(code.rsp, frame_info.subtraction);
192 code.push(IndexToReg64(i));
193 }
194 } 193 }
195 194
196 for (std::size_t i = 0; i < regs.size(); ++i) { 195 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) { 196 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); 197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
199 xmm_offset += 0x10; 198 frame_info.xmm_offset += 0x10;
200 } 199 }
201 } 200 }
202 201
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
205 204
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 205inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) { 206 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset; 207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210 208
211 for (std::size_t i = 0; i < regs.size(); ++i) { 209 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) { 210 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); 211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
214 xmm_offset += 0x10; 212 frame_info.xmm_offset += 0x10;
215 } 213 }
216 } 214 }
217 215
218 if (subtraction != 0) { 216 if (frame_info.subtraction != 0) {
219 code.add(code.rsp, subtraction); 217 code.add(code.rsp, frame_info.subtraction);
220 } 218 }
221 219
222 // GPRs need to be popped in reverse order 220 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) { 221 for (std::size_t j = 0; j < regs.size(); ++j) {
224 if (regs[i]) { 222 const std::size_t i = regs.size() - j - 1;
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) { 223 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i)); 224 code.pop(IndexToReg64(i));
262 } 225 }
263 } 226 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9..e7456a8c3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -50,7 +50,8 @@ public:
50 } 50 }
51 51
52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override { 52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
53 UNIMPLEMENTED(); 53 UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
54 MemoryReadCode(pc));
54 } 55 }
55 56
56 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { 57 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -89,8 +90,6 @@ public:
89 90
90 ARM_Dynarmic_32& parent; 91 ARM_Dynarmic_32& parent;
91 std::size_t num_interpreted_instructions{}; 92 std::size_t num_interpreted_instructions{};
92 u64 tpidrro_el0{};
93 u64 tpidr_el0{};
94}; 93};
95 94
96std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, 95std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
99 config.callbacks = cb.get(); 98 config.callbacks = cb.get();
100 // TODO(bunnei): Implement page table for 32-bit 99 // TODO(bunnei): Implement page table for 32-bit
101 // config.page_table = &page_table.pointers; 100 // config.page_table = &page_table.pointers;
102 config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); 101 config.coprocessors[15] = cp15;
103 config.define_unpredictable_behaviour = true; 102 config.define_unpredictable_behaviour = true;
104 return std::make_unique<Dynarmic::A32::Jit>(config); 103 return std::make_unique<Dynarmic::A32::Jit>(config);
105} 104}
@@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
112} 111}
113 112
114void ARM_Dynarmic_32::Step() { 113void ARM_Dynarmic_32::Step() {
115 cb->InterpreterFallback(jit->Regs()[15], 1); 114 jit->Step();
116} 115}
117 116
118ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, 117ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
119 std::size_t core_index) 118 std::size_t core_index)
120 : ARM_Interface{system}, 119 : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
121 cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, 120 cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
122 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 121 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
123 122
124ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; 123ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
154} 153}
155 154
156u64 ARM_Dynarmic_32::GetTlsAddress() const { 155u64 ARM_Dynarmic_32::GetTlsAddress() const {
157 return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 156 return cp15->uro;
158} 157}
159 158
160void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { 159void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
161 CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); 160 cp15->uro = static_cast<u32>(address);
162} 161}
163 162
164u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { 163u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
165 return cb->tpidr_el0; 164 return cp15->uprw;
166} 165}
167 166
168void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { 167void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
169 cb->tpidr_el0 = value; 168 cp15->uprw = value;
170} 169}
171 170
172void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { 171void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f..e5b92d7bb 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -22,6 +22,7 @@ class Memory;
22namespace Core { 22namespace Core {
23 23
24class DynarmicCallbacks32; 24class DynarmicCallbacks32;
25class DynarmicCP15;
25class DynarmicExclusiveMonitor; 26class DynarmicExclusiveMonitor;
26class System; 27class System;
27 28
@@ -66,12 +67,14 @@ private:
66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; 67 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
67 68
68 friend class DynarmicCallbacks32; 69 friend class DynarmicCallbacks32;
70 friend class DynarmicCP15;
71
69 std::unique_ptr<DynarmicCallbacks32> cb; 72 std::unique_ptr<DynarmicCallbacks32> cb;
70 JitCacheType jit_cache; 73 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A32::Jit> jit; 74 std::shared_ptr<Dynarmic::A32::Jit> jit;
75 std::shared_ptr<DynarmicCP15> cp15;
72 std::size_t core_index; 76 std::size_t core_index;
73 DynarmicExclusiveMonitor& exclusive_monitor; 77 DynarmicExclusiveMonitor& exclusive_monitor;
74 std::array<u32, 84> CP15_regs{};
75}; 78};
76 79
77} // namespace Core 80} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
index 3fdcdebde..d43e4dd70 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,79 +2,132 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <fmt/format.h>
6#include "common/logging/log.h"
7#include "core/arm/dynarmic/arm_dynarmic_32.h"
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h" 8#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
9#include "core/core.h"
10#include "core/core_timing.h"
11#include "core/core_timing_util.h"
6 12
7using Callback = Dynarmic::A32::Coprocessor::Callback; 13using Callback = Dynarmic::A32::Coprocessor::Callback;
8using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; 14using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
9using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; 15using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
10 16
17template <>
18struct fmt::formatter<Dynarmic::A32::CoprocReg> {
19 constexpr auto parse(format_parse_context& ctx) {
20 return ctx.begin();
21 }
22 template <typename FormatContext>
23 auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
24 return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
25 }
26};
27
28namespace Core {
29
30static u32 dummy_value;
31
11std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, 32std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
12 CoprocReg CRd, CoprocReg CRn, 33 CoprocReg CRd, CoprocReg CRn,
13 CoprocReg CRm, unsigned opc2) { 34 CoprocReg CRm, unsigned opc2) {
35 LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
36 CRm, opc2);
14 return {}; 37 return {};
15} 38}
16 39
17CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, 40CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
18 CoprocReg CRm, unsigned opc2) { 41 CoprocReg CRm, unsigned opc2) {
19 // TODO(merry): Privileged CP15 registers
20
21 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { 42 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
43 // CP15_FLUSH_PREFETCH_BUFFER
22 // This is a dummy write, we ignore the value written here. 44 // This is a dummy write, we ignore the value written here.
23 return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; 45 return &dummy_value;
24 } 46 }
25 47
26 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { 48 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
27 switch (opc2) { 49 switch (opc2) {
28 case 4: 50 case 4:
51 // CP15_DATA_SYNC_BARRIER
29 // This is a dummy write, we ignore the value written here. 52 // This is a dummy write, we ignore the value written here.
30 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; 53 return &dummy_value;
31 case 5: 54 case 5:
55 // CP15_DATA_MEMORY_BARRIER
32 // This is a dummy write, we ignore the value written here. 56 // This is a dummy write, we ignore the value written here.
33 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; 57 return &dummy_value;
34 default:
35 return {};
36 } 58 }
37 } 59 }
38 60
39 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { 61 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
40 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 62 // CP15_THREAD_UPRW
63 return &uprw;
41 } 64 }
42 65
66 LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
67 opc2);
43 return {}; 68 return {};
44} 69}
45 70
46CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { 71CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
72 LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
47 return {}; 73 return {};
48} 74}
49 75
50CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, 76CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
51 CoprocReg CRm, unsigned opc2) { 77 CoprocReg CRm, unsigned opc2) {
52 // TODO(merry): Privileged CP15 registers
53
54 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { 78 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
55 switch (opc2) { 79 switch (opc2) {
56 case 2: 80 case 2:
57 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 81 // CP15_THREAD_UPRW
82 return &uprw;
58 case 3: 83 case 3:
59 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 84 // CP15_THREAD_URO
60 default: 85 return &uro;
61 return {};
62 } 86 }
63 } 87 }
64 88
89 LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
90 opc2);
65 return {}; 91 return {};
66} 92}
67 93
68CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { 94CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
95 if (!two && opc == 0 && CRm == CoprocReg::C14) {
96 // CNTPCT
97 const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
98 [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
99 ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
100 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
101 });
102 return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
103 }
104
105 LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
69 return {}; 106 return {};
70} 107}
71 108
72std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, 109std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
73 std::optional<u8> option) { 110 std::optional<u8> option) {
111 if (option) {
112 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
113 long_transfer ? "l" : "", CRd, *option);
114 } else {
115 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
116 long_transfer ? "l" : "", CRd);
117 }
74 return {}; 118 return {};
75} 119}
76 120
77std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 121std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
78 std::optional<u8> option) { 122 std::optional<u8> option) {
123 if (option) {
124 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
125 long_transfer ? "l" : "", CRd, *option);
126 } else {
127 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
128 long_transfer ? "l" : "", CRd);
129 }
79 return {}; 130 return {};
80} 131}
132
133} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 07bcde5f9..7356d252e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,128 +10,15 @@
10#include <dynarmic/A32/coprocessor.h> 10#include <dynarmic/A32/coprocessor.h>
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13enum class CP15Register { 13namespace Core {
14 // c0 - Information registers
15 CP15_MAIN_ID,
16 CP15_CACHE_TYPE,
17 CP15_TCM_STATUS,
18 CP15_TLB_TYPE,
19 CP15_CPU_ID,
20 CP15_PROCESSOR_FEATURE_0,
21 CP15_PROCESSOR_FEATURE_1,
22 CP15_DEBUG_FEATURE_0,
23 CP15_AUXILIARY_FEATURE_0,
24 CP15_MEMORY_MODEL_FEATURE_0,
25 CP15_MEMORY_MODEL_FEATURE_1,
26 CP15_MEMORY_MODEL_FEATURE_2,
27 CP15_MEMORY_MODEL_FEATURE_3,
28 CP15_ISA_FEATURE_0,
29 CP15_ISA_FEATURE_1,
30 CP15_ISA_FEATURE_2,
31 CP15_ISA_FEATURE_3,
32 CP15_ISA_FEATURE_4,
33 14
34 // c1 - Control registers 15class ARM_Dynarmic_32;
35 CP15_CONTROL,
36 CP15_AUXILIARY_CONTROL,
37 CP15_COPROCESSOR_ACCESS_CONTROL,
38
39 // c2 - Translation table registers
40 CP15_TRANSLATION_BASE_TABLE_0,
41 CP15_TRANSLATION_BASE_TABLE_1,
42 CP15_TRANSLATION_BASE_CONTROL,
43 CP15_DOMAIN_ACCESS_CONTROL,
44 CP15_RESERVED,
45
46 // c5 - Fault status registers
47 CP15_FAULT_STATUS,
48 CP15_INSTR_FAULT_STATUS,
49 CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
50 CP15_INST_FSR,
51
52 // c6 - Fault Address registers
53 CP15_FAULT_ADDRESS,
54 CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
55 CP15_WFAR,
56 CP15_IFAR,
57
58 // c7 - Cache operation registers
59 CP15_WAIT_FOR_INTERRUPT,
60 CP15_PHYS_ADDRESS,
61 CP15_INVALIDATE_INSTR_CACHE,
62 CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
63 CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
64 CP15_FLUSH_PREFETCH_BUFFER,
65 CP15_FLUSH_BRANCH_TARGET_CACHE,
66 CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
67 CP15_INVALIDATE_DATA_CACHE,
68 CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
69 CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
70 CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
71 CP15_CLEAN_DATA_CACHE,
72 CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
73 CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
74 CP15_DATA_SYNC_BARRIER,
75 CP15_DATA_MEMORY_BARRIER,
76 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
77 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
78 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
79
80 // c8 - TLB operations
81 CP15_INVALIDATE_ITLB,
82 CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
83 CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
84 CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
85 CP15_INVALIDATE_DTLB,
86 CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
87 CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
88 CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
89 CP15_INVALIDATE_UTLB,
90 CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
91 CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
92 CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
93
94 // c9 - Data cache lockdown register
95 CP15_DATA_CACHE_LOCKDOWN,
96
97 // c10 - TLB/Memory map registers
98 CP15_TLB_LOCKDOWN,
99 CP15_PRIMARY_REGION_REMAP,
100 CP15_NORMAL_REGION_REMAP,
101
102 // c13 - Thread related registers
103 CP15_PID,
104 CP15_CONTEXT_ID,
105 CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
106 CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
107 CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
108
109 // c15 - Performance and TLB lockdown registers
110 CP15_PERFORMANCE_MONITOR_CONTROL,
111 CP15_CYCLE_COUNTER,
112 CP15_COUNT_0,
113 CP15_COUNT_1,
114 CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
115 CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
116 CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
117 CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
118 CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
119 CP15_TLB_DEBUG_CONTROL,
120
121 // Skyeye defined
122 CP15_TLB_FAULT_ADDR,
123 CP15_TLB_FAULT_STATUS,
124
125 // Not an actual register.
126 // All registers should be defined above this.
127 CP15_REGISTER_COUNT,
128};
129 16
130class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { 17class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
131public: 18public:
132 using CoprocReg = Dynarmic::A32::CoprocReg; 19 using CoprocReg = Dynarmic::A32::CoprocReg;
133 20
134 explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; 21 explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
135 22
136 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, 23 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
137 CoprocReg CRn, CoprocReg CRm, 24 CoprocReg CRn, CoprocReg CRm,
@@ -147,6 +34,9 @@ public:
147 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 34 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
148 std::optional<u8> option) override; 35 std::optional<u8> option) override;
149 36
150private: 37 ARM_Dynarmic_32& parent;
151 u32* CP15{}; 38 u32 uprw;
39 u32 uro;
152}; 40};
41
42} // namespace Core
diff --git a/src/core/settings.h b/src/core/settings.h
index 36cd66fd4..33e1e06cd 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
437 bool renderer_debug; 437 bool renderer_debug;
438 int vulkan_device; 438 int vulkan_device;
439 439
440 float resolution_factor; 440 u16 resolution_factor{1};
441 int aspect_ratio; 441 int aspect_ratio;
442 int max_anisotropy; 442 int max_anisotropy;
443 bool use_frame_limit; 443 bool use_frame_limit;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 39d5d8401..099bb446e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -52,6 +52,8 @@ add_library(video_core STATIC
52 rasterizer_interface.h 52 rasterizer_interface.h
53 renderer_base.cpp 53 renderer_base.cpp
54 renderer_base.h 54 renderer_base.h
55 renderer_opengl/gl_arb_decompiler.cpp
56 renderer_opengl/gl_arb_decompiler.h
55 renderer_opengl/gl_buffer_cache.cpp 57 renderer_opengl/gl_buffer_cache.cpp
56 renderer_opengl/gl_buffer_cache.h 58 renderer_opengl/gl_buffer_cache.h
57 renderer_opengl/gl_device.cpp 59 renderer_opengl/gl_device.cpp
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 11c1cc3be..d4a97ec7b 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); 14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
15 15
16namespace Tegra { 16namespace Tegra {
17static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; 17static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
18static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; 18static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
19static const Xbyak::Reg64 STATE = Xbyak::util::r11; 19static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
20static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
21static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
22static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
23static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; 20static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
24static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
25static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; 21static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
26 22
27static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ 23static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 PARAMETERS,
29 REGISTERS,
30 STATE, 24 STATE,
31 NEXT_PARAMETER,
32 RESULT, 25 RESULT,
26 PARAMETERS,
33 METHOD_ADDRESS, 27 METHOD_ADDRESS,
34 BRANCH_HOLDER, 28 BRANCH_HOLDER,
35}); 29});
@@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
53 JITState state{}; 47 JITState state{};
54 state.maxwell3d = &maxwell3d; 48 state.maxwell3d = &maxwell3d;
55 state.registers = {}; 49 state.registers = {};
56 state.parameters = parameters.data(); 50 program(&state, parameters.data());
57 program(&state);
58} 51}
59 52
60void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { 53void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
64 const bool is_move_operation = !is_a_zero && is_b_zero; 57 const bool is_move_operation = !is_a_zero && is_b_zero;
65 const bool has_zero_register = is_a_zero || is_b_zero; 58 const bool has_zero_register = is_a_zero || is_b_zero;
66 59
67 Xbyak::Reg64 src_a; 60 Xbyak::Reg32 src_a;
68 Xbyak::Reg32 src_b; 61 Xbyak::Reg32 src_b;
69 62
70 if (!optimizer.zero_reg_skip) { 63 if (!optimizer.zero_reg_skip) {
71 src_a = Compile_GetRegister(opcode.src_a, RESULT_64); 64 src_a = Compile_GetRegister(opcode.src_a, RESULT);
72 src_b = Compile_GetRegister(opcode.src_b, ebx); 65 src_b = Compile_GetRegister(opcode.src_b, eax);
73 } else { 66 } else {
74 if (!is_a_zero) { 67 if (!is_a_zero) {
75 src_a = Compile_GetRegister(opcode.src_a, RESULT_64); 68 src_a = Compile_GetRegister(opcode.src_a, RESULT);
76 } 69 }
77 if (!is_b_zero) { 70 if (!is_b_zero) {
78 src_b = Compile_GetRegister(opcode.src_b, ebx); 71 src_b = Compile_GetRegister(opcode.src_b, eax);
79 } 72 }
80 } 73 }
81 Xbyak::Label skip_carry{}; 74 Xbyak::Label skip_carry{};
@@ -302,22 +295,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
302 sub(result, opcode.immediate * -1); 295 sub(result, opcode.immediate * -1);
303 } 296 }
304 } 297 }
305 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 298 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
306 mov(Common::X64::ABI_PARAM1, qword[STATE]); 299 mov(Common::X64::ABI_PARAM1, qword[STATE]);
307 mov(Common::X64::ABI_PARAM2, RESULT); 300 mov(Common::X64::ABI_PARAM2, RESULT);
308 Common::X64::CallFarFunction(*this, &Read); 301 Common::X64::CallFarFunction(*this, &Read);
309 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 302 Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
310 mov(RESULT, Common::X64::ABI_RETURN.cvt32()); 303 mov(RESULT, Common::X64::ABI_RETURN.cvt32());
311 Compile_ProcessResult(opcode.result_operation, opcode.dst); 304 Compile_ProcessResult(opcode.result_operation, opcode.dst);
312} 305}
313 306
314void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { 307void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
315 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 308 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
316 mov(Common::X64::ABI_PARAM1, qword[STATE]); 309 mov(Common::X64::ABI_PARAM1, qword[STATE]);
317 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); 310 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
318 mov(Common::X64::ABI_PARAM3, value); 311 mov(Common::X64::ABI_PARAM3, value);
319 Common::X64::CallFarFunction(*this, &Send); 312 Common::X64::CallFarFunction(*this, &Send);
320 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); 313 Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
321 314
322 Xbyak::Label dont_process{}; 315 Xbyak::Label dont_process{};
323 // Get increment 316 // Get increment
@@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
329 and_(METHOD_ADDRESS, 0xfff); 322 and_(METHOD_ADDRESS, 0xfff);
330 shr(ecx, 12); 323 shr(ecx, 12);
331 and_(ecx, 0x3f); 324 and_(ecx, 0x3f);
332 lea(eax, ptr[rcx + METHOD_ADDRESS_64]); 325 lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
333 sal(ecx, 12); 326 sal(ecx, 12);
334 or_(eax, ecx); 327 or_(eax, ecx);
335 328
@@ -421,19 +414,15 @@ void MacroJITx64Impl::Compile() {
421 bool keep_executing = true; 414 bool keep_executing = true;
422 labels.fill(Xbyak::Label()); 415 labels.fill(Xbyak::Label());
423 416
424 Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); 417 Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
425 // JIT state 418 // JIT state
426 mov(STATE, Common::X64::ABI_PARAM1); 419 mov(STATE, Common::X64::ABI_PARAM1);
427 mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + 420 mov(PARAMETERS, Common::X64::ABI_PARAM2);
428 static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
429 mov(REGISTERS, Common::X64::ABI_PARAM1);
430 add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
431 xor_(RESULT, RESULT); 421 xor_(RESULT, RESULT);
432 xor_(METHOD_ADDRESS, METHOD_ADDRESS); 422 xor_(METHOD_ADDRESS, METHOD_ADDRESS);
433 xor_(NEXT_PARAMETER, NEXT_PARAMETER);
434 xor_(BRANCH_HOLDER, BRANCH_HOLDER); 423 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
435 424
436 mov(dword[REGISTERS + 4], Compile_FetchParameter()); 425 mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
437 426
438 // Track get register for zero registers and mark it as no-op 427 // Track get register for zero registers and mark it as no-op
439 optimizer.zero_reg_skip = true; 428 optimizer.zero_reg_skip = true;
@@ -463,7 +452,7 @@ void MacroJITx64Impl::Compile() {
463 452
464 L(end_of_code); 453 L(end_of_code);
465 454
466 Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); 455 Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
467 ret(); 456 ret();
468 ready(); 457 ready();
469 program = getCode<ProgramType>(); 458 program = getCode<ProgramType>();
@@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
537} 526}
538 527
539Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { 528Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
540 mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); 529 mov(eax, dword[PARAMETERS]);
541 inc(NEXT_PARAMETER); 530 add(PARAMETERS, sizeof(u32));
542 return eax; 531 return eax;
543} 532}
544 533
@@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
547 // Register 0 is always zero 536 // Register 0 is always zero
548 xor_(dst, dst); 537 xor_(dst, dst);
549 } else { 538 } else {
550 mov(dst, dword[REGISTERS + index * sizeof(u32)]); 539 mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
551 } 540 }
552 541
553 return dst; 542 return dst;
554} 543}
555 544
556Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
557 if (index == 0) {
558 // Register 0 is always zero
559 xor_(dst, dst);
560 } else {
561 mov(dst, dword[REGISTERS + index * sizeof(u32)]);
562 }
563
564 return dst;
565}
566
567void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
568 Xbyak::Label zero{}, end{};
569 xor_(ecx, ecx);
570 shr(dst, 32);
571 setne(cl);
572 mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
573}
574
575void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { 545void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
576 auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { 546 auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
577 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero 547 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
@@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
579 if (reg == 0) { 549 if (reg == 0) {
580 return; 550 return;
581 } 551 }
582 mov(dword[REGISTERS + reg * sizeof(u32)], result); 552 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
583 }; 553 };
584 auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; 554 auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
585 555
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 71f738b9a..51ec090b8 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,8 +55,6 @@ private:
55 55
56 Xbyak::Reg32 Compile_FetchParameter(); 56 Xbyak::Reg32 Compile_FetchParameter();
57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); 57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
58 Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
59 void Compile_WriteCarry(Xbyak::Reg64 dst);
60 58
61 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); 59 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
62 void Compile_Send(Xbyak::Reg32 value); 60 void Compile_Send(Xbyak::Reg32 value);
@@ -67,11 +65,10 @@ private:
67 struct JITState { 65 struct JITState {
68 Engines::Maxwell3D* maxwell3d{}; 66 Engines::Maxwell3D* maxwell3d{};
69 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; 67 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
70 const u32* parameters{};
71 u32 carry_flag{}; 68 u32 carry_flag{};
72 }; 69 };
73 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); 70 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
74 using ProgramType = void (*)(JITState*); 71 using ProgramType = void (*)(JITState*, const u32*);
75 72
76 struct OptimizerState { 73 struct OptimizerState {
77 bool can_skip_carry{}; 74 bool can_skip_carry{};
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..1e96b0310
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2074 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 ASSERT(component < 4);
43 return component["xyzw"];
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188std::string GlobalMemoryName(const GlobalMemoryBase& base) {
189 return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
190}
191
192class ARBDecompiler final {
193public:
194 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
195 ShaderType stage, std::string_view identifier);
196
197 std::string Code() const {
198 return shader_source;
199 }
200
201private:
202 void DeclareHeader();
203 void DeclareVertex();
204 void DeclareGeometry();
205 void DeclareFragment();
206 void DeclareCompute();
207 void DeclareInputAttributes();
208 void DeclareOutputAttributes();
209 void DeclareLocalMemory();
210 void DeclareGlobalMemory();
211 void DeclareConstantBuffers();
212 void DeclareRegisters();
213 void DeclareTemporaries();
214 void DeclarePredicates();
215 void DeclareInternalFlags();
216
217 void InitializeVariables();
218
219 void DecompileAST();
220 void DecompileBranchMode();
221
222 void VisitAST(const ASTNode& node);
223 std::string VisitExpression(const Expr& node);
224
225 void VisitBlock(const NodeBlock& bb);
226
227 std::string Visit(const Node& node);
228
229 std::pair<std::string, std::size_t> BuildCoords(Operation);
230 std::string BuildAoffi(Operation);
231 void Exit();
232
233 std::string Assign(Operation);
234 std::string Select(Operation);
235 std::string FClamp(Operation);
236 std::string FCastHalf0(Operation);
237 std::string FCastHalf1(Operation);
238 std::string FSqrt(Operation);
239 std::string FSwizzleAdd(Operation);
240 std::string HAdd2(Operation);
241 std::string HMul2(Operation);
242 std::string HFma2(Operation);
243 std::string HAbsolute(Operation);
244 std::string HNegate(Operation);
245 std::string HClamp(Operation);
246 std::string HCastFloat(Operation);
247 std::string HUnpack(Operation);
248 std::string HMergeF32(Operation);
249 std::string HMergeH0(Operation);
250 std::string HMergeH1(Operation);
251 std::string HPack2(Operation);
252 std::string LogicalAssign(Operation);
253 std::string LogicalPick2(Operation);
254 std::string LogicalAnd2(Operation);
255 std::string FloatOrdered(Operation);
256 std::string FloatUnordered(Operation);
257 std::string LogicalAddCarry(Operation);
258 std::string Texture(Operation);
259 std::string TextureGather(Operation);
260 std::string TextureQueryDimensions(Operation);
261 std::string TextureQueryLod(Operation);
262 std::string TexelFetch(Operation);
263 std::string TextureGradient(Operation);
264 std::string ImageLoad(Operation);
265 std::string ImageStore(Operation);
266 std::string Branch(Operation);
267 std::string BranchIndirect(Operation);
268 std::string PushFlowStack(Operation);
269 std::string PopFlowStack(Operation);
270 std::string Exit(Operation);
271 std::string Discard(Operation);
272 std::string EmitVertex(Operation);
273 std::string EndPrimitive(Operation);
274 std::string InvocationId(Operation);
275 std::string YNegate(Operation);
276 std::string ThreadId(Operation);
277 std::string ShuffleIndexed(Operation);
278 std::string Barrier(Operation);
279 std::string MemoryBarrierGroup(Operation);
280 std::string MemoryBarrierGlobal(Operation);
281
282 template <const std::string_view& op>
283 std::string Unary(Operation operation) {
284 const std::string temporary = AllocTemporary();
285 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
286 return temporary;
287 }
288
289 template <const std::string_view& op>
290 std::string Binary(Operation operation) {
291 const std::string temporary = AllocTemporary();
292 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
293 Visit(operation[1]));
294 return temporary;
295 }
296
297 template <const std::string_view& op>
298 std::string Trinary(Operation operation) {
299 const std::string temporary = AllocTemporary();
300 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
301 Visit(operation[1]), Visit(operation[2]));
302 return temporary;
303 }
304
305 template <const std::string_view& op, bool unordered>
306 std::string FloatComparison(Operation operation) {
307 const std::string temporary = AllocTemporary();
308 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
309 AddLine("MOV.S {}, 0;", temporary);
310 AddLine("MOV.S {} (NE.x), -1;", temporary);
311
312 const std::string op_a = Visit(operation[0]);
313 const std::string op_b = Visit(operation[1]);
314 if constexpr (unordered) {
315 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
316 AddLine("TRUNC.U.CC RC.x, RC.x;");
317 AddLine("MOV.S {} (NE.x), -1;", temporary);
318 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
319 AddLine("TRUNC.U.CC RC.x, RC.x;");
320 AddLine("MOV.S {} (NE.x), -1;", temporary);
321 } else if (op == SNE_F) {
322 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
323 AddLine("TRUNC.U.CC RC.x, RC.x;");
324 AddLine("MOV.S {} (NE.x), 0;", temporary);
325 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
326 AddLine("TRUNC.U.CC RC.x, RC.x;");
327 AddLine("MOV.S {} (NE.x), 0;", temporary);
328 }
329 return temporary;
330 }
331
332 template <const std::string_view& op, bool is_nan>
333 std::string HalfComparison(Operation operation) {
334 const std::string tmp1 = AllocVectorTemporary();
335 const std::string tmp2 = AllocVectorTemporary();
336 const std::string op_a = Visit(operation[0]);
337 const std::string op_b = Visit(operation[1]);
338 AddLine("UP2H.F {}, {};", tmp1, op_a);
339 AddLine("UP2H.F {}, {};", tmp2, op_b);
340 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
341 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
342 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
343 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
344 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
345 if constexpr (is_nan) {
346 AddLine("MOVC.F RC.x, {};", op_a);
347 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
348 AddLine("MOVC.F RC.x, {};", op_b);
349 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
350 }
351 return tmp1;
352 }
353
354 template <const std::string_view& op, const std::string_view& type>
355 std::string AtomicImage(Operation operation) {
356 const auto& meta = std::get<MetaImage>(operation.GetMeta());
357 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
358 const std::size_t num_coords = operation.GetOperandsCount();
359 const std::size_t num_values = meta.values.size();
360
361 const std::string coord = AllocVectorTemporary();
362 const std::string value = AllocVectorTemporary();
363 for (std::size_t i = 0; i < num_coords; ++i) {
364 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
365 }
366 for (std::size_t i = 0; i < num_values; ++i) {
367 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
368 }
369
370 const std::string result = coord;
371 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord,
372 image_id, ImageType(meta.image.type));
373 return fmt::format("{}.x", result);
374 }
375
376 template <const std::string_view& op, const std::string_view& type>
377 std::string Atomic(Operation operation) {
378 const std::string temporary = AllocTemporary();
379 std::string address;
380 std::string_view opname;
381 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
382 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
383 Visit(gmem->GetBaseAddress()));
384 address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
385 opname = "ATOMB";
386 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
387 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
388 opname = "ATOMS";
389 } else {
390 UNREACHABLE();
391 return "{0, 0, 0, 0}";
392 }
393 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
394 return temporary;
395 }
396
397 template <char type>
398 std::string Negate(Operation operation) {
399 const std::string temporary = AllocTemporary();
400 if constexpr (type == 'F') {
401 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
402 } else {
403 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
404 }
405 return temporary;
406 }
407
408 template <char type>
409 std::string Absolute(Operation operation) {
410 const std::string temporary = AllocTemporary();
411 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
412 return temporary;
413 }
414
415 template <char type>
416 std::string BitfieldInsert(Operation operation) {
417 const std::string temporary = AllocVectorTemporary();
418 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
419 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
420 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
421 Visit(operation[0]));
422 return fmt::format("{}.x", temporary);
423 }
424
425 template <char type>
426 std::string BitfieldExtract(Operation operation) {
427 const std::string temporary = AllocVectorTemporary();
428 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
429 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
430 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
431 return fmt::format("{}.x", temporary);
432 }
433
434 template <char swizzle>
435 std::string LocalInvocationId(Operation) {
436 return fmt::format("invocation.localid.{}", swizzle);
437 }
438
439 template <char swizzle>
440 std::string WorkGroupId(Operation) {
441 return fmt::format("invocation.groupid.{}", swizzle);
442 }
443
444 template <char c1, char c2>
445 std::string ThreadMask(Operation) {
446 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
447 }
448
449 template <typename... Args>
450 void AddExpression(std::string_view text, Args&&... args) {
451 shader_source += fmt::format(text, std::forward<Args>(args)...);
452 }
453
454 template <typename... Args>
455 void AddLine(std::string_view text, Args&&... args) {
456 AddExpression(text, std::forward<Args>(args)...);
457 shader_source += '\n';
458 }
459
460 std::string AllocTemporary() {
461 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
462 return fmt::format("T{}.x", num_temporaries++);
463 }
464
465 std::string AllocVectorTemporary() {
466 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
467 return fmt::format("T{}", num_temporaries++);
468 }
469
470 void ResetTemporaries() noexcept {
471 num_temporaries = 0;
472 }
473
474 const Device& device;
475 const ShaderIR& ir;
476 const Registry& registry;
477 const ShaderType stage;
478
479 std::size_t num_temporaries = 0;
480 std::size_t max_temporaries = 0;
481
482 std::string shader_source;
483
484 static constexpr std::string_view ADD_F32 = "ADD.F32";
485 static constexpr std::string_view ADD_S = "ADD.S";
486 static constexpr std::string_view ADD_U = "ADD.U";
487 static constexpr std::string_view MUL_F32 = "MUL.F32";
488 static constexpr std::string_view MUL_S = "MUL.S";
489 static constexpr std::string_view MUL_U = "MUL.U";
490 static constexpr std::string_view DIV_F32 = "DIV.F32";
491 static constexpr std::string_view DIV_S = "DIV.S";
492 static constexpr std::string_view DIV_U = "DIV.U";
493 static constexpr std::string_view MAD_F32 = "MAD.F32";
494 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
495 static constexpr std::string_view COS_F32 = "COS.F32";
496 static constexpr std::string_view SIN_F32 = "SIN.F32";
497 static constexpr std::string_view EX2_F32 = "EX2.F32";
498 static constexpr std::string_view LG2_F32 = "LG2.F32";
499 static constexpr std::string_view SLT_F = "SLT.F32";
500 static constexpr std::string_view SLT_S = "SLT.S";
501 static constexpr std::string_view SLT_U = "SLT.U";
502 static constexpr std::string_view SEQ_F = "SEQ.F32";
503 static constexpr std::string_view SEQ_S = "SEQ.S";
504 static constexpr std::string_view SEQ_U = "SEQ.U";
505 static constexpr std::string_view SLE_F = "SLE.F32";
506 static constexpr std::string_view SLE_S = "SLE.S";
507 static constexpr std::string_view SLE_U = "SLE.U";
508 static constexpr std::string_view SGT_F = "SGT.F32";
509 static constexpr std::string_view SGT_S = "SGT.S";
510 static constexpr std::string_view SGT_U = "SGT.U";
511 static constexpr std::string_view SNE_F = "SNE.F32";
512 static constexpr std::string_view SNE_S = "SNE.S";
513 static constexpr std::string_view SNE_U = "SNE.U";
514 static constexpr std::string_view SGE_F = "SGE.F32";
515 static constexpr std::string_view SGE_S = "SGE.S";
516 static constexpr std::string_view SGE_U = "SGE.U";
517 static constexpr std::string_view AND_S = "AND.S";
518 static constexpr std::string_view AND_U = "AND.U";
519 static constexpr std::string_view TRUNC_F = "TRUNC.F";
520 static constexpr std::string_view TRUNC_S = "TRUNC.S";
521 static constexpr std::string_view TRUNC_U = "TRUNC.U";
522 static constexpr std::string_view SHL_S = "SHL.S";
523 static constexpr std::string_view SHL_U = "SHL.U";
524 static constexpr std::string_view SHR_S = "SHR.S";
525 static constexpr std::string_view SHR_U = "SHR.U";
526 static constexpr std::string_view OR_S = "OR.S";
527 static constexpr std::string_view OR_U = "OR.U";
528 static constexpr std::string_view XOR_S = "XOR.S";
529 static constexpr std::string_view XOR_U = "XOR.U";
530 static constexpr std::string_view NOT_S = "NOT.S";
531 static constexpr std::string_view NOT_U = "NOT.U";
532 static constexpr std::string_view BTC_S = "BTC.S";
533 static constexpr std::string_view BTC_U = "BTC.U";
534 static constexpr std::string_view BTFM_S = "BTFM.S";
535 static constexpr std::string_view BTFM_U = "BTFM.U";
536 static constexpr std::string_view ROUND_F = "ROUND.F";
537 static constexpr std::string_view CEIL_F = "CEIL.F";
538 static constexpr std::string_view FLR_F = "FLR.F";
539 static constexpr std::string_view I2F_S = "I2F.S";
540 static constexpr std::string_view I2F_U = "I2F.U";
541 static constexpr std::string_view MIN_F = "MIN.F";
542 static constexpr std::string_view MIN_S = "MIN.S";
543 static constexpr std::string_view MIN_U = "MIN.U";
544 static constexpr std::string_view MAX_F = "MAX.F";
545 static constexpr std::string_view MAX_S = "MAX.S";
546 static constexpr std::string_view MAX_U = "MAX.U";
547 static constexpr std::string_view MOV_U = "MOV.U";
548 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
549 static constexpr std::string_view TGALL_U = "TGALL.U";
550 static constexpr std::string_view TGANY_U = "TGANY.U";
551 static constexpr std::string_view TGEQ_U = "TGEQ.U";
552 static constexpr std::string_view EXCH = "EXCH";
553 static constexpr std::string_view ADD = "ADD";
554 static constexpr std::string_view MIN = "MIN";
555 static constexpr std::string_view MAX = "MAX";
556 static constexpr std::string_view AND = "AND";
557 static constexpr std::string_view OR = "OR";
558 static constexpr std::string_view XOR = "XOR";
559 static constexpr std::string_view U32 = "U32";
560 static constexpr std::string_view S32 = "S32";
561
562 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
563 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
564 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
565 &ARBDecompiler::Assign,
566
567 &ARBDecompiler::Select,
568
569 &ARBDecompiler::Binary<ADD_F32>,
570 &ARBDecompiler::Binary<MUL_F32>,
571 &ARBDecompiler::Binary<DIV_F32>,
572 &ARBDecompiler::Trinary<MAD_F32>,
573 &ARBDecompiler::Negate<'F'>,
574 &ARBDecompiler::Absolute<'F'>,
575 &ARBDecompiler::FClamp,
576 &ARBDecompiler::FCastHalf0,
577 &ARBDecompiler::FCastHalf1,
578 &ARBDecompiler::Binary<MIN_F>,
579 &ARBDecompiler::Binary<MAX_F>,
580 &ARBDecompiler::Unary<COS_F32>,
581 &ARBDecompiler::Unary<SIN_F32>,
582 &ARBDecompiler::Unary<EX2_F32>,
583 &ARBDecompiler::Unary<LG2_F32>,
584 &ARBDecompiler::Unary<RSQ_F32>,
585 &ARBDecompiler::FSqrt,
586 &ARBDecompiler::Unary<ROUND_F>,
587 &ARBDecompiler::Unary<FLR_F>,
588 &ARBDecompiler::Unary<CEIL_F>,
589 &ARBDecompiler::Unary<TRUNC_F>,
590 &ARBDecompiler::Unary<I2F_S>,
591 &ARBDecompiler::Unary<I2F_U>,
592 &ARBDecompiler::FSwizzleAdd,
593
594 &ARBDecompiler::Binary<ADD_S>,
595 &ARBDecompiler::Binary<MUL_S>,
596 &ARBDecompiler::Binary<DIV_S>,
597 &ARBDecompiler::Negate<'S'>,
598 &ARBDecompiler::Absolute<'S'>,
599 &ARBDecompiler::Binary<MIN_S>,
600 &ARBDecompiler::Binary<MAX_S>,
601
602 &ARBDecompiler::Unary<TRUNC_S>,
603 &ARBDecompiler::Unary<MOV_U>,
604 &ARBDecompiler::Binary<SHL_S>,
605 &ARBDecompiler::Binary<SHR_U>,
606 &ARBDecompiler::Binary<SHR_S>,
607 &ARBDecompiler::Binary<AND_S>,
608 &ARBDecompiler::Binary<OR_S>,
609 &ARBDecompiler::Binary<XOR_S>,
610 &ARBDecompiler::Unary<NOT_S>,
611 &ARBDecompiler::BitfieldInsert<'S'>,
612 &ARBDecompiler::BitfieldExtract<'S'>,
613 &ARBDecompiler::Unary<BTC_S>,
614 &ARBDecompiler::Unary<BTFM_S>,
615
616 &ARBDecompiler::Binary<ADD_U>,
617 &ARBDecompiler::Binary<MUL_U>,
618 &ARBDecompiler::Binary<DIV_U>,
619 &ARBDecompiler::Binary<MIN_U>,
620 &ARBDecompiler::Binary<MAX_U>,
621 &ARBDecompiler::Unary<TRUNC_U>,
622 &ARBDecompiler::Unary<MOV_U>,
623 &ARBDecompiler::Binary<SHL_U>,
624 &ARBDecompiler::Binary<SHR_U>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<AND_U>,
627 &ARBDecompiler::Binary<OR_U>,
628 &ARBDecompiler::Binary<XOR_U>,
629 &ARBDecompiler::Unary<NOT_U>,
630 &ARBDecompiler::BitfieldInsert<'U'>,
631 &ARBDecompiler::BitfieldExtract<'U'>,
632 &ARBDecompiler::Unary<BTC_U>,
633 &ARBDecompiler::Unary<BTFM_U>,
634
635 &ARBDecompiler::HAdd2,
636 &ARBDecompiler::HMul2,
637 &ARBDecompiler::HFma2,
638 &ARBDecompiler::HAbsolute,
639 &ARBDecompiler::HNegate,
640 &ARBDecompiler::HClamp,
641 &ARBDecompiler::HCastFloat,
642 &ARBDecompiler::HUnpack,
643 &ARBDecompiler::HMergeF32,
644 &ARBDecompiler::HMergeH0,
645 &ARBDecompiler::HMergeH1,
646 &ARBDecompiler::HPack2,
647
648 &ARBDecompiler::LogicalAssign,
649 &ARBDecompiler::Binary<AND_U>,
650 &ARBDecompiler::Binary<OR_U>,
651 &ARBDecompiler::Binary<XOR_U>,
652 &ARBDecompiler::Unary<NOT_U>,
653 &ARBDecompiler::LogicalPick2,
654 &ARBDecompiler::LogicalAnd2,
655
656 &ARBDecompiler::FloatComparison<SLT_F, false>,
657 &ARBDecompiler::FloatComparison<SEQ_F, false>,
658 &ARBDecompiler::FloatComparison<SLE_F, false>,
659 &ARBDecompiler::FloatComparison<SGT_F, false>,
660 &ARBDecompiler::FloatComparison<SNE_F, false>,
661 &ARBDecompiler::FloatComparison<SGE_F, false>,
662 &ARBDecompiler::FloatOrdered,
663 &ARBDecompiler::FloatUnordered,
664 &ARBDecompiler::FloatComparison<SLT_F, true>,
665 &ARBDecompiler::FloatComparison<SEQ_F, true>,
666 &ARBDecompiler::FloatComparison<SLE_F, true>,
667 &ARBDecompiler::FloatComparison<SGT_F, true>,
668 &ARBDecompiler::FloatComparison<SNE_F, true>,
669 &ARBDecompiler::FloatComparison<SGE_F, true>,
670
671 &ARBDecompiler::Binary<SLT_S>,
672 &ARBDecompiler::Binary<SEQ_S>,
673 &ARBDecompiler::Binary<SLE_S>,
674 &ARBDecompiler::Binary<SGT_S>,
675 &ARBDecompiler::Binary<SNE_S>,
676 &ARBDecompiler::Binary<SGE_S>,
677
678 &ARBDecompiler::Binary<SLT_U>,
679 &ARBDecompiler::Binary<SEQ_U>,
680 &ARBDecompiler::Binary<SLE_U>,
681 &ARBDecompiler::Binary<SGT_U>,
682 &ARBDecompiler::Binary<SNE_U>,
683 &ARBDecompiler::Binary<SGE_U>,
684
685 &ARBDecompiler::LogicalAddCarry,
686
687 &ARBDecompiler::HalfComparison<SLT_F, false>,
688 &ARBDecompiler::HalfComparison<SEQ_F, false>,
689 &ARBDecompiler::HalfComparison<SLE_F, false>,
690 &ARBDecompiler::HalfComparison<SGT_F, false>,
691 &ARBDecompiler::HalfComparison<SNE_F, false>,
692 &ARBDecompiler::HalfComparison<SGE_F, false>,
693 &ARBDecompiler::HalfComparison<SLT_F, true>,
694 &ARBDecompiler::HalfComparison<SEQ_F, true>,
695 &ARBDecompiler::HalfComparison<SLE_F, true>,
696 &ARBDecompiler::HalfComparison<SGT_F, true>,
697 &ARBDecompiler::HalfComparison<SNE_F, true>,
698 &ARBDecompiler::HalfComparison<SGE_F, true>,
699
700 &ARBDecompiler::Texture,
701 &ARBDecompiler::Texture,
702 &ARBDecompiler::TextureGather,
703 &ARBDecompiler::TextureQueryDimensions,
704 &ARBDecompiler::TextureQueryLod,
705 &ARBDecompiler::TexelFetch,
706 &ARBDecompiler::TextureGradient,
707
708 &ARBDecompiler::ImageLoad,
709 &ARBDecompiler::ImageStore,
710
711 &ARBDecompiler::AtomicImage<ADD, U32>,
712 &ARBDecompiler::AtomicImage<AND, U32>,
713 &ARBDecompiler::AtomicImage<OR, U32>,
714 &ARBDecompiler::AtomicImage<XOR, U32>,
715 &ARBDecompiler::AtomicImage<EXCH, U32>,
716
717 &ARBDecompiler::Atomic<EXCH, U32>,
718 &ARBDecompiler::Atomic<ADD, U32>,
719 &ARBDecompiler::Atomic<MIN, U32>,
720 &ARBDecompiler::Atomic<MAX, U32>,
721 &ARBDecompiler::Atomic<AND, U32>,
722 &ARBDecompiler::Atomic<OR, U32>,
723 &ARBDecompiler::Atomic<XOR, U32>,
724
725 &ARBDecompiler::Atomic<EXCH, S32>,
726 &ARBDecompiler::Atomic<ADD, S32>,
727 &ARBDecompiler::Atomic<MIN, S32>,
728 &ARBDecompiler::Atomic<MAX, S32>,
729 &ARBDecompiler::Atomic<AND, S32>,
730 &ARBDecompiler::Atomic<OR, S32>,
731 &ARBDecompiler::Atomic<XOR, S32>,
732
733 &ARBDecompiler::Atomic<ADD, U32>,
734 &ARBDecompiler::Atomic<MIN, U32>,
735 &ARBDecompiler::Atomic<MAX, U32>,
736 &ARBDecompiler::Atomic<AND, U32>,
737 &ARBDecompiler::Atomic<OR, U32>,
738 &ARBDecompiler::Atomic<XOR, U32>,
739
740 &ARBDecompiler::Atomic<ADD, S32>,
741 &ARBDecompiler::Atomic<MIN, S32>,
742 &ARBDecompiler::Atomic<MAX, S32>,
743 &ARBDecompiler::Atomic<AND, S32>,
744 &ARBDecompiler::Atomic<OR, S32>,
745 &ARBDecompiler::Atomic<XOR, S32>,
746
747 &ARBDecompiler::Branch,
748 &ARBDecompiler::BranchIndirect,
749 &ARBDecompiler::PushFlowStack,
750 &ARBDecompiler::PopFlowStack,
751 &ARBDecompiler::Exit,
752 &ARBDecompiler::Discard,
753
754 &ARBDecompiler::EmitVertex,
755 &ARBDecompiler::EndPrimitive,
756
757 &ARBDecompiler::InvocationId,
758 &ARBDecompiler::YNegate,
759 &ARBDecompiler::LocalInvocationId<'x'>,
760 &ARBDecompiler::LocalInvocationId<'y'>,
761 &ARBDecompiler::LocalInvocationId<'z'>,
762 &ARBDecompiler::WorkGroupId<'x'>,
763 &ARBDecompiler::WorkGroupId<'y'>,
764 &ARBDecompiler::WorkGroupId<'z'>,
765
766 &ARBDecompiler::Unary<TGBALLOT_U>,
767 &ARBDecompiler::Unary<TGALL_U>,
768 &ARBDecompiler::Unary<TGANY_U>,
769 &ARBDecompiler::Unary<TGEQ_U>,
770
771 &ARBDecompiler::ThreadId,
772 &ARBDecompiler::ThreadMask<'e', 'q'>,
773 &ARBDecompiler::ThreadMask<'g', 'e'>,
774 &ARBDecompiler::ThreadMask<'g', 't'>,
775 &ARBDecompiler::ThreadMask<'l', 'e'>,
776 &ARBDecompiler::ThreadMask<'l', 't'>,
777 &ARBDecompiler::ShuffleIndexed,
778
779 &ARBDecompiler::Barrier,
780 &ARBDecompiler::MemoryBarrierGroup,
781 &ARBDecompiler::MemoryBarrierGlobal,
782 };
783};
784
785ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
786 ShaderType stage, std::string_view identifier)
787 : device{device}, ir{ir}, registry{registry}, stage{stage} {
788 AddLine("TEMP RC;");
789 AddLine("TEMP FSWZA[4];");
790 AddLine("TEMP FSWZB[4];");
791 if (ir.IsDecompiled()) {
792 DecompileAST();
793 } else {
794 DecompileBranchMode();
795 }
796 AddLine("END");
797
798 const std::string code = std::move(shader_source);
799 DeclareHeader();
800 DeclareVertex();
801 DeclareGeometry();
802 DeclareFragment();
803 DeclareCompute();
804 DeclareInputAttributes();
805 DeclareOutputAttributes();
806 DeclareLocalMemory();
807 DeclareGlobalMemory();
808 DeclareConstantBuffers();
809 DeclareRegisters();
810 DeclareTemporaries();
811 DeclarePredicates();
812 DeclareInternalFlags();
813
814 shader_source += code;
815}
816
817std::string_view HeaderStageName(ShaderType stage) {
818 switch (stage) {
819 case ShaderType::Vertex:
820 return "vp";
821 case ShaderType::Geometry:
822 return "gp";
823 case ShaderType::Fragment:
824 return "fp";
825 case ShaderType::Compute:
826 return "cp";
827 default:
828 UNREACHABLE();
829 return "";
830 }
831}
832
833void ARBDecompiler::DeclareHeader() {
834 AddLine("!!NV{}5.0", HeaderStageName(stage));
835 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
836 AddLine("OPTION NV_internal;");
837 AddLine("OPTION NV_gpu_program_fp64;");
838 AddLine("OPTION NV_shader_storage_buffer;");
839 AddLine("OPTION NV_shader_thread_group;");
840 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
841 AddLine("OPTION NV_shader_thread_shuffle;");
842 }
843 if (stage == ShaderType::Vertex) {
844 if (device.HasNvViewportArray2()) {
845 AddLine("OPTION NV_viewport_array2;");
846 }
847 }
848 if (stage == ShaderType::Fragment) {
849 AddLine("OPTION ARB_draw_buffers;");
850 }
851 if (device.HasImageLoadFormatted()) {
852 AddLine("OPTION EXT_shader_image_load_formatted;");
853 }
854}
855
856void ARBDecompiler::DeclareVertex() {
857 if (stage != ShaderType::Vertex) {
858 return;
859 }
860 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
861}
862
863void ARBDecompiler::DeclareGeometry() {
864 if (stage != ShaderType::Geometry) {
865 return;
866 }
867 const auto& info = registry.GetGraphicsInfo();
868 const auto& header = ir.GetHeader();
869 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
870 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
871 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
872 AddLine("ATTRIB vertex_position = vertex.position;");
873}
874
875void ARBDecompiler::DeclareFragment() {
876 if (stage != ShaderType::Fragment) {
877 return;
878 }
879 AddLine("OUTPUT result_color7 = result.color[7];");
880 AddLine("OUTPUT result_color6 = result.color[6];");
881 AddLine("OUTPUT result_color5 = result.color[5];");
882 AddLine("OUTPUT result_color4 = result.color[4];");
883 AddLine("OUTPUT result_color3 = result.color[3];");
884 AddLine("OUTPUT result_color2 = result.color[2];");
885 AddLine("OUTPUT result_color1 = result.color[1];");
886 AddLine("OUTPUT result_color0 = result.color;");
887}
888
889void ARBDecompiler::DeclareCompute() {
890 if (stage != ShaderType::Compute) {
891 return;
892 }
893 const ComputeInfo& info = registry.GetComputeInfo();
894 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
895 info.workgroup_size[2]);
896 if (info.shared_memory_size_in_words > 0) {
897 const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
898 AddLine("SHARED_MEMORY {};", size_in_bytes);
899 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
900 }
901}
902
903void ARBDecompiler::DeclareInputAttributes() {
904 if (stage == ShaderType::Compute) {
905 return;
906 }
907 const std::string_view stage_name = StageInputName(stage);
908 for (const auto attribute : ir.GetInputAttributes()) {
909 if (!IsGenericAttribute(attribute)) {
910 continue;
911 }
912 const u32 index = GetGenericAttributeIndex(attribute);
913
914 std::string_view suffix;
915 if (stage == ShaderType::Fragment) {
916 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
917 if (input_mode == PixelImap::Unused) {
918 return;
919 }
920 suffix = GetInputFlags(input_mode);
921 }
922 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
923 index);
924 }
925}
926
927void ARBDecompiler::DeclareOutputAttributes() {
928 if (stage == ShaderType::Compute) {
929 return;
930 }
931 for (const auto attribute : ir.GetOutputAttributes()) {
932 if (!IsGenericAttribute(attribute)) {
933 continue;
934 }
935 const u32 index = GetGenericAttributeIndex(attribute);
936 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
937 }
938}
939
940void ARBDecompiler::DeclareLocalMemory() {
941 u64 size = 0;
942 if (stage == ShaderType::Compute) {
943 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
944 } else {
945 size = ir.GetHeader().GetLocalMemorySize();
946 }
947 if (size == 0) {
948 return;
949 }
950 const u64 element_count = Common::AlignUp(size, 4) / 4;
951 AddLine("TEMP lmem[{}];", element_count);
952}
953
954void ARBDecompiler::DeclareGlobalMemory() {
955 u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
956 for (const auto& pair : ir.GetGlobalMemory()) {
957 const auto& base = pair.first;
958 AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
959 ++binding;
960 }
961}
962
963void ARBDecompiler::DeclareConstantBuffers() {
964 u32 binding = 0;
965 for (const auto& cbuf : ir.GetConstantBuffers()) {
966 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
967 ++binding;
968 }
969}
970
971void ARBDecompiler::DeclareRegisters() {
972 for (const u32 gpr : ir.GetRegisters()) {
973 AddLine("TEMP R{};", gpr);
974 }
975}
976
977void ARBDecompiler::DeclareTemporaries() {
978 for (std::size_t i = 0; i < max_temporaries; ++i) {
979 AddLine("TEMP T{};", i);
980 }
981}
982
983void ARBDecompiler::DeclarePredicates() {
984 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
985 AddLine("TEMP P{};", static_cast<u64>(pred));
986 }
987}
988
989void ARBDecompiler::DeclareInternalFlags() {
990 for (const char* name : INTERNAL_FLAG_NAMES) {
991 AddLine("TEMP {};", name);
992 }
993}
994
995void ARBDecompiler::InitializeVariables() {
996 AddLine("MOV.F32 FSWZA[0], -1;");
997 AddLine("MOV.F32 FSWZA[1], 1;");
998 AddLine("MOV.F32 FSWZA[2], -1;");
999 AddLine("MOV.F32 FSWZA[3], 0;");
1000 AddLine("MOV.F32 FSWZB[0], -1;");
1001 AddLine("MOV.F32 FSWZB[1], -1;");
1002 AddLine("MOV.F32 FSWZB[2], 1;");
1003 AddLine("MOV.F32 FSWZB[3], -1;");
1004
1005 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1006 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1007 }
1008 for (const auto attribute : ir.GetOutputAttributes()) {
1009 if (!IsGenericAttribute(attribute)) {
1010 continue;
1011 }
1012 const u32 index = GetGenericAttributeIndex(attribute);
1013 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1014 }
1015 for (const u32 gpr : ir.GetRegisters()) {
1016 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1017 }
1018 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1019 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1020 }
1021}
1022
1023void ARBDecompiler::DecompileAST() {
1024 const u32 num_flow_variables = ir.GetASTNumVariables();
1025 for (u32 i = 0; i < num_flow_variables; ++i) {
1026 AddLine("TEMP F{};", i);
1027 }
1028 for (u32 i = 0; i < num_flow_variables; ++i) {
1029 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1030 }
1031
1032 InitializeVariables();
1033
1034 VisitAST(ir.GetASTProgram());
1035}
1036
1037void ARBDecompiler::DecompileBranchMode() {
1038 static constexpr u32 FLOW_STACK_SIZE = 20;
1039 if (!ir.IsFlowStackDisabled()) {
1040 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1041 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1042 AddLine("TEMP SSY_TOP;");
1043 AddLine("TEMP PBK_TOP;");
1044 }
1045
1046 AddLine("TEMP PC;");
1047
1048 if (!ir.IsFlowStackDisabled()) {
1049 AddLine("MOV.U SSY_TOP.x, 0;");
1050 AddLine("MOV.U PBK_TOP.x, 0;");
1051 }
1052
1053 InitializeVariables();
1054
1055 const auto basic_block_end = ir.GetBasicBlocks().end();
1056 auto basic_block_it = ir.GetBasicBlocks().begin();
1057 const u32 first_address = basic_block_it->first;
1058 AddLine("MOV.U PC.x, {};", first_address);
1059
1060 AddLine("REP;");
1061
1062 std::size_t num_blocks = 0;
1063 while (basic_block_it != basic_block_end) {
1064 const auto& [address, bb] = *basic_block_it;
1065 ++num_blocks;
1066
1067 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1068 AddLine("IF NE.x;");
1069
1070 VisitBlock(bb);
1071
1072 ++basic_block_it;
1073
1074 if (basic_block_it != basic_block_end) {
1075 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1076 if (!op || op->GetCode() != OperationCode::Branch) {
1077 const u32 next_address = basic_block_it->first;
1078 AddLine("MOV.U PC.x, {};", next_address);
1079 AddLine("CONT;");
1080 }
1081 }
1082
1083 AddLine("ELSE;");
1084 }
1085 AddLine("RET;");
1086 while (num_blocks--) {
1087 AddLine("ENDIF;");
1088 }
1089
1090 AddLine("ENDREP;");
1091}
1092
1093void ARBDecompiler::VisitAST(const ASTNode& node) {
1094 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1095 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1096 VisitAST(current);
1097 }
1098 } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1099 const std::string condition = VisitExpression(ast->condition);
1100 ResetTemporaries();
1101
1102 AddLine("MOVC.U RC.x, {};", condition);
1103 AddLine("IF NE.x;");
1104 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1105 VisitAST(current);
1106 }
1107 AddLine("ENDIF;");
1108 } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1109 AddLine("ELSE;");
1110 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1111 VisitAST(current);
1112 }
1113 } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1114 VisitBlock(ast->nodes);
1115 } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1116 AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
1117 ResetTemporaries();
1118 } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1119 const std::string condition = VisitExpression(ast->condition);
1120 ResetTemporaries();
1121 AddLine("REP;");
1122 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1123 VisitAST(current);
1124 }
1125 AddLine("MOVC.U RC.x, {};", condition);
1126 AddLine("BRK (NE.x);");
1127 AddLine("ENDREP;");
1128 } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1129 const bool is_true = ExprIsTrue(ast->condition);
1130 if (!is_true) {
1131 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1132 AddLine("IF NE.x;");
1133 ResetTemporaries();
1134 }
1135 if (ast->kills) {
1136 AddLine("KIL TR;");
1137 } else {
1138 Exit();
1139 }
1140 if (!is_true) {
1141 AddLine("ENDIF;");
1142 }
1143 } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1144 if (ExprIsTrue(ast->condition)) {
1145 AddLine("BRK;");
1146 } else {
1147 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1148 AddLine("BRK (NE.x);");
1149 ResetTemporaries();
1150 }
1151 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1152 // Nothing to do
1153 } else {
1154 UNREACHABLE();
1155 }
1156}
1157
1158std::string ARBDecompiler::VisitExpression(const Expr& node) {
1159 const std::string result = AllocTemporary();
1160 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1161 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1162 VisitExpression(expr->operand2));
1163 return result;
1164 }
1165 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1166 const std::string result = AllocTemporary();
1167 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1168 VisitExpression(expr->operand2));
1169 return result;
1170 }
1171 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1172 const std::string result = AllocTemporary();
1173 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1174 return result;
1175 }
1176 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1177 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1178 }
1179 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1180 return Visit(ir.GetConditionCode(expr->cc));
1181 }
1182 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1183 return fmt::format("F{}.x", expr->var_index);
1184 }
1185 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1186 return expr->value ? "0xffffffff" : "0";
1187 }
1188 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1189 const std::string result = AllocTemporary();
1190 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1191 return result;
1192 }
1193 UNREACHABLE();
1194 return "0";
1195}
1196
1197void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1198 for (const auto& node : bb) {
1199 Visit(node);
1200 }
1201}
1202
1203std::string ARBDecompiler::Visit(const Node& node) {
1204 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1205 if (const auto amend_index = operation->GetAmendIndex()) {
1206 Visit(ir.GetAmendNode(*amend_index));
1207 }
1208 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1209 if (index >= OPERATION_DECOMPILERS.size()) {
1210 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1211 return {};
1212 }
1213 const auto decompiler = OPERATION_DECOMPILERS[index];
1214 if (decompiler == nullptr) {
1215 UNREACHABLE_MSG("Undefined operation: {}", index);
1216 return {};
1217 }
1218 return (this->*decompiler)(*operation);
1219 }
1220
1221 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1222 const u32 index = gpr->GetIndex();
1223 if (index == Register::ZeroIndex) {
1224 return "{0, 0, 0, 0}.x";
1225 }
1226 return fmt::format("R{}.x", index);
1227 }
1228
1229 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1230 return fmt::format("CV{}.x", cv->GetIndex());
1231 }
1232
1233 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1234 const std::string temporary = AllocTemporary();
1235 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1236 return temporary;
1237 }
1238
1239 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1240 const std::string temporary = AllocTemporary();
1241 switch (const auto index = predicate->GetIndex(); index) {
1242 case Tegra::Shader::Pred::UnusedIndex:
1243 AddLine("MOV.S {}, -1;", temporary);
1244 break;
1245 case Tegra::Shader::Pred::NeverExecute:
1246 AddLine("MOV.S {}, 0;", temporary);
1247 break;
1248 default:
1249 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1250 break;
1251 }
1252 if (predicate->IsNegated()) {
1253 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1254 }
1255 return temporary;
1256 }
1257
1258 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1259 if (abuf->IsPhysicalBuffer()) {
1260 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1261 return "{0, 0, 0, 0}.x";
1262 }
1263
1264 const auto buffer_index = [this, &abuf]() -> std::string {
1265 if (stage != ShaderType::Geometry) {
1266 return "";
1267 }
1268 return fmt::format("[{}]", Visit(abuf->GetBuffer()));
1269 };
1270
1271 const Attribute::Index index = abuf->GetIndex();
1272 const u32 element = abuf->GetElement();
1273 const char swizzle = Swizzle(element);
1274 switch (index) {
1275 case Attribute::Index::Position: {
1276 if (stage == ShaderType::Geometry) {
1277 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1278 Visit(abuf->GetBuffer()), swizzle);
1279 } else {
1280 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1281 }
1282 }
1283 case Attribute::Index::TessCoordInstanceIDVertexID:
1284 ASSERT(stage == ShaderType::Vertex);
1285 switch (element) {
1286 case 2:
1287 return "vertex.instance";
1288 case 3:
1289 return "vertex.id";
1290 }
1291 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1292 break;
1293 case Attribute::Index::PointCoord:
1294 switch (element) {
1295 case 0:
1296 return "fragment.pointcoord.x";
1297 case 1:
1298 return "fragment.pointcoord.y";
1299 }
1300 UNIMPLEMENTED();
1301 break;
1302 case Attribute::Index::FrontFacing: {
1303 ASSERT(stage == ShaderType::Fragment);
1304 ASSERT(element == 3);
1305 const std::string temporary = AllocVectorTemporary();
1306 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1307 AddLine("MOV.U.CC RC.x, -RC;");
1308 AddLine("MOV.S {}.x, 0;", temporary);
1309 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1310 return fmt::format("{}.x", temporary);
1311 }
1312 default:
1313 if (IsGenericAttribute(index)) {
1314 if (stage == ShaderType::Geometry) {
1315 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1316 Visit(abuf->GetBuffer()), swizzle);
1317 } else {
1318 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1319 GetGenericAttributeIndex(index), swizzle);
1320 }
1321 }
1322 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
1323 break;
1324 }
1325 return "{0, 0, 0, 0}.x";
1326 }
1327
1328 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1329 std::string offset_string;
1330 const auto& offset = cbuf->GetOffset();
1331 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1332 offset_string = std::to_string(imm->GetValue());
1333 } else {
1334 offset_string = Visit(offset);
1335 }
1336 const std::string temporary = AllocTemporary();
1337 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1338 return temporary;
1339 }
1340
1341 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1342 const std::string temporary = AllocTemporary();
1343 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1344 Visit(gmem->GetBaseAddress()));
1345 AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
1346 temporary);
1347 return temporary;
1348 }
1349
1350 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1351 const std::string temporary = Visit(lmem->GetAddress());
1352 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1353 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1354 return temporary;
1355 }
1356
1357 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1358 const std::string temporary = Visit(smem->GetAddress());
1359 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1360 return temporary;
1361 }
1362
1363 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1364 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1365 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1366 }
1367
1368 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1369 if (const auto amend_index = conditional->GetAmendIndex()) {
1370 Visit(ir.GetAmendNode(*amend_index));
1371 }
1372 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1373 AddLine("IF NE.x;");
1374 VisitBlock(conditional->GetCode());
1375 AddLine("ENDIF;");
1376 return {};
1377 }
1378
1379 if (const auto cmt = std::get_if<CommentNode>(&*node)) {
1380 // Uncommenting this will generate invalid code. GLASM lacks comments.
1381 // AddLine("// {}", cmt->GetText());
1382 return {};
1383 }
1384
1385 UNIMPLEMENTED();
1386 return {};
1387}
1388
1389std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1390 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1391 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1392 UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
1393 meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
1394
1395 const std::size_t count = operation.GetOperandsCount();
1396 std::string temporary = AllocVectorTemporary();
1397 std::size_t i = 0;
1398 for (; i < count; ++i) {
1399 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1400 }
1401 if (meta.sampler.is_array) {
1402 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
1403 }
1404 if (meta.sampler.is_shadow) {
1405 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
1406 }
1407 return {std::move(temporary), i};
1408}
1409
1410std::string ARBDecompiler::BuildAoffi(Operation operation) {
1411 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1412 if (meta.aoffi.empty()) {
1413 return {};
1414 }
1415 const std::string temporary = AllocVectorTemporary();
1416 std::size_t i = 0;
1417 for (auto& node : meta.aoffi) {
1418 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1419 }
1420 return fmt::format(", offset({})", temporary);
1421}
1422
1423void ARBDecompiler::Exit() {
1424 if (stage != ShaderType::Fragment) {
1425 AddLine("RET;");
1426 return;
1427 }
1428
1429 const auto safe_get_register = [this](u32 reg) -> std::string {
1430 // TODO(Rodrigo): Replace with contains once C++20 releases
1431 const auto& used_registers = ir.GetRegisters();
1432 if (used_registers.find(reg) != used_registers.end()) {
1433 return fmt::format("R{}.x", reg);
1434 }
1435 return "{0, 0, 0, 0}.x";
1436 };
1437
1438 const auto& header = ir.GetHeader();
1439 u32 current_reg = 0;
1440 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1441 for (u32 component = 0; component < 4; ++component) {
1442 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1443 continue;
1444 }
1445 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1446 safe_get_register(current_reg));
1447 ++current_reg;
1448 }
1449 }
1450 if (header.ps.omap.depth) {
1451 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1452 }
1453
1454 AddLine("RET;");
1455}
1456
1457std::string ARBDecompiler::Assign(Operation operation) {
1458 const Node& dest = operation[0];
1459 const Node& src = operation[1];
1460
1461 std::string dest_name;
1462 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1463 if (gpr->GetIndex() == Register::ZeroIndex) {
1464 // Writing to Register::ZeroIndex is a no op
1465 return {};
1466 }
1467 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1468 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1469 const u32 element = abuf->GetElement();
1470 const char swizzle = Swizzle(element);
1471 switch (const Attribute::Index index = abuf->GetIndex()) {
1472 case Attribute::Index::Position:
1473 dest_name = fmt::format("result.position.{}", swizzle);
1474 break;
1475 case Attribute::Index::LayerViewportPointSize:
1476 switch (element) {
1477 case 0:
1478 UNIMPLEMENTED();
1479 return {};
1480 case 1:
1481 case 2:
1482 if (!device.HasNvViewportArray2()) {
1483 LOG_ERROR(
1484 Render_OpenGL,
1485 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1486 return {};
1487 }
1488 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1489 break;
1490 case 3:
1491 dest_name = "result.pointsize.x";
1492 break;
1493 }
1494 break;
1495 case Attribute::Index::ClipDistances0123:
1496 dest_name = fmt::format("result.clip[{}].x", element);
1497 break;
1498 case Attribute::Index::ClipDistances4567:
1499 dest_name = fmt::format("result.clip[{}].x", element + 4);
1500 break;
1501 default:
1502 if (!IsGenericAttribute(index)) {
1503 UNREACHABLE();
1504 return {};
1505 }
1506 dest_name =
1507 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1508 break;
1509 }
1510 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1511 const std::string address = Visit(lmem->GetAddress());
1512 AddLine("SHR.U {}, {}, 2;", address, address);
1513 dest_name = fmt::format("lmem[{}].x", address);
1514 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1515 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1516 ResetTemporaries();
1517 return {};
1518 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1519 const std::string temporary = AllocTemporary();
1520 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1521 Visit(gmem->GetBaseAddress()));
1522 AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
1523 temporary);
1524 ResetTemporaries();
1525 return {};
1526 } else {
1527 UNREACHABLE();
1528 ResetTemporaries();
1529 return {};
1530 }
1531
1532 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1533 ResetTemporaries();
1534 return {};
1535}
1536
1537std::string ARBDecompiler::Select(Operation operation) {
1538 const std::string temporary = AllocTemporary();
1539 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1540 Visit(operation[2]));
1541 return temporary;
1542}
1543
1544std::string ARBDecompiler::FClamp(Operation operation) {
1545 // 1.0f in hex, replace with std::bit_cast on C++20
1546 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1547
1548 const std::string temporary = AllocTemporary();
1549 const Node& value = operation[0];
1550 const Node& low = operation[1];
1551 const Node& high = operation[2];
1552 const auto imm_low = std::get_if<ImmediateNode>(&*low);
1553 const auto imm_high = std::get_if<ImmediateNode>(&*high);
1554 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1555 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1556 } else {
1557 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1558 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1559 }
1560 return temporary;
1561}
1562
1563std::string ARBDecompiler::FCastHalf0(Operation operation) {
1564 const std::string temporary = AllocVectorTemporary();
1565 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1566 return fmt::format("{}.x", temporary);
1567}
1568
1569std::string ARBDecompiler::FCastHalf1(Operation operation) {
1570 const std::string temporary = AllocVectorTemporary();
1571 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1572 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1573 return fmt::format("{}.x", temporary);
1574}
1575
1576std::string ARBDecompiler::FSqrt(Operation operation) {
1577 const std::string temporary = AllocTemporary();
1578 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1579 AddLine("RCP.F32 {}, {};", temporary, temporary);
1580 return temporary;
1581}
1582
1583std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1584 const std::string temporary = AllocVectorTemporary();
1585 if (!device.HasWarpIntrinsics()) {
1586 LOG_ERROR(Render_OpenGL,
1587 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1588 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1589 return fmt::format("{}.x", temporary);
1590 }
1591 const std::string lut = AllocVectorTemporary();
1592 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1593 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1594 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1595 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1596 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1597 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1598 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1599 return fmt::format("{}.x", temporary);
1600}
1601
1602std::string ARBDecompiler::HAdd2(Operation operation) {
1603 const std::string tmp1 = AllocVectorTemporary();
1604 const std::string tmp2 = AllocVectorTemporary();
1605 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1606 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1607 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1608 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1609 return fmt::format("{}.x", tmp1);
1610}
1611
1612std::string ARBDecompiler::HMul2(Operation operation) {
1613 const std::string tmp1 = AllocVectorTemporary();
1614 const std::string tmp2 = AllocVectorTemporary();
1615 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1616 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1617 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1618 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1619 return fmt::format("{}.x", tmp1);
1620}
1621
1622std::string ARBDecompiler::HFma2(Operation operation) {
1623 const std::string tmp1 = AllocVectorTemporary();
1624 const std::string tmp2 = AllocVectorTemporary();
1625 const std::string tmp3 = AllocVectorTemporary();
1626 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1627 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1628 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1629 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1630 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1631 return fmt::format("{}.x", tmp1);
1632}
1633
1634std::string ARBDecompiler::HAbsolute(Operation operation) {
1635 const std::string temporary = AllocVectorTemporary();
1636 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1637 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1638 return fmt::format("{}.x", temporary);
1639}
1640
1641std::string ARBDecompiler::HNegate(Operation operation) {
1642 const std::string temporary = AllocVectorTemporary();
1643 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1644 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1645 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1646 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1647 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1648 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1649 return fmt::format("{}.x", temporary);
1650}
1651
1652std::string ARBDecompiler::HClamp(Operation operation) {
1653 const std::string tmp1 = AllocVectorTemporary();
1654 const std::string tmp2 = AllocVectorTemporary();
1655 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1656 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1657 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1658 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1659 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1660 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1661 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1662 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1663 return fmt::format("{}.x", tmp1);
1664}
1665
1666std::string ARBDecompiler::HCastFloat(Operation operation) {
1667 const std::string temporary = AllocVectorTemporary();
1668 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1669 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1670 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1671 return fmt::format("{}.x", temporary);
1672}
1673
1674std::string ARBDecompiler::HUnpack(Operation operation) {
1675 const std::string operand = Visit(operation[0]);
1676 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1677 case Tegra::Shader::HalfType::H0_H1:
1678 return operand;
1679 case Tegra::Shader::HalfType::F32: {
1680 const std::string temporary = AllocVectorTemporary();
1681 AddLine("MOV.U {}.x, {};", temporary, operand);
1682 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1683 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1684 return fmt::format("{}.x", temporary);
1685 }
1686 case Tegra::Shader::HalfType::H0_H0: {
1687 const std::string temporary = AllocVectorTemporary();
1688 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1689 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1690 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1691 return fmt::format("{}.x", temporary);
1692 }
1693 case Tegra::Shader::HalfType::H1_H1: {
1694 const std::string temporary = AllocVectorTemporary();
1695 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1696 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1697 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1698 return fmt::format("{}.x", temporary);
1699 }
1700 }
1701 UNREACHABLE();
1702 return "{0, 0, 0, 0}.x";
1703}
1704
1705std::string ARBDecompiler::HMergeF32(Operation operation) {
1706 const std::string temporary = AllocVectorTemporary();
1707 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1708 return fmt::format("{}.x", temporary);
1709}
1710
1711std::string ARBDecompiler::HMergeH0(Operation operation) {
1712 const std::string temporary = AllocVectorTemporary();
1713 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1714 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1715 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1716 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1717 return fmt::format("{}.x", temporary);
1718}
1719
1720std::string ARBDecompiler::HMergeH1(Operation operation) {
1721 const std::string temporary = AllocVectorTemporary();
1722 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1723 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1724 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1725 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1726 return fmt::format("{}.x", temporary);
1727}
1728
1729std::string ARBDecompiler::HPack2(Operation operation) {
1730 const std::string temporary = AllocVectorTemporary();
1731 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1732 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1733 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1734 return fmt::format("{}.x", temporary);
1735}
1736
1737std::string ARBDecompiler::LogicalAssign(Operation operation) {
1738 const Node& dest = operation[0];
1739 const Node& src = operation[1];
1740
1741 std::string target;
1742
1743 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1744 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1745
1746 const Tegra::Shader::Pred index = pred->GetIndex();
1747 switch (index) {
1748 case Tegra::Shader::Pred::NeverExecute:
1749 case Tegra::Shader::Pred::UnusedIndex:
1750 // Writing to these predicates is a no-op
1751 return {};
1752 }
1753 target = fmt::format("P{}.x", static_cast<u64>(index));
1754 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1755 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1756 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1757 } else {
1758 UNREACHABLE();
1759 ResetTemporaries();
1760 return {};
1761 }
1762
1763 AddLine("MOV.U {}, {};", target, Visit(src));
1764 ResetTemporaries();
1765 return {};
1766}
1767
1768std::string ARBDecompiler::LogicalPick2(Operation operation) {
1769 const std::string temporary = AllocTemporary();
1770 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1771 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1772 return temporary;
1773}
1774
1775std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1776 const std::string temporary = AllocTemporary();
1777 const std::string op = Visit(operation[0]);
1778 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1779 return temporary;
1780}
1781
1782std::string ARBDecompiler::FloatOrdered(Operation operation) {
1783 const std::string temporary = AllocTemporary();
1784 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1785 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1786 AddLine("MOV.S {}, -1;", temporary);
1787 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1788 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1789 return temporary;
1790}
1791
1792std::string ARBDecompiler::FloatUnordered(Operation operation) {
1793 const std::string temporary = AllocTemporary();
1794 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1795 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1796 AddLine("MOV.S {}, 0;", temporary);
1797 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1798 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1799 return temporary;
1800}
1801
1802std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1803 const std::string temporary = AllocTemporary();
1804 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1805 AddLine("MOV.S {}, 0;", temporary);
1806 AddLine("IF CF.x;");
1807 AddLine("MOV.S {}, -1;", temporary);
1808 AddLine("ENDIF;");
1809 return temporary;
1810}
1811
1812std::string ARBDecompiler::Texture(Operation operation) {
1813 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1814 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1815 const auto [temporary, swizzle] = BuildCoords(operation);
1816
1817 std::string_view opcode = "TEX";
1818 std::string extra;
1819 if (meta.bias) {
1820 ASSERT(!meta.lod);
1821 opcode = "TXB";
1822
1823 if (swizzle < 4) {
1824 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1825 } else {
1826 const std::string bias = AllocTemporary();
1827 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1828 extra = fmt::format(" {},", bias);
1829 }
1830 }
1831 if (meta.lod) {
1832 ASSERT(!meta.bias);
1833 opcode = "TXL";
1834
1835 if (swizzle < 4) {
1836 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1837 } else {
1838 const std::string lod = AllocTemporary();
1839 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1840 extra = fmt::format(" {},", lod);
1841 }
1842 }
1843
1844 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
1845 TextureType(meta), BuildAoffi(operation));
1846 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1847 return fmt::format("{}.x", temporary);
1848}
1849
1850std::string ARBDecompiler::TextureGather(Operation operation) {
1851 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1852 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1853 const auto [temporary, swizzle] = BuildCoords(operation);
1854
1855 std::string comp;
1856 if (!meta.sampler.is_shadow) {
1857 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1858 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1859 }
1860
1861 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1862 TextureType(meta), BuildAoffi(operation));
1863 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1864 return fmt::format("{}.x", temporary);
1865}
1866
1867std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1868 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1869 const std::string temporary = AllocVectorTemporary();
1870 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1871
1872 ASSERT(!meta.sampler.is_array);
1873
1874 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1875 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1876 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1877 return fmt::format("{}.x", temporary);
1878}
1879
1880std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1881 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1882 const std::string temporary = AllocVectorTemporary();
1883 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1884
1885 ASSERT(!meta.sampler.is_array);
1886
1887 const std::size_t count = operation.GetOperandsCount();
1888 for (std::size_t i = 0; i < count; ++i) {
1889 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1890 }
1891 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1892 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1893 AddLine("TRUNC.S {}, {};", temporary, temporary);
1894 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1895 return fmt::format("{}.x", temporary);
1896}
1897
1898std::string ARBDecompiler::TexelFetch(Operation operation) {
1899 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1900 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1901 const auto [temporary, swizzle] = BuildCoords(operation);
1902
1903 if (!meta.sampler.is_buffer) {
1904 ASSERT(swizzle < 4);
1905 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1906 }
1907 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
1908 BuildAoffi(operation));
1909 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1910 return fmt::format("{}.x", temporary);
1911}
1912
1913std::string ARBDecompiler::TextureGradient(Operation operation) {
1914 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1915 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1916 const std::string ddx = AllocVectorTemporary();
1917 const std::string ddy = AllocVectorTemporary();
1918 const std::string coord = BuildCoords(operation).first;
1919
1920 const std::size_t num_components = meta.derivates.size() / 2;
1921 for (std::size_t index = 0; index < num_components; ++index) {
1922 const char swizzle = Swizzle(index);
1923 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1924 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1925 }
1926
1927 const std::string_view result = coord;
1928 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1929 TextureType(meta), BuildAoffi(operation));
1930 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1931 return fmt::format("{}.x", result);
1932}
1933
1934std::string ARBDecompiler::ImageLoad(Operation operation) {
1935 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1936 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1937 const std::size_t count = operation.GetOperandsCount();
1938 const std::string_view type = ImageType(meta.image.type);
1939
1940 const std::string temporary = AllocVectorTemporary();
1941 for (std::size_t i = 0; i < count; ++i) {
1942 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1943 }
1944 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1945 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1946 return fmt::format("{}.x", temporary);
1947}
1948
1949std::string ARBDecompiler::ImageStore(Operation operation) {
1950 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1951 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1952 const std::size_t num_coords = operation.GetOperandsCount();
1953 const std::size_t num_values = meta.values.size();
1954 const std::string_view type = ImageType(meta.image.type);
1955
1956 const std::string coord = AllocVectorTemporary();
1957 const std::string value = AllocVectorTemporary();
1958 for (std::size_t i = 0; i < num_coords; ++i) {
1959 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
1960 }
1961 for (std::size_t i = 0; i < num_values; ++i) {
1962 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
1963 }
1964 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
1965 return {};
1966}
1967
1968std::string ARBDecompiler::Branch(Operation operation) {
1969 const auto target = std::get<ImmediateNode>(*operation[0]);
1970 AddLine("MOV.U PC.x, {};", target.GetValue());
1971 AddLine("CONT;");
1972 return {};
1973}
1974
1975std::string ARBDecompiler::BranchIndirect(Operation operation) {
1976 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
1977 AddLine("CONT;");
1978 return {};
1979}
1980
1981std::string ARBDecompiler::PushFlowStack(Operation operation) {
1982 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1983 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
1984 const std::string_view stack_name = StackName(stack);
1985 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
1986 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1987 return {};
1988}
1989
1990std::string ARBDecompiler::PopFlowStack(Operation operation) {
1991 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1992 const std::string_view stack_name = StackName(stack);
1993 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1994 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
1995 AddLine("CONT;");
1996 return {};
1997}
1998
1999std::string ARBDecompiler::Exit(Operation) {
2000 Exit();
2001 return {};
2002}
2003
2004std::string ARBDecompiler::Discard(Operation) {
2005 AddLine("KIL TR;");
2006 return {};
2007}
2008
2009std::string ARBDecompiler::EmitVertex(Operation) {
2010 AddLine("EMIT;");
2011 return {};
2012}
2013
2014std::string ARBDecompiler::EndPrimitive(Operation) {
2015 AddLine("ENDPRIM;");
2016 return {};
2017}
2018
2019std::string ARBDecompiler::InvocationId(Operation) {
2020 return "primitive.invocation";
2021}
2022
2023std::string ARBDecompiler::YNegate(Operation) {
2024 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2025 const std::string temporary = AllocTemporary();
2026 AddLine("MOV.F {}, 1;", temporary);
2027 return temporary;
2028}
2029
2030std::string ARBDecompiler::ThreadId(Operation) {
2031 return fmt::format("{}.threadid", StageInputName(stage));
2032}
2033
2034std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2035 if (!device.HasWarpIntrinsics()) {
2036 LOG_ERROR(Render_OpenGL,
2037 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2038 return Visit(operation[0]);
2039 }
2040 const std::string temporary = AllocVectorTemporary();
2041 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2042 Visit(operation[1]));
2043 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2044 return fmt::format("{}.x", temporary);
2045}
2046
2047std::string ARBDecompiler::Barrier(Operation) {
2048 if (!ir.IsDecompiled()) {
2049 LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
2050 return {};
2051 }
2052 AddLine("BAR;");
2053 return {};
2054}
2055
2056std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2057 AddLine("MEMBAR.CTA;");
2058 return {};
2059}
2060
2061std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2062 AddLine("MEMBAR;");
2063 return {};
2064}
2065
2066} // Anonymous namespace
2067
2068std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2069 const VideoCommon::Shader::Registry& registry,
2070 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2071 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2072}
2073
2074} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 21a4f4def..b31d604e4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -221,6 +221,7 @@ Device::Device()
221 has_component_indexing_bug = is_amd; 221 has_component_indexing_bug = is_amd;
222 has_precise_bug = TestPreciseBug(); 222 has_precise_bug = TestPreciseBug();
223 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; 223 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
224 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
224 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 225 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
225 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && 226 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
226 GLAD_GL_NV_transform_feedback2; 227 GLAD_GL_NV_transform_feedback2;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 98cca0254..145347943 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -88,6 +88,10 @@ public:
88 return has_fast_buffer_sub_data; 88 return has_fast_buffer_sub_data;
89 } 89 }
90 90
91 bool HasNvViewportArray2() const {
92 return has_nv_viewport_array2;
93 }
94
91 bool UseAssemblyShaders() const { 95 bool UseAssemblyShaders() const {
92 return use_assembly_shaders; 96 return use_assembly_shaders;
93 } 97 }
@@ -111,6 +115,7 @@ private:
111 bool has_component_indexing_bug{}; 115 bool has_component_indexing_bug{};
112 bool has_precise_bug{}; 116 bool has_precise_bug{};
113 bool has_fast_buffer_sub_data{}; 117 bool has_fast_buffer_sub_data{};
118 bool has_nv_viewport_array2{};
114 bool use_assembly_shaders{}; 119 bool use_assembly_shaders{};
115}; 120};
116 121
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c28486b1d..46e780a06 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_decompiler.h" 26#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
148 auto program = std::make_shared<ProgramHandle>(); 149 auto program = std::make_shared<ProgramHandle>();
149 150
150 if (device.UseAssemblyShaders()) { 151 if (device.UseAssemblyShaders()) {
151 const std::string arb = "Not implemented"; 152 const std::string arb =
153 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
152 154
153 GLuint& arb_prog = program->assembly_program.handle; 155 GLuint& arb_prog = program->assembly_program.handle;
154 156
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7e9073cc3..32c81dc70 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -631,13 +631,11 @@ void Config::ReadRendererValues() {
631 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); 631 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
632 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); 632 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
633 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); 633 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
634 Settings::values.resolution_factor =
635 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
636 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); 634 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
637 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); 635 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
638 Settings::values.use_frame_limit = 636 Settings::values.use_frame_limit =
639 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 637 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
640 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 638 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
641 Settings::values.use_disk_shader_cache = 639 Settings::values.use_disk_shader_cache =
642 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); 640 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
643 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); 641 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -722,8 +720,6 @@ void Config::ReadUIValues() {
722 .toString(); 720 .toString();
723 UISettings::values.enable_discord_presence = 721 UISettings::values.enable_discord_presence =
724 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); 722 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
725 UISettings::values.screenshot_resolution_factor =
726 static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
727 UISettings::values.select_user_on_boot = 723 UISettings::values.select_user_on_boot =
728 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); 724 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
729 725
@@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() {
1082 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); 1078 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
1083 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); 1079 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
1084 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); 1080 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
1085 WriteSetting(QStringLiteral("resolution_factor"),
1086 static_cast<double>(Settings::values.resolution_factor), 1.0);
1087 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); 1081 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1088 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); 1082 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
1089 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1083 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1159,8 +1153,6 @@ void Config::SaveUIValues() {
1159 QString::fromUtf8(UISettings::themes[0].second)); 1153 QString::fromUtf8(UISettings::themes[0].second));
1160 WriteSetting(QStringLiteral("enable_discord_presence"), 1154 WriteSetting(QStringLiteral("enable_discord_presence"),
1161 UISettings::values.enable_discord_presence, true); 1155 UISettings::values.enable_discord_presence, true);
1162 WriteSetting(QStringLiteral("screenshot_resolution_factor"),
1163 UISettings::values.screenshot_resolution_factor, 0);
1164 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, 1156 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
1165 false); 1157 false);
1166 1158
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
19#include "video_core/renderer_vulkan/renderer_vulkan.h" 19#include "video_core/renderer_vulkan/renderer_vulkan.h"
20#endif 20#endif
21 21
22namespace {
23enum class Resolution : int {
24 Auto,
25 Scale1x,
26 Scale2x,
27 Scale3x,
28 Scale4x,
29};
30
31float ToResolutionFactor(Resolution option) {
32 switch (option) {
33 case Resolution::Auto:
34 return 0.f;
35 case Resolution::Scale1x:
36 return 1.f;
37 case Resolution::Scale2x:
38 return 2.f;
39 case Resolution::Scale3x:
40 return 3.f;
41 case Resolution::Scale4x:
42 return 4.f;
43 }
44 return 0.f;
45}
46
47Resolution FromResolutionFactor(float factor) {
48 if (factor == 0.f) {
49 return Resolution::Auto;
50 } else if (factor == 1.f) {
51 return Resolution::Scale1x;
52 } else if (factor == 2.f) {
53 return Resolution::Scale2x;
54 } else if (factor == 3.f) {
55 return Resolution::Scale3x;
56 } else if (factor == 4.f) {
57 return Resolution::Scale4x;
58 }
59 return Resolution::Auto;
60}
61} // Anonymous namespace
62
63ConfigureGraphics::ConfigureGraphics(QWidget* parent) 22ConfigureGraphics::ConfigureGraphics(QWidget* parent)
64 : QWidget(parent), ui(new Ui::ConfigureGraphics) { 23 : QWidget(parent), ui(new Ui::ConfigureGraphics) {
65 vulkan_device = Settings::values.vulkan_device; 24 vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
99 58
100 ui->api->setEnabled(runtime_lock); 59 ui->api->setEnabled(runtime_lock);
101 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); 60 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
102 ui->resolution_factor_combobox->setCurrentIndex(
103 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
104 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); 61 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
105 ui->use_disk_shader_cache->setEnabled(runtime_lock); 62 ui->use_disk_shader_cache->setEnabled(runtime_lock);
106 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 63 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
114void ConfigureGraphics::ApplyConfiguration() { 71void ConfigureGraphics::ApplyConfiguration() {
115 Settings::values.renderer_backend = GetCurrentGraphicsBackend(); 72 Settings::values.renderer_backend = GetCurrentGraphicsBackend();
116 Settings::values.vulkan_device = vulkan_device; 73 Settings::values.vulkan_device = vulkan_device;
117 Settings::values.resolution_factor =
118 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
119 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); 74 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
120 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 75 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
121 Settings::values.use_asynchronous_gpu_emulation = 76 Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
85 </widget> 85 </widget>
86 </item> 86 </item>
87 <item> 87 <item>
88 <layout class="QHBoxLayout" name="horizontalLayout_2">
89 <item>
90 <widget class="QLabel" name="label">
91 <property name="text">
92 <string>Internal Resolution:</string>
93 </property>
94 </widget>
95 </item>
96 <item>
97 <widget class="QComboBox" name="resolution_factor_combobox">
98 <item>
99 <property name="text">
100 <string>Auto (Window Size)</string>
101 </property>
102 </item>
103 <item>
104 <property name="text">
105 <string>Native (1280x720)</string>
106 </property>
107 </item>
108 <item>
109 <property name="text">
110 <string>2x Native (2560x1440)</string>
111 </property>
112 </item>
113 <item>
114 <property name="text">
115 <string>3x Native (3840x2160)</string>
116 </property>
117 </item>
118 <item>
119 <property name="text">
120 <string>4x Native (5120x2880)</string>
121 </property>
122 </item>
123 </widget>
124 </item>
125 </layout>
126 </item>
127 <item>
128 <layout class="QHBoxLayout" name="horizontalLayout_6"> 88 <layout class="QHBoxLayout" name="horizontalLayout_6">
129 <item> 89 <item>
130 <widget class="QLabel" name="ar_label"> 90 <widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 37aadf7f8..be5006ad3 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
12 12
13 ui->setupUi(this); 13 ui->setupUi(this);
14 14
15 // TODO: Remove this after assembly shaders are fully integrated
16 ui->use_assembly_shaders->setVisible(false);
17
18 SetConfiguration(); 15 SetConfiguration();
19} 16}
20 17
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 270cccc77..4119d7907 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() {
689 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 689 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
690 UpdateStatusBar(); 690 UpdateStatusBar();
691 }); 691 });
692 // TODO: Remove this comment/static whenever the next major release of 692 constexpr u16 SPEED_LIMIT_STEP = 5;
693 // MSVC occurs and we make it a requirement (see:
694 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
695 static constexpr u16 SPEED_LIMIT_STEP = 5;
696 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), 693 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
697 &QShortcut::activated, this, [&] { 694 &QShortcut::activated, this, [&] {
698 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 695 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7240270f5..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -380,8 +380,6 @@ void Config::ReadValues() {
380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); 380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); 381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
382 382
383 Settings::values.resolution_factor =
384 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
385 Settings::values.aspect_ratio = 383 Settings::values.aspect_ratio =
386 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 384 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
387 Settings::values.max_anisotropy = 385 Settings::values.max_anisotropy =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6f53e9659..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
117# 0: Interpreter (slow), 1 (default): JIT (fast) 117# 0: Interpreter (slow), 1 (default): JIT (fast)
118use_shader_jit = 118use_shader_jit =
119 119
120# Resolution scale factor
121# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
122# factor for the Switch resolution
123resolution_factor =
124
125# Aspect ratio 120# Aspect ratio
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 121# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio = 122aspect_ratio =
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
117 117
118 // Renderer 118 // Renderer
119 Settings::values.resolution_factor =
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio = 119 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 120 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
123 Settings::values.max_anisotropy = 121 Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
21# 0: Interpreter (slow), 1 (default): JIT (fast) 21# 0: Interpreter (slow), 1 (default): JIT (fast)
22use_shader_jit = 22use_shader_jit =
23 23
24# Resolution scale factor
25# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
26# factor for the Switch resolution
27resolution_factor =
28
29# Aspect ratio 24# Aspect ratio
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 25# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio = 26aspect_ratio =