summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/configure.ui12
-rw-r--r--src/common/file_util.cpp43
-rw-r--r--src/common/file_util.h26
-rw-r--r--src/common/thread.h46
-rw-r--r--src/common/x64/emitter.cpp28
-rw-r--r--src/common/x64/emitter.h2
-rw-r--r--src/core/hle/config_mem.cpp7
-rw-r--r--src/core/hle/hle.cpp2
-rw-r--r--src/core/hle/service/soc_u.cpp100
-rw-r--r--src/core/hw/y2r.cpp2
-rw-r--r--src/core/loader/3dsx.cpp6
-rw-r--r--src/core/loader/ncch.cpp4
-rw-r--r--src/video_core/command_processor.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp19
-rw-r--r--src/video_core/rasterizer.cpp99
-rw-r--r--src/video_core/shader/shader.cpp34
-rw-r--r--src/video_core/shader/shader.h3
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp302
-rw-r--r--src/video_core/shader/shader_jit_x64.h58
19 files changed, 474 insertions, 323 deletions
diff --git a/src/citra_qt/configure.ui b/src/citra_qt/configure.ui
index 3c1f2ebba..6ae056ff9 100644
--- a/src/citra_qt/configure.ui
+++ b/src/citra_qt/configure.ui
@@ -10,24 +10,12 @@
10 <height>501</height> 10 <height>501</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="minimumSize">
14 <size>
15 <width>370</width>
16 <height>219</height>
17 </size>
18 </property>
19 <property name="windowTitle"> 13 <property name="windowTitle">
20 <string>Citra Configuration</string> 14 <string>Citra Configuration</string>
21 </property> 15 </property>
22 <layout class="QVBoxLayout" name="verticalLayout"> 16 <layout class="QVBoxLayout" name="verticalLayout">
23 <item> 17 <item>
24 <widget class="QTabWidget" name="tabWidget"> 18 <widget class="QTabWidget" name="tabWidget">
25 <property name="minimumSize">
26 <size>
27 <width>371</width>
28 <height>221</height>
29 </size>
30 </property>
31 <property name="currentIndex"> 19 <property name="currentIndex">
32 <number>0</number> 20 <number>0</number>
33 </property> 21 </property>
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 687b7ae5a..6e2867658 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -833,13 +833,12 @@ size_t WriteStringToFile(bool text_file, const std::string &str, const char *fil
833 833
834size_t ReadFileToString(bool text_file, const char *filename, std::string &str) 834size_t ReadFileToString(bool text_file, const char *filename, std::string &str)
835{ 835{
836 FileUtil::IOFile file(filename, text_file ? "r" : "rb"); 836 IOFile file(filename, text_file ? "r" : "rb");
837 auto const f = file.GetHandle();
838 837
839 if (!f) 838 if (!file)
840 return false; 839 return false;
841 840
842 str.resize(static_cast<u32>(GetSize(f))); 841 str.resize(static_cast<u32>(file.GetSize()));
843 return file.ReadArray(&str[0], str.size()); 842 return file.ReadArray(&str[0], str.size());
844} 843}
845 844
@@ -886,15 +885,10 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
886} 885}
887 886
888IOFile::IOFile() 887IOFile::IOFile()
889 : m_file(nullptr), m_good(true) 888{
890{} 889}
891
892IOFile::IOFile(std::FILE* file)
893 : m_file(file), m_good(true)
894{}
895 890
896IOFile::IOFile(const std::string& filename, const char openmode[]) 891IOFile::IOFile(const std::string& filename, const char openmode[])
897 : m_file(nullptr), m_good(true)
898{ 892{
899 Open(filename, openmode); 893 Open(filename, openmode);
900} 894}
@@ -905,7 +899,6 @@ IOFile::~IOFile()
905} 899}
906 900
907IOFile::IOFile(IOFile&& other) 901IOFile::IOFile(IOFile&& other)
908 : m_file(nullptr), m_good(true)
909{ 902{
910 Swap(other); 903 Swap(other);
911} 904}
@@ -944,26 +937,12 @@ bool IOFile::Close()
944 return m_good; 937 return m_good;
945} 938}
946 939
947std::FILE* IOFile::ReleaseHandle() 940u64 IOFile::GetSize() const
948{
949 std::FILE* const ret = m_file;
950 m_file = nullptr;
951 return ret;
952}
953
954void IOFile::SetHandle(std::FILE* file)
955{
956 Close();
957 Clear();
958 m_file = file;
959}
960
961u64 IOFile::GetSize()
962{ 941{
963 if (IsOpen()) 942 if (IsOpen())
964 return FileUtil::GetSize(m_file); 943 return FileUtil::GetSize(m_file);
965 else 944
966 return 0; 945 return 0;
967} 946}
968 947
969bool IOFile::Seek(s64 off, int origin) 948bool IOFile::Seek(s64 off, int origin)
@@ -974,12 +953,12 @@ bool IOFile::Seek(s64 off, int origin)
974 return m_good; 953 return m_good;
975} 954}
976 955
977u64 IOFile::Tell() 956u64 IOFile::Tell() const
978{ 957{
979 if (IsOpen()) 958 if (IsOpen())
980 return ftello(m_file); 959 return ftello(m_file);
981 else 960
982 return -1; 961 return -1;
983} 962}
984 963
985bool IOFile::Flush() 964bool IOFile::Flush()
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 880b8a1e3..b54a9fb72 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -176,7 +176,6 @@ class IOFile : public NonCopyable
176{ 176{
177public: 177public:
178 IOFile(); 178 IOFile();
179 explicit IOFile(std::FILE* file);
180 IOFile(const std::string& filename, const char openmode[]); 179 IOFile(const std::string& filename, const char openmode[]);
181 180
182 ~IOFile(); 181 ~IOFile();
@@ -192,6 +191,9 @@ public:
192 template <typename T> 191 template <typename T>
193 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
194 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
196
195 if (!IsOpen()) { 197 if (!IsOpen()) {
196 m_good = false; 198 m_good = false;
197 return -1; 199 return -1;
@@ -207,9 +209,8 @@ public:
207 template <typename T> 209 template <typename T>
208 size_t WriteArray(const T* data, size_t length) 210 size_t WriteArray(const T* data, size_t length)
209 { 211 {
210 static_assert(std::is_standard_layout<T>::value, "Given array does not consist of standard layout objects"); 212 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
211 // TODO: gcc 4.8 does not support is_trivially_copyable, but we really should check for it here. 213 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
212 //static_assert(std::is_trivially_copyable<T>::value, "Given array does not consist of trivially copyable objects");
213 214
214 if (!IsOpen()) { 215 if (!IsOpen()) {
215 m_good = false; 216 m_good = false;
@@ -243,25 +244,20 @@ public:
243 244
244 // m_good is set to false when a read, write or other function fails 245 // m_good is set to false when a read, write or other function fails
245 bool IsGood() const { return m_good; } 246 bool IsGood() const { return m_good; }
246 operator void*() { return m_good ? m_file : nullptr; } 247 explicit operator bool() const { return IsGood(); }
247
248 std::FILE* ReleaseHandle();
249
250 std::FILE* GetHandle() { return m_file; }
251
252 void SetHandle(std::FILE* file);
253 248
254 bool Seek(s64 off, int origin); 249 bool Seek(s64 off, int origin);
255 u64 Tell(); 250 u64 Tell() const;
256 u64 GetSize(); 251 u64 GetSize() const;
257 bool Resize(u64 size); 252 bool Resize(u64 size);
258 bool Flush(); 253 bool Flush();
259 254
260 // clear error state 255 // clear error state
261 void Clear() { m_good = true; std::clearerr(m_file); } 256 void Clear() { m_good = true; std::clearerr(m_file); }
262 257
263 std::FILE* m_file; 258private:
264 bool m_good; 259 std::FILE* m_file = nullptr;
260 bool m_good = true;
265}; 261};
266 262
267} // namespace 263} // namespace
diff --git a/src/common/thread.h b/src/common/thread.h
index 8255ee6d3..bbfa8befa 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -30,8 +30,7 @@
30# endif 30# endif
31#endif 31#endif
32 32
33namespace Common 33namespace Common {
34{
35 34
36int CurrentThreadId(); 35int CurrentThreadId();
37 36
@@ -43,55 +42,55 @@ public:
43 Event() : is_set(false) {} 42 Event() : is_set(false) {}
44 43
45 void Set() { 44 void Set() {
46 std::lock_guard<std::mutex> lk(m_mutex); 45 std::lock_guard<std::mutex> lk(mutex);
47 if (!is_set) { 46 if (!is_set) {
48 is_set = true; 47 is_set = true;
49 m_condvar.notify_one(); 48 condvar.notify_one();
50 } 49 }
51 } 50 }
52 51
53 void Wait() { 52 void Wait() {
54 std::unique_lock<std::mutex> lk(m_mutex); 53 std::unique_lock<std::mutex> lk(mutex);
55 m_condvar.wait(lk, [&]{ return is_set; }); 54 condvar.wait(lk, [&]{ return is_set; });
56 is_set = false; 55 is_set = false;
57 } 56 }
58 57
59 void Reset() { 58 void Reset() {
60 std::unique_lock<std::mutex> lk(m_mutex); 59 std::unique_lock<std::mutex> lk(mutex);
61 // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration 60 // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration
62 is_set = false; 61 is_set = false;
63 } 62 }
64 63
65private: 64private:
66 bool is_set; 65 bool is_set;
67 std::condition_variable m_condvar; 66 std::condition_variable condvar;
68 std::mutex m_mutex; 67 std::mutex mutex;
69}; 68};
70 69
71class Barrier { 70class Barrier {
72public: 71public:
73 Barrier(size_t count) : m_count(count), m_waiting(0) {} 72 explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {}
74 73
75 /// Blocks until all "count" threads have called Sync() 74 /// Blocks until all "count" threads have called Sync()
76 void Sync() { 75 void Sync() {
77 std::unique_lock<std::mutex> lk(m_mutex); 76 std::unique_lock<std::mutex> lk(mutex);
77 const size_t current_generation = generation;
78 78
79 // TODO: broken when next round of Sync()s 79 if (++waiting == count) {
80 // is entered before all waiting threads return from the notify_all 80 generation++;
81 81 waiting = 0;
82 if (++m_waiting == m_count) { 82 condvar.notify_all();
83 m_waiting = 0;
84 m_condvar.notify_all();
85 } else { 83 } else {
86 m_condvar.wait(lk, [&]{ return m_waiting == 0; }); 84 condvar.wait(lk, [this, current_generation]{ return current_generation != generation; });
87 } 85 }
88 } 86 }
89 87
90private: 88private:
91 std::condition_variable m_condvar; 89 std::condition_variable condvar;
92 std::mutex m_mutex; 90 std::mutex mutex;
93 const size_t m_count; 91 const size_t count;
94 size_t m_waiting; 92 size_t waiting;
93 size_t generation; // Incremented once each time the barrier is used
95}; 94};
96 95
97void SleepCurrentThread(int ms); 96void SleepCurrentThread(int ms);
@@ -100,8 +99,7 @@ void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
100// Use this function during a spin-wait to make the current thread 99// Use this function during a spin-wait to make the current thread
101// relax while another thread is working. This may be more efficient 100// relax while another thread is working. This may be more efficient
102// than using events because event functions use kernel calls. 101// than using events because event functions use kernel calls.
103inline void YieldCPU() 102inline void YieldCPU() {
104{
105 std::this_thread::yield(); 103 std::this_thread::yield();
106} 104}
107 105
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 1dcf2416c..5662f7f86 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr)
455 Write32(u32(distance)); 455 Write32(u32(distance));
456} 456}
457 457
458FixupBranch XEmitter::CALL()
459{
460 FixupBranch branch;
461 branch.type = 1;
462 branch.ptr = code + 5;
463
464 Write8(0xE8);
465 Write32(0);
466
467 return branch;
468}
469
458FixupBranch XEmitter::J(bool force5bytes) 470FixupBranch XEmitter::J(bool force5bytes)
459{ 471{
460 FixupBranch branch; 472 FixupBranch branch;
@@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
531 } 543 }
532} 544}
533 545
546void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
547{
548 if (branch.type == 0)
549 {
550 s64 distance = (s64)(target - branch.ptr);
551 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
552 branch.ptr[-1] = (u8)(s8)distance;
553 }
554 else if (branch.type == 1)
555 {
556 s64 distance = (s64)(target - branch.ptr);
557 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
558 ((s32*)branch.ptr)[-1] = (s32)distance;
559 }
560}
561
534//Single byte opcodes 562//Single byte opcodes
535//There is no PUSHAD/POPAD in 64-bit mode. 563//There is no PUSHAD/POPAD in 64-bit mode.
536void XEmitter::INT3() {Write8(0xCC);} 564void XEmitter::INT3() {Write8(0xCC);}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 7c6548fb5..a33724146 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -425,12 +425,14 @@ public:
425#undef CALL 425#undef CALL
426#endif 426#endif
427 void CALL(const void* fnptr); 427 void CALL(const void* fnptr);
428 FixupBranch CALL();
428 void CALLptr(OpArg arg); 429 void CALLptr(OpArg arg);
429 430
430 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); 431 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
431 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); 432 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
432 433
433 void SetJumpTarget(const FixupBranch& branch); 434 void SetJumpTarget(const FixupBranch& branch);
435 void SetJumpTarget(const FixupBranch& branch, const u8* target);
434 436
435 void SETcc(CCFlags flag, OpArg dest); 437 void SETcc(CCFlags flag, OpArg dest);
436 // Note: CMOV brings small if any benefit on current cpus. 438 // Note: CMOV brings small if any benefit on current cpus.
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index b1a72dc0c..ccd73cfcb 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -3,13 +3,6 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/common_funcs.h"
10
11#include "core/core.h"
12#include "core/memory.h"
13#include "core/hle/config_mem.h" 6#include "core/hle/config_mem.h"
14 7
15//////////////////////////////////////////////////////////////////////////////////////////////////// 8////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index 331b1b22a..e545de3b5 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -8,8 +8,6 @@
8#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/hle/hle.h" 10#include "core/hle/hle.h"
11#include "core/hle/config_mem.h"
12#include "core/hle/shared_page.h"
13#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
14 12
15//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index ff0af8f12..d3e5d4bca 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -151,6 +151,34 @@ static int TranslateError(int error) {
151 return error; 151 return error;
152} 152}
153 153
154/// Holds the translation from system network socket options to 3DS network socket options
155/// Note: -1 = No effect/unavailable
156static const std::unordered_map<int, int> sockopt_map = { {
157 { 0x0004, SO_REUSEADDR },
158 { 0x0080, -1 },
159 { 0x0100, -1 },
160 { 0x1001, SO_SNDBUF },
161 { 0x1002, SO_RCVBUF },
162 { 0x1003, -1 },
163#ifdef _WIN32
164 /// Unsupported in WinSock2
165 { 0x1004, -1 },
166#else
167 { 0x1004, SO_RCVLOWAT },
168#endif
169 { 0x1008, SO_TYPE },
170 { 0x1009, SO_ERROR },
171}};
172
173/// Converts a socket option from 3ds-specific to platform-specific
174static int TranslateSockOpt(int console_opt_name) {
175 auto found = sockopt_map.find(console_opt_name);
176 if (found != sockopt_map.end()) {
177 return found->second;
178 }
179 return console_opt_name;
180}
181
154/// Holds information about a particular socket 182/// Holds information about a particular socket
155struct SocketHolder { 183struct SocketHolder {
156 u32 socket_fd; ///< The socket descriptor 184 u32 socket_fd; ///< The socket descriptor
@@ -568,7 +596,7 @@ static void RecvFrom(Service::Interface* self) {
568 socklen_t src_addr_len = sizeof(src_addr); 596 socklen_t src_addr_len = sizeof(src_addr);
569 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); 597 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len);
570 598
571 if (buffer_parameters.output_src_address_buffer != 0) { 599 if (ret >= 0 && buffer_parameters.output_src_address_buffer != 0 && src_addr_len > 0) {
572 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); 600 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer));
573 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); 601 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
574 } 602 }
@@ -724,6 +752,72 @@ static void ShutdownSockets(Service::Interface* self) {
724 cmd_buffer[1] = 0; 752 cmd_buffer[1] = 0;
725} 753}
726 754
755static void GetSockOpt(Service::Interface* self) {
756 u32* cmd_buffer = Kernel::GetCommandBuffer();
757 u32 socket_handle = cmd_buffer[1];
758 u32 level = cmd_buffer[2];
759 int optname = TranslateSockOpt(cmd_buffer[3]);
760 socklen_t optlen = (socklen_t)cmd_buffer[4];
761
762 int ret = -1;
763 int err = 0;
764
765 if(optname < 0) {
766#ifdef _WIN32
767 err = WSAEINVAL;
768#else
769 err = EINVAL;
770#endif
771 } else {
772 // 0x100 = static buffer offset (bytes)
773 // + 0x4 = 2nd pointer (u32) position
774 // >> 2 = convert to u32 offset instead of byte offset (cmd_buffer = u32*)
775 char* optval = reinterpret_cast<char *>(Memory::GetPointer(cmd_buffer[0x104 >> 2]));
776
777 ret = ::getsockopt(socket_handle, level, optname, optval, &optlen);
778 err = 0;
779 if (ret == SOCKET_ERROR_VALUE) {
780 err = TranslateError(GET_ERRNO);
781 }
782 }
783
784 cmd_buffer[0] = IPC::MakeHeader(0x11, 4, 2);
785 cmd_buffer[1] = ret;
786 cmd_buffer[2] = err;
787 cmd_buffer[3] = optlen;
788}
789
790static void SetSockOpt(Service::Interface* self) {
791 u32* cmd_buffer = Kernel::GetCommandBuffer();
792 u32 socket_handle = cmd_buffer[1];
793 u32 level = cmd_buffer[2];
794 int optname = TranslateSockOpt(cmd_buffer[3]);
795
796 int ret = -1;
797 int err = 0;
798
799 if(optname < 0) {
800#ifdef _WIN32
801 err = WSAEINVAL;
802#else
803 err = EINVAL;
804#endif
805 } else {
806 socklen_t optlen = static_cast<socklen_t>(cmd_buffer[4]);
807 const char* optval = reinterpret_cast<const char *>(Memory::GetPointer(cmd_buffer[8]));
808
809 ret = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval, optlen));
810 err = 0;
811 if (ret == SOCKET_ERROR_VALUE) {
812 err = TranslateError(GET_ERRNO);
813 }
814 }
815
816 cmd_buffer[0] = IPC::MakeHeader(0x12, 4, 4);
817 cmd_buffer[1] = ret;
818 cmd_buffer[2] = err;
819}
820
727const Interface::FunctionInfo FunctionTable[] = { 821const Interface::FunctionInfo FunctionTable[] = {
728 {0x00010044, InitializeSockets, "InitializeSockets"}, 822 {0x00010044, InitializeSockets, "InitializeSockets"},
729 {0x000200C2, Socket, "Socket"}, 823 {0x000200C2, Socket, "Socket"},
@@ -741,8 +835,8 @@ const Interface::FunctionInfo FunctionTable[] = {
741 {0x000E00C2, nullptr, "GetHostByAddr"}, 835 {0x000E00C2, nullptr, "GetHostByAddr"},
742 {0x000F0106, nullptr, "GetAddrInfo"}, 836 {0x000F0106, nullptr, "GetAddrInfo"},
743 {0x00100102, nullptr, "GetNameInfo"}, 837 {0x00100102, nullptr, "GetNameInfo"},
744 {0x00110102, nullptr, "GetSockOpt"}, 838 {0x00110102, GetSockOpt, "GetSockOpt"},
745 {0x00120104, nullptr, "SetSockOpt"}, 839 {0x00120104, SetSockOpt, "SetSockOpt"},
746 {0x001300C2, Fcntl, "Fcntl"}, 840 {0x001300C2, Fcntl, "Fcntl"},
747 {0x00140084, Poll, "Poll"}, 841 {0x00140084, Poll, "Poll"},
748 {0x00150042, nullptr, "SockAtMark"}, 842 {0x00150042, nullptr, "SockAtMark"},
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 48c45564f..083391e83 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -261,7 +261,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
261 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); 261 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
262 // Tiles per row 262 // Tiles per row
263 size_t num_tiles = cvt.input_line_width / 8; 263 size_t num_tiles = cvt.input_line_width / 8;
264 ASSERT(num_tiles < MAX_TILES); 264 ASSERT(num_tiles <= MAX_TILES);
265 265
266 // Buffer used as a CDMA source/target. 266 // Buffer used as a CDMA source/target.
267 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); 267 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 8eed6a50a..5fb3b9e2b 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -10,13 +10,9 @@
10#include "core/file_sys/archive_romfs.h" 10#include "core/file_sys/archive_romfs.h"
11#include "core/hle/kernel/process.h" 11#include "core/hle/kernel/process.h"
12#include "core/hle/kernel/resource_limit.h" 12#include "core/hle/kernel/resource_limit.h"
13#include "core/hle/service/fs/archive.h" 13#include "core/loader/3dsx.h"
14#include "core/loader/elf.h"
15#include "core/loader/ncch.h"
16#include "core/memory.h" 14#include "core/memory.h"
17 15
18#include "3dsx.h"
19
20namespace Loader { 16namespace Loader {
21 17
22/* 18/*
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index e63cab33f..a4b47ef8c 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -174,7 +174,7 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
174 return ResultStatus::Error; 174 return ResultStatus::Error;
175 175
176 LOG_DEBUG(Loader, "%d sections:", kMaxSections); 176 LOG_DEBUG(Loader, "%d sections:", kMaxSections);
177 // Iterate through the ExeFs archive until we find the .code file... 177 // Iterate through the ExeFs archive until we find a section with the specified name...
178 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { 178 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) {
179 const auto& section = exefs_header.section[section_number]; 179 const auto& section = exefs_header.section[section_number];
180 180
@@ -186,7 +186,7 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
186 s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset); 186 s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset);
187 file.Seek(section_offset, SEEK_SET); 187 file.Seek(section_offset, SEEK_SET);
188 188
189 if (is_compressed) { 189 if (strcmp(section.name, ".code") == 0 && is_compressed) {
190 // Section is compressed, read compressed .code section... 190 // Section is compressed, read compressed .code section...
191 std::unique_ptr<u8[]> temp_buffer; 191 std::unique_ptr<u8[]> temp_buffer;
192 try { 192 try {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 08ec2907a..3abe79c09 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
140 immediate_attribute_id = 0; 140 immediate_attribute_id = 0;
141 141
142 Shader::UnitState<false> shader_unit; 142 Shader::UnitState<false> shader_unit;
143 Shader::Setup(shader_unit); 143 Shader::Setup();
144 144
145 if (g_debug_context) 145 if (g_debug_context)
146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); 146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
@@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
300 vertex_cache_ids.fill(-1); 300 vertex_cache_ids.fill(-1);
301 301
302 Shader::UnitState<false> shader_unit; 302 Shader::UnitState<false> shader_unit;
303 Shader::Setup(shader_unit); 303 Shader::Setup();
304 304
305 for (unsigned int index = 0; index < regs.num_vertices; ++index) 305 for (unsigned int index = 0; index < regs.num_vertices; ++index)
306 { 306 {
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 693f93597..c3a9c9598 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -286,7 +286,7 @@ void StartPicaTracing()
286 } 286 }
287 287
288 std::lock_guard<std::mutex> lock(pica_trace_mutex); 288 std::lock_guard<std::mutex> lock(pica_trace_mutex);
289 pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); 289 pica_trace = std::make_unique<PicaTrace>();
290 290
291 is_pica_tracing = true; 291 is_pica_tracing = true;
292} 292}
@@ -586,6 +586,21 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
586 return info; 586 return info;
587} 587}
588 588
589#ifdef HAVE_PNG
590// Adapter functions to libpng to write/flush to File::IOFile instances.
591static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) {
592 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
593 if (!fp->WriteBytes(data, length))
594 png_error(png_ptr, "Failed to write to output PNG file.");
595}
596
597static void FlushIOFile(png_structp png_ptr) {
598 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
599 if (!fp->Flush())
600 png_error(png_ptr, "Failed to flush to output PNG file.");
601}
602#endif
603
589void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { 604void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
590#ifndef HAVE_PNG 605#ifndef HAVE_PNG
591 return; 606 return;
@@ -629,7 +644,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
629 goto finalise; 644 goto finalise;
630 } 645 }
631 646
632 png_init_io(png_ptr, fp.GetHandle()); 647 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile);
633 648
634 // Write header (8 bit color depth) 649 // Write header (8 bit color depth)
635 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 650 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height,
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5b9ed7c64..0434ad05a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -923,92 +923,72 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
923 if (output_merger.alphablend_enable) { 923 if (output_merger.alphablend_enable) {
924 auto params = output_merger.alpha_blending; 924 auto params = output_merger.alpha_blending;
925 925
926 auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { 926 auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 {
927 DEBUG_ASSERT(channel < 4);
928
929 const Math::Vec4<u8> blend_const = {
930 static_cast<u8>(output_merger.blend_const.r),
931 static_cast<u8>(output_merger.blend_const.g),
932 static_cast<u8>(output_merger.blend_const.b),
933 static_cast<u8>(output_merger.blend_const.a)
934 };
935
927 switch (factor) { 936 switch (factor) {
928 case Regs::BlendFactor::Zero : 937 case Regs::BlendFactor::Zero:
929 return Math::Vec3<u8>(0, 0, 0); 938 return 0;
930 939
931 case Regs::BlendFactor::One : 940 case Regs::BlendFactor::One:
932 return Math::Vec3<u8>(255, 255, 255); 941 return 255;
933 942
934 case Regs::BlendFactor::SourceColor: 943 case Regs::BlendFactor::SourceColor:
935 return combiner_output.rgb(); 944 return combiner_output[channel];
936 945
937 case Regs::BlendFactor::OneMinusSourceColor: 946 case Regs::BlendFactor::OneMinusSourceColor:
938 return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); 947 return 255 - combiner_output[channel];
939 948
940 case Regs::BlendFactor::DestColor: 949 case Regs::BlendFactor::DestColor:
941 return dest.rgb(); 950 return dest[channel];
942 951
943 case Regs::BlendFactor::OneMinusDestColor: 952 case Regs::BlendFactor::OneMinusDestColor:
944 return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); 953 return 255 - dest[channel];
945 954
946 case Regs::BlendFactor::SourceAlpha: 955 case Regs::BlendFactor::SourceAlpha:
947 return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); 956 return combiner_output.a();
948 957
949 case Regs::BlendFactor::OneMinusSourceAlpha: 958 case Regs::BlendFactor::OneMinusSourceAlpha:
950 return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); 959 return 255 - combiner_output.a();
951 960
952 case Regs::BlendFactor::DestAlpha: 961 case Regs::BlendFactor::DestAlpha:
953 return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); 962 return dest.a();
954 963
955 case Regs::BlendFactor::OneMinusDestAlpha: 964 case Regs::BlendFactor::OneMinusDestAlpha:
956 return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); 965 return 255 - dest.a();
957 966
958 case Regs::BlendFactor::ConstantColor: 967 case Regs::BlendFactor::ConstantColor:
959 return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); 968 return blend_const[channel];
960 969
961 case Regs::BlendFactor::OneMinusConstantColor: 970 case Regs::BlendFactor::OneMinusConstantColor:
962 return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); 971 return 255 - blend_const[channel];
963 972
964 case Regs::BlendFactor::ConstantAlpha: 973 case Regs::BlendFactor::ConstantAlpha:
965 return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); 974 return blend_const.a();
966 975
967 case Regs::BlendFactor::OneMinusConstantAlpha: 976 case Regs::BlendFactor::OneMinusConstantAlpha:
968 return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); 977 return 255 - blend_const.a();
969
970 default:
971 LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
972 UNIMPLEMENTED();
973 break;
974 }
975
976 return {};
977 };
978
979 auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
980 switch (factor) {
981 case Regs::BlendFactor::Zero:
982 return 0;
983
984 case Regs::BlendFactor::One:
985 return 255;
986
987 case Regs::BlendFactor::SourceAlpha:
988 return combiner_output.a();
989
990 case Regs::BlendFactor::OneMinusSourceAlpha:
991 return 255 - combiner_output.a();
992 978
993 case Regs::BlendFactor::DestAlpha: 979 case Regs::BlendFactor::SourceAlphaSaturate:
994 return dest.a(); 980 // Returns 1.0 for the alpha channel
995 981 if (channel == 3)
996 case Regs::BlendFactor::OneMinusDestAlpha: 982 return 255;
997 return 255 - dest.a(); 983 return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a()));
998
999 case Regs::BlendFactor::ConstantAlpha:
1000 return output_merger.blend_const.a;
1001
1002 case Regs::BlendFactor::OneMinusConstantAlpha:
1003 return 255 - output_merger.blend_const.a;
1004 984
1005 default: 985 default:
1006 LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); 986 LOG_CRITICAL(HW_GPU, "Unknown blend factor %x", factor);
1007 UNIMPLEMENTED(); 987 UNIMPLEMENTED();
1008 break; 988 break;
1009 } 989 }
1010 990
1011 return {}; 991 return combiner_output[channel];
1012 }; 992 };
1013 993
1014 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 994 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
@@ -1060,10 +1040,15 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1060 MathUtil::Clamp(result.a(), 0, 255)); 1040 MathUtil::Clamp(result.a(), 0, 255));
1061 }; 1041 };
1062 1042
1063 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), 1043 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
1064 LookupFactorA(params.factor_source_a)); 1044 LookupFactor(1, params.factor_source_rgb),
1065 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), 1045 LookupFactor(2, params.factor_source_rgb),
1066 LookupFactorA(params.factor_dest_a)); 1046 LookupFactor(3, params.factor_source_a));
1047
1048 auto dstfactor = Math::MakeVec(LookupFactor(0, params.factor_dest_rgb),
1049 LookupFactor(1, params.factor_dest_rgb),
1050 LookupFactor(2, params.factor_dest_rgb),
1051 LookupFactor(3, params.factor_dest_a));
1067 1052
1068 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1053 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
1069 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1054 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 78d295c76..75301accd 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -28,36 +28,24 @@ namespace Pica {
28namespace Shader { 28namespace Shader {
29 29
30#ifdef ARCHITECTURE_x86_64 30#ifdef ARCHITECTURE_x86_64
31static std::unordered_map<u64, CompiledShader*> shader_map; 31static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
32static JitCompiler jit; 32static const JitShader* jit_shader;
33static CompiledShader* jit_shader;
34
35static void ClearCache() {
36 shader_map.clear();
37 jit.Clear();
38 LOG_INFO(HW_GPU, "Shader JIT cache cleared");
39}
40#endif // ARCHITECTURE_x86_64 33#endif // ARCHITECTURE_x86_64
41 34
42void Setup(UnitState<false>& state) { 35void Setup() {
43#ifdef ARCHITECTURE_x86_64 36#ifdef ARCHITECTURE_x86_64
44 if (VideoCore::g_shader_jit_enabled) { 37 if (VideoCore::g_shader_jit_enabled) {
45 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 38 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
46 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ 39 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
47 g_state.regs.vs.main_offset);
48 40
49 auto iter = shader_map.find(cache_key); 41 auto iter = shader_map.find(cache_key);
50 if (iter != shader_map.end()) { 42 if (iter != shader_map.end()) {
51 jit_shader = iter->second; 43 jit_shader = iter->second.get();
52 } else { 44 } else {
53 // Check if remaining JIT code space is enough for at least one more (massive) shader 45 auto shader = std::make_unique<JitShader>();
54 if (jit.GetSpaceLeft() < jit_shader_size) { 46 shader->Compile();
55 // If not, clear the cache of all previously compiled shaders 47 jit_shader = shader.get();
56 ClearCache(); 48 shader_map[cache_key] = std::move(shader);
57 }
58
59 jit_shader = jit.Compile();
60 shader_map.emplace(cache_key, jit_shader);
61 } 49 }
62 } 50 }
63#endif // ARCHITECTURE_x86_64 51#endif // ARCHITECTURE_x86_64
@@ -65,7 +53,7 @@ void Setup(UnitState<false>& state) {
65 53
66void Shutdown() { 54void Shutdown() {
67#ifdef ARCHITECTURE_x86_64 55#ifdef ARCHITECTURE_x86_64
68 ClearCache(); 56 shader_map.clear();
69#endif // ARCHITECTURE_x86_64 57#endif // ARCHITECTURE_x86_64
70} 58}
71 59
@@ -109,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
109 97
110#ifdef ARCHITECTURE_x86_64 98#ifdef ARCHITECTURE_x86_64
111 if (VideoCore::g_shader_jit_enabled) 99 if (VideoCore::g_shader_jit_enabled)
112 jit_shader(&state.registers); 100 jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
113 else 101 else
114 RunInterpreter(state); 102 RunInterpreter(state);
115#else 103#else
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 7af8f1fa1..9c5bd97bd 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -339,9 +339,8 @@ struct UnitState {
339/** 339/**
340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
341 * vertex, which would happen within the `Run` function). 341 * vertex, which would happen within the `Run` function).
342 * @param state Shader unit state, must be setup per shader and per shader unit
343 */ 342 */
344void Setup(UnitState<false>& state); 343void Setup();
345 344
346/// Performs any cleanup when the emulator is shutdown 345/// Performs any cleanup when the emulator is shutdown
347void Shutdown(); 346void Shutdown();
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index dffe051ef..b47d3beda 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <smmintrin.h> 6#include <smmintrin.h>
6 7
7#include "common/x64/abi.h" 8#include "common/x64/abi.h"
@@ -19,73 +20,73 @@ namespace Shader {
19 20
20using namespace Gen; 21using namespace Gen;
21 22
22typedef void (JitCompiler::*JitFunction)(Instruction instr); 23typedef void (JitShader::*JitFunction)(Instruction instr);
23 24
24const JitFunction instr_table[64] = { 25const JitFunction instr_table[64] = {
25 &JitCompiler::Compile_ADD, // add 26 &JitShader::Compile_ADD, // add
26 &JitCompiler::Compile_DP3, // dp3 27 &JitShader::Compile_DP3, // dp3
27 &JitCompiler::Compile_DP4, // dp4 28 &JitShader::Compile_DP4, // dp4
28 &JitCompiler::Compile_DPH, // dph 29 &JitShader::Compile_DPH, // dph
29 nullptr, // unknown 30 nullptr, // unknown
30 &JitCompiler::Compile_EX2, // ex2 31 &JitShader::Compile_EX2, // ex2
31 &JitCompiler::Compile_LG2, // lg2 32 &JitShader::Compile_LG2, // lg2
32 nullptr, // unknown 33 nullptr, // unknown
33 &JitCompiler::Compile_MUL, // mul 34 &JitShader::Compile_MUL, // mul
34 &JitCompiler::Compile_SGE, // sge 35 &JitShader::Compile_SGE, // sge
35 &JitCompiler::Compile_SLT, // slt 36 &JitShader::Compile_SLT, // slt
36 &JitCompiler::Compile_FLR, // flr 37 &JitShader::Compile_FLR, // flr
37 &JitCompiler::Compile_MAX, // max 38 &JitShader::Compile_MAX, // max
38 &JitCompiler::Compile_MIN, // min 39 &JitShader::Compile_MIN, // min
39 &JitCompiler::Compile_RCP, // rcp 40 &JitShader::Compile_RCP, // rcp
40 &JitCompiler::Compile_RSQ, // rsq 41 &JitShader::Compile_RSQ, // rsq
41 nullptr, // unknown 42 nullptr, // unknown
42 nullptr, // unknown 43 nullptr, // unknown
43 &JitCompiler::Compile_MOVA, // mova 44 &JitShader::Compile_MOVA, // mova
44 &JitCompiler::Compile_MOV, // mov 45 &JitShader::Compile_MOV, // mov
45 nullptr, // unknown 46 nullptr, // unknown
46 nullptr, // unknown 47 nullptr, // unknown
47 nullptr, // unknown 48 nullptr, // unknown
48 nullptr, // unknown 49 nullptr, // unknown
49 &JitCompiler::Compile_DPH, // dphi 50 &JitShader::Compile_DPH, // dphi
50 nullptr, // unknown 51 nullptr, // unknown
51 &JitCompiler::Compile_SGE, // sgei 52 &JitShader::Compile_SGE, // sgei
52 &JitCompiler::Compile_SLT, // slti 53 &JitShader::Compile_SLT, // slti
53 nullptr, // unknown 54 nullptr, // unknown
54 nullptr, // unknown 55 nullptr, // unknown
55 nullptr, // unknown 56 nullptr, // unknown
56 nullptr, // unknown 57 nullptr, // unknown
57 nullptr, // unknown 58 nullptr, // unknown
58 &JitCompiler::Compile_NOP, // nop 59 &JitShader::Compile_NOP, // nop
59 &JitCompiler::Compile_END, // end 60 &JitShader::Compile_END, // end
60 nullptr, // break 61 nullptr, // break
61 &JitCompiler::Compile_CALL, // call 62 &JitShader::Compile_CALL, // call
62 &JitCompiler::Compile_CALLC, // callc 63 &JitShader::Compile_CALLC, // callc
63 &JitCompiler::Compile_CALLU, // callu 64 &JitShader::Compile_CALLU, // callu
64 &JitCompiler::Compile_IF, // ifu 65 &JitShader::Compile_IF, // ifu
65 &JitCompiler::Compile_IF, // ifc 66 &JitShader::Compile_IF, // ifc
66 &JitCompiler::Compile_LOOP, // loop 67 &JitShader::Compile_LOOP, // loop
67 nullptr, // emit 68 nullptr, // emit
68 nullptr, // sete 69 nullptr, // sete
69 &JitCompiler::Compile_JMP, // jmpc 70 &JitShader::Compile_JMP, // jmpc
70 &JitCompiler::Compile_JMP, // jmpu 71 &JitShader::Compile_JMP, // jmpu
71 &JitCompiler::Compile_CMP, // cmp 72 &JitShader::Compile_CMP, // cmp
72 &JitCompiler::Compile_CMP, // cmp 73 &JitShader::Compile_CMP, // cmp
73 &JitCompiler::Compile_MAD, // madi 74 &JitShader::Compile_MAD, // madi
74 &JitCompiler::Compile_MAD, // madi 75 &JitShader::Compile_MAD, // madi
75 &JitCompiler::Compile_MAD, // madi 76 &JitShader::Compile_MAD, // madi
76 &JitCompiler::Compile_MAD, // madi 77 &JitShader::Compile_MAD, // madi
77 &JitCompiler::Compile_MAD, // madi 78 &JitShader::Compile_MAD, // madi
78 &JitCompiler::Compile_MAD, // madi 79 &JitShader::Compile_MAD, // madi
79 &JitCompiler::Compile_MAD, // madi 80 &JitShader::Compile_MAD, // madi
80 &JitCompiler::Compile_MAD, // madi 81 &JitShader::Compile_MAD, // madi
81 &JitCompiler::Compile_MAD, // mad 82 &JitShader::Compile_MAD, // mad
82 &JitCompiler::Compile_MAD, // mad 83 &JitShader::Compile_MAD, // mad
83 &JitCompiler::Compile_MAD, // mad 84 &JitShader::Compile_MAD, // mad
84 &JitCompiler::Compile_MAD, // mad 85 &JitShader::Compile_MAD, // mad
85 &JitCompiler::Compile_MAD, // mad 86 &JitShader::Compile_MAD, // mad
86 &JitCompiler::Compile_MAD, // mad 87 &JitShader::Compile_MAD, // mad
87 &JitCompiler::Compile_MAD, // mad 88 &JitShader::Compile_MAD, // mad
88 &JitCompiler::Compile_MAD, // mad 89 &JitShader::Compile_MAD, // mad
89}; 90};
90 91
91// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 92// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -138,13 +139,32 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
138static const u8 NO_DEST_REG_MASK = 0xf; 139static const u8 NO_DEST_REG_MASK = 0xf;
139 140
140/** 141/**
142 * Get the vertex shader instruction for a given offset in the current shader program
143 * @param offset Offset in the current shader program of the instruction
144 * @return Instruction at the specified offset
145 */
146static Instruction GetVertexShaderInstruction(size_t offset) {
147 return { g_state.vs.program_code[offset] };
148}
149
150static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg);
152}
153
154void JitShader::Compile_Assert(bool condition, const char* msg) {
155 if (!condition) {
156 ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
157 }
158}
159
160/**
141 * Loads and swizzles a source register into the specified XMM register. 161 * Loads and swizzles a source register into the specified XMM register.
142 * @param instr VS instruction, used for determining how to load the source register 162 * @param instr VS instruction, used for determining how to load the source register
143 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) 163 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
144 * @param src_reg SourceRegister object corresponding to the source register to load 164 * @param src_reg SourceRegister object corresponding to the source register to load
145 * @param dest Destination XMM register to store the loaded, swizzled source register 165 * @param dest Destination XMM register to store the loaded, swizzled source register
146 */ 166 */
147void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 167void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
148 X64Reg src_ptr; 168 X64Reg src_ptr;
149 size_t src_offset; 169 size_t src_offset;
150 170
@@ -216,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
216 } 236 }
217} 237}
218 238
219void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { 239void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
220 DestRegister dest; 240 DestRegister dest;
221 unsigned operand_desc_id; 241 unsigned operand_desc_id;
222 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 242 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -263,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
263 } 283 }
264} 284}
265 285
266void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { 286void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
267 MOVAPS(scratch, R(src1)); 287 MOVAPS(scratch, R(src1));
268 CMPPS(scratch, R(src2), CMP_ORD); 288 CMPPS(scratch, R(src2), CMP_ORD);
269 289
@@ -276,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::
276 ANDPS(src1, R(scratch)); 296 ANDPS(src1, R(scratch));
277} 297}
278 298
279void JitCompiler::Compile_EvaluateCondition(Instruction instr) { 299void JitShader::Compile_EvaluateCondition(Instruction instr) {
280 // Note: NXOR is used below to check for equality 300 // Note: NXOR is used below to check for equality
281 switch (instr.flow_control.op) { 301 switch (instr.flow_control.op) {
282 case Instruction::FlowControlType::Or: 302 case Instruction::FlowControlType::Or:
@@ -307,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
307 } 327 }
308} 328}
309 329
310void JitCompiler::Compile_UniformCondition(Instruction instr) { 330void JitShader::Compile_UniformCondition(Instruction instr) {
311 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 331 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
312 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 332 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
313} 333}
314 334
315BitSet32 JitCompiler::PersistentCallerSavedRegs() { 335BitSet32 JitShader::PersistentCallerSavedRegs() {
316 return persistent_regs & ABI_ALL_CALLER_SAVED; 336 return persistent_regs & ABI_ALL_CALLER_SAVED;
317} 337}
318 338
319void JitCompiler::Compile_ADD(Instruction instr) { 339void JitShader::Compile_ADD(Instruction instr) {
320 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 340 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
321 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 341 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
322 ADDPS(SRC1, R(SRC2)); 342 ADDPS(SRC1, R(SRC2));
323 Compile_DestEnable(instr, SRC1); 343 Compile_DestEnable(instr, SRC1);
324} 344}
325 345
326void JitCompiler::Compile_DP3(Instruction instr) { 346void JitShader::Compile_DP3(Instruction instr) {
327 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 347 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
328 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 348 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
329 349
@@ -342,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) {
342 Compile_DestEnable(instr, SRC1); 362 Compile_DestEnable(instr, SRC1);
343} 363}
344 364
345void JitCompiler::Compile_DP4(Instruction instr) { 365void JitShader::Compile_DP4(Instruction instr) {
346 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 366 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
347 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 367 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
348 368
@@ -359,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) {
359 Compile_DestEnable(instr, SRC1); 379 Compile_DestEnable(instr, SRC1);
360} 380}
361 381
362void JitCompiler::Compile_DPH(Instruction instr) { 382void JitShader::Compile_DPH(Instruction instr) {
363 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { 383 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
364 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 384 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
365 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 385 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -391,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) {
391 Compile_DestEnable(instr, SRC1); 411 Compile_DestEnable(instr, SRC1);
392} 412}
393 413
394void JitCompiler::Compile_EX2(Instruction instr) { 414void JitShader::Compile_EX2(Instruction instr) {
395 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 415 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
396 MOVSS(XMM0, R(SRC1)); 416 MOVSS(XMM0, R(SRC1));
397 417
@@ -404,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) {
404 Compile_DestEnable(instr, SRC1); 424 Compile_DestEnable(instr, SRC1);
405} 425}
406 426
407void JitCompiler::Compile_LG2(Instruction instr) { 427void JitShader::Compile_LG2(Instruction instr) {
408 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 428 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
409 MOVSS(XMM0, R(SRC1)); 429 MOVSS(XMM0, R(SRC1));
410 430
@@ -417,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) {
417 Compile_DestEnable(instr, SRC1); 437 Compile_DestEnable(instr, SRC1);
418} 438}
419 439
420void JitCompiler::Compile_MUL(Instruction instr) { 440void JitShader::Compile_MUL(Instruction instr) {
421 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 441 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
422 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 442 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
423 Compile_SanitizedMul(SRC1, SRC2, SCRATCH); 443 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
424 Compile_DestEnable(instr, SRC1); 444 Compile_DestEnable(instr, SRC1);
425} 445}
426 446
427void JitCompiler::Compile_SGE(Instruction instr) { 447void JitShader::Compile_SGE(Instruction instr) {
428 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { 448 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
429 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 449 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
430 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 450 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -439,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) {
439 Compile_DestEnable(instr, SRC2); 459 Compile_DestEnable(instr, SRC2);
440} 460}
441 461
442void JitCompiler::Compile_SLT(Instruction instr) { 462void JitShader::Compile_SLT(Instruction instr) {
443 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { 463 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
444 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 464 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
445 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 465 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -454,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) {
454 Compile_DestEnable(instr, SRC1); 474 Compile_DestEnable(instr, SRC1);
455} 475}
456 476
457void JitCompiler::Compile_FLR(Instruction instr) { 477void JitShader::Compile_FLR(Instruction instr) {
458 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 478 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
459 479
460 if (Common::GetCPUCaps().sse4_1) { 480 if (Common::GetCPUCaps().sse4_1) {
@@ -467,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
467 Compile_DestEnable(instr, SRC1); 487 Compile_DestEnable(instr, SRC1);
468} 488}
469 489
470void JitCompiler::Compile_MAX(Instruction instr) { 490void JitShader::Compile_MAX(Instruction instr) {
471 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 491 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
472 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 492 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
473 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 493 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -475,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
475 Compile_DestEnable(instr, SRC1); 495 Compile_DestEnable(instr, SRC1);
476} 496}
477 497
478void JitCompiler::Compile_MIN(Instruction instr) { 498void JitShader::Compile_MIN(Instruction instr) {
479 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 499 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
480 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 500 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
481 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 501 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -483,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) {
483 Compile_DestEnable(instr, SRC1); 503 Compile_DestEnable(instr, SRC1);
484} 504}
485 505
486void JitCompiler::Compile_MOVA(Instruction instr) { 506void JitShader::Compile_MOVA(Instruction instr) {
487 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 507 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
488 508
489 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 509 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
@@ -528,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
528 } 548 }
529} 549}
530 550
531void JitCompiler::Compile_MOV(Instruction instr) { 551void JitShader::Compile_MOV(Instruction instr) {
532 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 552 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
533 Compile_DestEnable(instr, SRC1); 553 Compile_DestEnable(instr, SRC1);
534} 554}
535 555
536void JitCompiler::Compile_RCP(Instruction instr) { 556void JitShader::Compile_RCP(Instruction instr) {
537 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 557 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
538 558
539 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica 559 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
@@ -544,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) {
544 Compile_DestEnable(instr, SRC1); 564 Compile_DestEnable(instr, SRC1);
545} 565}
546 566
547void JitCompiler::Compile_RSQ(Instruction instr) { 567void JitShader::Compile_RSQ(Instruction instr) {
548 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 568 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
549 569
550 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica 570 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
@@ -555,36 +575,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) {
555 Compile_DestEnable(instr, SRC1); 575 Compile_DestEnable(instr, SRC1);
556} 576}
557 577
558void JitCompiler::Compile_NOP(Instruction instr) { 578void JitShader::Compile_NOP(Instruction instr) {
559} 579}
560 580
561void JitCompiler::Compile_END(Instruction instr) { 581void JitShader::Compile_END(Instruction instr) {
562 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 582 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
563 RET(); 583 RET();
564} 584}
565 585
566void JitCompiler::Compile_CALL(Instruction instr) { 586void JitShader::Compile_CALL(Instruction instr) {
567 unsigned offset = instr.flow_control.dest_offset; 587 // Push offset of the return
568 while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { 588 PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
569 Compile_NextInstr(&offset); 589
570 } 590 // Call the subroutine
591 FixupBranch b = CALL();
592 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
593
594 // Skip over the return offset that's on the stack
595 ADD(64, R(RSP), Imm32(8));
571} 596}
572 597
573void JitCompiler::Compile_CALLC(Instruction instr) { 598void JitShader::Compile_CALLC(Instruction instr) {
574 Compile_EvaluateCondition(instr); 599 Compile_EvaluateCondition(instr);
575 FixupBranch b = J_CC(CC_Z, true); 600 FixupBranch b = J_CC(CC_Z, true);
576 Compile_CALL(instr); 601 Compile_CALL(instr);
577 SetJumpTarget(b); 602 SetJumpTarget(b);
578} 603}
579 604
580void JitCompiler::Compile_CALLU(Instruction instr) { 605void JitShader::Compile_CALLU(Instruction instr) {
581 Compile_UniformCondition(instr); 606 Compile_UniformCondition(instr);
582 FixupBranch b = J_CC(CC_Z, true); 607 FixupBranch b = J_CC(CC_Z, true);
583 Compile_CALL(instr); 608 Compile_CALL(instr);
584 SetJumpTarget(b); 609 SetJumpTarget(b);
585} 610}
586 611
587void JitCompiler::Compile_CMP(Instruction instr) { 612void JitShader::Compile_CMP(Instruction instr) {
588 using Op = Instruction::Common::CompareOpType::Op; 613 using Op = Instruction::Common::CompareOpType::Op;
589 Op op_x = instr.common.compare_op.x; 614 Op op_x = instr.common.compare_op.x;
590 Op op_y = instr.common.compare_op.y; 615 Op op_y = instr.common.compare_op.y;
@@ -627,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) {
627 SHR(64, R(COND1), Imm8(63)); 652 SHR(64, R(COND1), Imm8(63));
628} 653}
629 654
630void JitCompiler::Compile_MAD(Instruction instr) { 655void JitShader::Compile_MAD(Instruction instr) {
631 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); 656 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
632 657
633 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { 658 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
@@ -644,9 +669,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
644 Compile_DestEnable(instr, SRC1); 669 Compile_DestEnable(instr, SRC1);
645} 670}
646 671
647void JitCompiler::Compile_IF(Instruction instr) { 672void JitShader::Compile_IF(Instruction instr) {
648 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", 673 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
649 *offset_ptr, instr.flow_control.dest_offset.Value());
650 674
651 // Evaluate the "IF" condition 675 // Evaluate the "IF" condition
652 if (instr.opcode.Value() == OpCode::Id::IFU) { 676 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -676,10 +700,9 @@ void JitCompiler::Compile_IF(Instruction instr) {
676 SetJumpTarget(b2); 700 SetJumpTarget(b2);
677} 701}
678 702
679void JitCompiler::Compile_LOOP(Instruction instr) { 703void JitShader::Compile_LOOP(Instruction instr) {
680 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", 704 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
681 *offset_ptr, instr.flow_control.dest_offset.Value()); 705 Compile_Assert(!looping, "Nested loops not supported");
682 ASSERT_MSG(!looping, "Nested loops not supported");
683 706
684 looping = true; 707 looping = true;
685 708
@@ -705,10 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
705 looping = false; 728 looping = false;
706} 729}
707 730
708void JitCompiler::Compile_JMP(Instruction instr) { 731void JitShader::Compile_JMP(Instruction instr) {
709 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
710 *offset_ptr, instr.flow_control.dest_offset.Value());
711
712 if (instr.opcode.Value() == OpCode::Id::JMPC) 732 if (instr.opcode.Value() == OpCode::Id::JMPC)
713 Compile_EvaluateCondition(instr); 733 Compile_EvaluateCondition(instr);
714 else if (instr.opcode.Value() == OpCode::Id::JMPU) 734 else if (instr.opcode.Value() == OpCode::Id::JMPU)
@@ -718,30 +738,38 @@ void JitCompiler::Compile_JMP(Instruction instr) {
718 738
719 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 739 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
720 (instr.flow_control.num_instructions & 1); 740 (instr.flow_control.num_instructions & 1);
741
721 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 742 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
743 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
744}
722 745
723 Compile_Block(instr.flow_control.dest_offset); 746void JitShader::Compile_Block(unsigned end) {
747 while (program_counter < end) {
748 Compile_NextInstr();
749 }
750}
751
752void JitShader::Compile_Return() {
753 // Peek return offset on the stack and check if we're at that offset
754 MOV(64, R(RAX), MDisp(RSP, 8));
755 CMP(32, R(RAX), Imm32(program_counter));
724 756
757 // If so, jump back to before CALL
758 FixupBranch b = J_CC(CC_NZ, true);
759 RET();
725 SetJumpTarget(b); 760 SetJumpTarget(b);
726} 761}
727 762
728void JitCompiler::Compile_Block(unsigned end) { 763void JitShader::Compile_NextInstr() {
729 // Save current offset pointer 764 if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
730 unsigned* prev_offset_ptr = offset_ptr; 765 Compile_Return();
731 unsigned offset = *prev_offset_ptr; 766 }
732 767
733 while (offset < end) 768 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
734 Compile_NextInstr(&offset); 769 code_ptr[program_counter] = GetCodePtr();
735 770
736 // Restore current offset pointer 771 Instruction instr = GetVertexShaderInstruction(program_counter++);
737 offset_ptr = prev_offset_ptr;
738 *offset_ptr = offset;
739}
740 772
741void JitCompiler::Compile_NextInstr(unsigned* offset) {
742 offset_ptr = offset;
743
744 Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
745 OpCode::Id opcode = instr.opcode.Value(); 773 OpCode::Id opcode = instr.opcode.Value();
746 auto instr_func = instr_table[static_cast<unsigned>(opcode)]; 774 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
747 775
@@ -755,9 +783,35 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
755 } 783 }
756} 784}
757 785
758CompiledShader* JitCompiler::Compile() { 786void JitShader::FindReturnOffsets() {
759 const u8* start = GetCodePtr(); 787 return_offsets.clear();
760 unsigned offset = g_state.regs.vs.main_offset; 788
789 for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
790 Instruction instr = GetVertexShaderInstruction(offset);
791
792 switch (instr.opcode.Value()) {
793 case OpCode::Id::CALL:
794 case OpCode::Id::CALLC:
795 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break;
798 }
799 }
800
801 // Sort for efficient binary search later
802 std::sort(return_offsets.begin(), return_offsets.end());
803}
804
805void JitShader::Compile() {
806 // Reset flow control state
807 program = (CompiledShader*)GetCodePtr();
808 program_counter = 0;
809 looping = false;
810 code_ptr.fill(nullptr);
811 fixup_branches.clear();
812
813 // Find all `CALL` instructions and identify return locations
814 FindReturnOffsets();
761 815
762 // The stack pointer is 8 modulo 16 at the entry of a procedure 816 // The stack pointer is 8 modulo 16 at the entry of a procedure
763 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 817 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -780,21 +834,31 @@ CompiledShader* JitCompiler::Compile() {
780 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 834 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
781 MOVAPS(NEGBIT, MatR(RAX)); 835 MOVAPS(NEGBIT, MatR(RAX));
782 836
783 looping = false; 837 // Jump to start of the shader program
838 JMPptr(R(ABI_PARAM2));
839
840 // Compile entire program
841 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
784 842
785 while (offset < g_state.vs.program_code.size()) { 843 // Set the target for any incomplete branches now that the entire shader program has been emitted
786 Compile_NextInstr(&offset); 844 for (const auto& branch : fixup_branches) {
845 SetJumpTarget(branch.first, code_ptr[branch.second]);
787 } 846 }
788 847
789 return (CompiledShader*)start; 848 // Free memory that's no longer needed
790} 849 return_offsets.clear();
850 return_offsets.shrink_to_fit();
851 fixup_branches.clear();
852 fixup_branches.shrink_to_fit();
853
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
791 856
792JitCompiler::JitCompiler() { 857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
793 AllocCodeSpace(jit_cache_size);
794} 858}
795 859
796void JitCompiler::Clear() { 860JitShader::JitShader() {
797 ClearCodeSpace(); 861 AllocCodeSpace(MAX_SHADER_SIZE);
798} 862}
799 863
800} // namespace Shader 864} // namespace Shader
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5357c964b..cd6280ade 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,6 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <utility>
8#include <vector>
9
7#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
8 11
9#include "common/x64/emitter.h" 12#include "common/x64/emitter.h"
@@ -19,24 +22,22 @@ namespace Pica {
19 22
20namespace Shader { 23namespace Shader {
21 24
22/// Memory needed to be available to compile the next shader (otherwise, clear the cache) 25/// Memory allocated for each compiled shader (64Kb)
23constexpr size_t jit_shader_size = 1024 * 512; 26constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
24/// Memory allocated for the JIT code space cache
25constexpr size_t jit_cache_size = 1024 * 1024 * 8;
26
27using CompiledShader = void(void* registers);
28 27
29/** 28/**
30 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 29 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
31 * code that can be executed on the host machine directly. 30 * code that can be executed on the host machine directly.
32 */ 31 */
33class JitCompiler : public Gen::XCodeBlock { 32class JitShader : public Gen::XCodeBlock {
34public: 33public:
35 JitCompiler(); 34 JitShader();
36 35
37 CompiledShader* Compile(); 36 void Run(void* registers, unsigned offset) const {
37 program(registers, code_ptr[offset]);
38 }
38 39
39 void Clear(); 40 void Compile();
40 41
41 void Compile_ADD(Instruction instr); 42 void Compile_ADD(Instruction instr);
42 void Compile_DP3(Instruction instr); 43 void Compile_DP3(Instruction instr);
@@ -66,8 +67,9 @@ public:
66 void Compile_MAD(Instruction instr); 67 void Compile_MAD(Instruction instr);
67 68
68private: 69private:
70
69 void Compile_Block(unsigned end); 71 void Compile_Block(unsigned end);
70 void Compile_NextInstr(unsigned* offset); 72 void Compile_NextInstr();
71 73
72 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 74 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
73 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 75 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
@@ -81,13 +83,39 @@ private:
81 void Compile_EvaluateCondition(Instruction instr); 83 void Compile_EvaluateCondition(Instruction instr);
82 void Compile_UniformCondition(Instruction instr); 84 void Compile_UniformCondition(Instruction instr);
83 85
86 /**
87 * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
88 */
89 void Compile_Return();
90
84 BitSet32 PersistentCallerSavedRegs(); 91 BitSet32 PersistentCallerSavedRegs();
85 92
86 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 93 /**
87 unsigned* offset_ptr = nullptr; 94 * Assertion evaluated at compile-time, but only triggered if executed at runtime.
95 * @param msg Message to be logged if the assertion fails.
96 */
97 void Compile_Assert(bool condition, const char* msg);
98
99 /**
100 * Analyzes the entire shader program for `CALL` instructions before emitting any code,
101 * identifying the locations where a return needs to be inserted.
102 */
103 void FindReturnOffsets();
104
105 /// Mapping of Pica VS instructions to pointers in the emitted code
106 std::array<const u8*, 1024> code_ptr;
107
108 /// Offsets in code where a return needs to be inserted
109 std::vector<unsigned> return_offsets;
110
111 unsigned program_counter = 0; ///< Offset of the next instruction to decode
112 bool looping = false; ///< True if compiling a loop, used to check for nested loops
113
114 /// Branches that need to be fixed up once the entire shader program is compiled
115 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
88 116
89 /// Set to true if currently in a loop, used to check for the existence of nested loops 117 using CompiledShader = void(void* registers, const u8* start_addr);
90 bool looping = false; 118 CompiledShader* program = nullptr;
91}; 119};
92 120
93} // Shader 121} // Shader