diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/citra_qt/configure.ui | 12 | ||||
| -rw-r--r-- | src/common/file_util.cpp | 43 | ||||
| -rw-r--r-- | src/common/file_util.h | 26 | ||||
| -rw-r--r-- | src/common/thread.h | 46 | ||||
| -rw-r--r-- | src/common/x64/emitter.cpp | 28 | ||||
| -rw-r--r-- | src/common/x64/emitter.h | 2 | ||||
| -rw-r--r-- | src/core/hle/config_mem.cpp | 7 | ||||
| -rw-r--r-- | src/core/hle/hle.cpp | 2 | ||||
| -rw-r--r-- | src/core/hle/service/soc_u.cpp | 100 | ||||
| -rw-r--r-- | src/core/hw/y2r.cpp | 2 | ||||
| -rw-r--r-- | src/core/loader/3dsx.cpp | 6 | ||||
| -rw-r--r-- | src/core/loader/ncch.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 302 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 58 |
19 files changed, 474 insertions, 323 deletions
diff --git a/src/citra_qt/configure.ui b/src/citra_qt/configure.ui index 3c1f2ebba..6ae056ff9 100644 --- a/src/citra_qt/configure.ui +++ b/src/citra_qt/configure.ui | |||
| @@ -10,24 +10,12 @@ | |||
| 10 | <height>501</height> | 10 | <height>501</height> |
| 11 | </rect> | 11 | </rect> |
| 12 | </property> | 12 | </property> |
| 13 | <property name="minimumSize"> | ||
| 14 | <size> | ||
| 15 | <width>370</width> | ||
| 16 | <height>219</height> | ||
| 17 | </size> | ||
| 18 | </property> | ||
| 19 | <property name="windowTitle"> | 13 | <property name="windowTitle"> |
| 20 | <string>Citra Configuration</string> | 14 | <string>Citra Configuration</string> |
| 21 | </property> | 15 | </property> |
| 22 | <layout class="QVBoxLayout" name="verticalLayout"> | 16 | <layout class="QVBoxLayout" name="verticalLayout"> |
| 23 | <item> | 17 | <item> |
| 24 | <widget class="QTabWidget" name="tabWidget"> | 18 | <widget class="QTabWidget" name="tabWidget"> |
| 25 | <property name="minimumSize"> | ||
| 26 | <size> | ||
| 27 | <width>371</width> | ||
| 28 | <height>221</height> | ||
| 29 | </size> | ||
| 30 | </property> | ||
| 31 | <property name="currentIndex"> | 19 | <property name="currentIndex"> |
| 32 | <number>0</number> | 20 | <number>0</number> |
| 33 | </property> | 21 | </property> |
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 687b7ae5a..6e2867658 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp | |||
| @@ -833,13 +833,12 @@ size_t WriteStringToFile(bool text_file, const std::string &str, const char *fil | |||
| 833 | 833 | ||
| 834 | size_t ReadFileToString(bool text_file, const char *filename, std::string &str) | 834 | size_t ReadFileToString(bool text_file, const char *filename, std::string &str) |
| 835 | { | 835 | { |
| 836 | FileUtil::IOFile file(filename, text_file ? "r" : "rb"); | 836 | IOFile file(filename, text_file ? "r" : "rb"); |
| 837 | auto const f = file.GetHandle(); | ||
| 838 | 837 | ||
| 839 | if (!f) | 838 | if (!file) |
| 840 | return false; | 839 | return false; |
| 841 | 840 | ||
| 842 | str.resize(static_cast<u32>(GetSize(f))); | 841 | str.resize(static_cast<u32>(file.GetSize())); |
| 843 | return file.ReadArray(&str[0], str.size()); | 842 | return file.ReadArray(&str[0], str.size()); |
| 844 | } | 843 | } |
| 845 | 844 | ||
| @@ -886,15 +885,10 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam | |||
| 886 | } | 885 | } |
| 887 | 886 | ||
| 888 | IOFile::IOFile() | 887 | IOFile::IOFile() |
| 889 | : m_file(nullptr), m_good(true) | 888 | { |
| 890 | {} | 889 | } |
| 891 | |||
| 892 | IOFile::IOFile(std::FILE* file) | ||
| 893 | : m_file(file), m_good(true) | ||
| 894 | {} | ||
| 895 | 890 | ||
| 896 | IOFile::IOFile(const std::string& filename, const char openmode[]) | 891 | IOFile::IOFile(const std::string& filename, const char openmode[]) |
| 897 | : m_file(nullptr), m_good(true) | ||
| 898 | { | 892 | { |
| 899 | Open(filename, openmode); | 893 | Open(filename, openmode); |
| 900 | } | 894 | } |
| @@ -905,7 +899,6 @@ IOFile::~IOFile() | |||
| 905 | } | 899 | } |
| 906 | 900 | ||
| 907 | IOFile::IOFile(IOFile&& other) | 901 | IOFile::IOFile(IOFile&& other) |
| 908 | : m_file(nullptr), m_good(true) | ||
| 909 | { | 902 | { |
| 910 | Swap(other); | 903 | Swap(other); |
| 911 | } | 904 | } |
| @@ -944,26 +937,12 @@ bool IOFile::Close() | |||
| 944 | return m_good; | 937 | return m_good; |
| 945 | } | 938 | } |
| 946 | 939 | ||
| 947 | std::FILE* IOFile::ReleaseHandle() | 940 | u64 IOFile::GetSize() const |
| 948 | { | ||
| 949 | std::FILE* const ret = m_file; | ||
| 950 | m_file = nullptr; | ||
| 951 | return ret; | ||
| 952 | } | ||
| 953 | |||
| 954 | void IOFile::SetHandle(std::FILE* file) | ||
| 955 | { | ||
| 956 | Close(); | ||
| 957 | Clear(); | ||
| 958 | m_file = file; | ||
| 959 | } | ||
| 960 | |||
| 961 | u64 IOFile::GetSize() | ||
| 962 | { | 941 | { |
| 963 | if (IsOpen()) | 942 | if (IsOpen()) |
| 964 | return FileUtil::GetSize(m_file); | 943 | return FileUtil::GetSize(m_file); |
| 965 | else | 944 | |
| 966 | return 0; | 945 | return 0; |
| 967 | } | 946 | } |
| 968 | 947 | ||
| 969 | bool IOFile::Seek(s64 off, int origin) | 948 | bool IOFile::Seek(s64 off, int origin) |
| @@ -974,12 +953,12 @@ bool IOFile::Seek(s64 off, int origin) | |||
| 974 | return m_good; | 953 | return m_good; |
| 975 | } | 954 | } |
| 976 | 955 | ||
| 977 | u64 IOFile::Tell() | 956 | u64 IOFile::Tell() const |
| 978 | { | 957 | { |
| 979 | if (IsOpen()) | 958 | if (IsOpen()) |
| 980 | return ftello(m_file); | 959 | return ftello(m_file); |
| 981 | else | 960 | |
| 982 | return -1; | 961 | return -1; |
| 983 | } | 962 | } |
| 984 | 963 | ||
| 985 | bool IOFile::Flush() | 964 | bool IOFile::Flush() |
diff --git a/src/common/file_util.h b/src/common/file_util.h index 880b8a1e3..b54a9fb72 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h | |||
| @@ -176,7 +176,6 @@ class IOFile : public NonCopyable | |||
| 176 | { | 176 | { |
| 177 | public: | 177 | public: |
| 178 | IOFile(); | 178 | IOFile(); |
| 179 | explicit IOFile(std::FILE* file); | ||
| 180 | IOFile(const std::string& filename, const char openmode[]); | 179 | IOFile(const std::string& filename, const char openmode[]); |
| 181 | 180 | ||
| 182 | ~IOFile(); | 181 | ~IOFile(); |
| @@ -192,6 +191,9 @@ public: | |||
| 192 | template <typename T> | 191 | template <typename T> |
| 193 | size_t ReadArray(T* data, size_t length) | 192 | size_t ReadArray(T* data, size_t length) |
| 194 | { | 193 | { |
| 194 | static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); | ||
| 195 | static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); | ||
| 196 | |||
| 195 | if (!IsOpen()) { | 197 | if (!IsOpen()) { |
| 196 | m_good = false; | 198 | m_good = false; |
| 197 | return -1; | 199 | return -1; |
| @@ -207,9 +209,8 @@ public: | |||
| 207 | template <typename T> | 209 | template <typename T> |
| 208 | size_t WriteArray(const T* data, size_t length) | 210 | size_t WriteArray(const T* data, size_t length) |
| 209 | { | 211 | { |
| 210 | static_assert(std::is_standard_layout<T>::value, "Given array does not consist of standard layout objects"); | 212 | static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); |
| 211 | // TODO: gcc 4.8 does not support is_trivially_copyable, but we really should check for it here. | 213 | static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); |
| 212 | //static_assert(std::is_trivially_copyable<T>::value, "Given array does not consist of trivially copyable objects"); | ||
| 213 | 214 | ||
| 214 | if (!IsOpen()) { | 215 | if (!IsOpen()) { |
| 215 | m_good = false; | 216 | m_good = false; |
| @@ -243,25 +244,20 @@ public: | |||
| 243 | 244 | ||
| 244 | // m_good is set to false when a read, write or other function fails | 245 | // m_good is set to false when a read, write or other function fails |
| 245 | bool IsGood() const { return m_good; } | 246 | bool IsGood() const { return m_good; } |
| 246 | operator void*() { return m_good ? m_file : nullptr; } | 247 | explicit operator bool() const { return IsGood(); } |
| 247 | |||
| 248 | std::FILE* ReleaseHandle(); | ||
| 249 | |||
| 250 | std::FILE* GetHandle() { return m_file; } | ||
| 251 | |||
| 252 | void SetHandle(std::FILE* file); | ||
| 253 | 248 | ||
| 254 | bool Seek(s64 off, int origin); | 249 | bool Seek(s64 off, int origin); |
| 255 | u64 Tell(); | 250 | u64 Tell() const; |
| 256 | u64 GetSize(); | 251 | u64 GetSize() const; |
| 257 | bool Resize(u64 size); | 252 | bool Resize(u64 size); |
| 258 | bool Flush(); | 253 | bool Flush(); |
| 259 | 254 | ||
| 260 | // clear error state | 255 | // clear error state |
| 261 | void Clear() { m_good = true; std::clearerr(m_file); } | 256 | void Clear() { m_good = true; std::clearerr(m_file); } |
| 262 | 257 | ||
| 263 | std::FILE* m_file; | 258 | private: |
| 264 | bool m_good; | 259 | std::FILE* m_file = nullptr; |
| 260 | bool m_good = true; | ||
| 265 | }; | 261 | }; |
| 266 | 262 | ||
| 267 | } // namespace | 263 | } // namespace |
diff --git a/src/common/thread.h b/src/common/thread.h index 8255ee6d3..bbfa8befa 100644 --- a/src/common/thread.h +++ b/src/common/thread.h | |||
| @@ -30,8 +30,7 @@ | |||
| 30 | # endif | 30 | # endif |
| 31 | #endif | 31 | #endif |
| 32 | 32 | ||
| 33 | namespace Common | 33 | namespace Common { |
| 34 | { | ||
| 35 | 34 | ||
| 36 | int CurrentThreadId(); | 35 | int CurrentThreadId(); |
| 37 | 36 | ||
| @@ -43,55 +42,55 @@ public: | |||
| 43 | Event() : is_set(false) {} | 42 | Event() : is_set(false) {} |
| 44 | 43 | ||
| 45 | void Set() { | 44 | void Set() { |
| 46 | std::lock_guard<std::mutex> lk(m_mutex); | 45 | std::lock_guard<std::mutex> lk(mutex); |
| 47 | if (!is_set) { | 46 | if (!is_set) { |
| 48 | is_set = true; | 47 | is_set = true; |
| 49 | m_condvar.notify_one(); | 48 | condvar.notify_one(); |
| 50 | } | 49 | } |
| 51 | } | 50 | } |
| 52 | 51 | ||
| 53 | void Wait() { | 52 | void Wait() { |
| 54 | std::unique_lock<std::mutex> lk(m_mutex); | 53 | std::unique_lock<std::mutex> lk(mutex); |
| 55 | m_condvar.wait(lk, [&]{ return is_set; }); | 54 | condvar.wait(lk, [&]{ return is_set; }); |
| 56 | is_set = false; | 55 | is_set = false; |
| 57 | } | 56 | } |
| 58 | 57 | ||
| 59 | void Reset() { | 58 | void Reset() { |
| 60 | std::unique_lock<std::mutex> lk(m_mutex); | 59 | std::unique_lock<std::mutex> lk(mutex); |
| 61 | // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration | 60 | // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration |
| 62 | is_set = false; | 61 | is_set = false; |
| 63 | } | 62 | } |
| 64 | 63 | ||
| 65 | private: | 64 | private: |
| 66 | bool is_set; | 65 | bool is_set; |
| 67 | std::condition_variable m_condvar; | 66 | std::condition_variable condvar; |
| 68 | std::mutex m_mutex; | 67 | std::mutex mutex; |
| 69 | }; | 68 | }; |
| 70 | 69 | ||
| 71 | class Barrier { | 70 | class Barrier { |
| 72 | public: | 71 | public: |
| 73 | Barrier(size_t count) : m_count(count), m_waiting(0) {} | 72 | explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {} |
| 74 | 73 | ||
| 75 | /// Blocks until all "count" threads have called Sync() | 74 | /// Blocks until all "count" threads have called Sync() |
| 76 | void Sync() { | 75 | void Sync() { |
| 77 | std::unique_lock<std::mutex> lk(m_mutex); | 76 | std::unique_lock<std::mutex> lk(mutex); |
| 77 | const size_t current_generation = generation; | ||
| 78 | 78 | ||
| 79 | // TODO: broken when next round of Sync()s | 79 | if (++waiting == count) { |
| 80 | // is entered before all waiting threads return from the notify_all | 80 | generation++; |
| 81 | 81 | waiting = 0; | |
| 82 | if (++m_waiting == m_count) { | 82 | condvar.notify_all(); |
| 83 | m_waiting = 0; | ||
| 84 | m_condvar.notify_all(); | ||
| 85 | } else { | 83 | } else { |
| 86 | m_condvar.wait(lk, [&]{ return m_waiting == 0; }); | 84 | condvar.wait(lk, [this, current_generation]{ return current_generation != generation; }); |
| 87 | } | 85 | } |
| 88 | } | 86 | } |
| 89 | 87 | ||
| 90 | private: | 88 | private: |
| 91 | std::condition_variable m_condvar; | 89 | std::condition_variable condvar; |
| 92 | std::mutex m_mutex; | 90 | std::mutex mutex; |
| 93 | const size_t m_count; | 91 | const size_t count; |
| 94 | size_t m_waiting; | 92 | size_t waiting; |
| 93 | size_t generation; // Incremented once each time the barrier is used | ||
| 95 | }; | 94 | }; |
| 96 | 95 | ||
| 97 | void SleepCurrentThread(int ms); | 96 | void SleepCurrentThread(int ms); |
| @@ -100,8 +99,7 @@ void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms | |||
| 100 | // Use this function during a spin-wait to make the current thread | 99 | // Use this function during a spin-wait to make the current thread |
| 101 | // relax while another thread is working. This may be more efficient | 100 | // relax while another thread is working. This may be more efficient |
| 102 | // than using events because event functions use kernel calls. | 101 | // than using events because event functions use kernel calls. |
| 103 | inline void YieldCPU() | 102 | inline void YieldCPU() { |
| 104 | { | ||
| 105 | std::this_thread::yield(); | 103 | std::this_thread::yield(); |
| 106 | } | 104 | } |
| 107 | 105 | ||
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 1dcf2416c..5662f7f86 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp | |||
| @@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr) | |||
| 455 | Write32(u32(distance)); | 455 | Write32(u32(distance)); |
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | FixupBranch XEmitter::CALL() | ||
| 459 | { | ||
| 460 | FixupBranch branch; | ||
| 461 | branch.type = 1; | ||
| 462 | branch.ptr = code + 5; | ||
| 463 | |||
| 464 | Write8(0xE8); | ||
| 465 | Write32(0); | ||
| 466 | |||
| 467 | return branch; | ||
| 468 | } | ||
| 469 | |||
| 458 | FixupBranch XEmitter::J(bool force5bytes) | 470 | FixupBranch XEmitter::J(bool force5bytes) |
| 459 | { | 471 | { |
| 460 | FixupBranch branch; | 472 | FixupBranch branch; |
| @@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch) | |||
| 531 | } | 543 | } |
| 532 | } | 544 | } |
| 533 | 545 | ||
| 546 | void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) | ||
| 547 | { | ||
| 548 | if (branch.type == 0) | ||
| 549 | { | ||
| 550 | s64 distance = (s64)(target - branch.ptr); | ||
| 551 | ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); | ||
| 552 | branch.ptr[-1] = (u8)(s8)distance; | ||
| 553 | } | ||
| 554 | else if (branch.type == 1) | ||
| 555 | { | ||
| 556 | s64 distance = (s64)(target - branch.ptr); | ||
| 557 | ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); | ||
| 558 | ((s32*)branch.ptr)[-1] = (s32)distance; | ||
| 559 | } | ||
| 560 | } | ||
| 561 | |||
| 534 | //Single byte opcodes | 562 | //Single byte opcodes |
| 535 | //There is no PUSHAD/POPAD in 64-bit mode. | 563 | //There is no PUSHAD/POPAD in 64-bit mode. |
| 536 | void XEmitter::INT3() {Write8(0xCC);} | 564 | void XEmitter::INT3() {Write8(0xCC);} |
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 7c6548fb5..a33724146 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h | |||
| @@ -425,12 +425,14 @@ public: | |||
| 425 | #undef CALL | 425 | #undef CALL |
| 426 | #endif | 426 | #endif |
| 427 | void CALL(const void* fnptr); | 427 | void CALL(const void* fnptr); |
| 428 | FixupBranch CALL(); | ||
| 428 | void CALLptr(OpArg arg); | 429 | void CALLptr(OpArg arg); |
| 429 | 430 | ||
| 430 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); | 431 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); |
| 431 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); | 432 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); |
| 432 | 433 | ||
| 433 | void SetJumpTarget(const FixupBranch& branch); | 434 | void SetJumpTarget(const FixupBranch& branch); |
| 435 | void SetJumpTarget(const FixupBranch& branch, const u8* target); | ||
| 434 | 436 | ||
| 435 | void SETcc(CCFlags flag, OpArg dest); | 437 | void SETcc(CCFlags flag, OpArg dest); |
| 436 | // Note: CMOV brings small if any benefit on current cpus. | 438 | // Note: CMOV brings small if any benefit on current cpus. |
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp index b1a72dc0c..ccd73cfcb 100644 --- a/src/core/hle/config_mem.cpp +++ b/src/core/hle/config_mem.cpp | |||
| @@ -3,13 +3,6 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/common_funcs.h" | ||
| 10 | |||
| 11 | #include "core/core.h" | ||
| 12 | #include "core/memory.h" | ||
| 13 | #include "core/hle/config_mem.h" | 6 | #include "core/hle/config_mem.h" |
| 14 | 7 | ||
| 15 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 8 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index 331b1b22a..e545de3b5 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | #include "core/arm/arm_interface.h" | 8 | #include "core/arm/arm_interface.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/hle/hle.h" | 10 | #include "core/hle/hle.h" |
| 11 | #include "core/hle/config_mem.h" | ||
| 12 | #include "core/hle/shared_page.h" | ||
| 13 | #include "core/hle/service/service.h" | 11 | #include "core/hle/service/service.h" |
| 14 | 12 | ||
| 15 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 13 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp index ff0af8f12..d3e5d4bca 100644 --- a/src/core/hle/service/soc_u.cpp +++ b/src/core/hle/service/soc_u.cpp | |||
| @@ -151,6 +151,34 @@ static int TranslateError(int error) { | |||
| 151 | return error; | 151 | return error; |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | /// Holds the translation from system network socket options to 3DS network socket options | ||
| 155 | /// Note: -1 = No effect/unavailable | ||
| 156 | static const std::unordered_map<int, int> sockopt_map = { { | ||
| 157 | { 0x0004, SO_REUSEADDR }, | ||
| 158 | { 0x0080, -1 }, | ||
| 159 | { 0x0100, -1 }, | ||
| 160 | { 0x1001, SO_SNDBUF }, | ||
| 161 | { 0x1002, SO_RCVBUF }, | ||
| 162 | { 0x1003, -1 }, | ||
| 163 | #ifdef _WIN32 | ||
| 164 | /// Unsupported in WinSock2 | ||
| 165 | { 0x1004, -1 }, | ||
| 166 | #else | ||
| 167 | { 0x1004, SO_RCVLOWAT }, | ||
| 168 | #endif | ||
| 169 | { 0x1008, SO_TYPE }, | ||
| 170 | { 0x1009, SO_ERROR }, | ||
| 171 | }}; | ||
| 172 | |||
| 173 | /// Converts a socket option from 3ds-specific to platform-specific | ||
| 174 | static int TranslateSockOpt(int console_opt_name) { | ||
| 175 | auto found = sockopt_map.find(console_opt_name); | ||
| 176 | if (found != sockopt_map.end()) { | ||
| 177 | return found->second; | ||
| 178 | } | ||
| 179 | return console_opt_name; | ||
| 180 | } | ||
| 181 | |||
| 154 | /// Holds information about a particular socket | 182 | /// Holds information about a particular socket |
| 155 | struct SocketHolder { | 183 | struct SocketHolder { |
| 156 | u32 socket_fd; ///< The socket descriptor | 184 | u32 socket_fd; ///< The socket descriptor |
| @@ -568,7 +596,7 @@ static void RecvFrom(Service::Interface* self) { | |||
| 568 | socklen_t src_addr_len = sizeof(src_addr); | 596 | socklen_t src_addr_len = sizeof(src_addr); |
| 569 | int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); | 597 | int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); |
| 570 | 598 | ||
| 571 | if (buffer_parameters.output_src_address_buffer != 0) { | 599 | if (ret >= 0 && buffer_parameters.output_src_address_buffer != 0 && src_addr_len > 0) { |
| 572 | CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); | 600 | CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); |
| 573 | *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); | 601 | *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); |
| 574 | } | 602 | } |
| @@ -724,6 +752,72 @@ static void ShutdownSockets(Service::Interface* self) { | |||
| 724 | cmd_buffer[1] = 0; | 752 | cmd_buffer[1] = 0; |
| 725 | } | 753 | } |
| 726 | 754 | ||
| 755 | static void GetSockOpt(Service::Interface* self) { | ||
| 756 | u32* cmd_buffer = Kernel::GetCommandBuffer(); | ||
| 757 | u32 socket_handle = cmd_buffer[1]; | ||
| 758 | u32 level = cmd_buffer[2]; | ||
| 759 | int optname = TranslateSockOpt(cmd_buffer[3]); | ||
| 760 | socklen_t optlen = (socklen_t)cmd_buffer[4]; | ||
| 761 | |||
| 762 | int ret = -1; | ||
| 763 | int err = 0; | ||
| 764 | |||
| 765 | if(optname < 0) { | ||
| 766 | #ifdef _WIN32 | ||
| 767 | err = WSAEINVAL; | ||
| 768 | #else | ||
| 769 | err = EINVAL; | ||
| 770 | #endif | ||
| 771 | } else { | ||
| 772 | // 0x100 = static buffer offset (bytes) | ||
| 773 | // + 0x4 = 2nd pointer (u32) position | ||
| 774 | // >> 2 = convert to u32 offset instead of byte offset (cmd_buffer = u32*) | ||
| 775 | char* optval = reinterpret_cast<char *>(Memory::GetPointer(cmd_buffer[0x104 >> 2])); | ||
| 776 | |||
| 777 | ret = ::getsockopt(socket_handle, level, optname, optval, &optlen); | ||
| 778 | err = 0; | ||
| 779 | if (ret == SOCKET_ERROR_VALUE) { | ||
| 780 | err = TranslateError(GET_ERRNO); | ||
| 781 | } | ||
| 782 | } | ||
| 783 | |||
| 784 | cmd_buffer[0] = IPC::MakeHeader(0x11, 4, 2); | ||
| 785 | cmd_buffer[1] = ret; | ||
| 786 | cmd_buffer[2] = err; | ||
| 787 | cmd_buffer[3] = optlen; | ||
| 788 | } | ||
| 789 | |||
| 790 | static void SetSockOpt(Service::Interface* self) { | ||
| 791 | u32* cmd_buffer = Kernel::GetCommandBuffer(); | ||
| 792 | u32 socket_handle = cmd_buffer[1]; | ||
| 793 | u32 level = cmd_buffer[2]; | ||
| 794 | int optname = TranslateSockOpt(cmd_buffer[3]); | ||
| 795 | |||
| 796 | int ret = -1; | ||
| 797 | int err = 0; | ||
| 798 | |||
| 799 | if(optname < 0) { | ||
| 800 | #ifdef _WIN32 | ||
| 801 | err = WSAEINVAL; | ||
| 802 | #else | ||
| 803 | err = EINVAL; | ||
| 804 | #endif | ||
| 805 | } else { | ||
| 806 | socklen_t optlen = static_cast<socklen_t>(cmd_buffer[4]); | ||
| 807 | const char* optval = reinterpret_cast<const char *>(Memory::GetPointer(cmd_buffer[8])); | ||
| 808 | |||
| 809 | ret = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval, optlen)); | ||
| 810 | err = 0; | ||
| 811 | if (ret == SOCKET_ERROR_VALUE) { | ||
| 812 | err = TranslateError(GET_ERRNO); | ||
| 813 | } | ||
| 814 | } | ||
| 815 | |||
| 816 | cmd_buffer[0] = IPC::MakeHeader(0x12, 4, 4); | ||
| 817 | cmd_buffer[1] = ret; | ||
| 818 | cmd_buffer[2] = err; | ||
| 819 | } | ||
| 820 | |||
| 727 | const Interface::FunctionInfo FunctionTable[] = { | 821 | const Interface::FunctionInfo FunctionTable[] = { |
| 728 | {0x00010044, InitializeSockets, "InitializeSockets"}, | 822 | {0x00010044, InitializeSockets, "InitializeSockets"}, |
| 729 | {0x000200C2, Socket, "Socket"}, | 823 | {0x000200C2, Socket, "Socket"}, |
| @@ -741,8 +835,8 @@ const Interface::FunctionInfo FunctionTable[] = { | |||
| 741 | {0x000E00C2, nullptr, "GetHostByAddr"}, | 835 | {0x000E00C2, nullptr, "GetHostByAddr"}, |
| 742 | {0x000F0106, nullptr, "GetAddrInfo"}, | 836 | {0x000F0106, nullptr, "GetAddrInfo"}, |
| 743 | {0x00100102, nullptr, "GetNameInfo"}, | 837 | {0x00100102, nullptr, "GetNameInfo"}, |
| 744 | {0x00110102, nullptr, "GetSockOpt"}, | 838 | {0x00110102, GetSockOpt, "GetSockOpt"}, |
| 745 | {0x00120104, nullptr, "SetSockOpt"}, | 839 | {0x00120104, SetSockOpt, "SetSockOpt"}, |
| 746 | {0x001300C2, Fcntl, "Fcntl"}, | 840 | {0x001300C2, Fcntl, "Fcntl"}, |
| 747 | {0x00140084, Poll, "Poll"}, | 841 | {0x00140084, Poll, "Poll"}, |
| 748 | {0x00150042, nullptr, "SockAtMark"}, | 842 | {0x00150042, nullptr, "SockAtMark"}, |
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 48c45564f..083391e83 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp | |||
| @@ -261,7 +261,7 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 261 | ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); | 261 | ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); |
| 262 | // Tiles per row | 262 | // Tiles per row |
| 263 | size_t num_tiles = cvt.input_line_width / 8; | 263 | size_t num_tiles = cvt.input_line_width / 8; |
| 264 | ASSERT(num_tiles < MAX_TILES); | 264 | ASSERT(num_tiles <= MAX_TILES); |
| 265 | 265 | ||
| 266 | // Buffer used as a CDMA source/target. | 266 | // Buffer used as a CDMA source/target. |
| 267 | std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); | 267 | std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); |
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 8eed6a50a..5fb3b9e2b 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp | |||
| @@ -10,13 +10,9 @@ | |||
| 10 | #include "core/file_sys/archive_romfs.h" | 10 | #include "core/file_sys/archive_romfs.h" |
| 11 | #include "core/hle/kernel/process.h" | 11 | #include "core/hle/kernel/process.h" |
| 12 | #include "core/hle/kernel/resource_limit.h" | 12 | #include "core/hle/kernel/resource_limit.h" |
| 13 | #include "core/hle/service/fs/archive.h" | 13 | #include "core/loader/3dsx.h" |
| 14 | #include "core/loader/elf.h" | ||
| 15 | #include "core/loader/ncch.h" | ||
| 16 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 17 | 15 | ||
| 18 | #include "3dsx.h" | ||
| 19 | |||
| 20 | namespace Loader { | 16 | namespace Loader { |
| 21 | 17 | ||
| 22 | /* | 18 | /* |
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index e63cab33f..a4b47ef8c 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp | |||
| @@ -174,7 +174,7 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>& | |||
| 174 | return ResultStatus::Error; | 174 | return ResultStatus::Error; |
| 175 | 175 | ||
| 176 | LOG_DEBUG(Loader, "%d sections:", kMaxSections); | 176 | LOG_DEBUG(Loader, "%d sections:", kMaxSections); |
| 177 | // Iterate through the ExeFs archive until we find the .code file... | 177 | // Iterate through the ExeFs archive until we find a section with the specified name... |
| 178 | for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { | 178 | for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { |
| 179 | const auto& section = exefs_header.section[section_number]; | 179 | const auto& section = exefs_header.section[section_number]; |
| 180 | 180 | ||
| @@ -186,7 +186,7 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>& | |||
| 186 | s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset); | 186 | s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset); |
| 187 | file.Seek(section_offset, SEEK_SET); | 187 | file.Seek(section_offset, SEEK_SET); |
| 188 | 188 | ||
| 189 | if (is_compressed) { | 189 | if (strcmp(section.name, ".code") == 0 && is_compressed) { |
| 190 | // Section is compressed, read compressed .code section... | 190 | // Section is compressed, read compressed .code section... |
| 191 | std::unique_ptr<u8[]> temp_buffer; | 191 | std::unique_ptr<u8[]> temp_buffer; |
| 192 | try { | 192 | try { |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 08ec2907a..3abe79c09 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -140,7 +140,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 140 | immediate_attribute_id = 0; | 140 | immediate_attribute_id = 0; |
| 141 | 141 | ||
| 142 | Shader::UnitState<false> shader_unit; | 142 | Shader::UnitState<false> shader_unit; |
| 143 | Shader::Setup(shader_unit); | 143 | Shader::Setup(); |
| 144 | 144 | ||
| 145 | if (g_debug_context) | 145 | if (g_debug_context) |
| 146 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); | 146 | g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input)); |
| @@ -300,7 +300,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 300 | vertex_cache_ids.fill(-1); | 300 | vertex_cache_ids.fill(-1); |
| 301 | 301 | ||
| 302 | Shader::UnitState<false> shader_unit; | 302 | Shader::UnitState<false> shader_unit; |
| 303 | Shader::Setup(shader_unit); | 303 | Shader::Setup(); |
| 304 | 304 | ||
| 305 | for (unsigned int index = 0; index < regs.num_vertices; ++index) | 305 | for (unsigned int index = 0; index < regs.num_vertices; ++index) |
| 306 | { | 306 | { |
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 693f93597..c3a9c9598 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp | |||
| @@ -286,7 +286,7 @@ void StartPicaTracing() | |||
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | std::lock_guard<std::mutex> lock(pica_trace_mutex); | 288 | std::lock_guard<std::mutex> lock(pica_trace_mutex); |
| 289 | pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); | 289 | pica_trace = std::make_unique<PicaTrace>(); |
| 290 | 290 | ||
| 291 | is_pica_tracing = true; | 291 | is_pica_tracing = true; |
| 292 | } | 292 | } |
| @@ -586,6 +586,21 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, | |||
| 586 | return info; | 586 | return info; |
| 587 | } | 587 | } |
| 588 | 588 | ||
| 589 | #ifdef HAVE_PNG | ||
| 590 | // Adapter functions to libpng to write/flush to File::IOFile instances. | ||
| 591 | static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { | ||
| 592 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); | ||
| 593 | if (!fp->WriteBytes(data, length)) | ||
| 594 | png_error(png_ptr, "Failed to write to output PNG file."); | ||
| 595 | } | ||
| 596 | |||
| 597 | static void FlushIOFile(png_structp png_ptr) { | ||
| 598 | auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr)); | ||
| 599 | if (!fp->Flush()) | ||
| 600 | png_error(png_ptr, "Failed to flush to output PNG file."); | ||
| 601 | } | ||
| 602 | #endif | ||
| 603 | |||
| 589 | void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | 604 | void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { |
| 590 | #ifndef HAVE_PNG | 605 | #ifndef HAVE_PNG |
| 591 | return; | 606 | return; |
| @@ -629,7 +644,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { | |||
| 629 | goto finalise; | 644 | goto finalise; |
| 630 | } | 645 | } |
| 631 | 646 | ||
| 632 | png_init_io(png_ptr, fp.GetHandle()); | 647 | png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile); |
| 633 | 648 | ||
| 634 | // Write header (8 bit color depth) | 649 | // Write header (8 bit color depth) |
| 635 | png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, | 650 | png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 5b9ed7c64..0434ad05a 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -923,92 +923,72 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 923 | if (output_merger.alphablend_enable) { | 923 | if (output_merger.alphablend_enable) { |
| 924 | auto params = output_merger.alpha_blending; | 924 | auto params = output_merger.alpha_blending; |
| 925 | 925 | ||
| 926 | auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { | 926 | auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 { |
| 927 | DEBUG_ASSERT(channel < 4); | ||
| 928 | |||
| 929 | const Math::Vec4<u8> blend_const = { | ||
| 930 | static_cast<u8>(output_merger.blend_const.r), | ||
| 931 | static_cast<u8>(output_merger.blend_const.g), | ||
| 932 | static_cast<u8>(output_merger.blend_const.b), | ||
| 933 | static_cast<u8>(output_merger.blend_const.a) | ||
| 934 | }; | ||
| 935 | |||
| 927 | switch (factor) { | 936 | switch (factor) { |
| 928 | case Regs::BlendFactor::Zero : | 937 | case Regs::BlendFactor::Zero: |
| 929 | return Math::Vec3<u8>(0, 0, 0); | 938 | return 0; |
| 930 | 939 | ||
| 931 | case Regs::BlendFactor::One : | 940 | case Regs::BlendFactor::One: |
| 932 | return Math::Vec3<u8>(255, 255, 255); | 941 | return 255; |
| 933 | 942 | ||
| 934 | case Regs::BlendFactor::SourceColor: | 943 | case Regs::BlendFactor::SourceColor: |
| 935 | return combiner_output.rgb(); | 944 | return combiner_output[channel]; |
| 936 | 945 | ||
| 937 | case Regs::BlendFactor::OneMinusSourceColor: | 946 | case Regs::BlendFactor::OneMinusSourceColor: |
| 938 | return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); | 947 | return 255 - combiner_output[channel]; |
| 939 | 948 | ||
| 940 | case Regs::BlendFactor::DestColor: | 949 | case Regs::BlendFactor::DestColor: |
| 941 | return dest.rgb(); | 950 | return dest[channel]; |
| 942 | 951 | ||
| 943 | case Regs::BlendFactor::OneMinusDestColor: | 952 | case Regs::BlendFactor::OneMinusDestColor: |
| 944 | return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); | 953 | return 255 - dest[channel]; |
| 945 | 954 | ||
| 946 | case Regs::BlendFactor::SourceAlpha: | 955 | case Regs::BlendFactor::SourceAlpha: |
| 947 | return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); | 956 | return combiner_output.a(); |
| 948 | 957 | ||
| 949 | case Regs::BlendFactor::OneMinusSourceAlpha: | 958 | case Regs::BlendFactor::OneMinusSourceAlpha: |
| 950 | return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); | 959 | return 255 - combiner_output.a(); |
| 951 | 960 | ||
| 952 | case Regs::BlendFactor::DestAlpha: | 961 | case Regs::BlendFactor::DestAlpha: |
| 953 | return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); | 962 | return dest.a(); |
| 954 | 963 | ||
| 955 | case Regs::BlendFactor::OneMinusDestAlpha: | 964 | case Regs::BlendFactor::OneMinusDestAlpha: |
| 956 | return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); | 965 | return 255 - dest.a(); |
| 957 | 966 | ||
| 958 | case Regs::BlendFactor::ConstantColor: | 967 | case Regs::BlendFactor::ConstantColor: |
| 959 | return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); | 968 | return blend_const[channel]; |
| 960 | 969 | ||
| 961 | case Regs::BlendFactor::OneMinusConstantColor: | 970 | case Regs::BlendFactor::OneMinusConstantColor: |
| 962 | return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); | 971 | return 255 - blend_const[channel]; |
| 963 | 972 | ||
| 964 | case Regs::BlendFactor::ConstantAlpha: | 973 | case Regs::BlendFactor::ConstantAlpha: |
| 965 | return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); | 974 | return blend_const.a(); |
| 966 | 975 | ||
| 967 | case Regs::BlendFactor::OneMinusConstantAlpha: | 976 | case Regs::BlendFactor::OneMinusConstantAlpha: |
| 968 | return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); | 977 | return 255 - blend_const.a(); |
| 969 | |||
| 970 | default: | ||
| 971 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | ||
| 972 | UNIMPLEMENTED(); | ||
| 973 | break; | ||
| 974 | } | ||
| 975 | |||
| 976 | return {}; | ||
| 977 | }; | ||
| 978 | |||
| 979 | auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 { | ||
| 980 | switch (factor) { | ||
| 981 | case Regs::BlendFactor::Zero: | ||
| 982 | return 0; | ||
| 983 | |||
| 984 | case Regs::BlendFactor::One: | ||
| 985 | return 255; | ||
| 986 | |||
| 987 | case Regs::BlendFactor::SourceAlpha: | ||
| 988 | return combiner_output.a(); | ||
| 989 | |||
| 990 | case Regs::BlendFactor::OneMinusSourceAlpha: | ||
| 991 | return 255 - combiner_output.a(); | ||
| 992 | 978 | ||
| 993 | case Regs::BlendFactor::DestAlpha: | 979 | case Regs::BlendFactor::SourceAlphaSaturate: |
| 994 | return dest.a(); | 980 | // Returns 1.0 for the alpha channel |
| 995 | 981 | if (channel == 3) | |
| 996 | case Regs::BlendFactor::OneMinusDestAlpha: | 982 | return 255; |
| 997 | return 255 - dest.a(); | 983 | return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a())); |
| 998 | |||
| 999 | case Regs::BlendFactor::ConstantAlpha: | ||
| 1000 | return output_merger.blend_const.a; | ||
| 1001 | |||
| 1002 | case Regs::BlendFactor::OneMinusConstantAlpha: | ||
| 1003 | return 255 - output_merger.blend_const.a; | ||
| 1004 | 984 | ||
| 1005 | default: | 985 | default: |
| 1006 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | 986 | LOG_CRITICAL(HW_GPU, "Unknown blend factor %x", factor); |
| 1007 | UNIMPLEMENTED(); | 987 | UNIMPLEMENTED(); |
| 1008 | break; | 988 | break; |
| 1009 | } | 989 | } |
| 1010 | 990 | ||
| 1011 | return {}; | 991 | return combiner_output[channel]; |
| 1012 | }; | 992 | }; |
| 1013 | 993 | ||
| 1014 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 994 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| @@ -1060,10 +1040,15 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1060 | MathUtil::Clamp(result.a(), 0, 255)); | 1040 | MathUtil::Clamp(result.a(), 0, 255)); |
| 1061 | }; | 1041 | }; |
| 1062 | 1042 | ||
| 1063 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | 1043 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), |
| 1064 | LookupFactorA(params.factor_source_a)); | 1044 | LookupFactor(1, params.factor_source_rgb), |
| 1065 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | 1045 | LookupFactor(2, params.factor_source_rgb), |
| 1066 | LookupFactorA(params.factor_dest_a)); | 1046 | LookupFactor(3, params.factor_source_a)); |
| 1047 | |||
| 1048 | auto dstfactor = Math::MakeVec(LookupFactor(0, params.factor_dest_rgb), | ||
| 1049 | LookupFactor(1, params.factor_dest_rgb), | ||
| 1050 | LookupFactor(2, params.factor_dest_rgb), | ||
| 1051 | LookupFactor(3, params.factor_dest_a)); | ||
| 1067 | 1052 | ||
| 1068 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 1053 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |
| 1069 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 1054 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 78d295c76..75301accd 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp | |||
| @@ -28,36 +28,24 @@ namespace Pica { | |||
| 28 | namespace Shader { | 28 | namespace Shader { |
| 29 | 29 | ||
| 30 | #ifdef ARCHITECTURE_x86_64 | 30 | #ifdef ARCHITECTURE_x86_64 |
| 31 | static std::unordered_map<u64, CompiledShader*> shader_map; | 31 | static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; |
| 32 | static JitCompiler jit; | 32 | static const JitShader* jit_shader; |
| 33 | static CompiledShader* jit_shader; | ||
| 34 | |||
| 35 | static void ClearCache() { | ||
| 36 | shader_map.clear(); | ||
| 37 | jit.Clear(); | ||
| 38 | LOG_INFO(HW_GPU, "Shader JIT cache cleared"); | ||
| 39 | } | ||
| 40 | #endif // ARCHITECTURE_x86_64 | 33 | #endif // ARCHITECTURE_x86_64 |
| 41 | 34 | ||
| 42 | void Setup(UnitState<false>& state) { | 35 | void Setup() { |
| 43 | #ifdef ARCHITECTURE_x86_64 | 36 | #ifdef ARCHITECTURE_x86_64 |
| 44 | if (VideoCore::g_shader_jit_enabled) { | 37 | if (VideoCore::g_shader_jit_enabled) { |
| 45 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | 38 | u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ |
| 46 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ | 39 | Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data))); |
| 47 | g_state.regs.vs.main_offset); | ||
| 48 | 40 | ||
| 49 | auto iter = shader_map.find(cache_key); | 41 | auto iter = shader_map.find(cache_key); |
| 50 | if (iter != shader_map.end()) { | 42 | if (iter != shader_map.end()) { |
| 51 | jit_shader = iter->second; | 43 | jit_shader = iter->second.get(); |
| 52 | } else { | 44 | } else { |
| 53 | // Check if remaining JIT code space is enough for at least one more (massive) shader | 45 | auto shader = std::make_unique<JitShader>(); |
| 54 | if (jit.GetSpaceLeft() < jit_shader_size) { | 46 | shader->Compile(); |
| 55 | // If not, clear the cache of all previously compiled shaders | 47 | jit_shader = shader.get(); |
| 56 | ClearCache(); | 48 | shader_map[cache_key] = std::move(shader); |
| 57 | } | ||
| 58 | |||
| 59 | jit_shader = jit.Compile(); | ||
| 60 | shader_map.emplace(cache_key, jit_shader); | ||
| 61 | } | 49 | } |
| 62 | } | 50 | } |
| 63 | #endif // ARCHITECTURE_x86_64 | 51 | #endif // ARCHITECTURE_x86_64 |
| @@ -65,7 +53,7 @@ void Setup(UnitState<false>& state) { | |||
| 65 | 53 | ||
| 66 | void Shutdown() { | 54 | void Shutdown() { |
| 67 | #ifdef ARCHITECTURE_x86_64 | 55 | #ifdef ARCHITECTURE_x86_64 |
| 68 | ClearCache(); | 56 | shader_map.clear(); |
| 69 | #endif // ARCHITECTURE_x86_64 | 57 | #endif // ARCHITECTURE_x86_64 |
| 70 | } | 58 | } |
| 71 | 59 | ||
| @@ -109,7 +97,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr | |||
| 109 | 97 | ||
| 110 | #ifdef ARCHITECTURE_x86_64 | 98 | #ifdef ARCHITECTURE_x86_64 |
| 111 | if (VideoCore::g_shader_jit_enabled) | 99 | if (VideoCore::g_shader_jit_enabled) |
| 112 | jit_shader(&state.registers); | 100 | jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); |
| 113 | else | 101 | else |
| 114 | RunInterpreter(state); | 102 | RunInterpreter(state); |
| 115 | #else | 103 | #else |
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7af8f1fa1..9c5bd97bd 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h | |||
| @@ -339,9 +339,8 @@ struct UnitState { | |||
| 339 | /** | 339 | /** |
| 340 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per | 340 | * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per |
| 341 | * vertex, which would happen within the `Run` function). | 341 | * vertex, which would happen within the `Run` function). |
| 342 | * @param state Shader unit state, must be setup per shader and per shader unit | ||
| 343 | */ | 342 | */ |
| 344 | void Setup(UnitState<false>& state); | 343 | void Setup(); |
| 345 | 344 | ||
| 346 | /// Performs any cleanup when the emulator is shutdown | 345 | /// Performs any cleanup when the emulator is shutdown |
| 347 | void Shutdown(); | 346 | void Shutdown(); |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index dffe051ef..b47d3beda 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <smmintrin.h> | 6 | #include <smmintrin.h> |
| 6 | 7 | ||
| 7 | #include "common/x64/abi.h" | 8 | #include "common/x64/abi.h" |
| @@ -19,73 +20,73 @@ namespace Shader { | |||
| 19 | 20 | ||
| 20 | using namespace Gen; | 21 | using namespace Gen; |
| 21 | 22 | ||
| 22 | typedef void (JitCompiler::*JitFunction)(Instruction instr); | 23 | typedef void (JitShader::*JitFunction)(Instruction instr); |
| 23 | 24 | ||
| 24 | const JitFunction instr_table[64] = { | 25 | const JitFunction instr_table[64] = { |
| 25 | &JitCompiler::Compile_ADD, // add | 26 | &JitShader::Compile_ADD, // add |
| 26 | &JitCompiler::Compile_DP3, // dp3 | 27 | &JitShader::Compile_DP3, // dp3 |
| 27 | &JitCompiler::Compile_DP4, // dp4 | 28 | &JitShader::Compile_DP4, // dp4 |
| 28 | &JitCompiler::Compile_DPH, // dph | 29 | &JitShader::Compile_DPH, // dph |
| 29 | nullptr, // unknown | 30 | nullptr, // unknown |
| 30 | &JitCompiler::Compile_EX2, // ex2 | 31 | &JitShader::Compile_EX2, // ex2 |
| 31 | &JitCompiler::Compile_LG2, // lg2 | 32 | &JitShader::Compile_LG2, // lg2 |
| 32 | nullptr, // unknown | 33 | nullptr, // unknown |
| 33 | &JitCompiler::Compile_MUL, // mul | 34 | &JitShader::Compile_MUL, // mul |
| 34 | &JitCompiler::Compile_SGE, // sge | 35 | &JitShader::Compile_SGE, // sge |
| 35 | &JitCompiler::Compile_SLT, // slt | 36 | &JitShader::Compile_SLT, // slt |
| 36 | &JitCompiler::Compile_FLR, // flr | 37 | &JitShader::Compile_FLR, // flr |
| 37 | &JitCompiler::Compile_MAX, // max | 38 | &JitShader::Compile_MAX, // max |
| 38 | &JitCompiler::Compile_MIN, // min | 39 | &JitShader::Compile_MIN, // min |
| 39 | &JitCompiler::Compile_RCP, // rcp | 40 | &JitShader::Compile_RCP, // rcp |
| 40 | &JitCompiler::Compile_RSQ, // rsq | 41 | &JitShader::Compile_RSQ, // rsq |
| 41 | nullptr, // unknown | 42 | nullptr, // unknown |
| 42 | nullptr, // unknown | 43 | nullptr, // unknown |
| 43 | &JitCompiler::Compile_MOVA, // mova | 44 | &JitShader::Compile_MOVA, // mova |
| 44 | &JitCompiler::Compile_MOV, // mov | 45 | &JitShader::Compile_MOV, // mov |
| 45 | nullptr, // unknown | 46 | nullptr, // unknown |
| 46 | nullptr, // unknown | 47 | nullptr, // unknown |
| 47 | nullptr, // unknown | 48 | nullptr, // unknown |
| 48 | nullptr, // unknown | 49 | nullptr, // unknown |
| 49 | &JitCompiler::Compile_DPH, // dphi | 50 | &JitShader::Compile_DPH, // dphi |
| 50 | nullptr, // unknown | 51 | nullptr, // unknown |
| 51 | &JitCompiler::Compile_SGE, // sgei | 52 | &JitShader::Compile_SGE, // sgei |
| 52 | &JitCompiler::Compile_SLT, // slti | 53 | &JitShader::Compile_SLT, // slti |
| 53 | nullptr, // unknown | 54 | nullptr, // unknown |
| 54 | nullptr, // unknown | 55 | nullptr, // unknown |
| 55 | nullptr, // unknown | 56 | nullptr, // unknown |
| 56 | nullptr, // unknown | 57 | nullptr, // unknown |
| 57 | nullptr, // unknown | 58 | nullptr, // unknown |
| 58 | &JitCompiler::Compile_NOP, // nop | 59 | &JitShader::Compile_NOP, // nop |
| 59 | &JitCompiler::Compile_END, // end | 60 | &JitShader::Compile_END, // end |
| 60 | nullptr, // break | 61 | nullptr, // break |
| 61 | &JitCompiler::Compile_CALL, // call | 62 | &JitShader::Compile_CALL, // call |
| 62 | &JitCompiler::Compile_CALLC, // callc | 63 | &JitShader::Compile_CALLC, // callc |
| 63 | &JitCompiler::Compile_CALLU, // callu | 64 | &JitShader::Compile_CALLU, // callu |
| 64 | &JitCompiler::Compile_IF, // ifu | 65 | &JitShader::Compile_IF, // ifu |
| 65 | &JitCompiler::Compile_IF, // ifc | 66 | &JitShader::Compile_IF, // ifc |
| 66 | &JitCompiler::Compile_LOOP, // loop | 67 | &JitShader::Compile_LOOP, // loop |
| 67 | nullptr, // emit | 68 | nullptr, // emit |
| 68 | nullptr, // sete | 69 | nullptr, // sete |
| 69 | &JitCompiler::Compile_JMP, // jmpc | 70 | &JitShader::Compile_JMP, // jmpc |
| 70 | &JitCompiler::Compile_JMP, // jmpu | 71 | &JitShader::Compile_JMP, // jmpu |
| 71 | &JitCompiler::Compile_CMP, // cmp | 72 | &JitShader::Compile_CMP, // cmp |
| 72 | &JitCompiler::Compile_CMP, // cmp | 73 | &JitShader::Compile_CMP, // cmp |
| 73 | &JitCompiler::Compile_MAD, // madi | 74 | &JitShader::Compile_MAD, // madi |
| 74 | &JitCompiler::Compile_MAD, // madi | 75 | &JitShader::Compile_MAD, // madi |
| 75 | &JitCompiler::Compile_MAD, // madi | 76 | &JitShader::Compile_MAD, // madi |
| 76 | &JitCompiler::Compile_MAD, // madi | 77 | &JitShader::Compile_MAD, // madi |
| 77 | &JitCompiler::Compile_MAD, // madi | 78 | &JitShader::Compile_MAD, // madi |
| 78 | &JitCompiler::Compile_MAD, // madi | 79 | &JitShader::Compile_MAD, // madi |
| 79 | &JitCompiler::Compile_MAD, // madi | 80 | &JitShader::Compile_MAD, // madi |
| 80 | &JitCompiler::Compile_MAD, // madi | 81 | &JitShader::Compile_MAD, // madi |
| 81 | &JitCompiler::Compile_MAD, // mad | 82 | &JitShader::Compile_MAD, // mad |
| 82 | &JitCompiler::Compile_MAD, // mad | 83 | &JitShader::Compile_MAD, // mad |
| 83 | &JitCompiler::Compile_MAD, // mad | 84 | &JitShader::Compile_MAD, // mad |
| 84 | &JitCompiler::Compile_MAD, // mad | 85 | &JitShader::Compile_MAD, // mad |
| 85 | &JitCompiler::Compile_MAD, // mad | 86 | &JitShader::Compile_MAD, // mad |
| 86 | &JitCompiler::Compile_MAD, // mad | 87 | &JitShader::Compile_MAD, // mad |
| 87 | &JitCompiler::Compile_MAD, // mad | 88 | &JitShader::Compile_MAD, // mad |
| 88 | &JitCompiler::Compile_MAD, // mad | 89 | &JitShader::Compile_MAD, // mad |
| 89 | }; | 90 | }; |
| 90 | 91 | ||
| 91 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can | 92 | // The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can |
| @@ -138,13 +139,32 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | |||
| 138 | static const u8 NO_DEST_REG_MASK = 0xf; | 139 | static const u8 NO_DEST_REG_MASK = 0xf; |
| 139 | 140 | ||
| 140 | /** | 141 | /** |
| 142 | * Get the vertex shader instruction for a given offset in the current shader program | ||
| 143 | * @param offset Offset in the current shader program of the instruction | ||
| 144 | * @return Instruction at the specified offset | ||
| 145 | */ | ||
| 146 | static Instruction GetVertexShaderInstruction(size_t offset) { | ||
| 147 | return { g_state.vs.program_code[offset] }; | ||
| 148 | } | ||
| 149 | |||
| 150 | static void LogCritical(const char* msg) { | ||
| 151 | LOG_CRITICAL(HW_GPU, msg); | ||
| 152 | } | ||
| 153 | |||
| 154 | void JitShader::Compile_Assert(bool condition, const char* msg) { | ||
| 155 | if (!condition) { | ||
| 156 | ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg)); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | /** | ||
| 141 | * Loads and swizzles a source register into the specified XMM register. | 161 | * Loads and swizzles a source register into the specified XMM register. |
| 142 | * @param instr VS instruction, used for determining how to load the source register | 162 | * @param instr VS instruction, used for determining how to load the source register |
| 143 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) | 163 | * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) |
| 144 | * @param src_reg SourceRegister object corresponding to the source register to load | 164 | * @param src_reg SourceRegister object corresponding to the source register to load |
| 145 | * @param dest Destination XMM register to store the loaded, swizzled source register | 165 | * @param dest Destination XMM register to store the loaded, swizzled source register |
| 146 | */ | 166 | */ |
| 147 | void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { | 167 | void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { |
| 148 | X64Reg src_ptr; | 168 | X64Reg src_ptr; |
| 149 | size_t src_offset; | 169 | size_t src_offset; |
| 150 | 170 | ||
| @@ -216,7 +236,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | |||
| 216 | } | 236 | } |
| 217 | } | 237 | } |
| 218 | 238 | ||
| 219 | void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | 239 | void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { |
| 220 | DestRegister dest; | 240 | DestRegister dest; |
| 221 | unsigned operand_desc_id; | 241 | unsigned operand_desc_id; |
| 222 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || | 242 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || |
| @@ -263,7 +283,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | |||
| 263 | } | 283 | } |
| 264 | } | 284 | } |
| 265 | 285 | ||
| 266 | void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { | 286 | void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { |
| 267 | MOVAPS(scratch, R(src1)); | 287 | MOVAPS(scratch, R(src1)); |
| 268 | CMPPS(scratch, R(src2), CMP_ORD); | 288 | CMPPS(scratch, R(src2), CMP_ORD); |
| 269 | 289 | ||
| @@ -276,7 +296,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen:: | |||
| 276 | ANDPS(src1, R(scratch)); | 296 | ANDPS(src1, R(scratch)); |
| 277 | } | 297 | } |
| 278 | 298 | ||
| 279 | void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | 299 | void JitShader::Compile_EvaluateCondition(Instruction instr) { |
| 280 | // Note: NXOR is used below to check for equality | 300 | // Note: NXOR is used below to check for equality |
| 281 | switch (instr.flow_control.op) { | 301 | switch (instr.flow_control.op) { |
| 282 | case Instruction::FlowControlType::Or: | 302 | case Instruction::FlowControlType::Or: |
| @@ -307,23 +327,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) { | |||
| 307 | } | 327 | } |
| 308 | } | 328 | } |
| 309 | 329 | ||
| 310 | void JitCompiler::Compile_UniformCondition(Instruction instr) { | 330 | void JitShader::Compile_UniformCondition(Instruction instr) { |
| 311 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); | 331 | int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); |
| 312 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 332 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |
| 313 | } | 333 | } |
| 314 | 334 | ||
| 315 | BitSet32 JitCompiler::PersistentCallerSavedRegs() { | 335 | BitSet32 JitShader::PersistentCallerSavedRegs() { |
| 316 | return persistent_regs & ABI_ALL_CALLER_SAVED; | 336 | return persistent_regs & ABI_ALL_CALLER_SAVED; |
| 317 | } | 337 | } |
| 318 | 338 | ||
| 319 | void JitCompiler::Compile_ADD(Instruction instr) { | 339 | void JitShader::Compile_ADD(Instruction instr) { |
| 320 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 340 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 321 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 341 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 322 | ADDPS(SRC1, R(SRC2)); | 342 | ADDPS(SRC1, R(SRC2)); |
| 323 | Compile_DestEnable(instr, SRC1); | 343 | Compile_DestEnable(instr, SRC1); |
| 324 | } | 344 | } |
| 325 | 345 | ||
| 326 | void JitCompiler::Compile_DP3(Instruction instr) { | 346 | void JitShader::Compile_DP3(Instruction instr) { |
| 327 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 347 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 328 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 348 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 329 | 349 | ||
| @@ -342,7 +362,7 @@ void JitCompiler::Compile_DP3(Instruction instr) { | |||
| 342 | Compile_DestEnable(instr, SRC1); | 362 | Compile_DestEnable(instr, SRC1); |
| 343 | } | 363 | } |
| 344 | 364 | ||
| 345 | void JitCompiler::Compile_DP4(Instruction instr) { | 365 | void JitShader::Compile_DP4(Instruction instr) { |
| 346 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 366 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 347 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 367 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 348 | 368 | ||
| @@ -359,7 +379,7 @@ void JitCompiler::Compile_DP4(Instruction instr) { | |||
| 359 | Compile_DestEnable(instr, SRC1); | 379 | Compile_DestEnable(instr, SRC1); |
| 360 | } | 380 | } |
| 361 | 381 | ||
| 362 | void JitCompiler::Compile_DPH(Instruction instr) { | 382 | void JitShader::Compile_DPH(Instruction instr) { |
| 363 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | 383 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { |
| 364 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 384 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 365 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 385 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -391,7 +411,7 @@ void JitCompiler::Compile_DPH(Instruction instr) { | |||
| 391 | Compile_DestEnable(instr, SRC1); | 411 | Compile_DestEnable(instr, SRC1); |
| 392 | } | 412 | } |
| 393 | 413 | ||
| 394 | void JitCompiler::Compile_EX2(Instruction instr) { | 414 | void JitShader::Compile_EX2(Instruction instr) { |
| 395 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 415 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 396 | MOVSS(XMM0, R(SRC1)); | 416 | MOVSS(XMM0, R(SRC1)); |
| 397 | 417 | ||
| @@ -404,7 +424,7 @@ void JitCompiler::Compile_EX2(Instruction instr) { | |||
| 404 | Compile_DestEnable(instr, SRC1); | 424 | Compile_DestEnable(instr, SRC1); |
| 405 | } | 425 | } |
| 406 | 426 | ||
| 407 | void JitCompiler::Compile_LG2(Instruction instr) { | 427 | void JitShader::Compile_LG2(Instruction instr) { |
| 408 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 428 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 409 | MOVSS(XMM0, R(SRC1)); | 429 | MOVSS(XMM0, R(SRC1)); |
| 410 | 430 | ||
| @@ -417,14 +437,14 @@ void JitCompiler::Compile_LG2(Instruction instr) { | |||
| 417 | Compile_DestEnable(instr, SRC1); | 437 | Compile_DestEnable(instr, SRC1); |
| 418 | } | 438 | } |
| 419 | 439 | ||
| 420 | void JitCompiler::Compile_MUL(Instruction instr) { | 440 | void JitShader::Compile_MUL(Instruction instr) { |
| 421 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 441 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 422 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 442 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 423 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); | 443 | Compile_SanitizedMul(SRC1, SRC2, SCRATCH); |
| 424 | Compile_DestEnable(instr, SRC1); | 444 | Compile_DestEnable(instr, SRC1); |
| 425 | } | 445 | } |
| 426 | 446 | ||
| 427 | void JitCompiler::Compile_SGE(Instruction instr) { | 447 | void JitShader::Compile_SGE(Instruction instr) { |
| 428 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { | 448 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { |
| 429 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 449 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 430 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 450 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -439,7 +459,7 @@ void JitCompiler::Compile_SGE(Instruction instr) { | |||
| 439 | Compile_DestEnable(instr, SRC2); | 459 | Compile_DestEnable(instr, SRC2); |
| 440 | } | 460 | } |
| 441 | 461 | ||
| 442 | void JitCompiler::Compile_SLT(Instruction instr) { | 462 | void JitShader::Compile_SLT(Instruction instr) { |
| 443 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { | 463 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { |
| 444 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | 464 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); |
| 445 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | 465 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); |
| @@ -454,7 +474,7 @@ void JitCompiler::Compile_SLT(Instruction instr) { | |||
| 454 | Compile_DestEnable(instr, SRC1); | 474 | Compile_DestEnable(instr, SRC1); |
| 455 | } | 475 | } |
| 456 | 476 | ||
| 457 | void JitCompiler::Compile_FLR(Instruction instr) { | 477 | void JitShader::Compile_FLR(Instruction instr) { |
| 458 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 478 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 459 | 479 | ||
| 460 | if (Common::GetCPUCaps().sse4_1) { | 480 | if (Common::GetCPUCaps().sse4_1) { |
| @@ -467,7 +487,7 @@ void JitCompiler::Compile_FLR(Instruction instr) { | |||
| 467 | Compile_DestEnable(instr, SRC1); | 487 | Compile_DestEnable(instr, SRC1); |
| 468 | } | 488 | } |
| 469 | 489 | ||
| 470 | void JitCompiler::Compile_MAX(Instruction instr) { | 490 | void JitShader::Compile_MAX(Instruction instr) { |
| 471 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 491 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 472 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 492 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 473 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | 493 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. |
| @@ -475,7 +495,7 @@ void JitCompiler::Compile_MAX(Instruction instr) { | |||
| 475 | Compile_DestEnable(instr, SRC1); | 495 | Compile_DestEnable(instr, SRC1); |
| 476 | } | 496 | } |
| 477 | 497 | ||
| 478 | void JitCompiler::Compile_MIN(Instruction instr) { | 498 | void JitShader::Compile_MIN(Instruction instr) { |
| 479 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 499 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 480 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 500 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 481 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. | 501 | // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. |
| @@ -483,7 +503,7 @@ void JitCompiler::Compile_MIN(Instruction instr) { | |||
| 483 | Compile_DestEnable(instr, SRC1); | 503 | Compile_DestEnable(instr, SRC1); |
| 484 | } | 504 | } |
| 485 | 505 | ||
| 486 | void JitCompiler::Compile_MOVA(Instruction instr) { | 506 | void JitShader::Compile_MOVA(Instruction instr) { |
| 487 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; | 507 | SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; |
| 488 | 508 | ||
| 489 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { | 509 | if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { |
| @@ -528,12 +548,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) { | |||
| 528 | } | 548 | } |
| 529 | } | 549 | } |
| 530 | 550 | ||
| 531 | void JitCompiler::Compile_MOV(Instruction instr) { | 551 | void JitShader::Compile_MOV(Instruction instr) { |
| 532 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 552 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 533 | Compile_DestEnable(instr, SRC1); | 553 | Compile_DestEnable(instr, SRC1); |
| 534 | } | 554 | } |
| 535 | 555 | ||
| 536 | void JitCompiler::Compile_RCP(Instruction instr) { | 556 | void JitShader::Compile_RCP(Instruction instr) { |
| 537 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 557 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 538 | 558 | ||
| 539 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica | 559 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica |
| @@ -544,7 +564,7 @@ void JitCompiler::Compile_RCP(Instruction instr) { | |||
| 544 | Compile_DestEnable(instr, SRC1); | 564 | Compile_DestEnable(instr, SRC1); |
| 545 | } | 565 | } |
| 546 | 566 | ||
| 547 | void JitCompiler::Compile_RSQ(Instruction instr) { | 567 | void JitShader::Compile_RSQ(Instruction instr) { |
| 548 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 568 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 549 | 569 | ||
| 550 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica | 570 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica |
| @@ -555,36 +575,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) { | |||
| 555 | Compile_DestEnable(instr, SRC1); | 575 | Compile_DestEnable(instr, SRC1); |
| 556 | } | 576 | } |
| 557 | 577 | ||
| 558 | void JitCompiler::Compile_NOP(Instruction instr) { | 578 | void JitShader::Compile_NOP(Instruction instr) { |
| 559 | } | 579 | } |
| 560 | 580 | ||
| 561 | void JitCompiler::Compile_END(Instruction instr) { | 581 | void JitShader::Compile_END(Instruction instr) { |
| 562 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 582 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 563 | RET(); | 583 | RET(); |
| 564 | } | 584 | } |
| 565 | 585 | ||
| 566 | void JitCompiler::Compile_CALL(Instruction instr) { | 586 | void JitShader::Compile_CALL(Instruction instr) { |
| 567 | unsigned offset = instr.flow_control.dest_offset; | 587 | // Push offset of the return |
| 568 | while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { | 588 | PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions)); |
| 569 | Compile_NextInstr(&offset); | 589 | |
| 570 | } | 590 | // Call the subroutine |
| 591 | FixupBranch b = CALL(); | ||
| 592 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 593 | |||
| 594 | // Skip over the return offset that's on the stack | ||
| 595 | ADD(64, R(RSP), Imm32(8)); | ||
| 571 | } | 596 | } |
| 572 | 597 | ||
| 573 | void JitCompiler::Compile_CALLC(Instruction instr) { | 598 | void JitShader::Compile_CALLC(Instruction instr) { |
| 574 | Compile_EvaluateCondition(instr); | 599 | Compile_EvaluateCondition(instr); |
| 575 | FixupBranch b = J_CC(CC_Z, true); | 600 | FixupBranch b = J_CC(CC_Z, true); |
| 576 | Compile_CALL(instr); | 601 | Compile_CALL(instr); |
| 577 | SetJumpTarget(b); | 602 | SetJumpTarget(b); |
| 578 | } | 603 | } |
| 579 | 604 | ||
| 580 | void JitCompiler::Compile_CALLU(Instruction instr) { | 605 | void JitShader::Compile_CALLU(Instruction instr) { |
| 581 | Compile_UniformCondition(instr); | 606 | Compile_UniformCondition(instr); |
| 582 | FixupBranch b = J_CC(CC_Z, true); | 607 | FixupBranch b = J_CC(CC_Z, true); |
| 583 | Compile_CALL(instr); | 608 | Compile_CALL(instr); |
| 584 | SetJumpTarget(b); | 609 | SetJumpTarget(b); |
| 585 | } | 610 | } |
| 586 | 611 | ||
| 587 | void JitCompiler::Compile_CMP(Instruction instr) { | 612 | void JitShader::Compile_CMP(Instruction instr) { |
| 588 | using Op = Instruction::Common::CompareOpType::Op; | 613 | using Op = Instruction::Common::CompareOpType::Op; |
| 589 | Op op_x = instr.common.compare_op.x; | 614 | Op op_x = instr.common.compare_op.x; |
| 590 | Op op_y = instr.common.compare_op.y; | 615 | Op op_y = instr.common.compare_op.y; |
| @@ -627,7 +652,7 @@ void JitCompiler::Compile_CMP(Instruction instr) { | |||
| 627 | SHR(64, R(COND1), Imm8(63)); | 652 | SHR(64, R(COND1), Imm8(63)); |
| 628 | } | 653 | } |
| 629 | 654 | ||
| 630 | void JitCompiler::Compile_MAD(Instruction instr) { | 655 | void JitShader::Compile_MAD(Instruction instr) { |
| 631 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); | 656 | Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); |
| 632 | 657 | ||
| 633 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { | 658 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { |
| @@ -644,9 +669,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { | |||
| 644 | Compile_DestEnable(instr, SRC1); | 669 | Compile_DestEnable(instr, SRC1); |
| 645 | } | 670 | } |
| 646 | 671 | ||
| 647 | void JitCompiler::Compile_IF(Instruction instr) { | 672 | void JitShader::Compile_IF(Instruction instr) { |
| 648 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", | 673 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported"); |
| 649 | *offset_ptr, instr.flow_control.dest_offset.Value()); | ||
| 650 | 674 | ||
| 651 | // Evaluate the "IF" condition | 675 | // Evaluate the "IF" condition |
| 652 | if (instr.opcode.Value() == OpCode::Id::IFU) { | 676 | if (instr.opcode.Value() == OpCode::Id::IFU) { |
| @@ -676,10 +700,9 @@ void JitCompiler::Compile_IF(Instruction instr) { | |||
| 676 | SetJumpTarget(b2); | 700 | SetJumpTarget(b2); |
| 677 | } | 701 | } |
| 678 | 702 | ||
| 679 | void JitCompiler::Compile_LOOP(Instruction instr) { | 703 | void JitShader::Compile_LOOP(Instruction instr) { |
| 680 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", | 704 | Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported"); |
| 681 | *offset_ptr, instr.flow_control.dest_offset.Value()); | 705 | Compile_Assert(!looping, "Nested loops not supported"); |
| 682 | ASSERT_MSG(!looping, "Nested loops not supported"); | ||
| 683 | 706 | ||
| 684 | looping = true; | 707 | looping = true; |
| 685 | 708 | ||
| @@ -705,10 +728,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) { | |||
| 705 | looping = false; | 728 | looping = false; |
| 706 | } | 729 | } |
| 707 | 730 | ||
| 708 | void JitCompiler::Compile_JMP(Instruction instr) { | 731 | void JitShader::Compile_JMP(Instruction instr) { |
| 709 | ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", | ||
| 710 | *offset_ptr, instr.flow_control.dest_offset.Value()); | ||
| 711 | |||
| 712 | if (instr.opcode.Value() == OpCode::Id::JMPC) | 732 | if (instr.opcode.Value() == OpCode::Id::JMPC) |
| 713 | Compile_EvaluateCondition(instr); | 733 | Compile_EvaluateCondition(instr); |
| 714 | else if (instr.opcode.Value() == OpCode::Id::JMPU) | 734 | else if (instr.opcode.Value() == OpCode::Id::JMPU) |
| @@ -718,30 +738,38 @@ void JitCompiler::Compile_JMP(Instruction instr) { | |||
| 718 | 738 | ||
| 719 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && | 739 | bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && |
| 720 | (instr.flow_control.num_instructions & 1); | 740 | (instr.flow_control.num_instructions & 1); |
| 741 | |||
| 721 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); | 742 | FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); |
| 743 | fixup_branches.push_back({ b, instr.flow_control.dest_offset }); | ||
| 744 | } | ||
| 722 | 745 | ||
| 723 | Compile_Block(instr.flow_control.dest_offset); | 746 | void JitShader::Compile_Block(unsigned end) { |
| 747 | while (program_counter < end) { | ||
| 748 | Compile_NextInstr(); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | void JitShader::Compile_Return() { | ||
| 753 | // Peek return offset on the stack and check if we're at that offset | ||
| 754 | MOV(64, R(RAX), MDisp(RSP, 8)); | ||
| 755 | CMP(32, R(RAX), Imm32(program_counter)); | ||
| 724 | 756 | ||
| 757 | // If so, jump back to before CALL | ||
| 758 | FixupBranch b = J_CC(CC_NZ, true); | ||
| 759 | RET(); | ||
| 725 | SetJumpTarget(b); | 760 | SetJumpTarget(b); |
| 726 | } | 761 | } |
| 727 | 762 | ||
| 728 | void JitCompiler::Compile_Block(unsigned end) { | 763 | void JitShader::Compile_NextInstr() { |
| 729 | // Save current offset pointer | 764 | if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) { |
| 730 | unsigned* prev_offset_ptr = offset_ptr; | 765 | Compile_Return(); |
| 731 | unsigned offset = *prev_offset_ptr; | 766 | } |
| 732 | 767 | ||
| 733 | while (offset < end) | 768 | ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!"); |
| 734 | Compile_NextInstr(&offset); | 769 | code_ptr[program_counter] = GetCodePtr(); |
| 735 | 770 | ||
| 736 | // Restore current offset pointer | 771 | Instruction instr = GetVertexShaderInstruction(program_counter++); |
| 737 | offset_ptr = prev_offset_ptr; | ||
| 738 | *offset_ptr = offset; | ||
| 739 | } | ||
| 740 | 772 | ||
| 741 | void JitCompiler::Compile_NextInstr(unsigned* offset) { | ||
| 742 | offset_ptr = offset; | ||
| 743 | |||
| 744 | Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++]; | ||
| 745 | OpCode::Id opcode = instr.opcode.Value(); | 773 | OpCode::Id opcode = instr.opcode.Value(); |
| 746 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; | 774 | auto instr_func = instr_table[static_cast<unsigned>(opcode)]; |
| 747 | 775 | ||
| @@ -755,9 +783,35 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { | |||
| 755 | } | 783 | } |
| 756 | } | 784 | } |
| 757 | 785 | ||
| 758 | CompiledShader* JitCompiler::Compile() { | 786 | void JitShader::FindReturnOffsets() { |
| 759 | const u8* start = GetCodePtr(); | 787 | return_offsets.clear(); |
| 760 | unsigned offset = g_state.regs.vs.main_offset; | 788 | |
| 789 | for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { | ||
| 790 | Instruction instr = GetVertexShaderInstruction(offset); | ||
| 791 | |||
| 792 | switch (instr.opcode.Value()) { | ||
| 793 | case OpCode::Id::CALL: | ||
| 794 | case OpCode::Id::CALLC: | ||
| 795 | case OpCode::Id::CALLU: | ||
| 796 | return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); | ||
| 797 | break; | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | // Sort for efficient binary search later | ||
| 802 | std::sort(return_offsets.begin(), return_offsets.end()); | ||
| 803 | } | ||
| 804 | |||
| 805 | void JitShader::Compile() { | ||
| 806 | // Reset flow control state | ||
| 807 | program = (CompiledShader*)GetCodePtr(); | ||
| 808 | program_counter = 0; | ||
| 809 | looping = false; | ||
| 810 | code_ptr.fill(nullptr); | ||
| 811 | fixup_branches.clear(); | ||
| 812 | |||
| 813 | // Find all `CALL` instructions and identify return locations | ||
| 814 | FindReturnOffsets(); | ||
| 761 | 815 | ||
| 762 | // The stack pointer is 8 modulo 16 at the entry of a procedure | 816 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 763 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | 817 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| @@ -780,21 +834,31 @@ CompiledShader* JitCompiler::Compile() { | |||
| 780 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 834 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 781 | MOVAPS(NEGBIT, MatR(RAX)); | 835 | MOVAPS(NEGBIT, MatR(RAX)); |
| 782 | 836 | ||
| 783 | looping = false; | 837 | // Jump to start of the shader program |
| 838 | JMPptr(R(ABI_PARAM2)); | ||
| 839 | |||
| 840 | // Compile entire program | ||
| 841 | Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); | ||
| 784 | 842 | ||
| 785 | while (offset < g_state.vs.program_code.size()) { | 843 | // Set the target for any incomplete branches now that the entire shader program has been emitted |
| 786 | Compile_NextInstr(&offset); | 844 | for (const auto& branch : fixup_branches) { |
| 845 | SetJumpTarget(branch.first, code_ptr[branch.second]); | ||
| 787 | } | 846 | } |
| 788 | 847 | ||
| 789 | return (CompiledShader*)start; | 848 | // Free memory that's no longer needed |
| 790 | } | 849 | return_offsets.clear(); |
| 850 | return_offsets.shrink_to_fit(); | ||
| 851 | fixup_branches.clear(); | ||
| 852 | fixup_branches.shrink_to_fit(); | ||
| 853 | |||
| 854 | uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); | ||
| 855 | ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); | ||
| 791 | 856 | ||
| 792 | JitCompiler::JitCompiler() { | 857 | LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); |
| 793 | AllocCodeSpace(jit_cache_size); | ||
| 794 | } | 858 | } |
| 795 | 859 | ||
| 796 | void JitCompiler::Clear() { | 860 | JitShader::JitShader() { |
| 797 | ClearCodeSpace(); | 861 | AllocCodeSpace(MAX_SHADER_SIZE); |
| 798 | } | 862 | } |
| 799 | 863 | ||
| 800 | } // namespace Shader | 864 | } // namespace Shader |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5357c964b..cd6280ade 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -4,6 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 7 | #include <nihstro/shader_bytecode.h> | 10 | #include <nihstro/shader_bytecode.h> |
| 8 | 11 | ||
| 9 | #include "common/x64/emitter.h" | 12 | #include "common/x64/emitter.h" |
| @@ -19,24 +22,22 @@ namespace Pica { | |||
| 19 | 22 | ||
| 20 | namespace Shader { | 23 | namespace Shader { |
| 21 | 24 | ||
| 22 | /// Memory needed to be available to compile the next shader (otherwise, clear the cache) | 25 | /// Memory allocated for each compiled shader (64Kb) |
| 23 | constexpr size_t jit_shader_size = 1024 * 512; | 26 | constexpr size_t MAX_SHADER_SIZE = 1024 * 64; |
| 24 | /// Memory allocated for the JIT code space cache | ||
| 25 | constexpr size_t jit_cache_size = 1024 * 1024 * 8; | ||
| 26 | |||
| 27 | using CompiledShader = void(void* registers); | ||
| 28 | 27 | ||
| 29 | /** | 28 | /** |
| 30 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 | 29 | * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 |
| 31 | * code that can be executed on the host machine directly. | 30 | * code that can be executed on the host machine directly. |
| 32 | */ | 31 | */ |
| 33 | class JitCompiler : public Gen::XCodeBlock { | 32 | class JitShader : public Gen::XCodeBlock { |
| 34 | public: | 33 | public: |
| 35 | JitCompiler(); | 34 | JitShader(); |
| 36 | 35 | ||
| 37 | CompiledShader* Compile(); | 36 | void Run(void* registers, unsigned offset) const { |
| 37 | program(registers, code_ptr[offset]); | ||
| 38 | } | ||
| 38 | 39 | ||
| 39 | void Clear(); | 40 | void Compile(); |
| 40 | 41 | ||
| 41 | void Compile_ADD(Instruction instr); | 42 | void Compile_ADD(Instruction instr); |
| 42 | void Compile_DP3(Instruction instr); | 43 | void Compile_DP3(Instruction instr); |
| @@ -66,8 +67,9 @@ public: | |||
| 66 | void Compile_MAD(Instruction instr); | 67 | void Compile_MAD(Instruction instr); |
| 67 | 68 | ||
| 68 | private: | 69 | private: |
| 70 | |||
| 69 | void Compile_Block(unsigned end); | 71 | void Compile_Block(unsigned end); |
| 70 | void Compile_NextInstr(unsigned* offset); | 72 | void Compile_NextInstr(); |
| 71 | 73 | ||
| 72 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); | 74 | void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); |
| 73 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); | 75 | void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); |
| @@ -81,13 +83,39 @@ private: | |||
| 81 | void Compile_EvaluateCondition(Instruction instr); | 83 | void Compile_EvaluateCondition(Instruction instr); |
| 82 | void Compile_UniformCondition(Instruction instr); | 84 | void Compile_UniformCondition(Instruction instr); |
| 83 | 85 | ||
| 86 | /** | ||
| 87 | * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction. | ||
| 88 | */ | ||
| 89 | void Compile_Return(); | ||
| 90 | |||
| 84 | BitSet32 PersistentCallerSavedRegs(); | 91 | BitSet32 PersistentCallerSavedRegs(); |
| 85 | 92 | ||
| 86 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 93 | /** |
| 87 | unsigned* offset_ptr = nullptr; | 94 | * Assertion evaluated at compile-time, but only triggered if executed at runtime. |
| 95 | * @param msg Message to be logged if the assertion fails. | ||
| 96 | */ | ||
| 97 | void Compile_Assert(bool condition, const char* msg); | ||
| 98 | |||
| 99 | /** | ||
| 100 | * Analyzes the entire shader program for `CALL` instructions before emitting any code, | ||
| 101 | * identifying the locations where a return needs to be inserted. | ||
| 102 | */ | ||
| 103 | void FindReturnOffsets(); | ||
| 104 | |||
| 105 | /// Mapping of Pica VS instructions to pointers in the emitted code | ||
| 106 | std::array<const u8*, 1024> code_ptr; | ||
| 107 | |||
| 108 | /// Offsets in code where a return needs to be inserted | ||
| 109 | std::vector<unsigned> return_offsets; | ||
| 110 | |||
| 111 | unsigned program_counter = 0; ///< Offset of the next instruction to decode | ||
| 112 | bool looping = false; ///< True if compiling a loop, used to check for nested loops | ||
| 113 | |||
| 114 | /// Branches that need to be fixed up once the entire shader program is compiled | ||
| 115 | std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; | ||
| 88 | 116 | ||
| 89 | /// Set to true if currently in a loop, used to check for the existence of nested loops | 117 | using CompiledShader = void(void* registers, const u8* start_addr); |
| 90 | bool looping = false; | 118 | CompiledShader* program = nullptr; |
| 91 | }; | 119 | }; |
| 92 | 120 | ||
| 93 | } // Shader | 121 | } // Shader |