summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt6
-rw-r--r--src/common/bit_set.h244
-rw-r--r--src/common/math_util.h16
-rw-r--r--src/common/thread.cpp35
-rw-r--r--src/common/thread.h20
-rw-r--r--src/common/x64/xbyak_abi.h222
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/core.cpp119
-rw-r--r--src/core/cpu_core_manager.cpp142
-rw-r--r--src/core/cpu_core_manager.h59
-rw-r--r--src/core/file_sys/patch_manager.cpp49
-rw-r--r--src/core/gdbstub/gdbstub.cpp123
-rw-r--r--src/core/hle/kernel/handle_table.cpp11
-rw-r--r--src/core/hle/kernel/handle_table.h15
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/service/am/am.cpp42
-rw-r--r--src/core/hle/service/am/applets/applets.cpp6
-rw-r--r--src/core/hle/service/am/applets/applets.h50
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp24
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h7
-rw-r--r--src/core/hle/service/am/applets/stub_applet.cpp70
-rw-r--r--src/core/hle/service/am/applets/stub_applet.h24
-rw-r--r--src/core/hle/service/audio/audout_u.cpp28
-rw-r--r--src/core/hle/service/audio/audout_u.h3
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp5
-rw-r--r--src/core/hle/service/filesystem/filesystem.h1
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h6
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp18
-rw-r--r--src/core/hle/service/nvdrv/interface.h2
-rw-r--r--src/core/hle/service/sm/sm.cpp54
-rw-r--r--src/core/hle/service/sm/sm.h3
-rw-r--r--src/core/hle/service/vi/vi.cpp32
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/engines/maxwell_3d.h19
-rw-r--r--src/video_core/engines/shader_bytecode.h24
-rw-r--r--src/video_core/engines/shader_header.h11
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/macro_interpreter.cpp29
-rw-r--r--src/video_core/macro_interpreter.h4
-rw-r--r--src/video_core/memory_manager.cpp7
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/morton.cpp353
-rw-r--r--src/video_core/morton.h21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp49
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h31
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp240
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp1166
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_state.h9
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h5
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp10
-rw-r--r--src/video_core/utils.h164
-rw-r--r--src/yuzu/applets/software_keyboard.cpp16
-rw-r--r--src/yuzu/applets/software_keyboard.h7
-rw-r--r--src/yuzu/bootmanager.cpp2
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_debug.cpp2
-rw-r--r--src/yuzu/configuration/configure_debug.ui10
-rw-r--r--src/yuzu/configuration/configure_graphics.ui94
-rw-r--r--src/yuzu/main.cpp18
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp16
71 files changed, 2028 insertions, 1844 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index eccd8f64a..a5e71d879 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -44,7 +44,6 @@ add_library(common STATIC
44 detached_tasks.cpp 44 detached_tasks.cpp
45 detached_tasks.h 45 detached_tasks.h
46 bit_field.h 46 bit_field.h
47 bit_set.h
48 cityhash.cpp 47 cityhash.cpp
49 cityhash.h 48 cityhash.h
50 color.h 49 color.h
@@ -95,14 +94,9 @@ if(ARCHITECTURE_x86_64)
95 PRIVATE 94 PRIVATE
96 x64/cpu_detect.cpp 95 x64/cpu_detect.cpp
97 x64/cpu_detect.h 96 x64/cpu_detect.h
98 x64/xbyak_abi.h
99 x64/xbyak_util.h
100 ) 97 )
101endif() 98endif()
102 99
103create_target_directory_groups(common) 100create_target_directory_groups(common)
104 101
105target_link_libraries(common PUBLIC Boost::boost fmt microprofile) 102target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
106if (ARCHITECTURE_x86_64)
107 target_link_libraries(common PRIVATE xbyak)
108endif()
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
deleted file mode 100644
index 5cd1352b2..000000000
--- a/src/common/bit_set.h
+++ /dev/null
@@ -1,244 +0,0 @@
1// This file is under the public domain.
2
3#pragma once
4
5#include <cstddef>
6#ifdef _WIN32
7#include <intrin.h>
8#endif
9#include <initializer_list>
10#include <new>
11#include <type_traits>
12#include "common/common_types.h"
13
14// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
15namespace Common {
16
17// Helper functions:
18
19#ifdef _MSC_VER
20template <typename T>
21static inline int CountSetBits(T v) {
22 // from https://graphics.stanford.edu/~seander/bithacks.html
23 // GCC has this built in, but MSVC's intrinsic will only emit the actual
24 // POPCNT instruction, which we're not depending on
25 v = v - ((v >> 1) & (T) ~(T)0 / 3);
26 v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3);
27 v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15;
28 return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8;
29}
30static inline int LeastSignificantSetBit(u8 val) {
31 unsigned long index;
32 _BitScanForward(&index, val);
33 return (int)index;
34}
35static inline int LeastSignificantSetBit(u16 val) {
36 unsigned long index;
37 _BitScanForward(&index, val);
38 return (int)index;
39}
40static inline int LeastSignificantSetBit(u32 val) {
41 unsigned long index;
42 _BitScanForward(&index, val);
43 return (int)index;
44}
45static inline int LeastSignificantSetBit(u64 val) {
46 unsigned long index;
47 _BitScanForward64(&index, val);
48 return (int)index;
49}
50#else
51static inline int CountSetBits(u8 val) {
52 return __builtin_popcount(val);
53}
54static inline int CountSetBits(u16 val) {
55 return __builtin_popcount(val);
56}
57static inline int CountSetBits(u32 val) {
58 return __builtin_popcount(val);
59}
60static inline int CountSetBits(u64 val) {
61 return __builtin_popcountll(val);
62}
63static inline int LeastSignificantSetBit(u8 val) {
64 return __builtin_ctz(val);
65}
66static inline int LeastSignificantSetBit(u16 val) {
67 return __builtin_ctz(val);
68}
69static inline int LeastSignificantSetBit(u32 val) {
70 return __builtin_ctz(val);
71}
72static inline int LeastSignificantSetBit(u64 val) {
73 return __builtin_ctzll(val);
74}
75#endif
76
77// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
78// using the set bits of an integer to represent a set of integers. Like that
79// class, it acts like an array of bools:
80// BitSet32 bs;
81// bs[1] = true;
82// but also like the underlying integer ([0] = least significant bit):
83// BitSet32 bs2 = ...;
84// bs = (bs ^ bs2) & BitSet32(0xffff);
85// The following additional functionality is provided:
86// - Construction using an initializer list.
87// BitSet bs { 1, 2, 4, 8 };
88// - Efficiently iterating through the set bits:
89// for (int i : bs)
90// [i is the *index* of a set bit]
91// (This uses the appropriate CPU instruction to find the next set bit in one
92// operation.)
93// - Counting set bits using .Count() - see comment on that method.
94
95// TODO: use constexpr when MSVC gets out of the Dark Ages
96
97template <typename IntTy>
98class BitSet {
99 static_assert(!std::is_signed_v<IntTy>, "BitSet should not be used with signed types");
100
101public:
102 // A reference to a particular bit, returned from operator[].
103 class Ref {
104 public:
105 Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
106 Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
107 operator bool() const {
108 return (m_bs->m_val & m_mask) != 0;
109 }
110 bool operator=(bool set) {
111 m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
112 return set;
113 }
114
115 private:
116 BitSet* m_bs;
117 IntTy m_mask;
118 };
119
120 // A STL-like iterator is required to be able to use range-based for loops.
121 class Iterator {
122 public:
123 Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
124 Iterator(IntTy val) : m_val(val), m_bit(0) {}
125 Iterator& operator=(Iterator other) {
126 new (this) Iterator(other);
127 return *this;
128 }
129 int operator*() {
130 return m_bit + ComputeLsb();
131 }
132 Iterator& operator++() {
133 int lsb = ComputeLsb();
134 m_val >>= lsb + 1;
135 m_bit += lsb + 1;
136 m_has_lsb = false;
137 return *this;
138 }
139 Iterator operator++(int _) {
140 Iterator other(*this);
141 ++*this;
142 return other;
143 }
144 bool operator==(Iterator other) const {
145 return m_val == other.m_val;
146 }
147 bool operator!=(Iterator other) const {
148 return m_val != other.m_val;
149 }
150
151 private:
152 int ComputeLsb() {
153 if (!m_has_lsb) {
154 m_lsb = LeastSignificantSetBit(m_val);
155 m_has_lsb = true;
156 }
157 return m_lsb;
158 }
159 IntTy m_val;
160 int m_bit;
161 int m_lsb = -1;
162 bool m_has_lsb = false;
163 };
164
165 BitSet() : m_val(0) {}
166 explicit BitSet(IntTy val) : m_val(val) {}
167 BitSet(std::initializer_list<int> init) {
168 m_val = 0;
169 for (int bit : init)
170 m_val |= (IntTy)1 << bit;
171 }
172
173 static BitSet AllTrue(std::size_t count) {
174 return BitSet(count == sizeof(IntTy) * 8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
175 }
176
177 Ref operator[](std::size_t bit) {
178 return Ref(this, (IntTy)1 << bit);
179 }
180 const Ref operator[](std::size_t bit) const {
181 return (*const_cast<BitSet*>(this))[bit];
182 }
183 bool operator==(BitSet other) const {
184 return m_val == other.m_val;
185 }
186 bool operator!=(BitSet other) const {
187 return m_val != other.m_val;
188 }
189 bool operator<(BitSet other) const {
190 return m_val < other.m_val;
191 }
192 bool operator>(BitSet other) const {
193 return m_val > other.m_val;
194 }
195 BitSet operator|(BitSet other) const {
196 return BitSet(m_val | other.m_val);
197 }
198 BitSet operator&(BitSet other) const {
199 return BitSet(m_val & other.m_val);
200 }
201 BitSet operator^(BitSet other) const {
202 return BitSet(m_val ^ other.m_val);
203 }
204 BitSet operator~() const {
205 return BitSet(~m_val);
206 }
207 BitSet& operator|=(BitSet other) {
208 return *this = *this | other;
209 }
210 BitSet& operator&=(BitSet other) {
211 return *this = *this & other;
212 }
213 BitSet& operator^=(BitSet other) {
214 return *this = *this ^ other;
215 }
216 operator u32() = delete;
217 operator bool() {
218 return m_val != 0;
219 }
220
221 // Warning: Even though on modern CPUs this is a single fast instruction,
222 // Dolphin's official builds do not currently assume POPCNT support on x86,
223 // so slower explicit bit twiddling is generated. Still should generally
224 // be faster than a loop.
225 unsigned int Count() const {
226 return CountSetBits(m_val);
227 }
228
229 Iterator begin() const {
230 return Iterator(m_val);
231 }
232 Iterator end() const {
233 return Iterator(0);
234 }
235
236 IntTy m_val;
237};
238
239} // namespace Common
240
241typedef Common::BitSet<u8> BitSet8;
242typedef Common::BitSet<u16> BitSet16;
243typedef Common::BitSet<u32> BitSet32;
244typedef Common::BitSet<u64> BitSet64;
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 343cdd902..94b4394c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -4,18 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
8#include <cstdlib> 7#include <cstdlib>
9#include <type_traits> 8#include <type_traits>
10 9
11namespace MathUtil { 10namespace MathUtil {
12 11
13static constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
14
15inline bool IntervalsIntersect(unsigned start0, unsigned length0, unsigned start1,
16 unsigned length1) {
17 return (std::max(start0, start1) < std::min(start0 + length0, start1 + length1));
18}
19 13
20template <class T> 14template <class T>
21struct Rectangle { 15struct Rectangle {
@@ -24,16 +18,16 @@ struct Rectangle {
24 T right{}; 18 T right{};
25 T bottom{}; 19 T bottom{};
26 20
27 Rectangle() = default; 21 constexpr Rectangle() = default;
28 22
29 Rectangle(T left, T top, T right, T bottom) 23 constexpr Rectangle(T left, T top, T right, T bottom)
30 : left(left), top(top), right(right), bottom(bottom) {} 24 : left(left), top(top), right(right), bottom(bottom) {}
31 25
32 T GetWidth() const { 26 T GetWidth() const {
33 return std::abs(static_cast<typename std::make_signed<T>::type>(right - left)); 27 return std::abs(static_cast<std::make_signed_t<T>>(right - left));
34 } 28 }
35 T GetHeight() const { 29 T GetHeight() const {
36 return std::abs(static_cast<typename std::make_signed<T>::type>(bottom - top)); 30 return std::abs(static_cast<std::make_signed_t<T>>(bottom - top));
37 } 31 }
38 Rectangle<T> TranslateX(const T x) const { 32 Rectangle<T> TranslateX(const T x) const {
39 return Rectangle{left + x, top, right + x, bottom}; 33 return Rectangle{left + x, top, right + x, bottom};
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 9e207118f..5144c0d9f 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -25,23 +25,6 @@
25 25
26namespace Common { 26namespace Common {
27 27
28int CurrentThreadId() {
29#ifdef _MSC_VER
30 return GetCurrentThreadId();
31#elif defined __APPLE__
32 return mach_thread_self();
33#else
34 return 0;
35#endif
36}
37
38#ifdef _WIN32
39// Supporting functions
40void SleepCurrentThread(int ms) {
41 Sleep(ms);
42}
43#endif
44
45#ifdef _MSC_VER 28#ifdef _MSC_VER
46 29
47void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) { 30void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask) {
@@ -62,7 +45,7 @@ void SwitchCurrentThread() {
62 45
63// This is implemented much nicer in upcoming msvc++, see: 46// This is implemented much nicer in upcoming msvc++, see:
64// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx 47// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx
65void SetCurrentThreadName(const char* szThreadName) { 48void SetCurrentThreadName(const char* name) {
66 static const DWORD MS_VC_EXCEPTION = 0x406D1388; 49 static const DWORD MS_VC_EXCEPTION = 0x406D1388;
67 50
68#pragma pack(push, 8) 51#pragma pack(push, 8)
@@ -75,7 +58,7 @@ void SetCurrentThreadName(const char* szThreadName) {
75#pragma pack(pop) 58#pragma pack(pop)
76 59
77 info.dwType = 0x1000; 60 info.dwType = 0x1000;
78 info.szName = szThreadName; 61 info.szName = name;
79 info.dwThreadID = -1; // dwThreadID; 62 info.dwThreadID = -1; // dwThreadID;
80 info.dwFlags = 0; 63 info.dwFlags = 0;
81 64
@@ -107,10 +90,6 @@ void SetCurrentThreadAffinity(u32 mask) {
107} 90}
108 91
109#ifndef _WIN32 92#ifndef _WIN32
110void SleepCurrentThread(int ms) {
111 usleep(1000 * ms);
112}
113
114void SwitchCurrentThread() { 93void SwitchCurrentThread() {
115 usleep(1000 * 1); 94 usleep(1000 * 1);
116} 95}
@@ -118,15 +97,15 @@ void SwitchCurrentThread() {
118 97
119// MinGW with the POSIX threading model does not support pthread_setname_np 98// MinGW with the POSIX threading model does not support pthread_setname_np
120#if !defined(_WIN32) || defined(_MSC_VER) 99#if !defined(_WIN32) || defined(_MSC_VER)
121void SetCurrentThreadName(const char* szThreadName) { 100void SetCurrentThreadName(const char* name) {
122#ifdef __APPLE__ 101#ifdef __APPLE__
123 pthread_setname_np(szThreadName); 102 pthread_setname_np(name);
124#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) 103#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__)
125 pthread_set_name_np(pthread_self(), szThreadName); 104 pthread_set_name_np(pthread_self(), name);
126#elif defined(__NetBSD__) 105#elif defined(__NetBSD__)
127 pthread_setname_np(pthread_self(), "%s", (void*)szThreadName); 106 pthread_setname_np(pthread_self(), "%s", (void*)name);
128#else 107#else
129 pthread_setname_np(pthread_self(), szThreadName); 108 pthread_setname_np(pthread_self(), name);
130#endif 109#endif
131} 110}
132#endif 111#endif
diff --git a/src/common/thread.h b/src/common/thread.h
index 6cbdb96a3..2cf74452d 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -13,15 +13,8 @@
13 13
14namespace Common { 14namespace Common {
15 15
16int CurrentThreadId();
17
18void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
19void SetCurrentThreadAffinity(u32 mask);
20
21class Event { 16class Event {
22public: 17public:
23 Event() : is_set(false) {}
24
25 void Set() { 18 void Set() {
26 std::lock_guard<std::mutex> lk(mutex); 19 std::lock_guard<std::mutex> lk(mutex);
27 if (!is_set) { 20 if (!is_set) {
@@ -53,14 +46,14 @@ public:
53 } 46 }
54 47
55private: 48private:
56 bool is_set; 49 bool is_set = false;
57 std::condition_variable condvar; 50 std::condition_variable condvar;
58 std::mutex mutex; 51 std::mutex mutex;
59}; 52};
60 53
61class Barrier { 54class Barrier {
62public: 55public:
63 explicit Barrier(std::size_t count_) : count(count_), waiting(0), generation(0) {} 56 explicit Barrier(std::size_t count_) : count(count_) {}
64 57
65 /// Blocks until all "count" threads have called Sync() 58 /// Blocks until all "count" threads have called Sync()
66 void Sync() { 59 void Sync() {
@@ -80,12 +73,13 @@ public:
80private: 73private:
81 std::condition_variable condvar; 74 std::condition_variable condvar;
82 std::mutex mutex; 75 std::mutex mutex;
83 const std::size_t count; 76 std::size_t count;
84 std::size_t waiting; 77 std::size_t waiting = 0;
85 std::size_t generation; // Incremented once each time the barrier is used 78 std::size_t generation = 0; // Incremented once each time the barrier is used
86}; 79};
87 80
88void SleepCurrentThread(int ms); 81void SetThreadAffinity(std::thread::native_handle_type thread, u32 mask);
82void SetCurrentThreadAffinity(u32 mask);
89void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms 83void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
90void SetCurrentThreadName(const char* name); 84void SetCurrentThreadName(const char* name);
91 85
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
deleted file mode 100644
index 636a5c0f9..000000000
--- a/src/common/x64/xbyak_abi.h
+++ /dev/null
@@ -1,222 +0,0 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8#include <xbyak.h>
9#include "common/assert.h"
10#include "common/bit_set.h"
11
12namespace Common::X64 {
13
14inline int RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20}
21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) {
23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index);
25}
26
27inline Xbyak::Xmm IndexToXmm(int reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16);
30}
31
32inline Xbyak::Reg IndexToReg(int reg_index) {
33 if (reg_index < 16) {
34 return IndexToReg64(reg_index);
35 } else {
36 return IndexToXmm(reg_index);
37 }
38}
39
40inline BitSet32 BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 BitSet32 bits;
42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true;
44 }
45 return bits;
46}
47
48const BitSet32 ABI_ALL_GPRS(0x0000FFFF);
49const BitSet32 ABI_ALL_XMMS(0xFFFF0000);
50
51#ifdef _WIN32
52
53// Microsoft x64 ABI
54const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
55const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
56const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59
60const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs
62 Xbyak::util::rcx,
63 Xbyak::util::rdx,
64 Xbyak::util::r8,
65 Xbyak::util::r9,
66 Xbyak::util::r10,
67 Xbyak::util::r11,
68 // XMMs
69 Xbyak::util::xmm0,
70 Xbyak::util::xmm1,
71 Xbyak::util::xmm2,
72 Xbyak::util::xmm3,
73 Xbyak::util::xmm4,
74 Xbyak::util::xmm5,
75});
76
77const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs
79 Xbyak::util::rbx,
80 Xbyak::util::rsi,
81 Xbyak::util::rdi,
82 Xbyak::util::rbp,
83 Xbyak::util::r12,
84 Xbyak::util::r13,
85 Xbyak::util::r14,
86 Xbyak::util::r15,
87 // XMMs
88 Xbyak::util::xmm6,
89 Xbyak::util::xmm7,
90 Xbyak::util::xmm8,
91 Xbyak::util::xmm9,
92 Xbyak::util::xmm10,
93 Xbyak::util::xmm11,
94 Xbyak::util::xmm12,
95 Xbyak::util::xmm13,
96 Xbyak::util::xmm14,
97 Xbyak::util::xmm15,
98});
99
100constexpr std::size_t ABI_SHADOW_SPACE = 0x20;
101
102#else
103
104// System V x86-64 ABI
105const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
106const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
107const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110
111const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs
113 Xbyak::util::rcx,
114 Xbyak::util::rdx,
115 Xbyak::util::rdi,
116 Xbyak::util::rsi,
117 Xbyak::util::r8,
118 Xbyak::util::r9,
119 Xbyak::util::r10,
120 Xbyak::util::r11,
121 // XMMs
122 Xbyak::util::xmm0,
123 Xbyak::util::xmm1,
124 Xbyak::util::xmm2,
125 Xbyak::util::xmm3,
126 Xbyak::util::xmm4,
127 Xbyak::util::xmm5,
128 Xbyak::util::xmm6,
129 Xbyak::util::xmm7,
130 Xbyak::util::xmm8,
131 Xbyak::util::xmm9,
132 Xbyak::util::xmm10,
133 Xbyak::util::xmm11,
134 Xbyak::util::xmm12,
135 Xbyak::util::xmm13,
136 Xbyak::util::xmm14,
137 Xbyak::util::xmm15,
138});
139
140const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs
142 Xbyak::util::rbx,
143 Xbyak::util::rbp,
144 Xbyak::util::r12,
145 Xbyak::util::r13,
146 Xbyak::util::r14,
147 Xbyak::util::r15,
148});
149
150constexpr std::size_t ABI_SHADOW_SPACE = 0;
151
152#endif
153
154inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment,
155 std::size_t needed_frame_size, s32* out_subtraction,
156 s32* out_xmm_offset) {
157 int count = (regs & ABI_ALL_GPRS).Count();
158 rsp_alignment -= count * 8;
159 std::size_t subtraction = 0;
160 int xmm_count = (regs & ABI_ALL_XMMS).Count();
161 if (xmm_count) {
162 // If we have any XMMs to save, we must align the stack here.
163 subtraction = rsp_alignment & 0xF;
164 }
165 subtraction += 0x10 * xmm_count;
166 std::size_t xmm_base_subtraction = subtraction;
167 subtraction += needed_frame_size;
168 subtraction += ABI_SHADOW_SPACE;
169 // Final alignment.
170 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF;
172
173 *out_subtraction = (s32)subtraction;
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
175}
176
177inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
178 std::size_t rsp_alignment,
179 std::size_t needed_frame_size = 0) {
180 s32 subtraction, xmm_offset;
181 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
182
183 for (int reg_index : (regs & ABI_ALL_GPRS)) {
184 code.push(IndexToReg64(reg_index));
185 }
186
187 if (subtraction != 0) {
188 code.sub(code.rsp, subtraction);
189 }
190
191 for (int reg_index : (regs & ABI_ALL_XMMS)) {
192 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(reg_index));
193 xmm_offset += 0x10;
194 }
195
196 return ABI_SHADOW_SPACE;
197}
198
199inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
200 std::size_t rsp_alignment,
201 std::size_t needed_frame_size = 0) {
202 s32 subtraction, xmm_offset;
203 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
204
205 for (int reg_index : (regs & ABI_ALL_XMMS)) {
206 code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + xmm_offset]);
207 xmm_offset += 0x10;
208 }
209
210 if (subtraction != 0) {
211 code.add(code.rsp, subtraction);
212 }
213
214 // GPRs need to be popped in reverse order
215 for (int reg_index = 15; reg_index >= 0; reg_index--) {
216 if (regs[reg_index]) {
217 code.pop(IndexToReg64(reg_index));
218 }
219 }
220}
221
222} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
deleted file mode 100644
index 5cc8a8c76..000000000
--- a/src/common/x64/xbyak_util.h
+++ /dev/null
@@ -1,47 +0,0 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common::X64 {
12
13// Constants for use with cmpps/cmpss
14enum {
15 CMP_EQ = 0,
16 CMP_LT = 1,
17 CMP_LE = 2,
18 CMP_UNORD = 3,
19 CMP_NEQ = 4,
20 CMP_NLT = 5,
21 CMP_NLE = 6,
22 CMP_ORD = 7,
23};
24
25inline bool IsWithin2G(uintptr_t ref, uintptr_t target) {
26 u64 distance = target - (ref + 5);
27 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
28}
29
30inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
31 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
32}
33
34template <typename T>
35inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
36 static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
37 std::size_t addr = reinterpret_cast<std::size_t>(f);
38 if (IsWithin2G(code, addr)) {
39 code.call(f);
40 } else {
41 // ABI_RETURN is a safe temp register to use before a call
42 code.mov(ABI_RETURN, addr);
43 code.call(ABI_RETURN);
44 }
45}
46
47} // namespace Common::X64
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a355eaca6..e1f21a764 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -12,6 +12,8 @@ add_library(core STATIC
12 core_timing.h 12 core_timing.h
13 core_timing_util.cpp 13 core_timing_util.cpp
14 core_timing_util.h 14 core_timing_util.h
15 cpu_core_manager.cpp
16 cpu_core_manager.h
15 crypto/aes_util.cpp 17 crypto/aes_util.cpp
16 crypto/aes_util.h 18 crypto/aes_util.h
17 crypto/encryption_layer.cpp 19 crypto/encryption_layer.cpp
@@ -156,6 +158,8 @@ add_library(core STATIC
156 hle/service/am/applets/applets.h 158 hle/service/am/applets/applets.h
157 hle/service/am/applets/software_keyboard.cpp 159 hle/service/am/applets/software_keyboard.cpp
158 hle/service/am/applets/software_keyboard.h 160 hle/service/am/applets/software_keyboard.h
161 hle/service/am/applets/stub_applet.cpp
162 hle/service/am/applets/stub_applet.h
159 hle/service/am/idle.cpp 163 hle/service/am/idle.cpp
160 hle/service/am/idle.h 164 hle/service/am/idle.h
161 hle/service/am/omm.cpp 165 hle/service/am/omm.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 6c72fdf4a..795fabc65 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -14,6 +14,7 @@
14#include "core/core.h" 14#include "core/core.h"
15#include "core/core_cpu.h" 15#include "core/core_cpu.h"
16#include "core/core_timing.h" 16#include "core/core_timing.h"
17#include "core/cpu_core_manager.h"
17#include "core/file_sys/mode.h" 18#include "core/file_sys/mode.h"
18#include "core/file_sys/vfs_concat.h" 19#include "core/file_sys/vfs_concat.h"
19#include "core/file_sys/vfs_real.h" 20#include "core/file_sys/vfs_real.h"
@@ -28,7 +29,6 @@
28#include "core/hle/service/sm/sm.h" 29#include "core/hle/service/sm/sm.h"
29#include "core/loader/loader.h" 30#include "core/loader/loader.h"
30#include "core/perf_stats.h" 31#include "core/perf_stats.h"
31#include "core/settings.h"
32#include "core/telemetry_session.h" 32#include "core/telemetry_session.h"
33#include "frontend/applets/software_keyboard.h" 33#include "frontend/applets/software_keyboard.h"
34#include "video_core/debug_utils/debug_utils.h" 34#include "video_core/debug_utils/debug_utils.h"
@@ -71,64 +71,22 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
71 71
72 return vfs->OpenFile(path, FileSys::Mode::Read); 72 return vfs->OpenFile(path, FileSys::Mode::Read);
73} 73}
74
75/// Runs a CPU core while the system is powered on
76void RunCpuCore(Cpu& cpu_state) {
77 while (Core::System::GetInstance().IsPoweredOn()) {
78 cpu_state.RunLoop(true);
79 }
80}
81} // Anonymous namespace 74} // Anonymous namespace
82 75
83struct System::Impl { 76struct System::Impl {
84 Cpu& CurrentCpuCore() { 77 Cpu& CurrentCpuCore() {
85 if (Settings::values.use_multi_core) { 78 return cpu_core_manager.GetCurrentCore();
86 const auto& search = thread_to_cpu.find(std::this_thread::get_id());
87 ASSERT(search != thread_to_cpu.end());
88 ASSERT(search->second);
89 return *search->second;
90 }
91
92 // Otherwise, use single-threaded mode active_core variable
93 return *cpu_cores[active_core];
94 } 79 }
95 80
96 ResultStatus RunLoop(bool tight_loop) { 81 ResultStatus RunLoop(bool tight_loop) {
97 status = ResultStatus::Success; 82 status = ResultStatus::Success;
98 83
99 // Update thread_to_cpu in case Core 0 is run from a different host thread 84 cpu_core_manager.RunLoop(tight_loop);
100 thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0].get();
101
102 if (GDBStub::IsServerEnabled()) {
103 GDBStub::HandlePacket();
104
105 // If the loop is halted and we want to step, use a tiny (1) number of instructions to
106 // execute. Otherwise, get out of the loop function.
107 if (GDBStub::GetCpuHaltFlag()) {
108 if (GDBStub::GetCpuStepFlag()) {
109 tight_loop = false;
110 } else {
111 return ResultStatus::Success;
112 }
113 }
114 }
115
116 for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
117 cpu_cores[active_core]->RunLoop(tight_loop);
118 if (Settings::values.use_multi_core) {
119 // Cores 1-3 are run on other threads in this mode
120 break;
121 }
122 }
123
124 if (GDBStub::IsServerEnabled()) {
125 GDBStub::SetCpuStepFlag(false);
126 }
127 85
128 return status; 86 return status;
129 } 87 }
130 88
131 ResultStatus Init(Frontend::EmuWindow& emu_window) { 89 ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
132 LOG_DEBUG(HW_Memory, "initialized OK"); 90 LOG_DEBUG(HW_Memory, "initialized OK");
133 91
134 CoreTiming::Init(); 92 CoreTiming::Init();
@@ -145,12 +103,6 @@ struct System::Impl {
145 auto main_process = Kernel::Process::Create(kernel, "main"); 103 auto main_process = Kernel::Process::Create(kernel, "main");
146 kernel.MakeCurrentProcess(main_process.get()); 104 kernel.MakeCurrentProcess(main_process.get());
147 105
148 cpu_barrier = std::make_unique<CpuBarrier>();
149 cpu_exclusive_monitor = Cpu::MakeExclusiveMonitor(cpu_cores.size());
150 for (std::size_t index = 0; index < cpu_cores.size(); ++index) {
151 cpu_cores[index] = std::make_unique<Cpu>(*cpu_exclusive_monitor, *cpu_barrier, index);
152 }
153
154 telemetry_session = std::make_unique<Core::TelemetrySession>(); 106 telemetry_session = std::make_unique<Core::TelemetrySession>();
155 service_manager = std::make_shared<Service::SM::ServiceManager>(); 107 service_manager = std::make_shared<Service::SM::ServiceManager>();
156 108
@@ -164,17 +116,8 @@ struct System::Impl {
164 116
165 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer()); 117 gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
166 118
167 // Create threads for CPU cores 1-3, and build thread_to_cpu map 119 cpu_core_manager.Initialize(system);
168 // CPU core 0 is run on the main thread 120 is_powered_on = true;
169 thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0].get();
170 if (Settings::values.use_multi_core) {
171 for (std::size_t index = 0; index < cpu_core_threads.size(); ++index) {
172 cpu_core_threads[index] =
173 std::make_unique<std::thread>(RunCpuCore, std::ref(*cpu_cores[index + 1]));
174 thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1].get();
175 }
176 }
177
178 LOG_DEBUG(Core, "Initialized OK"); 121 LOG_DEBUG(Core, "Initialized OK");
179 122
180 // Reset counters and set time origin to current frame 123 // Reset counters and set time origin to current frame
@@ -184,7 +127,8 @@ struct System::Impl {
184 return ResultStatus::Success; 127 return ResultStatus::Success;
185 } 128 }
186 129
187 ResultStatus Load(Frontend::EmuWindow& emu_window, const std::string& filepath) { 130 ResultStatus Load(System& system, Frontend::EmuWindow& emu_window,
131 const std::string& filepath) {
188 app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath)); 132 app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath));
189 133
190 if (!app_loader) { 134 if (!app_loader) {
@@ -201,7 +145,7 @@ struct System::Impl {
201 return ResultStatus::ErrorSystemMode; 145 return ResultStatus::ErrorSystemMode;
202 } 146 }
203 147
204 ResultStatus init_result{Init(emu_window)}; 148 ResultStatus init_result{Init(system, emu_window)};
205 if (init_result != ResultStatus::Success) { 149 if (init_result != ResultStatus::Success) {
206 LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", 150 LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
207 static_cast<int>(init_result)); 151 static_cast<int>(init_result));
@@ -231,6 +175,8 @@ struct System::Impl {
231 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 175 Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
232 perf_results.frametime * 1000.0); 176 perf_results.frametime * 1000.0);
233 177
178 is_powered_on = false;
179
234 // Shutdown emulation session 180 // Shutdown emulation session
235 renderer.reset(); 181 renderer.reset();
236 GDBStub::Shutdown(); 182 GDBStub::Shutdown();
@@ -240,19 +186,7 @@ struct System::Impl {
240 gpu_core.reset(); 186 gpu_core.reset();
241 187
242 // Close all CPU/threading state 188 // Close all CPU/threading state
243 cpu_barrier->NotifyEnd(); 189 cpu_core_manager.Shutdown();
244 if (Settings::values.use_multi_core) {
245 for (auto& thread : cpu_core_threads) {
246 thread->join();
247 thread.reset();
248 }
249 }
250 thread_to_cpu.clear();
251 for (auto& cpu_core : cpu_cores) {
252 cpu_core.reset();
253 }
254 cpu_exclusive_monitor.reset();
255 cpu_barrier.reset();
256 190
257 // Shutdown kernel and core timing 191 // Shutdown kernel and core timing
258 kernel.Shutdown(); 192 kernel.Shutdown();
@@ -289,11 +223,8 @@ struct System::Impl {
289 std::unique_ptr<VideoCore::RendererBase> renderer; 223 std::unique_ptr<VideoCore::RendererBase> renderer;
290 std::unique_ptr<Tegra::GPU> gpu_core; 224 std::unique_ptr<Tegra::GPU> gpu_core;
291 std::shared_ptr<Tegra::DebugContext> debug_context; 225 std::shared_ptr<Tegra::DebugContext> debug_context;
292 std::unique_ptr<ExclusiveMonitor> cpu_exclusive_monitor; 226 CpuCoreManager cpu_core_manager;
293 std::unique_ptr<CpuBarrier> cpu_barrier; 227 bool is_powered_on = false;
294 std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
295 std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
296 std::size_t active_core{}; ///< Active core, only used in single thread mode
297 228
298 /// Frontend applets 229 /// Frontend applets
299 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard; 230 std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -307,9 +238,6 @@ struct System::Impl {
307 ResultStatus status = ResultStatus::Success; 238 ResultStatus status = ResultStatus::Success;
308 std::string status_details = ""; 239 std::string status_details = "";
309 240
310 /// Map of guest threads to CPU cores
311 std::map<std::thread::id, Cpu*> thread_to_cpu;
312
313 Core::PerfStats perf_stats; 241 Core::PerfStats perf_stats;
314 Core::FrameLimiter frame_limiter; 242 Core::FrameLimiter frame_limiter;
315}; 243};
@@ -334,17 +262,15 @@ System::ResultStatus System::SingleStep() {
334} 262}
335 263
336void System::InvalidateCpuInstructionCaches() { 264void System::InvalidateCpuInstructionCaches() {
337 for (auto& cpu : impl->cpu_cores) { 265 impl->cpu_core_manager.InvalidateAllInstructionCaches();
338 cpu->ArmInterface().ClearInstructionCache();
339 }
340} 266}
341 267
342System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) { 268System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) {
343 return impl->Load(emu_window, filepath); 269 return impl->Load(*this, emu_window, filepath);
344} 270}
345 271
346bool System::IsPoweredOn() const { 272bool System::IsPoweredOn() const {
347 return impl->cpu_barrier && impl->cpu_barrier->IsAlive(); 273 return impl->is_powered_on;
348} 274}
349 275
350void System::PrepareReschedule() { 276void System::PrepareReschedule() {
@@ -408,21 +334,20 @@ const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
408} 334}
409 335
410Cpu& System::CpuCore(std::size_t core_index) { 336Cpu& System::CpuCore(std::size_t core_index) {
411 ASSERT(core_index < NUM_CPU_CORES); 337 return impl->cpu_core_manager.GetCore(core_index);
412 return *impl->cpu_cores[core_index];
413} 338}
414 339
415const Cpu& System::CpuCore(std::size_t core_index) const { 340const Cpu& System::CpuCore(std::size_t core_index) const {
416 ASSERT(core_index < NUM_CPU_CORES); 341 ASSERT(core_index < NUM_CPU_CORES);
417 return *impl->cpu_cores[core_index]; 342 return impl->cpu_core_manager.GetCore(core_index);
418} 343}
419 344
420ExclusiveMonitor& System::Monitor() { 345ExclusiveMonitor& System::Monitor() {
421 return *impl->cpu_exclusive_monitor; 346 return impl->cpu_core_manager.GetExclusiveMonitor();
422} 347}
423 348
424const ExclusiveMonitor& System::Monitor() const { 349const ExclusiveMonitor& System::Monitor() const {
425 return *impl->cpu_exclusive_monitor; 350 return impl->cpu_core_manager.GetExclusiveMonitor();
426} 351}
427 352
428Tegra::GPU& System::GPU() { 353Tegra::GPU& System::GPU() {
@@ -506,7 +431,7 @@ const Core::Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() cons
506} 431}
507 432
508System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { 433System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) {
509 return impl->Init(emu_window); 434 return impl->Init(*this, emu_window);
510} 435}
511 436
512void System::Shutdown() { 437void System::Shutdown() {
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
new file mode 100644
index 000000000..769a6fefa
--- /dev/null
+++ b/src/core/cpu_core_manager.cpp
@@ -0,0 +1,142 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "core/arm/exclusive_monitor.h"
7#include "core/core.h"
8#include "core/core_cpu.h"
9#include "core/cpu_core_manager.h"
10#include "core/gdbstub/gdbstub.h"
11#include "core/settings.h"
12
13namespace Core {
14namespace {
15void RunCpuCore(const System& system, Cpu& cpu_state) {
16 while (system.IsPoweredOn()) {
17 cpu_state.RunLoop(true);
18 }
19}
20} // Anonymous namespace
21
22CpuCoreManager::CpuCoreManager() = default;
23CpuCoreManager::~CpuCoreManager() = default;
24
25void CpuCoreManager::Initialize(System& system) {
26 barrier = std::make_unique<CpuBarrier>();
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28
29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
31 }
32
33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
34 // CPU core 0 is run on the main thread
35 thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
36 if (!Settings::values.use_multi_core) {
37 return;
38 }
39
40 for (std::size_t index = 0; index < core_threads.size(); ++index) {
41 core_threads[index] = std::make_unique<std::thread>(RunCpuCore, std::cref(system),
42 std::ref(*cores[index + 1]));
43 thread_to_cpu[core_threads[index]->get_id()] = cores[index + 1].get();
44 }
45}
46
47void CpuCoreManager::Shutdown() {
48 barrier->NotifyEnd();
49 if (Settings::values.use_multi_core) {
50 for (auto& thread : core_threads) {
51 thread->join();
52 thread.reset();
53 }
54 }
55
56 thread_to_cpu.clear();
57 for (auto& cpu_core : cores) {
58 cpu_core.reset();
59 }
60
61 exclusive_monitor.reset();
62 barrier.reset();
63}
64
65Cpu& CpuCoreManager::GetCore(std::size_t index) {
66 return *cores.at(index);
67}
68
69const Cpu& CpuCoreManager::GetCore(std::size_t index) const {
70 return *cores.at(index);
71}
72
73ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() {
74 return *exclusive_monitor;
75}
76
77const ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() const {
78 return *exclusive_monitor;
79}
80
81Cpu& CpuCoreManager::GetCurrentCore() {
82 if (Settings::values.use_multi_core) {
83 const auto& search = thread_to_cpu.find(std::this_thread::get_id());
84 ASSERT(search != thread_to_cpu.end());
85 ASSERT(search->second);
86 return *search->second;
87 }
88
89 // Otherwise, use single-threaded mode active_core variable
90 return *cores[active_core];
91}
92
93const Cpu& CpuCoreManager::GetCurrentCore() const {
94 if (Settings::values.use_multi_core) {
95 const auto& search = thread_to_cpu.find(std::this_thread::get_id());
96 ASSERT(search != thread_to_cpu.end());
97 ASSERT(search->second);
98 return *search->second;
99 }
100
101 // Otherwise, use single-threaded mode active_core variable
102 return *cores[active_core];
103}
104
105void CpuCoreManager::RunLoop(bool tight_loop) {
106 // Update thread_to_cpu in case Core 0 is run from a different host thread
107 thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
108
109 if (GDBStub::IsServerEnabled()) {
110 GDBStub::HandlePacket();
111
112 // If the loop is halted and we want to step, use a tiny (1) number of instructions to
113 // execute. Otherwise, get out of the loop function.
114 if (GDBStub::GetCpuHaltFlag()) {
115 if (GDBStub::GetCpuStepFlag()) {
116 tight_loop = false;
117 } else {
118 return;
119 }
120 }
121 }
122
123 for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
124 cores[active_core]->RunLoop(tight_loop);
125 if (Settings::values.use_multi_core) {
126 // Cores 1-3 are run on other threads in this mode
127 break;
128 }
129 }
130
131 if (GDBStub::IsServerEnabled()) {
132 GDBStub::SetCpuStepFlag(false);
133 }
134}
135
136void CpuCoreManager::InvalidateAllInstructionCaches() {
137 for (auto& cpu : cores) {
138 cpu->ArmInterface().ClearInstructionCache();
139 }
140}
141
142} // namespace Core
diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h
new file mode 100644
index 000000000..a4d70ec56
--- /dev/null
+++ b/src/core/cpu_core_manager.h
@@ -0,0 +1,59 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <map>
9#include <memory>
10#include <thread>
11
12namespace Core {
13
14class Cpu;
15class CpuBarrier;
16class ExclusiveMonitor;
17class System;
18
19class CpuCoreManager {
20public:
21 CpuCoreManager();
22 CpuCoreManager(const CpuCoreManager&) = delete;
23 CpuCoreManager(CpuCoreManager&&) = delete;
24
25 ~CpuCoreManager();
26
27 CpuCoreManager& operator=(const CpuCoreManager&) = delete;
28 CpuCoreManager& operator=(CpuCoreManager&&) = delete;
29
30 void Initialize(System& system);
31 void Shutdown();
32
33 Cpu& GetCore(std::size_t index);
34 const Cpu& GetCore(std::size_t index) const;
35
36 Cpu& GetCurrentCore();
37 const Cpu& GetCurrentCore() const;
38
39 ExclusiveMonitor& GetExclusiveMonitor();
40 const ExclusiveMonitor& GetExclusiveMonitor() const;
41
42 void RunLoop(bool tight_loop);
43
44 void InvalidateAllInstructionCaches();
45
46private:
47 static constexpr std::size_t NUM_CPU_CORES = 4;
48
49 std::unique_ptr<ExclusiveMonitor> exclusive_monitor;
50 std::unique_ptr<CpuBarrier> barrier;
51 std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cores;
52 std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> core_threads;
53 std::size_t active_core{}; ///< Active core, only used in single thread mode
54
55 /// Map of guest threads to CPU cores
56 std::map<std::thread::id, Cpu*> thread_to_cpu;
57};
58
59} // namespace Core
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index 8d062eb3e..e8df08724 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -26,6 +26,11 @@ namespace FileSys {
26constexpr u64 SINGLE_BYTE_MODULUS = 0x100; 26constexpr u64 SINGLE_BYTE_MODULUS = 0x100;
27constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000; 27constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
28 28
29constexpr std::array<const char*, 14> EXEFS_FILE_NAMES{
30 "main", "main.npdm", "rtld", "sdk", "subsdk0", "subsdk1", "subsdk2",
31 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", "subsdk8", "subsdk9",
32};
33
29struct NSOBuildHeader { 34struct NSOBuildHeader {
30 u32_le magic; 35 u32_le magic;
31 INSERT_PADDING_BYTES(0x3C); 36 INSERT_PADDING_BYTES(0x3C);
@@ -57,6 +62,15 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
57 if (exefs == nullptr) 62 if (exefs == nullptr)
58 return exefs; 63 return exefs;
59 64
65 if (Settings::values.dump_exefs) {
66 LOG_INFO(Loader, "Dumping ExeFS for title_id={:016X}", title_id);
67 const auto dump_dir = Service::FileSystem::GetModificationDumpRoot(title_id);
68 if (dump_dir != nullptr) {
69 const auto exefs_dir = GetOrCreateDirectoryRelative(dump_dir, "/exefs");
70 VfsRawCopyD(exefs, exefs_dir);
71 }
72 }
73
60 const auto installed = Service::FileSystem::GetUnionContents(); 74 const auto installed = Service::FileSystem::GetUnionContents();
61 75
62 // Game Updates 76 // Game Updates
@@ -70,6 +84,30 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
70 exefs = update->GetExeFS(); 84 exefs = update->GetExeFS();
71 } 85 }
72 86
87 // LayeredExeFS
88 const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
89 if (load_dir != nullptr && load_dir->GetSize() > 0) {
90 auto patch_dirs = load_dir->GetSubdirectories();
91 std::sort(
92 patch_dirs.begin(), patch_dirs.end(),
93 [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
94
95 std::vector<VirtualDir> layers;
96 layers.reserve(patch_dirs.size() + 1);
97 for (const auto& subdir : patch_dirs) {
98 auto exefs_dir = subdir->GetSubdirectory("exefs");
99 if (exefs_dir != nullptr)
100 layers.push_back(std::move(exefs_dir));
101 }
102 layers.push_back(exefs);
103
104 auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));
105 if (layered != nullptr) {
106 LOG_INFO(Loader, " ExeFS: LayeredExeFS patches applied successfully");
107 exefs = std::move(layered);
108 }
109 }
110
73 return exefs; 111 return exefs;
74} 112}
75 113
@@ -314,18 +352,25 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
314 if (IsDirValidAndNonEmpty(exefs_dir)) { 352 if (IsDirValidAndNonEmpty(exefs_dir)) {
315 bool ips = false; 353 bool ips = false;
316 bool ipswitch = false; 354 bool ipswitch = false;
355 bool layeredfs = false;
317 356
318 for (const auto& file : exefs_dir->GetFiles()) { 357 for (const auto& file : exefs_dir->GetFiles()) {
319 if (file->GetExtension() == "ips") 358 if (file->GetExtension() == "ips") {
320 ips = true; 359 ips = true;
321 else if (file->GetExtension() == "pchtxt") 360 } else if (file->GetExtension() == "pchtxt") {
322 ipswitch = true; 361 ipswitch = true;
362 } else if (std::find(EXEFS_FILE_NAMES.begin(), EXEFS_FILE_NAMES.end(),
363 file->GetName()) != EXEFS_FILE_NAMES.end()) {
364 layeredfs = true;
365 }
323 } 366 }
324 367
325 if (ips) 368 if (ips)
326 AppendCommaIfNotEmpty(types, "IPS"); 369 AppendCommaIfNotEmpty(types, "IPS");
327 if (ipswitch) 370 if (ipswitch)
328 AppendCommaIfNotEmpty(types, "IPSwitch"); 371 AppendCommaIfNotEmpty(types, "IPSwitch");
372 if (layeredfs)
373 AppendCommaIfNotEmpty(types, "LayeredExeFS");
329 } 374 }
330 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) 375 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
331 AppendCommaIfNotEmpty(types, "LayeredFS"); 376 AppendCommaIfNotEmpty(types, "LayeredFS");
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index bdcc889e0..687dea409 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -71,10 +71,6 @@ constexpr u32 PSTATE_REGISTER = 33;
71constexpr u32 UC_ARM64_REG_Q0 = 34; 71constexpr u32 UC_ARM64_REG_Q0 = 34;
72constexpr u32 FPCR_REGISTER = 66; 72constexpr u32 FPCR_REGISTER = 66;
73 73
74// TODO/WiP - Used while working on support for FPU
75constexpr u32 TODO_DUMMY_REG_997 = 997;
76constexpr u32 TODO_DUMMY_REG_998 = 998;
77
78// For sample XML files see the GDB source /gdb/features 74// For sample XML files see the GDB source /gdb/features
79// GDB also wants the l character at the start 75// GDB also wants the l character at the start
80// This XML defines what the registers are for this specific ARM device 76// This XML defines what the registers are for this specific ARM device
@@ -260,6 +256,36 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
260 } 256 }
261} 257}
262 258
259static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) {
260 if (!thread) {
261 return u128{0};
262 }
263
264 auto& thread_context = thread->GetContext();
265
266 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
267 return thread_context.vector_registers[id - UC_ARM64_REG_Q0];
268 } else if (id == FPCR_REGISTER) {
269 return u128{thread_context.fpcr, 0};
270 } else {
271 return u128{0};
272 }
273}
274
275static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr) {
276 if (!thread) {
277 return;
278 }
279
280 auto& thread_context = thread->GetContext();
281
282 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
283 thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val;
284 } else if (id == FPCR_REGISTER) {
285 thread_context.fpcr = val[0];
286 }
287}
288
263/** 289/**
264 * Turns hex string character into the equivalent byte. 290 * Turns hex string character into the equivalent byte.
265 * 291 *
@@ -409,6 +435,27 @@ static u64 GdbHexToLong(const u8* src) {
409 return output; 435 return output;
410} 436}
411 437
438/**
439 * Convert a gdb-formatted hex string into a u128.
440 *
441 * @param src Pointer to hex string.
442 */
443static u128 GdbHexToU128(const u8* src) {
444 u128 output;
445
446 for (int i = 0; i < 16; i += 2) {
447 output[0] = (output[0] << 4) | HexCharToValue(src[15 - i - 1]);
448 output[0] = (output[0] << 4) | HexCharToValue(src[15 - i]);
449 }
450
451 for (int i = 0; i < 16; i += 2) {
452 output[1] = (output[1] << 4) | HexCharToValue(src[16 + 15 - i - 1]);
453 output[1] = (output[1] << 4) | HexCharToValue(src[16 + 15 - i]);
454 }
455
456 return output;
457}
458
412/// Read a byte from the gdb client. 459/// Read a byte from the gdb client.
413static u8 ReadByte() { 460static u8 ReadByte() {
414 u8 c; 461 u8 c;
@@ -599,8 +646,7 @@ static void HandleQuery() {
599 for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { 646 for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
600 const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList(); 647 const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
601 for (const auto& thread : threads) { 648 for (const auto& thread : threads) {
602 val += fmt::format("{:x}", thread->GetThreadID()); 649 val += fmt::format("{:x},", thread->GetThreadID());
603 val += ",";
604 } 650 }
605 } 651 }
606 val.pop_back(); 652 val.pop_back();
@@ -791,11 +837,15 @@ static void ReadRegister() {
791 } else if (id == PSTATE_REGISTER) { 837 } else if (id == PSTATE_REGISTER) {
792 IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread))); 838 IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread)));
793 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 839 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
794 LongToGdbHex(reply, RegRead(id, current_thread)); 840 u128 r = FpuRead(id, current_thread);
841 LongToGdbHex(reply, r[0]);
842 LongToGdbHex(reply + 16, r[1]);
795 } else if (id == FPCR_REGISTER) { 843 } else if (id == FPCR_REGISTER) {
796 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread)); 844 u128 r = FpuRead(id, current_thread);
797 } else { 845 IntToGdbHex(reply, static_cast<u32>(r[0]));
798 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread)); 846 } else if (id == FPCR_REGISTER + 1) {
847 u128 r = FpuRead(id, current_thread);
848 IntToGdbHex(reply, static_cast<u32>(r[0] >> 32));
799 } 849 }
800 850
801 SendReply(reinterpret_cast<char*>(reply)); 851 SendReply(reinterpret_cast<char*>(reply));
@@ -822,13 +872,18 @@ static void ReadRegisters() {
822 872
823 bufptr += 8; 873 bufptr += 8;
824 874
825 for (u32 reg = UC_ARM64_REG_Q0; reg <= UC_ARM64_REG_Q0 + 31; reg++) { 875 u128 r;
826 LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread)); 876
877 for (u32 reg = UC_ARM64_REG_Q0; reg < FPCR_REGISTER; reg++) {
878 r = FpuRead(reg, current_thread);
879 LongToGdbHex(bufptr + reg * 32, r[0]);
880 LongToGdbHex(bufptr + reg * 32 + 16, r[1]);
827 } 881 }
828 882
829 bufptr += 32 * 32; 883 bufptr += 32 * 32;
830 884
831 LongToGdbHex(bufptr, RegRead(TODO_DUMMY_REG_998, current_thread)); 885 r = FpuRead(FPCR_REGISTER, current_thread);
886 IntToGdbHex(bufptr, static_cast<u32>(r[0]));
832 887
833 bufptr += 8; 888 bufptr += 8;
834 889
@@ -853,14 +908,12 @@ static void WriteRegister() {
853 } else if (id == PSTATE_REGISTER) { 908 } else if (id == PSTATE_REGISTER) {
854 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread); 909 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
855 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 910 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
856 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); 911 FpuWrite(id, GdbHexToU128(buffer_ptr), current_thread);
857 } else if (id == FPCR_REGISTER) { 912 } else if (id == FPCR_REGISTER) {
858 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread); 913 } else if (id == FPCR_REGISTER + 1) {
859 } else {
860 RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
861 } 914 }
862 915
863 // Update Unicorn context skipping scheduler, no running threads at this point 916 // Update ARM context, skipping scheduler - no running threads at this point
864 Core::System::GetInstance() 917 Core::System::GetInstance()
865 .ArmInterface(current_core) 918 .ArmInterface(current_core)
866 .LoadContext(current_thread->GetContext()); 919 .LoadContext(current_thread->GetContext());
@@ -885,13 +938,13 @@ static void WriteRegisters() {
885 } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) { 938 } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) {
886 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); 939 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
887 } else if (reg == FPCR_REGISTER) { 940 } else if (reg == FPCR_REGISTER) {
888 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread); 941 RegWrite(FPCR_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
889 } else { 942 } else if (reg == FPCR_REGISTER + 1) {
890 UNIMPLEMENTED(); 943 RegWrite(FPCR_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
891 } 944 }
892 } 945 }
893 946
894 // Update Unicorn context skipping scheduler, no running threads at this point 947 // Update ARM context, skipping scheduler - no running threads at this point
895 Core::System::GetInstance() 948 Core::System::GetInstance()
896 .ArmInterface(current_core) 949 .ArmInterface(current_core)
897 .LoadContext(current_thread->GetContext()); 950 .LoadContext(current_thread->GetContext());
@@ -917,12 +970,6 @@ static void ReadMemory() {
917 SendReply("E01"); 970 SendReply("E01");
918 } 971 }
919 972
920 const auto& vm_manager = Core::CurrentProcess()->VMManager();
921 if (addr < vm_manager.GetCodeRegionBaseAddress() ||
922 addr >= vm_manager.GetMapRegionEndAddress()) {
923 return SendReply("E00");
924 }
925
926 if (!Memory::IsValidVirtualAddress(addr)) { 973 if (!Memory::IsValidVirtualAddress(addr)) {
927 return SendReply("E00"); 974 return SendReply("E00");
928 } 975 }
@@ -967,7 +1014,7 @@ void Break(bool is_memory_break) {
967static void Step() { 1014static void Step() {
968 if (command_length > 1) { 1015 if (command_length > 1) {
969 RegWrite(PC_REGISTER, GdbHexToLong(command_buffer + 1), current_thread); 1016 RegWrite(PC_REGISTER, GdbHexToLong(command_buffer + 1), current_thread);
970 // Update Unicorn context skipping scheduler, no running threads at this point 1017 // Update ARM context, skipping scheduler - no running threads at this point
971 Core::System::GetInstance() 1018 Core::System::GetInstance()
972 .ArmInterface(current_core) 1019 .ArmInterface(current_core)
973 .LoadContext(current_thread->GetContext()); 1020 .LoadContext(current_thread->GetContext());
@@ -1010,7 +1057,7 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
1010 breakpoint.addr = addr; 1057 breakpoint.addr = addr;
1011 breakpoint.len = len; 1058 breakpoint.len = len;
1012 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size()); 1059 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
1013 static constexpr std::array<u8, 4> btrap{{0x00, 0x7d, 0x20, 0xd4}}; 1060 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
1014 Memory::WriteBlock(addr, btrap.data(), btrap.size()); 1061 Memory::WriteBlock(addr, btrap.data(), btrap.size());
1015 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 1062 Core::System::GetInstance().InvalidateCpuInstructionCaches();
1016 p.insert({addr, breakpoint}); 1063 p.insert({addr, breakpoint});
@@ -1321,13 +1368,15 @@ void SetCpuStepFlag(bool is_step) {
1321} 1368}
1322 1369
1323void SendTrap(Kernel::Thread* thread, int trap) { 1370void SendTrap(Kernel::Thread* thread, int trap) {
1324 if (send_trap) { 1371 if (!send_trap) {
1325 if (!halt_loop || current_thread == thread) { 1372 return;
1326 current_thread = thread;
1327 SendSignal(thread, trap);
1328 }
1329 halt_loop = true;
1330 send_trap = false;
1331 } 1373 }
1374
1375 if (!halt_loop || current_thread == thread) {
1376 current_thread = thread;
1377 SendSignal(thread, trap);
1378 }
1379 halt_loop = true;
1380 send_trap = false;
1332} 1381}
1333}; // namespace GDBStub 1382}; // namespace GDBStub
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index 5ee5c05e3..1bf79b692 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -12,12 +12,23 @@
12#include "core/hle/kernel/thread.h" 12#include "core/hle/kernel/thread.h"
13 13
14namespace Kernel { 14namespace Kernel {
15namespace {
16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15;
18}
19
20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF;
22}
23} // Anonymous namespace
15 24
16HandleTable::HandleTable() { 25HandleTable::HandleTable() {
17 next_generation = 1; 26 next_generation = 1;
18 Clear(); 27 Clear();
19} 28}
20 29
30HandleTable::~HandleTable() = default;
31
21ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
22 DEBUG_ASSERT(obj != nullptr); 33 DEBUG_ASSERT(obj != nullptr);
23 34
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 9e2f33e8a..e3f3e3fb8 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -43,6 +43,7 @@ enum KernelHandle : Handle {
43class HandleTable final : NonCopyable { 43class HandleTable final : NonCopyable {
44public: 44public:
45 HandleTable(); 45 HandleTable();
46 ~HandleTable();
46 47
47 /** 48 /**
48 * Allocates a handle for the given object. 49 * Allocates a handle for the given object.
@@ -89,18 +90,8 @@ public:
89 void Clear(); 90 void Clear();
90 91
91private: 92private:
92 /** 93 /// This is the maximum limit of handles allowed per process in Horizon
93 * This is the maximum limit of handles allowed per process in CTR-OS. It can be further 94 static constexpr std::size_t MAX_COUNT = 1024;
94 * reduced by ExHeader values, but this is not emulated here.
95 */
96 static const std::size_t MAX_COUNT = 4096;
97
98 static u16 GetSlot(Handle handle) {
99 return handle >> 15;
100 }
101 static u16 GetGeneration(Handle handle) {
102 return handle & 0x7FFF;
103 }
104 95
105 /// Stores the Object referenced by the handle or null if the slot is empty. 96 /// Stores the Object referenced by the handle or null if the slot is empty.
106 std::array<SharedPtr<Object>, MAX_COUNT> objects; 97 std::array<SharedPtr<Object>, MAX_COUNT> objects;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index b8b6b4d49..f287f7c97 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -671,7 +671,8 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
671 break; 671 break;
672 } 672 }
673 default: 673 default:
674 UNIMPLEMENTED(); 674 LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
675 return ERR_INVALID_ENUM_VALUE;
675 } 676 }
676 677
677 return RESULT_SUCCESS; 678 return RESULT_SUCCESS;
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 7b4af9eb7..4f17b52f9 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -6,8 +6,6 @@
6#include <cinttypes> 6#include <cinttypes>
7#include <cstring> 7#include <cstring>
8#include <stack> 8#include <stack>
9#include "applets/applets.h"
10#include "applets/software_keyboard.h"
11#include "audio_core/audio_renderer.h" 9#include "audio_core/audio_renderer.h"
12#include "core/core.h" 10#include "core/core.h"
13#include "core/hle/ipc_helpers.h" 11#include "core/hle/ipc_helpers.h"
@@ -18,6 +16,9 @@
18#include "core/hle/service/am/am.h" 16#include "core/hle/service/am/am.h"
19#include "core/hle/service/am/applet_ae.h" 17#include "core/hle/service/am/applet_ae.h"
20#include "core/hle/service/am/applet_oe.h" 18#include "core/hle/service/am/applet_oe.h"
19#include "core/hle/service/am/applets/applets.h"
20#include "core/hle/service/am/applets/software_keyboard.h"
21#include "core/hle/service/am/applets/stub_applet.h"
21#include "core/hle/service/am/idle.h" 22#include "core/hle/service/am/idle.h"
22#include "core/hle/service/am/omm.h" 23#include "core/hle/service/am/omm.h"
23#include "core/hle/service/am/spsm.h" 24#include "core/hle/service/am/spsm.h"
@@ -482,11 +483,15 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
482 rb.Push(RESULT_SUCCESS); 483 rb.Push(RESULT_SUCCESS);
483 484
484 if (Settings::values.use_docked_mode) { 485 if (Settings::values.use_docked_mode) {
485 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); 486 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
486 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); 487 static_cast<u32>(Settings::values.resolution_factor));
488 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
489 static_cast<u32>(Settings::values.resolution_factor));
487 } else { 490 } else {
488 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); 491 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
489 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); 492 static_cast<u32>(Settings::values.resolution_factor));
493 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
494 static_cast<u32>(Settings::values.resolution_factor));
490 } 495 }
491 496
492 LOG_DEBUG(Service_AM, "called"); 497 LOG_DEBUG(Service_AM, "called");
@@ -532,8 +537,7 @@ void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
532class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { 537class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> {
533public: 538public:
534 explicit ILibraryAppletAccessor(std::shared_ptr<Applets::Applet> applet) 539 explicit ILibraryAppletAccessor(std::shared_ptr<Applets::Applet> applet)
535 : ServiceFramework("ILibraryAppletAccessor"), applet(std::move(applet)), 540 : ServiceFramework("ILibraryAppletAccessor"), applet(std::move(applet)) {
536 broker(std::make_shared<Applets::AppletDataBroker>()) {
537 // clang-format off 541 // clang-format off
538 static const FunctionInfo functions[] = { 542 static const FunctionInfo functions[] = {
539 {0, &ILibraryAppletAccessor::GetAppletStateChangedEvent, "GetAppletStateChangedEvent"}, 543 {0, &ILibraryAppletAccessor::GetAppletStateChangedEvent, "GetAppletStateChangedEvent"},
@@ -562,7 +566,7 @@ public:
562 566
563private: 567private:
564 void GetAppletStateChangedEvent(Kernel::HLERequestContext& ctx) { 568 void GetAppletStateChangedEvent(Kernel::HLERequestContext& ctx) {
565 const auto event = broker->GetStateChangedEvent(); 569 const auto event = applet->GetBroker().GetStateChangedEvent();
566 event->Signal(); 570 event->Signal();
567 571
568 IPC::ResponseBuilder rb{ctx, 2, 1}; 572 IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -590,7 +594,7 @@ private:
590 void Start(Kernel::HLERequestContext& ctx) { 594 void Start(Kernel::HLERequestContext& ctx) {
591 ASSERT(applet != nullptr); 595 ASSERT(applet != nullptr);
592 596
593 applet->Initialize(broker); 597 applet->Initialize();
594 applet->Execute(); 598 applet->Execute();
595 599
596 IPC::ResponseBuilder rb{ctx, 2}; 600 IPC::ResponseBuilder rb{ctx, 2};
@@ -601,7 +605,7 @@ private:
601 605
602 void PushInData(Kernel::HLERequestContext& ctx) { 606 void PushInData(Kernel::HLERequestContext& ctx) {
603 IPC::RequestParser rp{ctx}; 607 IPC::RequestParser rp{ctx};
604 broker->PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>()); 608 applet->GetBroker().PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>());
605 609
606 IPC::ResponseBuilder rb{ctx, 2}; 610 IPC::ResponseBuilder rb{ctx, 2};
607 rb.Push(RESULT_SUCCESS); 611 rb.Push(RESULT_SUCCESS);
@@ -612,7 +616,7 @@ private:
612 void PopOutData(Kernel::HLERequestContext& ctx) { 616 void PopOutData(Kernel::HLERequestContext& ctx) {
613 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 617 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
614 618
615 const auto storage = broker->PopNormalDataToGame(); 619 const auto storage = applet->GetBroker().PopNormalDataToGame();
616 if (storage == nullptr) { 620 if (storage == nullptr) {
617 rb.Push(ERR_NO_DATA_IN_CHANNEL); 621 rb.Push(ERR_NO_DATA_IN_CHANNEL);
618 return; 622 return;
@@ -626,7 +630,7 @@ private:
626 630
627 void PushInteractiveInData(Kernel::HLERequestContext& ctx) { 631 void PushInteractiveInData(Kernel::HLERequestContext& ctx) {
628 IPC::RequestParser rp{ctx}; 632 IPC::RequestParser rp{ctx};
629 broker->PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>()); 633 applet->GetBroker().PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>());
630 634
631 ASSERT(applet->IsInitialized()); 635 ASSERT(applet->IsInitialized());
632 applet->ExecuteInteractive(); 636 applet->ExecuteInteractive();
@@ -641,7 +645,7 @@ private:
641 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { 645 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
642 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 646 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
643 647
644 const auto storage = broker->PopInteractiveDataToGame(); 648 const auto storage = applet->GetBroker().PopInteractiveDataToGame();
645 if (storage == nullptr) { 649 if (storage == nullptr) {
646 rb.Push(ERR_NO_DATA_IN_CHANNEL); 650 rb.Push(ERR_NO_DATA_IN_CHANNEL);
647 return; 651 return;
@@ -656,7 +660,7 @@ private:
656 void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) { 660 void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) {
657 IPC::ResponseBuilder rb{ctx, 2, 1}; 661 IPC::ResponseBuilder rb{ctx, 2, 1};
658 rb.Push(RESULT_SUCCESS); 662 rb.Push(RESULT_SUCCESS);
659 rb.PushCopyObjects(broker->GetNormalDataEvent()); 663 rb.PushCopyObjects(applet->GetBroker().GetNormalDataEvent());
660 664
661 LOG_DEBUG(Service_AM, "called"); 665 LOG_DEBUG(Service_AM, "called");
662 } 666 }
@@ -664,13 +668,12 @@ private:
664 void GetPopInteractiveOutDataEvent(Kernel::HLERequestContext& ctx) { 668 void GetPopInteractiveOutDataEvent(Kernel::HLERequestContext& ctx) {
665 IPC::ResponseBuilder rb{ctx, 2, 1}; 669 IPC::ResponseBuilder rb{ctx, 2, 1};
666 rb.Push(RESULT_SUCCESS); 670 rb.Push(RESULT_SUCCESS);
667 rb.PushCopyObjects(broker->GetInteractiveDataEvent()); 671 rb.PushCopyObjects(applet->GetBroker().GetInteractiveDataEvent());
668 672
669 LOG_DEBUG(Service_AM, "called"); 673 LOG_DEBUG(Service_AM, "called");
670 } 674 }
671 675
672 std::shared_ptr<Applets::Applet> applet; 676 std::shared_ptr<Applets::Applet> applet;
673 std::shared_ptr<Applets::AppletDataBroker> broker;
674}; 677};
675 678
676void IStorage::Open(Kernel::HLERequestContext& ctx) { 679void IStorage::Open(Kernel::HLERequestContext& ctx) {
@@ -763,8 +766,9 @@ static std::shared_ptr<Applets::Applet> GetAppletFromId(AppletId id) {
763 case AppletId::SoftwareKeyboard: 766 case AppletId::SoftwareKeyboard:
764 return std::make_shared<Applets::SoftwareKeyboard>(); 767 return std::make_shared<Applets::SoftwareKeyboard>();
765 default: 768 default:
766 UNREACHABLE_MSG("Unimplemented AppletId [{:08X}]!", static_cast<u32>(id)); 769 LOG_ERROR(Service_AM, "Unimplemented AppletId [{:08X}]! -- Falling back to stub!",
767 return nullptr; 770 static_cast<u32>(id));
771 return std::make_shared<Applets::StubApplet>();
768 } 772 }
769} 773}
770 774
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 8adb81823..becbadd06 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -98,10 +98,8 @@ Applet::Applet() = default;
98 98
99Applet::~Applet() = default; 99Applet::~Applet() = default;
100 100
101void Applet::Initialize(std::shared_ptr<AppletDataBroker> broker_) { 101void Applet::Initialize() {
102 broker = std::move(broker_); 102 const auto common = broker.PopNormalDataToApplet();
103
104 const auto common = broker->PopNormalDataToApplet();
105 ASSERT(common != nullptr); 103 ASSERT(common != nullptr);
106 104
107 const auto common_data = common->GetData(); 105 const auto common_data = common->GetData();
diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h
index 136445649..f65ea119c 100644
--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -4,14 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <functional>
8#include <memory> 7#include <memory>
9#include <queue> 8#include <queue>
10#include "common/swap.h" 9#include "common/swap.h"
11#include "core/hle/kernel/event.h" 10#include "core/hle/kernel/kernel.h"
12 11
13union ResultCode; 12union ResultCode;
14 13
14namespace Kernel {
15class Event;
16}
17
15namespace Service::AM { 18namespace Service::AM {
16 19
17class IStorage; 20class IStorage;
@@ -43,19 +46,26 @@ public:
43 46
44private: 47private:
45 // Queues are named from applet's perspective 48 // Queues are named from applet's perspective
46 std::queue<std::unique_ptr<IStorage>> 49
47 in_channel; // PopNormalDataToApplet and PushNormalDataFromGame 50 // PopNormalDataToApplet and PushNormalDataFromGame
48 std::queue<std::unique_ptr<IStorage>> 51 std::queue<std::unique_ptr<IStorage>> in_channel;
49 out_channel; // PopNormalDataToGame and PushNormalDataFromApplet 52
50 std::queue<std::unique_ptr<IStorage>> 53 // PopNormalDataToGame and PushNormalDataFromApplet
51 in_interactive_channel; // PopInteractiveDataToApplet and PushInteractiveDataFromGame 54 std::queue<std::unique_ptr<IStorage>> out_channel;
52 std::queue<std::unique_ptr<IStorage>> 55
53 out_interactive_channel; // PopInteractiveDataToGame and PushInteractiveDataFromApplet 56 // PopInteractiveDataToApplet and PushInteractiveDataFromGame
57 std::queue<std::unique_ptr<IStorage>> in_interactive_channel;
58
59 // PopInteractiveDataToGame and PushInteractiveDataFromApplet
60 std::queue<std::unique_ptr<IStorage>> out_interactive_channel;
54 61
55 Kernel::SharedPtr<Kernel::Event> state_changed_event; 62 Kernel::SharedPtr<Kernel::Event> state_changed_event;
56 Kernel::SharedPtr<Kernel::Event> pop_out_data_event; // Signaled on PushNormalDataFromApplet 63
57 Kernel::SharedPtr<Kernel::Event> 64 // Signaled on PushNormalDataFromApplet
58 pop_interactive_out_data_event; // Signaled on PushInteractiveDataFromApplet 65 Kernel::SharedPtr<Kernel::Event> pop_out_data_event;
66
67 // Signaled on PushInteractiveDataFromApplet
68 Kernel::SharedPtr<Kernel::Event> pop_interactive_out_data_event;
59}; 69};
60 70
61class Applet { 71class Applet {
@@ -63,7 +73,7 @@ public:
63 Applet(); 73 Applet();
64 virtual ~Applet(); 74 virtual ~Applet();
65 75
66 virtual void Initialize(std::shared_ptr<AppletDataBroker> broker); 76 virtual void Initialize();
67 77
68 virtual bool TransactionComplete() const = 0; 78 virtual bool TransactionComplete() const = 0;
69 virtual ResultCode GetStatus() const = 0; 79 virtual ResultCode GetStatus() const = 0;
@@ -74,6 +84,14 @@ public:
74 return initialized; 84 return initialized;
75 } 85 }
76 86
87 AppletDataBroker& GetBroker() {
88 return broker;
89 }
90
91 const AppletDataBroker& GetBroker() const {
92 return broker;
93 }
94
77protected: 95protected:
78 struct CommonArguments { 96 struct CommonArguments {
79 u32_le arguments_version; 97 u32_le arguments_version;
@@ -85,8 +103,8 @@ protected:
85 }; 103 };
86 static_assert(sizeof(CommonArguments) == 0x20, "CommonArguments has incorrect size."); 104 static_assert(sizeof(CommonArguments) == 0x20, "CommonArguments has incorrect size.");
87 105
88 CommonArguments common_args; 106 CommonArguments common_args{};
89 std::shared_ptr<AppletDataBroker> broker; 107 AppletDataBroker broker;
90 bool initialized = false; 108 bool initialized = false;
91}; 109};
92 110
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index c4b76a515..981bdec51 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -42,21 +42,21 @@ SoftwareKeyboard::SoftwareKeyboard() = default;
42 42
43SoftwareKeyboard::~SoftwareKeyboard() = default; 43SoftwareKeyboard::~SoftwareKeyboard() = default;
44 44
45void SoftwareKeyboard::Initialize(std::shared_ptr<AppletDataBroker> broker_) { 45void SoftwareKeyboard::Initialize() {
46 complete = false; 46 complete = false;
47 initial_text.clear(); 47 initial_text.clear();
48 final_data.clear(); 48 final_data.clear();
49 49
50 Applet::Initialize(std::move(broker_)); 50 Applet::Initialize();
51 51
52 const auto keyboard_config_storage = broker->PopNormalDataToApplet(); 52 const auto keyboard_config_storage = broker.PopNormalDataToApplet();
53 ASSERT(keyboard_config_storage != nullptr); 53 ASSERT(keyboard_config_storage != nullptr);
54 const auto& keyboard_config = keyboard_config_storage->GetData(); 54 const auto& keyboard_config = keyboard_config_storage->GetData();
55 55
56 ASSERT(keyboard_config.size() >= sizeof(KeyboardConfig)); 56 ASSERT(keyboard_config.size() >= sizeof(KeyboardConfig));
57 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig)); 57 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig));
58 58
59 const auto work_buffer_storage = broker->PopNormalDataToApplet(); 59 const auto work_buffer_storage = broker.PopNormalDataToApplet();
60 ASSERT(work_buffer_storage != nullptr); 60 ASSERT(work_buffer_storage != nullptr);
61 const auto& work_buffer = work_buffer_storage->GetData(); 61 const auto& work_buffer = work_buffer_storage->GetData();
62 62
@@ -81,7 +81,7 @@ void SoftwareKeyboard::ExecuteInteractive() {
81 if (complete) 81 if (complete)
82 return; 82 return;
83 83
84 const auto storage = broker->PopInteractiveDataToApplet(); 84 const auto storage = broker.PopInteractiveDataToApplet();
85 ASSERT(storage != nullptr); 85 ASSERT(storage != nullptr);
86 const auto data = storage->GetData(); 86 const auto data = storage->GetData();
87 const auto status = static_cast<bool>(data[0]); 87 const auto status = static_cast<bool>(data[0]);
@@ -95,13 +95,13 @@ void SoftwareKeyboard::ExecuteInteractive() {
95 std::memcpy(string.data(), data.data() + 4, string.size() * 2); 95 std::memcpy(string.data(), data.data() + 4, string.size() * 2);
96 frontend.SendTextCheckDialog( 96 frontend.SendTextCheckDialog(
97 Common::UTF16StringFromFixedZeroTerminatedBuffer(string.data(), string.size()), 97 Common::UTF16StringFromFixedZeroTerminatedBuffer(string.data(), string.size()),
98 [this] { broker->SignalStateChanged(); }); 98 [this] { broker.SignalStateChanged(); });
99 } 99 }
100} 100}
101 101
102void SoftwareKeyboard::Execute() { 102void SoftwareKeyboard::Execute() {
103 if (complete) { 103 if (complete) {
104 broker->PushNormalDataFromApplet(IStorage{final_data}); 104 broker.PushNormalDataFromApplet(IStorage{final_data});
105 return; 105 return;
106 } 106 }
107 107
@@ -145,17 +145,17 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
145 final_data = output_main; 145 final_data = output_main;
146 146
147 if (complete) { 147 if (complete) {
148 broker->PushNormalDataFromApplet(IStorage{output_main}); 148 broker.PushNormalDataFromApplet(IStorage{output_main});
149 } else { 149 } else {
150 broker->PushInteractiveDataFromApplet(IStorage{output_sub}); 150 broker.PushInteractiveDataFromApplet(IStorage{output_sub});
151 } 151 }
152 152
153 broker->SignalStateChanged(); 153 broker.SignalStateChanged();
154 } else { 154 } else {
155 output_main[0] = 1; 155 output_main[0] = 1;
156 complete = true; 156 complete = true;
157 broker->PushNormalDataFromApplet(IStorage{output_main}); 157 broker.PushNormalDataFromApplet(IStorage{output_main});
158 broker->SignalStateChanged(); 158 broker.SignalStateChanged();
159 } 159 }
160} 160}
161} // namespace Service::AM::Applets 161} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index 16e1fff66..efd5753a1 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -4,7 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <string>
9#include <vector>
10
7#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/swap.h"
8#include "core/hle/service/am/am.h" 13#include "core/hle/service/am/am.h"
9#include "core/hle/service/am/applets/applets.h" 14#include "core/hle/service/am/applets/applets.h"
10 15
@@ -50,7 +55,7 @@ public:
50 SoftwareKeyboard(); 55 SoftwareKeyboard();
51 ~SoftwareKeyboard() override; 56 ~SoftwareKeyboard() override;
52 57
53 void Initialize(std::shared_ptr<AppletDataBroker> broker) override; 58 void Initialize() override;
54 59
55 bool TransactionComplete() const override; 60 bool TransactionComplete() const override;
56 ResultCode GetStatus() const override; 61 ResultCode GetStatus() const override;
diff --git a/src/core/hle/service/am/applets/stub_applet.cpp b/src/core/hle/service/am/applets/stub_applet.cpp
new file mode 100644
index 000000000..ed166b87d
--- /dev/null
+++ b/src/core/hle/service/am/applets/stub_applet.cpp
@@ -0,0 +1,70 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include "common/hex_util.h"
8#include "common/logging/log.h"
9#include "core/hle/result.h"
10#include "core/hle/service/am/am.h"
11#include "core/hle/service/am/applets/stub_applet.h"
12
13namespace Service::AM::Applets {
14
15static void LogCurrentStorage(AppletDataBroker& broker, std::string prefix) {
16 std::unique_ptr<IStorage> storage = broker.PopNormalDataToApplet();
17 for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
18 const auto data = storage->GetData();
19 LOG_INFO(Service_AM,
20 "called (STUBBED), during {} recieved normal data with size={:08X}, data={}",
21 prefix, data.size(), Common::HexVectorToString(data));
22 }
23
24 storage = broker.PopInteractiveDataToApplet();
25 for (; storage != nullptr; storage = broker.PopInteractiveDataToApplet()) {
26 const auto data = storage->GetData();
27 LOG_INFO(Service_AM,
28 "called (STUBBED), during {} recieved interactive data with size={:08X}, data={}",
29 prefix, data.size(), Common::HexVectorToString(data));
30 }
31}
32
33StubApplet::StubApplet() = default;
34
35StubApplet::~StubApplet() = default;
36
37void StubApplet::Initialize() {
38 LOG_WARNING(Service_AM, "called (STUBBED)");
39 Applet::Initialize();
40 LogCurrentStorage(broker, "Initialize");
41}
42
43bool StubApplet::TransactionComplete() const {
44 LOG_WARNING(Service_AM, "called (STUBBED)");
45 return true;
46}
47
48ResultCode StubApplet::GetStatus() const {
49 LOG_WARNING(Service_AM, "called (STUBBED)");
50 return RESULT_SUCCESS;
51}
52
53void StubApplet::ExecuteInteractive() {
54 LOG_WARNING(Service_AM, "called (STUBBED)");
55 LogCurrentStorage(broker, "ExecuteInteractive");
56
57 broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
58 broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
59 broker.SignalStateChanged();
60}
61
62void StubApplet::Execute() {
63 LOG_WARNING(Service_AM, "called (STUBBED)");
64 LogCurrentStorage(broker, "Execute");
65
66 broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
67 broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
68 broker.SignalStateChanged();
69}
70} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/stub_applet.h b/src/core/hle/service/am/applets/stub_applet.h
new file mode 100644
index 000000000..7d8dc968d
--- /dev/null
+++ b/src/core/hle/service/am/applets/stub_applet.h
@@ -0,0 +1,24 @@
1// Copyright 2018 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/service/am/applets/applets.h"
8
9namespace Service::AM::Applets {
10
11class StubApplet final : public Applet {
12public:
13 StubApplet();
14 ~StubApplet() override;
15
16 void Initialize() override;
17
18 bool TransactionComplete() const override;
19 ResultCode GetStatus() const override;
20 void ExecuteInteractive() override;
21 void Execute() override;
22};
23
24} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index ff1edefbb..23e1f1165 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -44,8 +44,10 @@ enum class AudioState : u32 {
44 44
45class IAudioOut final : public ServiceFramework<IAudioOut> { 45class IAudioOut final : public ServiceFramework<IAudioOut> {
46public: 46public:
47 IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core) 47 IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name,
48 : ServiceFramework("IAudioOut"), audio_core(audio_core), audio_params(audio_params) { 48 std::string&& unique_name)
49 : ServiceFramework("IAudioOut"), audio_core(audio_core), audio_params(audio_params),
50 device_name(std::move(device_name)) {
49 51
50 static const FunctionInfo functions[] = { 52 static const FunctionInfo functions[] = {
51 {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"}, 53 {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"},
@@ -69,7 +71,7 @@ public:
69 Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "IAudioOutBufferReleased"); 71 Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
70 72
71 stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count, 73 stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
72 "IAudioOut", [=]() { buffer_event->Signal(); }); 74 std::move(unique_name), [=]() { buffer_event->Signal(); });
73 } 75 }
74 76
75private: 77private:
@@ -177,6 +179,7 @@ private:
177 179
178 AudioCore::AudioOut& audio_core; 180 AudioCore::AudioOut& audio_core;
179 AudioCore::StreamPtr stream; 181 AudioCore::StreamPtr stream;
182 std::string device_name;
180 183
181 AudoutParams audio_params{}; 184 AudoutParams audio_params{};
182 185
@@ -199,7 +202,15 @@ void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
199void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) { 202void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
200 LOG_DEBUG(Service_Audio, "called"); 203 LOG_DEBUG(Service_Audio, "called");
201 204
202 ctx.WriteBuffer(DefaultDevice); 205 const auto device_name_data{ctx.ReadBuffer()};
206 std::string device_name;
207 if (device_name_data[0] != '\0') {
208 device_name.assign(device_name_data.begin(), device_name_data.end());
209 } else {
210 device_name.assign(DefaultDevice.begin(), DefaultDevice.end());
211 }
212 ctx.WriteBuffer(device_name);
213
203 IPC::RequestParser rp{ctx}; 214 IPC::RequestParser rp{ctx};
204 auto params{rp.PopRaw<AudoutParams>()}; 215 auto params{rp.PopRaw<AudoutParams>()};
205 if (params.channel_count <= 2) { 216 if (params.channel_count <= 2) {
@@ -212,10 +223,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
212 params.sample_rate = DefaultSampleRate; 223 params.sample_rate = DefaultSampleRate;
213 } 224 }
214 225
215 // TODO(bunnei): Support more than one IAudioOut interface. When we add this, ListAudioOutsImpl 226 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())};
216 // will likely need to be updated as well. 227 auto audio_out_interface = std::make_shared<IAudioOut>(
217 ASSERT_MSG(!audio_out_interface, "Unimplemented"); 228 params, *audio_core, std::move(device_name), std::move(unique_name));
218 audio_out_interface = std::make_shared<IAudioOut>(params, *audio_core);
219 229
220 IPC::ResponseBuilder rb{ctx, 6, 0, 1}; 230 IPC::ResponseBuilder rb{ctx, 6, 0, 1};
221 rb.Push(RESULT_SUCCESS); 231 rb.Push(RESULT_SUCCESS);
@@ -224,6 +234,8 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
224 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); 234 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16));
225 rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); 235 rb.Push<u32>(static_cast<u32>(AudioState::Stopped));
226 rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); 236 rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface);
237
238 audio_out_interfaces.push_back(std::move(audio_out_interface));
227} 239}
228 240
229AudOutU::AudOutU() : ServiceFramework("audout:u") { 241AudOutU::AudOutU() : ServiceFramework("audout:u") {
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index dcaf64708..aed4c43b2 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector>
7#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
8 9
9namespace AudioCore { 10namespace AudioCore {
@@ -24,7 +25,7 @@ public:
24 ~AudOutU() override; 25 ~AudOutU() override;
25 26
26private: 27private:
27 std::shared_ptr<IAudioOut> audio_out_interface; 28 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces;
28 std::unique_ptr<AudioCore::AudioOut> audio_core; 29 std::unique_ptr<AudioCore::AudioOut> audio_core;
29 30
30 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); 31 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 5d6294016..2aa77f68d 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -341,6 +341,10 @@ std::shared_ptr<FileSys::RegisteredCacheUnion> GetUnionContents() {
341 return registered_cache_union; 341 return registered_cache_union;
342} 342}
343 343
344void ClearUnionContents() {
345 registered_cache_union = nullptr;
346}
347
344FileSys::RegisteredCache* GetSystemNANDContents() { 348FileSys::RegisteredCache* GetSystemNANDContents() {
345 LOG_TRACE(Service_FS, "Opening System NAND Contents"); 349 LOG_TRACE(Service_FS, "Opening System NAND Contents");
346 350
@@ -391,6 +395,7 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
391 bis_factory = nullptr; 395 bis_factory = nullptr;
392 save_data_factory = nullptr; 396 save_data_factory = nullptr;
393 sdmc_factory = nullptr; 397 sdmc_factory = nullptr;
398 ClearUnionContents();
394 } 399 }
395 400
396 auto nand_directory = vfs.OpenDirectory(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir), 401 auto nand_directory = vfs.OpenDirectory(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir),
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index ff9182e84..0a6cb6635 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -49,6 +49,7 @@ ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space)
49ResultVal<FileSys::VirtualDir> OpenSDMC(); 49ResultVal<FileSys::VirtualDir> OpenSDMC();
50 50
51std::shared_ptr<FileSys::RegisteredCacheUnion> GetUnionContents(); 51std::shared_ptr<FileSys::RegisteredCacheUnion> GetUnionContents();
52void ClearUnionContents();
52 53
53FileSys::RegisteredCache* GetSystemNANDContents(); 54FileSys::RegisteredCache* GetSystemNANDContents();
54FileSys::RegisteredCache* GetUserNANDContents(); 55FileSys::RegisteredCache* GetUserNANDContents();
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index e76c83aee..c22357d8c 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -71,8 +71,9 @@ void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
71 71
72void Controller_DebugPad::OnLoadInputDevices() { 72void Controller_DebugPad::OnLoadInputDevices() {
73 std::transform(Settings::values.debug_pad_buttons.begin(), 73 std::transform(Settings::values.debug_pad_buttons.begin(),
74 Settings::values.debug_pad_buttons.end(), buttons.begin(), 74 Settings::values.debug_pad_buttons.begin() +
75 Input::CreateDevice<Input::ButtonDevice>); 75 Settings::NativeButton::NUM_BUTTONS_HID,
76 buttons.begin(), Input::CreateDevice<Input::ButtonDevice>);
76 std::transform(Settings::values.debug_pad_analogs.begin(), 77 std::transform(Settings::values.debug_pad_analogs.begin(),
77 Settings::values.debug_pad_analogs.end(), analogs.begin(), 78 Settings::values.debug_pad_analogs.end(), analogs.begin(),
78 Input::CreateDevice<Input::AnalogDevice>); 79 Input::CreateDevice<Input::AnalogDevice>);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 7a88ae029..792d26e52 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,8 @@
5#include <cstring> 5#include <cstring>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
8#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" 10#include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
9 11
10namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
@@ -33,6 +35,8 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vec
33 return ZBCQueryTable(input, output); 35 return ZBCQueryTable(input, output);
34 case IoctlCommand::IocFlushL2: 36 case IoctlCommand::IocFlushL2:
35 return FlushL2(input, output); 37 return FlushL2(input, output);
38 case IoctlCommand::IocGetGpuTime:
39 return GetGpuTime(input, output);
36 } 40 }
37 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 41 UNIMPLEMENTED_MSG("Unimplemented ioctl");
38 return 0; 42 return 0;
@@ -169,4 +173,13 @@ u32 nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& outp
169 return 0; 173 return 0;
170} 174}
171 175
176u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output) {
177 LOG_DEBUG(Service_NVDRV, "called");
178 IoctlGetGpuTime params{};
179 std::memcpy(&params, input.data(), input.size());
180 params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks());
181 std::memcpy(output.data(), &params, output.size());
182 return 0;
183}
184
172} // namespace Service::Nvidia::Devices 185} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 3bbf028ad..240435eea 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -156,6 +156,11 @@ private:
156 }; 156 };
157 static_assert(sizeof(IoctlFlushL2) == 8, "IoctlFlushL2 is incorrect size"); 157 static_assert(sizeof(IoctlFlushL2) == 8, "IoctlFlushL2 is incorrect size");
158 158
159 struct IoctlGetGpuTime {
160 u64_le gpu_time;
161 };
162 static_assert(sizeof(IoctlGetGpuTime) == 8, "IoctlGetGpuTime is incorrect size");
163
159 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output); 164 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
160 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); 165 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
161 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); 166 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
@@ -164,6 +169,7 @@ private:
164 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output); 169 u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
165 u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output); 170 u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
166 u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output); 171 u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
172 u32 GetGpuTime(const std::vector<u8>& input, std::vector<u8>& output);
167}; 173};
168 174
169} // namespace Service::Nvidia::Devices 175} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index ac3859353..602086eed 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -88,6 +88,20 @@ void NVDRV::FinishInitialize(Kernel::HLERequestContext& ctx) {
88 rb.Push(RESULT_SUCCESS); 88 rb.Push(RESULT_SUCCESS);
89} 89}
90 90
91void NVDRV::GetStatus(Kernel::HLERequestContext& ctx) {
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
93 IPC::ResponseBuilder rb{ctx, 2};
94 rb.Push(RESULT_SUCCESS);
95}
96
97void NVDRV::DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx) {
98 // According to SwitchBrew, this has no inputs and no outputs, so effectively does nothing on
99 // retail hardware.
100 LOG_DEBUG(Service_NVDRV, "called");
101 IPC::ResponseBuilder rb{ctx, 2};
102 rb.Push(RESULT_SUCCESS);
103}
104
91NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) 105NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
92 : ServiceFramework(name), nvdrv(std::move(nvdrv)) { 106 : ServiceFramework(name), nvdrv(std::move(nvdrv)) {
93 static const FunctionInfo functions[] = { 107 static const FunctionInfo functions[] = {
@@ -97,10 +111,10 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
97 {3, &NVDRV::Initialize, "Initialize"}, 111 {3, &NVDRV::Initialize, "Initialize"},
98 {4, &NVDRV::QueryEvent, "QueryEvent"}, 112 {4, &NVDRV::QueryEvent, "QueryEvent"},
99 {5, nullptr, "MapSharedMem"}, 113 {5, nullptr, "MapSharedMem"},
100 {6, nullptr, "GetStatus"}, 114 {6, &NVDRV::GetStatus, "GetStatus"},
101 {7, nullptr, "ForceSetClientPID"}, 115 {7, nullptr, "ForceSetClientPID"},
102 {8, &NVDRV::SetClientPID, "SetClientPID"}, 116 {8, &NVDRV::SetClientPID, "SetClientPID"},
103 {9, nullptr, "DumpGraphicsMemoryInfo"}, 117 {9, &NVDRV::DumpGraphicsMemoryInfo, "DumpGraphicsMemoryInfo"},
104 {10, nullptr, "InitializeDevtools"}, 118 {10, nullptr, "InitializeDevtools"},
105 {11, &NVDRV::Ioctl, "Ioctl2"}, 119 {11, &NVDRV::Ioctl, "Ioctl2"},
106 {12, nullptr, "Ioctl3"}, 120 {12, nullptr, "Ioctl3"},
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index d340893c2..5a1e4baa7 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -24,6 +24,8 @@ private:
24 void QueryEvent(Kernel::HLERequestContext& ctx); 24 void QueryEvent(Kernel::HLERequestContext& ctx);
25 void SetClientPID(Kernel::HLERequestContext& ctx); 25 void SetClientPID(Kernel::HLERequestContext& ctx);
26 void FinishInitialize(Kernel::HLERequestContext& ctx); 26 void FinishInitialize(Kernel::HLERequestContext& ctx);
27 void GetStatus(Kernel::HLERequestContext& ctx);
28 void DumpGraphicsMemoryInfo(Kernel::HLERequestContext& ctx);
27 29
28 std::shared_ptr<Module> nvdrv; 30 std::shared_ptr<Module> nvdrv;
29 31
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 464e79d01..9ca8483a5 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -63,6 +63,17 @@ ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> ServiceManager::RegisterService
63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)); 63 return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port));
64} 64}
65 65
66ResultCode ServiceManager::UnregisterService(const std::string& name) {
67 CASCADE_CODE(ValidateServiceName(name));
68
69 const auto iter = registered_services.find(name);
70 if (iter == registered_services.end())
71 return ERR_SERVICE_NOT_REGISTERED;
72
73 registered_services.erase(iter);
74 return RESULT_SUCCESS;
75}
76
66ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> ServiceManager::GetServicePort( 77ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> ServiceManager::GetServicePort(
67 const std::string& name) { 78 const std::string& name) {
68 79
@@ -127,13 +138,52 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
127 } 138 }
128} 139}
129 140
141void SM::RegisterService(Kernel::HLERequestContext& ctx) {
142 IPC::RequestParser rp{ctx};
143
144 const auto name_buf = rp.PopRaw<std::array<char, 8>>();
145 const auto end = std::find(name_buf.begin(), name_buf.end(), '\0');
146
147 const std::string name(name_buf.begin(), end);
148
149 const auto unk_bool = static_cast<bool>(rp.PopRaw<u32>());
150 const auto session_count = rp.PopRaw<u32>();
151
152 LOG_DEBUG(Service_SM, "called with unk_bool={}", unk_bool);
153
154 auto handle = service_manager->RegisterService(name, session_count);
155 if (handle.Failed()) {
156 LOG_ERROR(Service_SM, "failed to register service with error_code={:08X}",
157 handle.Code().raw);
158 IPC::ResponseBuilder rb{ctx, 2};
159 rb.Push(handle.Code());
160 return;
161 }
162
163 IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
164 rb.Push(handle.Code());
165 rb.PushMoveObjects(std::move(handle).Unwrap());
166}
167
168void SM::UnregisterService(Kernel::HLERequestContext& ctx) {
169 IPC::RequestParser rp{ctx};
170
171 const auto name_buf = rp.PopRaw<std::array<char, 8>>();
172 const auto end = std::find(name_buf.begin(), name_buf.end(), '\0');
173
174 const std::string name(name_buf.begin(), end);
175
176 IPC::ResponseBuilder rb{ctx, 2};
177 rb.Push(service_manager->UnregisterService(name));
178}
179
130SM::SM(std::shared_ptr<ServiceManager> service_manager) 180SM::SM(std::shared_ptr<ServiceManager> service_manager)
131 : ServiceFramework("sm:", 4), service_manager(std::move(service_manager)) { 181 : ServiceFramework("sm:", 4), service_manager(std::move(service_manager)) {
132 static const FunctionInfo functions[] = { 182 static const FunctionInfo functions[] = {
133 {0x00000000, &SM::Initialize, "Initialize"}, 183 {0x00000000, &SM::Initialize, "Initialize"},
134 {0x00000001, &SM::GetService, "GetService"}, 184 {0x00000001, &SM::GetService, "GetService"},
135 {0x00000002, nullptr, "RegisterService"}, 185 {0x00000002, &SM::RegisterService, "RegisterService"},
136 {0x00000003, nullptr, "UnregisterService"}, 186 {0x00000003, &SM::UnregisterService, "UnregisterService"},
137 }; 187 };
138 RegisterHandlers(functions); 188 RegisterHandlers(functions);
139} 189}
diff --git a/src/core/hle/service/sm/sm.h b/src/core/hle/service/sm/sm.h
index 4f8145dda..bef25433e 100644
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -35,6 +35,8 @@ public:
35private: 35private:
36 void Initialize(Kernel::HLERequestContext& ctx); 36 void Initialize(Kernel::HLERequestContext& ctx);
37 void GetService(Kernel::HLERequestContext& ctx); 37 void GetService(Kernel::HLERequestContext& ctx);
38 void RegisterService(Kernel::HLERequestContext& ctx);
39 void UnregisterService(Kernel::HLERequestContext& ctx);
38 40
39 std::shared_ptr<ServiceManager> service_manager; 41 std::shared_ptr<ServiceManager> service_manager;
40}; 42};
@@ -48,6 +50,7 @@ public:
48 50
49 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name, 51 ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> RegisterService(std::string name,
50 unsigned int max_sessions); 52 unsigned int max_sessions);
53 ResultCode UnregisterService(const std::string& name);
51 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name); 54 ResultVal<Kernel::SharedPtr<Kernel::ClientPort>> GetServicePort(const std::string& name);
52 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name); 55 ResultVal<Kernel::SharedPtr<Kernel::ClientSession>> ConnectToService(const std::string& name);
53 56
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index d25fdb1fe..a72416084 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -510,7 +510,11 @@ private:
510 510
511 if (transaction == TransactionId::Connect) { 511 if (transaction == TransactionId::Connect) {
512 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 512 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
513 IGBPConnectResponseParcel response{1280, 720}; 513 IGBPConnectResponseParcel response{
514 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
515 Settings::values.resolution_factor),
516 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
517 Settings::values.resolution_factor)};
514 ctx.WriteBuffer(response.Serialize()); 518 ctx.WriteBuffer(response.Serialize());
515 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 519 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
516 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 520 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
@@ -692,11 +696,15 @@ private:
692 rb.Push(RESULT_SUCCESS); 696 rb.Push(RESULT_SUCCESS);
693 697
694 if (Settings::values.use_docked_mode) { 698 if (Settings::values.use_docked_mode) {
695 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth)); 699 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedWidth) *
696 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight)); 700 static_cast<u32>(Settings::values.resolution_factor));
701 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::DockedHeight) *
702 static_cast<u32>(Settings::values.resolution_factor));
697 } else { 703 } else {
698 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth)); 704 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedWidth) *
699 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight)); 705 static_cast<u32>(Settings::values.resolution_factor));
706 rb.Push(static_cast<u32>(Service::VI::DisplayResolution::UndockedHeight) *
707 static_cast<u32>(Settings::values.resolution_factor));
700 } 708 }
701 709
702 rb.PushRaw<float>(60.0f); 710 rb.PushRaw<float>(60.0f);
@@ -901,11 +909,15 @@ private:
901 rb.Push(RESULT_SUCCESS); 909 rb.Push(RESULT_SUCCESS);
902 910
903 if (Settings::values.use_docked_mode) { 911 if (Settings::values.use_docked_mode) {
904 rb.Push(static_cast<u64>(DisplayResolution::DockedWidth)); 912 rb.Push(static_cast<u64>(DisplayResolution::DockedWidth) *
905 rb.Push(static_cast<u64>(DisplayResolution::DockedHeight)); 913 static_cast<u32>(Settings::values.resolution_factor));
914 rb.Push(static_cast<u64>(DisplayResolution::DockedHeight) *
915 static_cast<u32>(Settings::values.resolution_factor));
906 } else { 916 } else {
907 rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth)); 917 rb.Push(static_cast<u64>(DisplayResolution::UndockedWidth) *
908 rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight)); 918 static_cast<u32>(Settings::values.resolution_factor));
919 rb.Push(static_cast<u64>(DisplayResolution::UndockedHeight) *
920 static_cast<u32>(Settings::values.resolution_factor));
909 } 921 }
910 } 922 }
911 923
@@ -922,6 +934,8 @@ private:
922 void ListDisplays(Kernel::HLERequestContext& ctx) { 934 void ListDisplays(Kernel::HLERequestContext& ctx) {
923 IPC::RequestParser rp{ctx}; 935 IPC::RequestParser rp{ctx};
924 DisplayInfo display_info; 936 DisplayInfo display_info;
937 display_info.width *= static_cast<u64>(Settings::values.resolution_factor);
938 display_info.height *= static_cast<u64>(Settings::values.resolution_factor);
925 ctx.WriteBuffer(&display_info, sizeof(DisplayInfo)); 939 ctx.WriteBuffer(&display_info, sizeof(DisplayInfo));
926 IPC::ResponseBuilder rb{ctx, 4}; 940 IPC::ResponseBuilder rb{ctx, 4};
927 rb.Push(RESULT_SUCCESS); 941 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/settings.h b/src/core/settings.h
index e63134f80..a0c5fd447 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -403,6 +403,7 @@ struct Values {
403 bool use_gdbstub; 403 bool use_gdbstub;
404 u16 gdbstub_port; 404 u16 gdbstub_port;
405 std::string program_args; 405 std::string program_args;
406 bool dump_exefs;
406 bool dump_nso; 407 bool dump_nso;
407 408
408 // WebService 409 // WebService
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a780215c1..3f906a517 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(video_core STATIC
21 macro_interpreter.h 21 macro_interpreter.h
22 memory_manager.cpp 22 memory_manager.cpp
23 memory_manager.h 23 memory_manager.h
24 morton.cpp
25 morton.h
24 rasterizer_cache.cpp 26 rasterizer_cache.cpp
25 rasterizer_cache.h 27 rasterizer_cache.h
26 rasterizer_interface.h 28 rasterizer_interface.h
@@ -62,7 +64,6 @@ add_library(video_core STATIC
62 textures/decoders.cpp 64 textures/decoders.cpp
63 textures/decoders.h 65 textures/decoders.h
64 textures/texture.h 66 textures/texture.h
65 utils.h
66 video_core.cpp 67 video_core.cpp
67 video_core.h 68 video_core.h
68) 69)
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 9e480dc39..4f137e693 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -389,6 +389,13 @@ public:
389 ReverseSubtract = 3, 389 ReverseSubtract = 3,
390 Min = 4, 390 Min = 4,
391 Max = 5, 391 Max = 5,
392
393 // These values are used by Nouveau and some games.
394 AddGL = 0x8006,
395 SubtractGL = 0x8007,
396 ReverseSubtractGL = 0x8008,
397 MinGL = 0x800a,
398 MaxGL = 0x800b
392 }; 399 };
393 400
394 enum class Factor : u32 { 401 enum class Factor : u32 {
@@ -624,7 +631,16 @@ public:
624 } 631 }
625 } zeta; 632 } zeta;
626 633
627 INSERT_PADDING_WORDS(0x5B); 634 INSERT_PADDING_WORDS(0x41);
635
636 union {
637 BitField<0, 4, u32> stencil;
638 BitField<4, 4, u32> unknown;
639 BitField<8, 4, u32> scissor;
640 BitField<12, 4, u32> viewport;
641 } clear_flags;
642
643 INSERT_PADDING_WORDS(0x19);
628 644
629 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 645 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
630 646
@@ -1127,6 +1143,7 @@ ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1127ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1143ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1128ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); 1144ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1129ASSERT_REG_POSITION(zeta, 0x3F8); 1145ASSERT_REG_POSITION(zeta, 0x3F8);
1146ASSERT_REG_POSITION(clear_flags, 0x43E);
1130ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1147ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1131ASSERT_REG_POSITION(rt_control, 0x487); 1148ASSERT_REG_POSITION(rt_control, 0x487);
1132ASSERT_REG_POSITION(zeta_width, 0x48a); 1149ASSERT_REG_POSITION(zeta_width, 0x48a);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 613fdc823..b9faaf8e0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -82,6 +82,8 @@ union Attribute {
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
85 ClipDistances0123 = 44,
86 ClipDistances4567 = 45,
85 PointCoord = 46, 87 PointCoord = 46,
86 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex 88 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
87 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 89 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
@@ -153,6 +155,7 @@ enum class PredCondition : u64 {
153 NotEqual = 5, 155 NotEqual = 5,
154 GreaterEqual = 6, 156 GreaterEqual = 6,
155 LessThanWithNan = 9, 157 LessThanWithNan = 9,
158 LessEqualWithNan = 11,
156 GreaterThanWithNan = 12, 159 GreaterThanWithNan = 12,
157 NotEqualWithNan = 13, 160 NotEqualWithNan = 13,
158 GreaterEqualWithNan = 14, 161 GreaterEqualWithNan = 14,
@@ -261,7 +264,7 @@ enum class FlowCondition : u64 {
261 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? 264 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
262}; 265};
263 266
264enum class ControlCode : u64 { 267enum class ConditionCode : u64 {
265 F = 0, 268 F = 0,
266 LT = 1, 269 LT = 1,
267 EQ = 2, 270 EQ = 2,
@@ -365,6 +368,11 @@ enum class HalfPrecision : u64 {
365 FMZ = 2, 368 FMZ = 2,
366}; 369};
367 370
371enum class R2pMode : u64 {
372 Pr = 0,
373 Cc = 1,
374};
375
368enum class IpaInterpMode : u64 { 376enum class IpaInterpMode : u64 {
369 Linear = 0, 377 Linear = 0,
370 Perspective = 1, 378 Perspective = 1,
@@ -569,7 +577,6 @@ union Instruction {
569 BitField<39, 2, u64> tab5cb8_2; 577 BitField<39, 2, u64> tab5cb8_2;
570 BitField<41, 3, u64> tab5c68_1; 578 BitField<41, 3, u64> tab5c68_1;
571 BitField<44, 2, u64> tab5c68_0; 579 BitField<44, 2, u64> tab5c68_0;
572 BitField<47, 1, u64> cc;
573 BitField<48, 1, u64> negate_b; 580 BitField<48, 1, u64> negate_b;
574 } fmul; 581 } fmul;
575 582
@@ -831,7 +838,7 @@ union Instruction {
831 union { 838 union {
832 BitField<0, 3, u64> pred0; 839 BitField<0, 3, u64> pred0;
833 BitField<3, 3, u64> pred3; 840 BitField<3, 3, u64> pred3;
834 BitField<8, 5, ControlCode> cc; // flag in cc 841 BitField<8, 5, ConditionCode> cc; // flag in cc
835 BitField<39, 3, u64> pred39; 842 BitField<39, 3, u64> pred39;
836 BitField<42, 1, u64> neg_pred39; 843 BitField<42, 1, u64> neg_pred39;
837 BitField<45, 4, PredOperation> op; // op with pred39 844 BitField<45, 4, PredOperation> op; // op with pred39
@@ -855,6 +862,12 @@ union Instruction {
855 } hsetp2; 862 } hsetp2;
856 863
857 union { 864 union {
865 BitField<40, 1, R2pMode> mode;
866 BitField<41, 2, u64> byte;
867 BitField<20, 7, u64> immediate_mask;
868 } r2p;
869
870 union {
858 BitField<39, 3, u64> pred39; 871 BitField<39, 3, u64> pred39;
859 BitField<42, 1, u64> neg_pred; 872 BitField<42, 1, u64> neg_pred;
860 BitField<43, 1, u64> neg_a; 873 BitField<43, 1, u64> neg_a;
@@ -1235,7 +1248,7 @@ union Instruction {
1235 BitField<60, 1, u64> is_b_gpr; 1248 BitField<60, 1, u64> is_b_gpr;
1236 BitField<59, 1, u64> is_c_gpr; 1249 BitField<59, 1, u64> is_c_gpr;
1237 BitField<20, 24, s64> smem_imm; 1250 BitField<20, 24, s64> smem_imm;
1238 BitField<0, 5, ControlCode> flow_control_code; 1251 BitField<0, 5, ConditionCode> flow_condition_code;
1239 1252
1240 Attribute attribute; 1253 Attribute attribute;
1241 Sampler sampler; 1254 Sampler sampler;
@@ -1382,6 +1395,7 @@ public:
1382 PSETP, 1395 PSETP,
1383 PSET, 1396 PSET,
1384 CSETP, 1397 CSETP,
1398 R2P_IMM,
1385 XMAD_IMM, 1399 XMAD_IMM,
1386 XMAD_CR, 1400 XMAD_CR,
1387 XMAD_RC, 1401 XMAD_RC,
@@ -1412,6 +1426,7 @@ public:
1412 HalfSetPredicate, 1426 HalfSetPredicate,
1413 PredicateSetPredicate, 1427 PredicateSetPredicate,
1414 PredicateSetRegister, 1428 PredicateSetRegister,
1429 RegisterSetPredicate,
1415 Conversion, 1430 Conversion,
1416 Xmad, 1431 Xmad,
1417 Unknown, 1432 Unknown,
@@ -1650,6 +1665,7 @@ private:
1650 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), 1665 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
1651 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 1666 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
1652 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), 1667 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
1668 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
1653 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), 1669 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
1654 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), 1670 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
1655 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), 1671 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a0e015c4b..99c34649f 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -62,7 +62,16 @@ struct Header {
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32] 63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor 64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 65 union {
66 BitField<0, 8, u16> clip_distances;
67 BitField<8, 1, u16> point_sprite_s;
68 BitField<9, 1, u16> point_sprite_t;
69 BitField<10, 1, u16> fog_coordinate;
70 BitField<12, 1, u16> tessellation_eval_point_u;
71 BitField<13, 1, u16> tessellation_eval_point_v;
72 BitField<14, 1, u16> instance_id;
73 BitField<15, 1, u16> vertex_id;
74 };
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10] 75 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved 76 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA 77 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 83c7e5b0b..51b3904f6 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,6 +17,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
17 switch (format) { 17 switch (format) {
18 case PixelFormat::ABGR8: 18 case PixelFormat::ABGR8:
19 return 4; 19 return 4;
20 default:
21 return 4;
20 } 22 }
21 23
22 UNREACHABLE(); 24 UNREACHABLE();
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 335a8d407..2b0dea5cd 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -35,6 +35,7 @@ void MacroInterpreter::Reset() {
35 // The next parameter index starts at 1, because $r1 already has the value of the first 35 // The next parameter index starts at 1, because $r1 already has the value of the first
36 // parameter. 36 // parameter.
37 next_parameter_index = 1; 37 next_parameter_index = 1;
38 carry_flag = false;
38} 39}
39 40
40bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { 41bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
@@ -135,14 +136,28 @@ MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
135 return {macro_memory[offset + pc / sizeof(u32)]}; 136 return {macro_memory[offset + pc / sizeof(u32)]};
136} 137}
137 138
138u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { 139u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) {
139 switch (operation) { 140 switch (operation) {
140 case ALUOperation::Add: 141 case ALUOperation::Add: {
141 return src_a + src_b; 142 const u64 result{static_cast<u64>(src_a) + src_b};
142 // TODO(Subv): Implement AddWithCarry 143 carry_flag = result > 0xffffffff;
143 case ALUOperation::Subtract: 144 return static_cast<u32>(result);
144 return src_a - src_b; 145 }
145 // TODO(Subv): Implement SubtractWithBorrow 146 case ALUOperation::AddWithCarry: {
147 const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
148 carry_flag = result > 0xffffffff;
149 return static_cast<u32>(result);
150 }
151 case ALUOperation::Subtract: {
152 const u64 result{static_cast<u64>(src_a) - src_b};
153 carry_flag = result < 0x100000000;
154 return static_cast<u32>(result);
155 }
156 case ALUOperation::SubtractWithBorrow: {
157 const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
158 carry_flag = result < 0x100000000;
159 return static_cast<u32>(result);
160 }
146 case ALUOperation::Xor: 161 case ALUOperation::Xor:
147 return src_a ^ src_b; 162 return src_a ^ src_b;
148 case ALUOperation::Or: 163 case ALUOperation::Or:
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 62d1ce289..cde360288 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -117,7 +117,7 @@ private:
117 bool Step(u32 offset, bool is_delay_slot); 117 bool Step(u32 offset, bool is_delay_slot);
118 118
119 /// Calculates the result of an ALU operation. src_a OP src_b; 119 /// Calculates the result of an ALU operation. src_a OP src_b;
120 u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; 120 u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b);
121 121
122 /// Performs the result operation on the input result and stores it in the specified register 122 /// Performs the result operation on the input result and stores it in the specified register
123 /// (if necessary). 123 /// (if necessary).
@@ -165,5 +165,7 @@ private:
165 std::vector<u32> parameters; 165 std::vector<u32> parameters;
166 /// Index of the next parameter that will be fetched by the 'parm' instruction. 166 /// Index of the next parameter that will be fetched by the 'parm' instruction.
167 u32 next_parameter_index = 0; 167 u32 next_parameter_index = 0;
168
169 bool carry_flag{};
168}; 170};
169} // namespace Tegra 171} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 77a20bb84..47247f097 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,6 +9,13 @@
9 9
10namespace Tegra { 10namespace Tegra {
11 11
12MemoryManager::MemoryManager() {
13 // Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
14 // try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
15 // Undertale using 0 for a render target.
16 PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
17}
18
12GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 19GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
13 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)}; 20 const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
14 21
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 4eb338aa2..fb03497ca 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -18,7 +18,7 @@ using GPUVAddr = u64;
18 18
19class MemoryManager final { 19class MemoryManager final {
20public: 20public:
21 MemoryManager() = default; 21 MemoryManager();
22 22
23 GPUVAddr AllocateSpace(u64 size, u64 align); 23 GPUVAddr AllocateSpace(u64 size, u64 align);
24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align); 24 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
@@ -37,6 +37,7 @@ private:
37 enum class PageStatus : u64 { 37 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 38 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
39 Allocated = 0xFFFFFFFFFFFFFFFEULL, 39 Allocated = 0xFFFFFFFFFFFFFFFEULL,
40 Reserved = 0xFFFFFFFFFFFFFFFDULL,
40 }; 41 };
41 42
42 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align, 43 std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
new file mode 100644
index 000000000..f14abba7d
--- /dev/null
+++ b/src/video_core/morton.cpp
@@ -0,0 +1,353 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace VideoCore {
15
16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat;
18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21
22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u8* buffer, std::size_t buffer_size, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
28 // pixel values.
29 const u32 tile_size_x{GetDefaultBlockWidth(format)};
30 const u32 tile_size_y{GetDefaultBlockHeight(format)};
31
32 if constexpr (morton_to_linear) {
33 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
34 stride, height, depth, block_height, block_depth);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
39 buffer, false, block_height, block_depth);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 // clang-format off
45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::G8R8U>,
69 MortonCopy<true, PixelFormat::G8R8S>,
70 MortonCopy<true, PixelFormat::BGRA8>,
71 MortonCopy<true, PixelFormat::RGBA32F>,
72 MortonCopy<true, PixelFormat::RG32F>,
73 MortonCopy<true, PixelFormat::R32F>,
74 MortonCopy<true, PixelFormat::R16F>,
75 MortonCopy<true, PixelFormat::R16U>,
76 MortonCopy<true, PixelFormat::R16S>,
77 MortonCopy<true, PixelFormat::R16UI>,
78 MortonCopy<true, PixelFormat::R16I>,
79 MortonCopy<true, PixelFormat::RG16>,
80 MortonCopy<true, PixelFormat::RG16F>,
81 MortonCopy<true, PixelFormat::RG16UI>,
82 MortonCopy<true, PixelFormat::RG16I>,
83 MortonCopy<true, PixelFormat::RG16S>,
84 MortonCopy<true, PixelFormat::RGB32F>,
85 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
86 MortonCopy<true, PixelFormat::RG8U>,
87 MortonCopy<true, PixelFormat::RG8S>,
88 MortonCopy<true, PixelFormat::RG32UI>,
89 MortonCopy<true, PixelFormat::R32UI>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
93 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
94 MortonCopy<true, PixelFormat::DXT1_SRGB>,
95 MortonCopy<true, PixelFormat::DXT23_SRGB>,
96 MortonCopy<true, PixelFormat::DXT45_SRGB>,
97 MortonCopy<true, PixelFormat::BC7U_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
103 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
105 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
106 MortonCopy<true, PixelFormat::Z32F>,
107 MortonCopy<true, PixelFormat::Z16>,
108 MortonCopy<true, PixelFormat::Z24S8>,
109 MortonCopy<true, PixelFormat::S8Z24>,
110 MortonCopy<true, PixelFormat::Z32FS8>,
111 // clang-format on
112};
113
114static constexpr ConversionArray linear_to_morton_fns = {
115 // clang-format off
116 MortonCopy<false, PixelFormat::ABGR8U>,
117 MortonCopy<false, PixelFormat::ABGR8S>,
118 MortonCopy<false, PixelFormat::ABGR8UI>,
119 MortonCopy<false, PixelFormat::B5G6R5U>,
120 MortonCopy<false, PixelFormat::A2B10G10R10U>,
121 MortonCopy<false, PixelFormat::A1B5G5R5U>,
122 MortonCopy<false, PixelFormat::R8U>,
123 MortonCopy<false, PixelFormat::R8UI>,
124 MortonCopy<false, PixelFormat::RGBA16F>,
125 MortonCopy<false, PixelFormat::RGBA16U>,
126 MortonCopy<false, PixelFormat::RGBA16UI>,
127 MortonCopy<false, PixelFormat::R11FG11FB10F>,
128 MortonCopy<false, PixelFormat::RGBA32UI>,
129 MortonCopy<false, PixelFormat::DXT1>,
130 MortonCopy<false, PixelFormat::DXT23>,
131 MortonCopy<false, PixelFormat::DXT45>,
132 MortonCopy<false, PixelFormat::DXN1>,
133 MortonCopy<false, PixelFormat::DXN2UNORM>,
134 MortonCopy<false, PixelFormat::DXN2SNORM>,
135 MortonCopy<false, PixelFormat::BC7U>,
136 MortonCopy<false, PixelFormat::BC6H_UF16>,
137 MortonCopy<false, PixelFormat::BC6H_SF16>,
138 // TODO(Subv): Swizzling ASTC formats are not supported
139 nullptr,
140 MortonCopy<false, PixelFormat::G8R8U>,
141 MortonCopy<false, PixelFormat::G8R8S>,
142 MortonCopy<false, PixelFormat::BGRA8>,
143 MortonCopy<false, PixelFormat::RGBA32F>,
144 MortonCopy<false, PixelFormat::RG32F>,
145 MortonCopy<false, PixelFormat::R32F>,
146 MortonCopy<false, PixelFormat::R16F>,
147 MortonCopy<false, PixelFormat::R16U>,
148 MortonCopy<false, PixelFormat::R16S>,
149 MortonCopy<false, PixelFormat::R16UI>,
150 MortonCopy<false, PixelFormat::R16I>,
151 MortonCopy<false, PixelFormat::RG16>,
152 MortonCopy<false, PixelFormat::RG16F>,
153 MortonCopy<false, PixelFormat::RG16UI>,
154 MortonCopy<false, PixelFormat::RG16I>,
155 MortonCopy<false, PixelFormat::RG16S>,
156 MortonCopy<false, PixelFormat::RGB32F>,
157 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
158 MortonCopy<false, PixelFormat::RG8U>,
159 MortonCopy<false, PixelFormat::RG8S>,
160 MortonCopy<false, PixelFormat::RG32UI>,
161 MortonCopy<false, PixelFormat::R32UI>,
162 nullptr,
163 nullptr,
164 nullptr,
165 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
166 MortonCopy<false, PixelFormat::DXT1_SRGB>,
167 MortonCopy<false, PixelFormat::DXT23_SRGB>,
168 MortonCopy<false, PixelFormat::DXT45_SRGB>,
169 MortonCopy<false, PixelFormat::BC7U_SRGB>,
170 nullptr,
171 nullptr,
172 nullptr,
173 nullptr,
174 nullptr,
175 nullptr,
176 nullptr,
177 nullptr,
178 MortonCopy<false, PixelFormat::Z32F>,
179 MortonCopy<false, PixelFormat::Z16>,
180 MortonCopy<false, PixelFormat::Z24S8>,
181 MortonCopy<false, PixelFormat::S8Z24>,
182 MortonCopy<false, PixelFormat::Z32FS8>,
183 // clang-format on
184};
185
186constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
187 switch (mode) {
188 case MortonSwizzleMode::MortonToLinear:
189 return morton_to_linear_fns[static_cast<std::size_t>(format)];
190 case MortonSwizzleMode::LinearToMorton:
191 return linear_to_morton_fns[static_cast<std::size_t>(format)];
192 }
193}
194
195/// 8x8 Z-Order coordinate from 2D coordinates
196static u32 MortonInterleave(u32 x, u32 y) {
197 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
198 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
199 return xlut[x % 8] + ylut[y % 8];
200}
201
202/// Calculates the offset of the position of the pixel in Morton order
203static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
204 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
205 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
206 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
207 // texels are laid out in a 2x2 subtile like this:
208 // 2 3
209 // 0 1
210 //
211 // The full 8x8 tile has the texels arranged like this:
212 //
213 // 42 43 46 47 58 59 62 63
214 // 40 41 44 45 56 57 60 61
215 // 34 35 38 39 50 51 54 55
216 // 32 33 36 37 48 49 52 53
217 // 10 11 14 15 26 27 30 31
218 // 08 09 12 13 24 25 28 29
219 // 02 03 06 07 18 19 22 23
220 // 00 01 04 05 16 17 20 21
221 //
222 // This pattern is what's called Z-order curve, or Morton order.
223
224 const unsigned int block_height = 8;
225 const unsigned int coarse_x = x & ~7;
226
227 u32 i = MortonInterleave(x, y);
228
229 const unsigned int offset = coarse_x * block_height;
230
231 return (i + offset) * bytes_per_pixel;
232}
233
234static u32 MortonInterleave128(u32 x, u32 y) {
235 // 128x128 Z-Order coordinate from 2D coordinates
236 static constexpr u32 xlut[] = {
237 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
238 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
239 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
240 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
241 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
242 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
243 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
244 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
245 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
246 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
247 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
248 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
250 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
251 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
252 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
253 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
254 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
255 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
256 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
257 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
258 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
259 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
260 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
261 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
262 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
263 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
264 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
265 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
266 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
267 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
268 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
269 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
270 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
271 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
272 };
273 static constexpr u32 ylut[] = {
274 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
275 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
276 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
277 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
278 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
279 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
280 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
281 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
282 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
283 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
284 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
285 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
286 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
287 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
288 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
289 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
290 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
291 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
292 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
293 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
294 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
295 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
296 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
297 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
298 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
299 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
300 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
301 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
302 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
303 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
304 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
305 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
306 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
307 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
308 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
309 };
310 return xlut[x % 128] + ylut[y % 128];
311}
312
313static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
314 // Calculates the offset of the position of the pixel in Morton order
315 // Framebuffer images are split into 128x128 tiles.
316
317 constexpr u32 block_height = 128;
318 const u32 coarse_x = x & ~127;
319
320 const u32 i = MortonInterleave128(x, y);
321
322 const u32 offset = coarse_x * block_height;
323
324 return (i + offset) * bytes_per_pixel;
325}
326
327void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
328 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
329 std::size_t buffer_size, VAddr addr) {
330
331 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
332 buffer_size, addr);
333}
334
335void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
336 u8* morton_data, u8* linear_data, bool morton_to_linear) {
337 u8* data_ptrs[2];
338 for (u32 y = 0; y < height; ++y) {
339 for (u32 x = 0; x < width; ++x) {
340 const u32 coarse_y = y & ~127;
341 const u32 morton_offset =
342 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
343 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
344
345 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
346 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
347
348 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
349 }
350 }
351}
352
353} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
new file mode 100644
index 000000000..b9b9eca86
--- /dev/null
+++ b/src/video_core/morton.h
@@ -0,0 +1,21 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
16 std::size_t buffer_size, VAddr addr);
17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear);
20
21} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ae6aaee4c..630a58e49 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -98,14 +98,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
98 has_ARB_direct_state_access = true; 98 has_ARB_direct_state_access = true;
99 } else if (extension == "GL_ARB_multi_bind") { 99 } else if (extension == "GL_ARB_multi_bind") {
100 has_ARB_multi_bind = true; 100 has_ARB_multi_bind = true;
101 } else if (extension == "GL_ARB_separate_shader_objects") {
102 has_ARB_separate_shader_objects = true;
103 } else if (extension == "GL_ARB_vertex_attrib_binding") {
104 has_ARB_vertex_attrib_binding = true;
105 } 101 }
106 } 102 }
107 103
108 ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
109 OpenGLState::ApplyDefaultState(); 104 OpenGLState::ApplyDefaultState();
110 105
111 // Create render framebuffer 106 // Create render framebuffer
@@ -542,6 +537,30 @@ void RasterizerOpenGL::Clear() {
542 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 537 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
543 use_stencil = true; 538 use_stencil = true;
544 clear_state.stencil.test_enabled = true; 539 clear_state.stencil.test_enabled = true;
540 if (regs.clear_flags.stencil) {
541 // Stencil affects the clear so fill it with the used masks
542 clear_state.stencil.front.test_func = GL_ALWAYS;
543 clear_state.stencil.front.test_mask = regs.stencil_front_func_mask;
544 clear_state.stencil.front.action_stencil_fail = GL_KEEP;
545 clear_state.stencil.front.action_depth_fail = GL_KEEP;
546 clear_state.stencil.front.action_depth_pass = GL_KEEP;
547 clear_state.stencil.front.write_mask = regs.stencil_front_mask;
548 if (regs.stencil_two_side_enable) {
549 clear_state.stencil.back.test_func = GL_ALWAYS;
550 clear_state.stencil.back.test_mask = regs.stencil_back_func_mask;
551 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
552 clear_state.stencil.back.action_depth_fail = GL_KEEP;
553 clear_state.stencil.back.action_depth_pass = GL_KEEP;
554 clear_state.stencil.back.write_mask = regs.stencil_back_mask;
555 } else {
556 clear_state.stencil.back.test_func = GL_ALWAYS;
557 clear_state.stencil.back.test_mask = 0xFFFFFFFF;
558 clear_state.stencil.back.write_mask = 0xFFFFFFFF;
559 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
560 clear_state.stencil.back.action_depth_fail = GL_KEEP;
561 clear_state.stencil.back.action_depth_pass = GL_KEEP;
562 }
563 }
545 } 564 }
546 565
547 if (!use_color && !use_depth && !use_stencil) { 566 if (!use_color && !use_depth && !use_stencil) {
@@ -553,6 +572,14 @@ void RasterizerOpenGL::Clear() {
553 572
554 ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false, 573 ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
555 regs.clear_buffers.RT.Value()); 574 regs.clear_buffers.RT.Value());
575 if (regs.clear_flags.scissor) {
576 SyncScissorTest(clear_state);
577 }
578
579 if (regs.clear_flags.viewport) {
580 clear_state.EmulateViewportWithScissor();
581 }
582
556 clear_state.Apply(); 583 clear_state.Apply();
557 584
558 if (use_color) { 585 if (use_color) {
@@ -588,7 +615,7 @@ void RasterizerOpenGL::DrawArrays() {
588 SyncLogicOpState(); 615 SyncLogicOpState();
589 SyncCullMode(); 616 SyncCullMode();
590 SyncPrimitiveRestart(); 617 SyncPrimitiveRestart();
591 SyncScissorTest(); 618 SyncScissorTest(state);
592 // Alpha Testing is synced on shaders. 619 // Alpha Testing is synced on shaders.
593 SyncTransformFeedback(); 620 SyncTransformFeedback();
594 SyncPointState(); 621 SyncPointState();
@@ -815,7 +842,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
815 } 842 }
816 const u32 bias = config.mip_lod_bias.Value(); 843 const u32 bias = config.mip_lod_bias.Value();
817 // Sign extend the 13-bit value. 844 // Sign extend the 13-bit value.
818 const u32 mask = 1U << (13 - 1); 845 constexpr u32 mask = 1U << (13 - 1);
819 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; 846 const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f;
820 if (lod_bias != bias_lod) { 847 if (lod_bias != bias_lod) {
821 lod_bias = bias_lod; 848 lod_bias = bias_lod;
@@ -947,8 +974,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
947 auto& viewport = current_state.viewports[i]; 974 auto& viewport = current_state.viewports[i];
948 viewport.x = viewport_rect.left; 975 viewport.x = viewport_rect.left;
949 viewport.y = viewport_rect.bottom; 976 viewport.y = viewport_rect.bottom;
950 viewport.width = static_cast<GLfloat>(viewport_rect.GetWidth()); 977 viewport.width = viewport_rect.GetWidth();
951 viewport.height = static_cast<GLfloat>(viewport_rect.GetHeight()); 978 viewport.height = viewport_rect.GetHeight();
952 viewport.depth_range_far = regs.viewports[i].depth_range_far; 979 viewport.depth_range_far = regs.viewports[i].depth_range_far;
953 viewport.depth_range_near = regs.viewports[i].depth_range_near; 980 viewport.depth_range_near = regs.viewports[i].depth_range_near;
954 } 981 }
@@ -1120,11 +1147,11 @@ void RasterizerOpenGL::SyncLogicOpState() {
1120 state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); 1147 state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
1121} 1148}
1122 1149
1123void RasterizerOpenGL::SyncScissorTest() { 1150void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1124 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1151 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1125 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) { 1152 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumViewports; i++) {
1126 const auto& src = regs.scissor_test[i]; 1153 const auto& src = regs.scissor_test[i];
1127 auto& dst = state.viewports[i].scissor; 1154 auto& dst = current_state.viewports[i].scissor;
1128 dst.enabled = (src.enable != 0); 1155 dst.enabled = (src.enable != 0);
1129 if (dst.enabled == 0) { 1156 if (dst.enabled == 0) {
1130 return; 1157 return;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6e78ab4cd..f4354289c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -91,19 +91,20 @@ private:
91 void SyncWithConfig(const Tegra::Texture::TSCEntry& info); 91 void SyncWithConfig(const Tegra::Texture::TSCEntry& info);
92 92
93 private: 93 private:
94 Tegra::Texture::TextureFilter mag_filter; 94 Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
95 Tegra::Texture::TextureFilter min_filter; 95 Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
96 Tegra::Texture::TextureMipmapFilter mip_filter; 96 Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None;
97 Tegra::Texture::WrapMode wrap_u; 97 Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
98 Tegra::Texture::WrapMode wrap_v; 98 Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
99 Tegra::Texture::WrapMode wrap_p; 99 Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
100 bool uses_depth_compare; 100 bool uses_depth_compare = false;
101 Tegra::Texture::DepthCompareFunc depth_compare_func; 101 Tegra::Texture::DepthCompareFunc depth_compare_func =
102 GLvec4 border_color; 102 Tegra::Texture::DepthCompareFunc::Always;
103 float min_lod; 103 GLvec4 border_color = {};
104 float max_lod; 104 float min_lod = 0.0f;
105 float lod_bias; 105 float max_lod = 16.0f;
106 float max_anisotropic; 106 float lod_bias = 0.0f;
107 float max_anisotropic = 1.0f;
107 }; 108 };
108 109
109 /** 110 /**
@@ -171,7 +172,7 @@ private:
171 void SyncMultiSampleState(); 172 void SyncMultiSampleState();
172 173
173 /// Syncs the scissor test state to match the guest state 174 /// Syncs the scissor test state to match the guest state
174 void SyncScissorTest(); 175 void SyncScissorTest(OpenGLState& current_state);
175 176
176 /// Syncs the transform feedback state to match the guest state 177 /// Syncs the transform feedback state to match the guest state
177 void SyncTransformFeedback(); 178 void SyncTransformFeedback();
@@ -187,8 +188,6 @@ private:
187 188
188 bool has_ARB_direct_state_access = false; 189 bool has_ARB_direct_state_access = false;
189 bool has_ARB_multi_bind = false; 190 bool has_ARB_multi_bind = false;
190 bool has_ARB_separate_shader_objects = false;
191 bool has_ARB_vertex_attrib_binding = false;
192 191
193 OpenGLState state; 192 OpenGLState state;
194 193
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 9ca82c06c..d458f77e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
15#include "core/memory.h" 15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/morton.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 21#include "video_core/renderer_opengl/gl_state.h"
@@ -22,10 +23,11 @@
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25#include "video_core/utils.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
28 28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
29using VideoCore::Surface::ComponentTypeFromDepthFormat; 31using VideoCore::Surface::ComponentTypeFromDepthFormat;
30using VideoCore::Surface::ComponentTypeFromRenderTarget; 32using VideoCore::Surface::ComponentTypeFromRenderTarget;
31using VideoCore::Surface::ComponentTypeFromTexture; 33using VideoCore::Surface::ComponentTypeFromTexture;
@@ -265,11 +267,11 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
265 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 267 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
266 true}, // DXN2UNORM 268 true}, // DXN2UNORM
267 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM 269 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
268 {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 270 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
269 true}, // BC7U 271 true}, // BC7U
270 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, 272 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
271 ComponentType::Float, true}, // BC6H_UF16 273 true}, // BC6H_UF16
272 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, 274 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
273 true}, // BC6H_SF16 275 true}, // BC6H_SF16
274 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 276 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
275 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U 277 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U
@@ -306,8 +308,8 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
306 true}, // DXT23_SRGB 308 true}, // DXT23_SRGB
307 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 309 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
308 true}, // DXT45_SRGB 310 true}, // DXT45_SRGB
309 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 311 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
310 ComponentType::UNorm, true}, // BC7U_SRGB 312 true}, // BC7U_SRGB
311 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB 313 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
312 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB 314 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
313 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB 315 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
@@ -346,7 +348,7 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
346 case SurfaceTarget::TextureCubemap: 348 case SurfaceTarget::TextureCubemap:
347 return GL_TEXTURE_CUBE_MAP; 349 return GL_TEXTURE_CUBE_MAP;
348 case SurfaceTarget::TextureCubeArray: 350 case SurfaceTarget::TextureCubeArray:
349 return GL_TEXTURE_CUBE_MAP_ARRAY_ARB; 351 return GL_TEXTURE_CUBE_MAP_ARRAY;
350 } 352 }
351 LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target)); 353 LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
352 UNREACHABLE(); 354 UNREACHABLE();
@@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
370 return {0, actual_height, MipWidth(mip_level), 0}; 372 return {0, actual_height, MipWidth(mip_level), 0};
371} 373}
372 374
373template <bool morton_to_gl, PixelFormat format> 375void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
374void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
375 std::size_t gl_buffer_size, VAddr addr) {
376 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
377
378 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
379 // pixel values.
380 const u32 tile_size_x{GetDefaultBlockWidth(format)};
381 const u32 tile_size_y{GetDefaultBlockHeight(format)};
382
383 if (morton_to_gl) {
384 Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
385 stride, height, depth, block_height, block_depth);
386 } else {
387 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
388 (height + tile_size_y - 1) / tile_size_y, depth,
389 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
390 gl_buffer, false, block_height, block_depth);
391 }
392}
393
394using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
395 VideoCore::Surface::MaxPixelFormat>;
396
397static constexpr GLConversionArray morton_to_gl_fns = {
398 // clang-format off
399 MortonCopy<true, PixelFormat::ABGR8U>,
400 MortonCopy<true, PixelFormat::ABGR8S>,
401 MortonCopy<true, PixelFormat::ABGR8UI>,
402 MortonCopy<true, PixelFormat::B5G6R5U>,
403 MortonCopy<true, PixelFormat::A2B10G10R10U>,
404 MortonCopy<true, PixelFormat::A1B5G5R5U>,
405 MortonCopy<true, PixelFormat::R8U>,
406 MortonCopy<true, PixelFormat::R8UI>,
407 MortonCopy<true, PixelFormat::RGBA16F>,
408 MortonCopy<true, PixelFormat::RGBA16U>,
409 MortonCopy<true, PixelFormat::RGBA16UI>,
410 MortonCopy<true, PixelFormat::R11FG11FB10F>,
411 MortonCopy<true, PixelFormat::RGBA32UI>,
412 MortonCopy<true, PixelFormat::DXT1>,
413 MortonCopy<true, PixelFormat::DXT23>,
414 MortonCopy<true, PixelFormat::DXT45>,
415 MortonCopy<true, PixelFormat::DXN1>,
416 MortonCopy<true, PixelFormat::DXN2UNORM>,
417 MortonCopy<true, PixelFormat::DXN2SNORM>,
418 MortonCopy<true, PixelFormat::BC7U>,
419 MortonCopy<true, PixelFormat::BC6H_UF16>,
420 MortonCopy<true, PixelFormat::BC6H_SF16>,
421 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
422 MortonCopy<true, PixelFormat::G8R8U>,
423 MortonCopy<true, PixelFormat::G8R8S>,
424 MortonCopy<true, PixelFormat::BGRA8>,
425 MortonCopy<true, PixelFormat::RGBA32F>,
426 MortonCopy<true, PixelFormat::RG32F>,
427 MortonCopy<true, PixelFormat::R32F>,
428 MortonCopy<true, PixelFormat::R16F>,
429 MortonCopy<true, PixelFormat::R16U>,
430 MortonCopy<true, PixelFormat::R16S>,
431 MortonCopy<true, PixelFormat::R16UI>,
432 MortonCopy<true, PixelFormat::R16I>,
433 MortonCopy<true, PixelFormat::RG16>,
434 MortonCopy<true, PixelFormat::RG16F>,
435 MortonCopy<true, PixelFormat::RG16UI>,
436 MortonCopy<true, PixelFormat::RG16I>,
437 MortonCopy<true, PixelFormat::RG16S>,
438 MortonCopy<true, PixelFormat::RGB32F>,
439 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
440 MortonCopy<true, PixelFormat::RG8U>,
441 MortonCopy<true, PixelFormat::RG8S>,
442 MortonCopy<true, PixelFormat::RG32UI>,
443 MortonCopy<true, PixelFormat::R32UI>,
444 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
445 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
446 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
447 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
448 MortonCopy<true, PixelFormat::DXT1_SRGB>,
449 MortonCopy<true, PixelFormat::DXT23_SRGB>,
450 MortonCopy<true, PixelFormat::DXT45_SRGB>,
451 MortonCopy<true, PixelFormat::BC7U_SRGB>,
452 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
453 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
454 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
455 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
456 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
457 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
458 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
459 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
460 MortonCopy<true, PixelFormat::Z32F>,
461 MortonCopy<true, PixelFormat::Z16>,
462 MortonCopy<true, PixelFormat::Z24S8>,
463 MortonCopy<true, PixelFormat::S8Z24>,
464 MortonCopy<true, PixelFormat::Z32FS8>,
465 // clang-format on
466};
467
468static constexpr GLConversionArray gl_to_morton_fns = {
469 // clang-format off
470 MortonCopy<false, PixelFormat::ABGR8U>,
471 MortonCopy<false, PixelFormat::ABGR8S>,
472 MortonCopy<false, PixelFormat::ABGR8UI>,
473 MortonCopy<false, PixelFormat::B5G6R5U>,
474 MortonCopy<false, PixelFormat::A2B10G10R10U>,
475 MortonCopy<false, PixelFormat::A1B5G5R5U>,
476 MortonCopy<false, PixelFormat::R8U>,
477 MortonCopy<false, PixelFormat::R8UI>,
478 MortonCopy<false, PixelFormat::RGBA16F>,
479 MortonCopy<false, PixelFormat::RGBA16U>,
480 MortonCopy<false, PixelFormat::RGBA16UI>,
481 MortonCopy<false, PixelFormat::R11FG11FB10F>,
482 MortonCopy<false, PixelFormat::RGBA32UI>,
483 MortonCopy<false, PixelFormat::DXT1>,
484 MortonCopy<false, PixelFormat::DXT23>,
485 MortonCopy<false, PixelFormat::DXT45>,
486 MortonCopy<false, PixelFormat::DXN1>,
487 MortonCopy<false, PixelFormat::DXN2UNORM>,
488 MortonCopy<false, PixelFormat::DXN2SNORM>,
489 MortonCopy<false, PixelFormat::BC7U>,
490 MortonCopy<false, PixelFormat::BC6H_UF16>,
491 MortonCopy<false, PixelFormat::BC6H_SF16>,
492 // TODO(Subv): Swizzling ASTC formats are not supported
493 nullptr,
494 MortonCopy<false, PixelFormat::G8R8U>,
495 MortonCopy<false, PixelFormat::G8R8S>,
496 MortonCopy<false, PixelFormat::BGRA8>,
497 MortonCopy<false, PixelFormat::RGBA32F>,
498 MortonCopy<false, PixelFormat::RG32F>,
499 MortonCopy<false, PixelFormat::R32F>,
500 MortonCopy<false, PixelFormat::R16F>,
501 MortonCopy<false, PixelFormat::R16U>,
502 MortonCopy<false, PixelFormat::R16S>,
503 MortonCopy<false, PixelFormat::R16UI>,
504 MortonCopy<false, PixelFormat::R16I>,
505 MortonCopy<false, PixelFormat::RG16>,
506 MortonCopy<false, PixelFormat::RG16F>,
507 MortonCopy<false, PixelFormat::RG16UI>,
508 MortonCopy<false, PixelFormat::RG16I>,
509 MortonCopy<false, PixelFormat::RG16S>,
510 MortonCopy<false, PixelFormat::RGB32F>,
511 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
512 MortonCopy<false, PixelFormat::RG8U>,
513 MortonCopy<false, PixelFormat::RG8S>,
514 MortonCopy<false, PixelFormat::RG32UI>,
515 MortonCopy<false, PixelFormat::R32UI>,
516 nullptr,
517 nullptr,
518 nullptr,
519 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
520 MortonCopy<false, PixelFormat::DXT1_SRGB>,
521 MortonCopy<false, PixelFormat::DXT23_SRGB>,
522 MortonCopy<false, PixelFormat::DXT45_SRGB>,
523 MortonCopy<false, PixelFormat::BC7U_SRGB>,
524 nullptr,
525 nullptr,
526 nullptr,
527 nullptr,
528 nullptr,
529 nullptr,
530 nullptr,
531 nullptr,
532 MortonCopy<false, PixelFormat::Z32F>,
533 MortonCopy<false, PixelFormat::Z16>,
534 MortonCopy<false, PixelFormat::Z24S8>,
535 MortonCopy<false, PixelFormat::S8Z24>,
536 MortonCopy<false, PixelFormat::Z32FS8>,
537 // clang-format on
538};
539
540void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
541 std::vector<u8>& gl_buffer, u32 mip_level) { 376 std::vector<u8>& gl_buffer, u32 mip_level) {
542 u32 depth = params.MipDepth(mip_level); 377 u32 depth = params.MipDepth(mip_level);
543 if (params.target == SurfaceTarget::Texture2D) { 378 if (params.target == SurfaceTarget::Texture2D) {
@@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
550 const u64 layer_size = params.LayerMemorySize(); 385 const u64 layer_size = params.LayerMemorySize();
551 const u64 gl_size = params.LayerSizeGL(mip_level); 386 const u64 gl_size = params.LayerSizeGL(mip_level);
552 for (u32 i = 0; i < params.depth; i++) { 387 for (u32 i = 0; i < params.depth; i++) {
553 functions[static_cast<std::size_t>(params.pixel_format)]( 388 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
554 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 389 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
555 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, 390 params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
556 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 391 params.addr + offset);
557 offset += layer_size; 392 offset += layer_size;
558 offset_gl += gl_size; 393 offset_gl += gl_size;
559 } 394 }
560 } else { 395 } else {
561 const u64 offset = params.GetMipmapLevelOffset(mip_level); 396 const u64 offset = params.GetMipmapLevelOffset(mip_level);
562 functions[static_cast<std::size_t>(params.pixel_format)]( 397 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
563 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 398 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
564 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), 399 params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
565 gl_buffer.size(), params.addr + offset); 400 params.addr + offset);
566 } 401 }
567} 402}
568 403
@@ -726,7 +561,7 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
726 const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); 561 const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
727 562
728 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 563 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
729 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); 564 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW);
730 if (source_format.compressed) { 565 if (source_format.compressed) {
731 glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, 566 glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
732 static_cast<GLsizei>(src_params.size_in_bytes), nullptr); 567 static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
@@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() {
996 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
997 params.block_width, static_cast<u32>(params.target)); 832 params.block_width, static_cast<u32>(params.target));
998 for (u32 i = 0; i < params.max_mip_level; i++) 833 for (u32 i = 0; i < params.max_mip_level; i++)
999 SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); 834 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
1000 } else { 835 } else {
1001 const auto texture_src_data{Memory::GetPointer(params.addr)}; 836 const auto texture_src_data{Memory::GetPointer(params.addr)};
1002 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 837 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() {
1035 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 870 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
1036 params.block_width, static_cast<u32>(params.target)); 871 params.block_width, static_cast<u32>(params.target));
1037 872
1038 SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); 873 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
1039 } else { 874 } else {
1040 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 875 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
1041 } 876 }
@@ -1275,6 +1110,31 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
1275 return surface; 1110 return surface;
1276} 1111}
1277 1112
1113void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1114 const Surface& dst_surface) {
1115 const auto& init_params{src_surface->GetSurfaceParams()};
1116 const auto& dst_params{dst_surface->GetSurfaceParams()};
1117 VAddr address = init_params.addr;
1118 const std::size_t layer_size = dst_params.LayerMemorySize();
1119 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1120 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1121 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
1122 const Surface& copy = TryGet(sub_address);
1123 if (!copy)
1124 continue;
1125 const auto& src_params{copy->GetSurfaceParams()};
1126 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1127 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
1128
1129 glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0,
1130 0, 0, dst_surface->Texture().handle,
1131 SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width,
1132 height, 1);
1133 }
1134 address += layer_size;
1135 }
1136}
1137
1278void RasterizerCacheOpenGL::FermiCopySurface( 1138void RasterizerCacheOpenGL::FermiCopySurface(
1279 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1139 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1280 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { 1140 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) {
@@ -1340,11 +1200,13 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1340 CopySurface(old_surface, new_surface, copy_pbo.handle); 1200 CopySurface(old_surface, new_surface, copy_pbo.handle);
1341 } 1201 }
1342 break; 1202 break;
1343 case SurfaceTarget::TextureCubemap:
1344 case SurfaceTarget::Texture3D: 1203 case SurfaceTarget::Texture3D:
1204 AccurateCopySurface(old_surface, new_surface);
1205 break;
1206 case SurfaceTarget::TextureCubemap:
1345 case SurfaceTarget::Texture2DArray: 1207 case SurfaceTarget::Texture2DArray:
1346 case SurfaceTarget::TextureCubeArray: 1208 case SurfaceTarget::TextureCubeArray:
1347 AccurateCopySurface(old_surface, new_surface); 1209 FastLayeredCopySurface(old_surface, new_surface);
1348 break; 1210 break;
1349 default: 1211 default:
1350 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1212 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 494f6b903..9ac79c5a4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -350,6 +350,7 @@ private:
350 350
351 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 351 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
352 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 352 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
353 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
353 354
354 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 355 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
355 /// previously been used. This is to prevent surfaces from being constantly created and 356 /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a85a7c0c5..038b25c75 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -84,6 +84,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
84 } 84 }
85 85
86 entries = program_result.second; 86 entries = program_result.second;
87 shader_length = entries.shader_length;
87 88
88 if (program_type != Maxwell::ShaderProgram::Geometry) { 89 if (program_type != Maxwell::ShaderProgram::Geometry) {
89 OGLShader shader; 90 OGLShader shader;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index ffbf21831..08f470de3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -30,7 +30,7 @@ public:
30 } 30 }
31 31
32 std::size_t GetSizeInBytes() const override { 32 std::size_t GetSizeInBytes() const override {
33 return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); 33 return shader_length;
34 } 34 }
35 35
36 // We do not have to flush this cache as things in it are never modified by us. 36 // We do not have to flush this cache as things in it are never modified by us.
@@ -82,6 +82,7 @@ private:
82 u32 max_vertices, const std::string& debug_name); 82 u32 max_vertices, const std::string& debug_name);
83 83
84 VAddr addr; 84 VAddr addr;
85 std::size_t shader_length;
85 Maxwell::ShaderProgram program_type; 86 Maxwell::ShaderProgram program_type;
86 GLShader::ShaderSetup setup; 87 GLShader::ShaderSetup setup;
87 GLShader::ShaderEntries entries; 88 GLShader::ShaderEntries entries;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4c662eedb..05fe2d370 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -34,6 +34,17 @@ constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
34constexpr u32 MAX_GEOMETRY_BUFFERS = 6; 34constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
35constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested 35constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
36 36
37static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag",
38 "overflow_flag"};
39
40enum class InternalFlag : u64 {
41 ZeroFlag = 0,
42 SignFlag = 1,
43 CarryFlag = 2,
44 OverflowFlag = 3,
45 Amount
46};
47
37class DecompileFail : public std::runtime_error { 48class DecompileFail : public std::runtime_error {
38public: 49public:
39 using std::runtime_error::runtime_error; 50 using std::runtime_error::runtime_error;
@@ -49,8 +60,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
49 case Tegra::Shader::OutputTopology::TriangleStrip: 60 case Tegra::Shader::OutputTopology::TriangleStrip:
50 return "triangle_strip"; 61 return "triangle_strip";
51 default: 62 default:
52 LOG_CRITICAL(Render_OpenGL, "Unknown output topology {}", static_cast<u32>(topology)); 63 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
53 UNREACHABLE();
54 return "points"; 64 return "points";
55 } 65 }
56} 66}
@@ -85,7 +95,8 @@ struct Subroutine {
85class ControlFlowAnalyzer { 95class ControlFlowAnalyzer {
86public: 96public:
87 ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) 97 ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
88 : program_code(program_code) { 98 : program_code(program_code), shader_coverage_begin(main_offset),
99 shader_coverage_end(main_offset + 1) {
89 100
90 // Recursively finds all subroutines. 101 // Recursively finds all subroutines.
91 const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); 102 const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
@@ -97,10 +108,16 @@ public:
97 return std::move(subroutines); 108 return std::move(subroutines);
98 } 109 }
99 110
111 std::size_t GetShaderLength() const {
112 return shader_coverage_end * sizeof(u64);
113 }
114
100private: 115private:
101 const ProgramCode& program_code; 116 const ProgramCode& program_code;
102 std::set<Subroutine> subroutines; 117 std::set<Subroutine> subroutines;
103 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; 118 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
119 u32 shader_coverage_begin;
120 u32 shader_coverage_end;
104 121
105 /// Adds and analyzes a new subroutine if it is not added yet. 122 /// Adds and analyzes a new subroutine if it is not added yet.
106 const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { 123 const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
@@ -142,6 +159,9 @@ private:
142 return exit_method; 159 return exit_method;
143 160
144 for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { 161 for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
162 shader_coverage_begin = std::min(shader_coverage_begin, offset);
163 shader_coverage_end = std::max(shader_coverage_end, offset + 1);
164
145 const Instruction instr = {program_code[offset]}; 165 const Instruction instr = {program_code[offset]};
146 if (const auto opcode = OpCode::Decode(instr)) { 166 if (const auto opcode = OpCode::Decode(instr)) {
147 switch (opcode->get().GetId()) { 167 switch (opcode->get().GetId()) {
@@ -167,8 +187,8 @@ private:
167 case OpCode::Id::SSY: 187 case OpCode::Id::SSY:
168 case OpCode::Id::PBK: { 188 case OpCode::Id::PBK: {
169 // The SSY and PBK use a similar encoding as the BRA instruction. 189 // The SSY and PBK use a similar encoding as the BRA instruction.
170 ASSERT_MSG(instr.bra.constant_buffer == 0, 190 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
171 "Constant buffer branching is not supported"); 191 "Constant buffer branching is not supported");
172 const u32 target = offset + instr.bra.GetBranchTarget(); 192 const u32 target = offset + instr.bra.GetBranchTarget();
173 labels.insert(target); 193 labels.insert(target);
174 // Continue scanning for an exit method. 194 // Continue scanning for an exit method.
@@ -258,14 +278,6 @@ private:
258 const std::string& suffix; 278 const std::string& suffix;
259}; 279};
260 280
261enum class InternalFlag : u64 {
262 ZeroFlag = 0,
263 CarryFlag = 1,
264 OverflowFlag = 2,
265 NaNFlag = 3,
266 Amount
267};
268
269/** 281/**
270 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state 282 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
271 * of all registers (e.g. whether they are currently being used as Floats or Integers), and 283 * of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -299,8 +311,7 @@ public:
299 // Default - do nothing 311 // Default - do nothing
300 return value; 312 return value;
301 default: 313 default:
302 LOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size)); 314 UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
303 UNREACHABLE();
304 } 315 }
305 } 316 }
306 317
@@ -363,7 +374,7 @@ public:
363 u64 value_num_components, bool is_saturated = false, 374 u64 value_num_components, bool is_saturated = false,
364 u64 dest_elem = 0, Register::Size size = Register::Size::Word, 375 u64 dest_elem = 0, Register::Size size = Register::Size::Word,
365 bool sets_cc = false) { 376 bool sets_cc = false) {
366 ASSERT_MSG(!is_saturated, "Unimplemented"); 377 UNIMPLEMENTED_IF(is_saturated);
367 378
368 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; 379 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
369 380
@@ -373,7 +384,7 @@ public:
373 if (sets_cc) { 384 if (sets_cc) {
374 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; 385 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
375 SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); 386 SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
376 LOG_WARNING(HW_GPU, "Control Codes Imcomplete."); 387 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
377 } 388 }
378 } 389 }
379 390
@@ -392,7 +403,7 @@ public:
392 Tegra::Shader::HalfMerge merge, u64 dest_num_components, 403 Tegra::Shader::HalfMerge merge, u64 dest_num_components,
393 u64 value_num_components, bool is_saturated = false, 404 u64 value_num_components, bool is_saturated = false,
394 u64 dest_elem = 0) { 405 u64 dest_elem = 0) {
395 ASSERT_MSG(!is_saturated, "Unimplemented"); 406 UNIMPLEMENTED_IF(is_saturated);
396 407
397 const std::string result = [&]() { 408 const std::string result = [&]() {
398 switch (merge) { 409 switch (merge) {
@@ -456,24 +467,25 @@ public:
456 shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); 467 shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
457 } 468 }
458 469
459 std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { 470 std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const {
460 switch (cc) { 471 switch (cc) {
461 case Tegra::Shader::ControlCode::NEU: 472 case Tegra::Shader::ConditionCode::NEU:
462 return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; 473 return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
463 default: 474 default:
464 LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc)); 475 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
465 UNREACHABLE();
466 return "false"; 476 return "false";
467 } 477 }
468 } 478 }
469 479
470 std::string GetInternalFlag(const InternalFlag ii) const { 480 std::string GetInternalFlag(const InternalFlag flag) const {
471 const u32 code = static_cast<u32>(ii); 481 const auto index = static_cast<u32>(flag);
472 return "internalFlag_" + std::to_string(code) + suffix; 482 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
483
484 return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix;
473 } 485 }
474 486
475 void SetInternalFlag(const InternalFlag ii, const std::string& value) const { 487 void SetInternalFlag(const InternalFlag flag, const std::string& value) const {
476 shader.AddLine(GetInternalFlag(ii) + " = " + value + ';'); 488 shader.AddLine(GetInternalFlag(flag) + " = " + value + ';');
477 } 489 }
478 490
479 /** 491 /**
@@ -488,27 +500,42 @@ public:
488 const Register& buf_reg) { 500 const Register& buf_reg) {
489 const std::string dest = GetOutputAttribute(attribute); 501 const std::string dest = GetOutputAttribute(attribute);
490 const std::string src = GetRegisterAsFloat(val_reg); 502 const std::string src = GetRegisterAsFloat(val_reg);
503 if (dest.empty())
504 return;
491 505
492 if (!dest.empty()) { 506 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
493 // Can happen with unknown/unimplemented output attributes, in which case we ignore the 507 // instruction for now.
494 // instruction for now. 508 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
495 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { 509 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
496 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry 510 // shader. These instructions use a dirty register as buffer index, to avoid some
497 // shader. These instructions use a dirty register as buffer index, to avoid some 511 // drivers from complaining about out of boundary writes, guard them.
498 // drivers from complaining about out of boundary writes, guard them. 512 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
499 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + 513 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
500 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; 514 shader.AddLine("amem[" + buf_index + "][" +
501 shader.AddLine("amem[" + buf_index + "][" + 515 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
502 std::to_string(static_cast<u32>(attribute)) + ']' + 516 " = " + src + ';');
503 GetSwizzle(elem) + " = " + src + ';'); 517 return;
504 } else { 518 }
505 if (attribute == Attribute::Index::PointSize) { 519
506 fixed_pipeline_output_attributes_used.insert(attribute); 520 switch (attribute) {
507 shader.AddLine(dest + " = " + src + ';'); 521 case Attribute::Index::ClipDistances0123:
508 } else { 522 case Attribute::Index::ClipDistances4567: {
509 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); 523 const u64 index = attribute == Attribute::Index::ClipDistances4567 ? 4 : 0 + elem;
510 } 524 UNIMPLEMENTED_IF_MSG(
511 } 525 ((header.vtg.clip_distances >> index) & 1) == 0,
526 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
527
528 fixed_pipeline_output_attributes_used.insert(attribute);
529 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
530 break;
531 }
532 case Attribute::Index::PointSize:
533 fixed_pipeline_output_attributes_used.insert(attribute);
534 shader.AddLine(dest + " = " + src + ';');
535 break;
536 default:
537 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
538 break;
512 } 539 }
513 } 540 }
514 541
@@ -624,8 +651,8 @@ private:
624 651
625 /// Generates declarations for internal flags. 652 /// Generates declarations for internal flags.
626 void GenerateInternalFlags() { 653 void GenerateInternalFlags() {
627 for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { 654 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
628 const InternalFlag code = static_cast<InternalFlag>(ii); 655 const InternalFlag code = static_cast<InternalFlag>(flag);
629 declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); 656 declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
630 } 657 }
631 declarations.AddNewLine(); 658 declarations.AddNewLine();
@@ -728,12 +755,19 @@ private:
728 void GenerateVertex() { 755 void GenerateVertex() {
729 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) 756 if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
730 return; 757 return;
758 bool clip_distances_declared = false;
759
731 declarations.AddLine("out gl_PerVertex {"); 760 declarations.AddLine("out gl_PerVertex {");
732 ++declarations.scope; 761 ++declarations.scope;
733 declarations.AddLine("vec4 gl_Position;"); 762 declarations.AddLine("vec4 gl_Position;");
734 for (auto& o : fixed_pipeline_output_attributes_used) { 763 for (auto& o : fixed_pipeline_output_attributes_used) {
735 if (o == Attribute::Index::PointSize) 764 if (o == Attribute::Index::PointSize)
736 declarations.AddLine("float gl_PointSize;"); 765 declarations.AddLine("float gl_PointSize;");
766 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
767 o == Attribute::Index::ClipDistances4567)) {
768 declarations.AddLine("float gl_ClipDistance[];");
769 clip_distances_declared = true;
770 }
737 } 771 }
738 --declarations.scope; 772 --declarations.scope;
739 declarations.AddLine("};"); 773 declarations.AddLine("};");
@@ -761,8 +795,7 @@ private:
761 u64 dest_num_components, u64 value_num_components, u64 dest_elem, 795 u64 dest_num_components, u64 value_num_components, u64 dest_elem,
762 bool precise) { 796 bool precise) {
763 if (reg == Register::ZeroIndex) { 797 if (reg == Register::ZeroIndex) {
764 LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); 798 // Setting RZ is a nop in hardware.
765 UNREACHABLE();
766 return; 799 return;
767 } 800 }
768 801
@@ -847,16 +880,13 @@ private:
847 if (declr_input_attribute.count(attribute) == 0) { 880 if (declr_input_attribute.count(attribute) == 0) {
848 declr_input_attribute[attribute] = input_mode; 881 declr_input_attribute[attribute] = input_mode;
849 } else { 882 } else {
850 if (declr_input_attribute[attribute] != input_mode) { 883 UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode,
851 LOG_CRITICAL(HW_GPU, "Same Input multiple input modes"); 884 "Multiple input modes for the same attribute");
852 UNREACHABLE();
853 }
854 } 885 }
855 return GeometryPass("input_attribute_" + std::to_string(index)); 886 return GeometryPass("input_attribute_" + std::to_string(index));
856 } 887 }
857 888
858 LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute)); 889 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
859 UNREACHABLE();
860 } 890 }
861 891
862 return "vec4(0, 0, 0, 0)"; 892 return "vec4(0, 0, 0, 0)";
@@ -882,24 +912,20 @@ private:
882 break; 912 break;
883 } 913 }
884 default: { 914 default: {
885 LOG_CRITICAL(HW_GPU, "Unhandled Ipa InterpMode: {}", static_cast<u32>(interp_mode)); 915 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
886 UNREACHABLE();
887 } 916 }
888 } 917 }
889 switch (sample_mode) { 918 switch (sample_mode) {
890 case Tegra::Shader::IpaSampleMode::Centroid: { 919 case Tegra::Shader::IpaSampleMode::Centroid:
891 // Note not implemented, it can be implemented with the "centroid " keyword in glsl; 920 // It can be implemented with the "centroid " keyword in glsl
892 LOG_CRITICAL(HW_GPU, "Ipa Sampler Mode: centroid, not implemented"); 921 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
893 UNREACHABLE();
894 break; 922 break;
895 } 923 case Tegra::Shader::IpaSampleMode::Default:
896 case Tegra::Shader::IpaSampleMode::Default: {
897 // Default, n/a 924 // Default, n/a
898 break; 925 break;
899 }
900 default: { 926 default: {
901 LOG_CRITICAL(HW_GPU, "Unhandled Ipa SampleMode: {}", static_cast<u32>(sample_mode)); 927 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
902 UNREACHABLE(); 928 break;
903 } 929 }
904 } 930 }
905 return out; 931 return out;
@@ -912,6 +938,10 @@ private:
912 return "gl_PointSize"; 938 return "gl_PointSize";
913 case Attribute::Index::Position: 939 case Attribute::Index::Position:
914 return "position"; 940 return "position";
941 case Attribute::Index::ClipDistances0123:
942 case Attribute::Index::ClipDistances4567: {
943 return "gl_ClipDistance";
944 }
915 default: 945 default:
916 const u32 index{static_cast<u32>(attribute) - 946 const u32 index{static_cast<u32>(attribute) -
917 static_cast<u32>(Attribute::Index::Attribute_0)}; 947 static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -920,8 +950,7 @@ private:
920 return "output_attribute_" + std::to_string(index); 950 return "output_attribute_" + std::to_string(index);
921 } 951 }
922 952
923 LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); 953 UNIMPLEMENTED_MSG("Unhandled output attribute={}", index);
924 UNREACHABLE();
925 return {}; 954 return {};
926 } 955 }
927 } 956 }
@@ -951,9 +980,10 @@ private:
951class GLSLGenerator { 980class GLSLGenerator {
952public: 981public:
953 GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, 982 GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
954 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) 983 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix,
984 std::size_t shader_length)
955 : subroutines(subroutines), program_code(program_code), main_offset(main_offset), 985 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
956 stage(stage), suffix(suffix) { 986 stage(stage), suffix(suffix), shader_length(shader_length) {
957 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 987 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
958 local_memory_size = header.GetLocalMemorySize(); 988 local_memory_size = header.GetLocalMemorySize();
959 regs.SetLocalMemory(local_memory_size); 989 regs.SetLocalMemory(local_memory_size);
@@ -966,7 +996,7 @@ public:
966 996
967 /// Returns entries in the shader that are useful for external functions 997 /// Returns entries in the shader that are useful for external functions
968 ShaderEntries GetEntries() const { 998 ShaderEntries GetEntries() const {
969 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()}; 999 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), shader_length};
970 } 1000 }
971 1001
972private: 1002private:
@@ -1071,19 +1101,26 @@ private:
1071 const std::string& op_a, const std::string& op_b) const { 1101 const std::string& op_a, const std::string& op_b) const {
1072 using Tegra::Shader::PredCondition; 1102 using Tegra::Shader::PredCondition;
1073 static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { 1103 static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
1074 {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, 1104 {PredCondition::LessThan, "<"},
1075 {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, 1105 {PredCondition::Equal, "=="},
1076 {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, 1106 {PredCondition::LessEqual, "<="},
1077 {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="}, 1107 {PredCondition::GreaterThan, ">"},
1078 {PredCondition::GreaterThanWithNan, ">"}, {PredCondition::GreaterEqualWithNan, ">="}}; 1108 {PredCondition::NotEqual, "!="},
1109 {PredCondition::GreaterEqual, ">="},
1110 {PredCondition::LessThanWithNan, "<"},
1111 {PredCondition::NotEqualWithNan, "!="},
1112 {PredCondition::LessEqualWithNan, "<="},
1113 {PredCondition::GreaterThanWithNan, ">"},
1114 {PredCondition::GreaterEqualWithNan, ">="}};
1079 1115
1080 const auto& comparison{PredicateComparisonStrings.find(condition)}; 1116 const auto& comparison{PredicateComparisonStrings.find(condition)};
1081 ASSERT_MSG(comparison != PredicateComparisonStrings.end(), 1117 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(),
1082 "Unknown predicate comparison operation"); 1118 "Unknown predicate comparison operation");
1083 1119
1084 std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; 1120 std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
1085 if (condition == PredCondition::LessThanWithNan || 1121 if (condition == PredCondition::LessThanWithNan ||
1086 condition == PredCondition::NotEqualWithNan || 1122 condition == PredCondition::NotEqualWithNan ||
1123 condition == PredCondition::LessEqualWithNan ||
1087 condition == PredCondition::GreaterThanWithNan || 1124 condition == PredCondition::GreaterThanWithNan ||
1088 condition == PredCondition::GreaterEqualWithNan) { 1125 condition == PredCondition::GreaterEqualWithNan) {
1089 predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; 1126 predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
@@ -1107,7 +1144,7 @@ private:
1107 }; 1144 };
1108 1145
1109 auto op = PredicateOperationStrings.find(operation); 1146 auto op = PredicateOperationStrings.find(operation);
1110 ASSERT_MSG(op != PredicateOperationStrings.end(), "Unknown predicate operation"); 1147 UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation");
1111 return op->second; 1148 return op->second;
1112 } 1149 }
1113 1150
@@ -1205,8 +1242,7 @@ private:
1205 break; 1242 break;
1206 } 1243 }
1207 default: 1244 default:
1208 LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op)); 1245 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
1209 UNREACHABLE();
1210 } 1246 }
1211 1247
1212 if (dest != Tegra::Shader::Register::ZeroIndex) { 1248 if (dest != Tegra::Shader::Register::ZeroIndex) {
@@ -1224,9 +1260,8 @@ private:
1224 SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); 1260 SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
1225 break; 1261 break;
1226 default: 1262 default:
1227 LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}", 1263 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
1228 static_cast<u32>(predicate_mode)); 1264 static_cast<u32>(predicate_mode));
1229 UNREACHABLE();
1230 } 1265 }
1231 } 1266 }
1232 1267
@@ -1264,6 +1299,7 @@ private:
1264 shader.AddLine('{'); 1299 shader.AddLine('{');
1265 ++shader.scope; 1300 ++shader.scope;
1266 shader.AddLine(coord); 1301 shader.AddLine(coord);
1302 shader.AddLine("vec4 texture_tmp = " + texture + ';');
1267 1303
1268 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 1304 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
1269 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 1305 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -1276,37 +1312,32 @@ private:
1276 1312
1277 if (written_components < 2) { 1313 if (written_components < 2) {
1278 // Write the first two swizzle components to gpr0 and gpr0+1 1314 // Write the first two swizzle components to gpr0 and gpr0+1
1279 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, 1315 regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
1280 written_components % 2); 1316 written_components % 2);
1281 } else { 1317 } else {
1282 ASSERT(instr.texs.HasTwoDestinations()); 1318 ASSERT(instr.texs.HasTwoDestinations());
1283 // Write the rest of the swizzle components to gpr28 and gpr28+1 1319 // Write the rest of the swizzle components to gpr28 and gpr28+1
1284 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, 1320 regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
1285 written_components % 2); 1321 written_components % 2);
1286 } 1322 }
1287 1323
1288 ++written_components; 1324 ++written_components;
1289 } 1325 }
1290
1291 --shader.scope; 1326 --shader.scope;
1292 shader.AddLine('}'); 1327 shader.AddLine('}');
1293 } 1328 }
1294 1329
1295 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { 1330 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
1296 switch (texture_type) { 1331 switch (texture_type) {
1297 case Tegra::Shader::TextureType::Texture1D: { 1332 case Tegra::Shader::TextureType::Texture1D:
1298 return 1; 1333 return 1;
1299 } 1334 case Tegra::Shader::TextureType::Texture2D:
1300 case Tegra::Shader::TextureType::Texture2D: {
1301 return 2; 1335 return 2;
1302 }
1303 case Tegra::Shader::TextureType::Texture3D: 1336 case Tegra::Shader::TextureType::Texture3D:
1304 case Tegra::Shader::TextureType::TextureCube: { 1337 case Tegra::Shader::TextureType::TextureCube:
1305 return 3; 1338 return 3;
1306 }
1307 default: 1339 default:
1308 LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", static_cast<u32>(texture_type)); 1340 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
1309 UNREACHABLE();
1310 return 0; 1341 return 0;
1311 } 1342 }
1312 } 1343 }
@@ -1342,7 +1373,7 @@ private:
1342 void EmitFragmentOutputsWrite() { 1373 void EmitFragmentOutputsWrite() {
1343 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 1374 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
1344 1375
1345 ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); 1376 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented");
1346 1377
1347 shader.AddLine("if (alpha_test[0] != 0) {"); 1378 shader.AddLine("if (alpha_test[0] != 0) {");
1348 ++shader.scope; 1379 ++shader.scope;
@@ -1408,7 +1439,7 @@ private:
1408 case Tegra::Shader::VideoType::Size32: 1439 case Tegra::Shader::VideoType::Size32:
1409 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when 1440 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
1410 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better 1441 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
1411 // explanation is found: assert. 1442 // explanation is found: abort.
1412 UNIMPLEMENTED(); 1443 UNIMPLEMENTED();
1413 return zero; 1444 return zero;
1414 case Tegra::Shader::VideoType::Invalid: 1445 case Tegra::Shader::VideoType::Invalid:
@@ -1464,8 +1495,7 @@ private:
1464 1495
1465 // Decoding failure 1496 // Decoding failure
1466 if (!opcode) { 1497 if (!opcode) {
1467 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value); 1498 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
1468 UNREACHABLE();
1469 return offset + 1; 1499 return offset + 1;
1470 } 1500 }
1471 1501
@@ -1473,8 +1503,8 @@ private:
1473 fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); 1503 fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value));
1474 1504
1475 using Tegra::Shader::Pred; 1505 using Tegra::Shader::Pred;
1476 ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, 1506 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
1477 "NeverExecute predicate not implemented"); 1507 "NeverExecute predicate not implemented");
1478 1508
1479 // Some instructions (like SSY) don't have a predicate field, they are always 1509 // Some instructions (like SSY) don't have a predicate field, they are always
1480 // unconditionally executed. 1510 // unconditionally executed.
@@ -1517,37 +1547,36 @@ private:
1517 case OpCode::Id::FMUL_R: 1547 case OpCode::Id::FMUL_R:
1518 case OpCode::Id::FMUL_IMM: { 1548 case OpCode::Id::FMUL_IMM: {
1519 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. 1549 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
1520 ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented", 1550 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
1521 instr.fmul.tab5cb8_2.Value()); 1551 "FMUL tab5cb8_2({}) is not implemented",
1522 ASSERT_MSG(instr.fmul.tab5c68_1 == 0, "FMUL tab5cb8_1({}) is not implemented", 1552 instr.fmul.tab5cb8_2.Value());
1523 instr.fmul.tab5c68_1.Value()); 1553 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5c68_1 != 0,
1524 ASSERT_MSG(instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented", 1554 "FMUL tab5cb8_1({}) is not implemented",
1525 instr.fmul.tab5c68_0 1555 instr.fmul.tab5c68_1.Value());
1526 .Value()); // SMO typical sends 1 here which seems to be the default 1556 UNIMPLEMENTED_IF_MSG(
1527 ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); 1557 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
1558 instr.fmul.tab5c68_0
1559 .Value()); // SMO typical sends 1 here which seems to be the default
1560 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1561 "Condition codes generation in FMUL is not implemented");
1528 1562
1529 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); 1563 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
1530 1564
1531 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, 1565 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
1532 instr.alu.saturate_d, 0, true); 1566 instr.alu.saturate_d, 0, true);
1533 if (instr.generates_cc) {
1534 LOG_CRITICAL(HW_GPU, "FMUL Generates an unhandled Control Code");
1535 UNREACHABLE();
1536 }
1537 break; 1567 break;
1538 } 1568 }
1539 case OpCode::Id::FADD_C: 1569 case OpCode::Id::FADD_C:
1540 case OpCode::Id::FADD_R: 1570 case OpCode::Id::FADD_R:
1541 case OpCode::Id::FADD_IMM: { 1571 case OpCode::Id::FADD_IMM: {
1572 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1573 "Condition codes generation in FADD is not implemented");
1574
1542 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); 1575 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1543 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); 1576 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1544 1577
1545 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, 1578 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
1546 instr.alu.saturate_d, 0, true); 1579 instr.alu.saturate_d, 0, true);
1547 if (instr.generates_cc) {
1548 LOG_CRITICAL(HW_GPU, "FADD Generates an unhandled Control Code");
1549 UNREACHABLE();
1550 }
1551 break; 1580 break;
1552 } 1581 }
1553 case OpCode::Id::MUFU: { 1582 case OpCode::Id::MUFU: {
@@ -1582,15 +1611,17 @@ private:
1582 instr.alu.saturate_d, 0, true); 1611 instr.alu.saturate_d, 0, true);
1583 break; 1612 break;
1584 default: 1613 default:
1585 LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 1614 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
1586 static_cast<unsigned>(instr.sub_op.Value())); 1615 static_cast<unsigned>(instr.sub_op.Value()));
1587 UNREACHABLE();
1588 } 1616 }
1589 break; 1617 break;
1590 } 1618 }
1591 case OpCode::Id::FMNMX_C: 1619 case OpCode::Id::FMNMX_C:
1592 case OpCode::Id::FMNMX_R: 1620 case OpCode::Id::FMNMX_R:
1593 case OpCode::Id::FMNMX_IMM: { 1621 case OpCode::Id::FMNMX_IMM: {
1622 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1623 "Condition codes generation in FMNMX is not implemented");
1624
1594 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); 1625 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1595 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); 1626 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1596 1627
@@ -1601,10 +1632,6 @@ private:
1601 '(' + condition + ") ? min(" + parameters + ") : max(" + 1632 '(' + condition + ") ? min(" + parameters + ") : max(" +
1602 parameters + ')', 1633 parameters + ')',
1603 1, 1, false, 0, true); 1634 1, 1, false, 0, true);
1604 if (instr.generates_cc) {
1605 LOG_CRITICAL(HW_GPU, "FMNMX Generates an unhandled Control Code");
1606 UNREACHABLE();
1607 }
1608 break; 1635 break;
1609 } 1636 }
1610 case OpCode::Id::RRO_C: 1637 case OpCode::Id::RRO_C:
@@ -1617,9 +1644,7 @@ private:
1617 break; 1644 break;
1618 } 1645 }
1619 default: { 1646 default: {
1620 LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", 1647 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
1621 opcode->get().GetName());
1622 UNREACHABLE();
1623 } 1648 }
1624 } 1649 }
1625 break; 1650 break;
@@ -1631,17 +1656,19 @@ private:
1631 break; 1656 break;
1632 } 1657 }
1633 case OpCode::Id::FMUL32_IMM: { 1658 case OpCode::Id::FMUL32_IMM: {
1659 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
1660 "Condition codes generation in FMUL32 is not implemented");
1661
1634 regs.SetRegisterToFloat(instr.gpr0, 0, 1662 regs.SetRegisterToFloat(instr.gpr0, 0,
1635 regs.GetRegisterAsFloat(instr.gpr8) + " * " + 1663 regs.GetRegisterAsFloat(instr.gpr8) + " * " +
1636 GetImmediate32(instr), 1664 GetImmediate32(instr),
1637 1, 1, instr.fmul32.saturate, 0, true); 1665 1, 1, instr.fmul32.saturate, 0, true);
1638 if (instr.op_32.generates_cc) {
1639 LOG_CRITICAL(HW_GPU, "FMUL32 Generates an unhandled Control Code");
1640 UNREACHABLE();
1641 }
1642 break; 1666 break;
1643 } 1667 }
1644 case OpCode::Id::FADD32I: { 1668 case OpCode::Id::FADD32I: {
1669 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
1670 "Condition codes generation in FADD32I is not implemented");
1671
1645 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1672 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1646 std::string op_b = GetImmediate32(instr); 1673 std::string op_b = GetImmediate32(instr);
1647 1674
@@ -1662,23 +1689,22 @@ private:
1662 } 1689 }
1663 1690
1664 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); 1691 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
1665 if (instr.op_32.generates_cc) {
1666 LOG_CRITICAL(HW_GPU, "FADD32 Generates an unhandled Control Code");
1667 UNREACHABLE();
1668 }
1669 break; 1692 break;
1670 } 1693 }
1671 } 1694 }
1672 break; 1695 break;
1673 } 1696 }
1674 case OpCode::Type::Bfe: { 1697 case OpCode::Type::Bfe: {
1675 ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented"); 1698 UNIMPLEMENTED_IF(instr.bfe.negate_b);
1676 1699
1677 std::string op_a = instr.bfe.negate_a ? "-" : ""; 1700 std::string op_a = instr.bfe.negate_a ? "-" : "";
1678 op_a += regs.GetRegisterAsInteger(instr.gpr8); 1701 op_a += regs.GetRegisterAsInteger(instr.gpr8);
1679 1702
1680 switch (opcode->get().GetId()) { 1703 switch (opcode->get().GetId()) {
1681 case OpCode::Id::BFE_IMM: { 1704 case OpCode::Id::BFE_IMM: {
1705 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1706 "Condition codes generation in BFE is not implemented");
1707
1682 std::string inner_shift = 1708 std::string inner_shift =
1683 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; 1709 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
1684 std::string outer_shift = 1710 std::string outer_shift =
@@ -1686,15 +1712,10 @@ private:
1686 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; 1712 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
1687 1713
1688 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1); 1714 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
1689 if (instr.generates_cc) {
1690 LOG_CRITICAL(HW_GPU, "BFE Generates an unhandled Control Code");
1691 UNREACHABLE();
1692 }
1693 break; 1715 break;
1694 } 1716 }
1695 default: { 1717 default: {
1696 LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->get().GetName()); 1718 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
1697 UNREACHABLE();
1698 } 1719 }
1699 } 1720 }
1700 1721
@@ -1739,6 +1760,9 @@ private:
1739 case OpCode::Id::SHR_C: 1760 case OpCode::Id::SHR_C:
1740 case OpCode::Id::SHR_R: 1761 case OpCode::Id::SHR_R:
1741 case OpCode::Id::SHR_IMM: { 1762 case OpCode::Id::SHR_IMM: {
1763 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1764 "Condition codes generation in SHR is not implemented");
1765
1742 if (!instr.shift.is_signed) { 1766 if (!instr.shift.is_signed) {
1743 // Logical shift right 1767 // Logical shift right
1744 op_a = "uint(" + op_a + ')'; 1768 op_a = "uint(" + op_a + ')';
@@ -1747,24 +1771,17 @@ private:
1747 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift 1771 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
1748 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', 1772 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
1749 1, 1); 1773 1, 1);
1750 if (instr.generates_cc) {
1751 LOG_CRITICAL(HW_GPU, "SHR Generates an unhandled Control Code");
1752 UNREACHABLE();
1753 }
1754 break; 1774 break;
1755 } 1775 }
1756 case OpCode::Id::SHL_C: 1776 case OpCode::Id::SHL_C:
1757 case OpCode::Id::SHL_R: 1777 case OpCode::Id::SHL_R:
1758 case OpCode::Id::SHL_IMM: 1778 case OpCode::Id::SHL_IMM:
1779 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1780 "Condition codes generation in SHL is not implemented");
1759 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); 1781 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
1760 if (instr.generates_cc) {
1761 LOG_CRITICAL(HW_GPU, "SHL Generates an unhandled Control Code");
1762 UNREACHABLE();
1763 }
1764 break; 1782 break;
1765 default: { 1783 default: {
1766 LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->get().GetName()); 1784 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
1767 UNREACHABLE();
1768 } 1785 }
1769 } 1786 }
1770 break; 1787 break;
@@ -1775,17 +1792,19 @@ private:
1775 1792
1776 switch (opcode->get().GetId()) { 1793 switch (opcode->get().GetId()) {
1777 case OpCode::Id::IADD32I: 1794 case OpCode::Id::IADD32I:
1795 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
1796 "Condition codes generation in IADD32I is not implemented");
1797
1778 if (instr.iadd32i.negate_a) 1798 if (instr.iadd32i.negate_a)
1779 op_a = "-(" + op_a + ')'; 1799 op_a = "-(" + op_a + ')';
1780 1800
1781 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, 1801 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1782 instr.iadd32i.saturate != 0); 1802 instr.iadd32i.saturate != 0);
1783 if (instr.op_32.generates_cc) {
1784 LOG_CRITICAL(HW_GPU, "IADD32 Generates an unhandled Control Code");
1785 UNREACHABLE();
1786 }
1787 break; 1803 break;
1788 case OpCode::Id::LOP32I: { 1804 case OpCode::Id::LOP32I: {
1805 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
1806 "Condition codes generation in LOP32I is not implemented");
1807
1789 if (instr.alu.lop32i.invert_a) 1808 if (instr.alu.lop32i.invert_a)
1790 op_a = "~(" + op_a + ')'; 1809 op_a = "~(" + op_a + ')';
1791 1810
@@ -1795,16 +1814,11 @@ private:
1795 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, 1814 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
1796 Tegra::Shader::PredicateResultMode::None, 1815 Tegra::Shader::PredicateResultMode::None,
1797 Tegra::Shader::Pred::UnusedIndex); 1816 Tegra::Shader::Pred::UnusedIndex);
1798 if (instr.op_32.generates_cc) {
1799 LOG_CRITICAL(HW_GPU, "LOP32I Generates an unhandled Control Code");
1800 UNREACHABLE();
1801 }
1802 break; 1817 break;
1803 } 1818 }
1804 default: { 1819 default: {
1805 LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}", 1820 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
1806 opcode->get().GetName()); 1821 opcode->get().GetName());
1807 UNREACHABLE();
1808 } 1822 }
1809 } 1823 }
1810 break; 1824 break;
@@ -1827,6 +1841,9 @@ private:
1827 case OpCode::Id::IADD_C: 1841 case OpCode::Id::IADD_C:
1828 case OpCode::Id::IADD_R: 1842 case OpCode::Id::IADD_R:
1829 case OpCode::Id::IADD_IMM: { 1843 case OpCode::Id::IADD_IMM: {
1844 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1845 "Condition codes generation in IADD is not implemented");
1846
1830 if (instr.alu_integer.negate_a) 1847 if (instr.alu_integer.negate_a)
1831 op_a = "-(" + op_a + ')'; 1848 op_a = "-(" + op_a + ')';
1832 1849
@@ -1835,15 +1852,14 @@ private:
1835 1852
1836 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, 1853 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1837 instr.alu.saturate_d); 1854 instr.alu.saturate_d);
1838 if (instr.generates_cc) {
1839 LOG_CRITICAL(HW_GPU, "IADD Generates an unhandled Control Code");
1840 UNREACHABLE();
1841 }
1842 break; 1855 break;
1843 } 1856 }
1844 case OpCode::Id::IADD3_C: 1857 case OpCode::Id::IADD3_C:
1845 case OpCode::Id::IADD3_R: 1858 case OpCode::Id::IADD3_R:
1846 case OpCode::Id::IADD3_IMM: { 1859 case OpCode::Id::IADD3_IMM: {
1860 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1861 "Condition codes generation in IADD3 is not implemented");
1862
1847 std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); 1863 std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
1848 1864
1849 auto apply_height = [](auto height, auto& oprand) { 1865 auto apply_height = [](auto height, auto& oprand) {
@@ -1857,9 +1873,8 @@ private:
1857 oprand = "((" + oprand + ") >> 16)"; 1873 oprand = "((" + oprand + ") >> 16)";
1858 break; 1874 break;
1859 default: 1875 default:
1860 LOG_CRITICAL(HW_GPU, "Unhandled IADD3 height: {}", 1876 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}",
1861 static_cast<u32>(height.Value())); 1877 static_cast<u32>(height.Value()));
1862 UNREACHABLE();
1863 } 1878 }
1864 }; 1879 };
1865 1880
@@ -1900,16 +1915,14 @@ private:
1900 } 1915 }
1901 1916
1902 regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1); 1917 regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1);
1903
1904 if (instr.generates_cc) {
1905 LOG_CRITICAL(HW_GPU, "IADD3 Generates an unhandled Control Code");
1906 UNREACHABLE();
1907 }
1908 break; 1918 break;
1909 } 1919 }
1910 case OpCode::Id::ISCADD_C: 1920 case OpCode::Id::ISCADD_C:
1911 case OpCode::Id::ISCADD_R: 1921 case OpCode::Id::ISCADD_R:
1912 case OpCode::Id::ISCADD_IMM: { 1922 case OpCode::Id::ISCADD_IMM: {
1923 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1924 "Condition codes generation in ISCADD is not implemented");
1925
1913 if (instr.alu_integer.negate_a) 1926 if (instr.alu_integer.negate_a)
1914 op_a = "-(" + op_a + ')'; 1927 op_a = "-(" + op_a + ')';
1915 1928
@@ -1920,10 +1933,6 @@ private:
1920 1933
1921 regs.SetRegisterToInteger(instr.gpr0, true, 0, 1934 regs.SetRegisterToInteger(instr.gpr0, true, 0,
1922 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); 1935 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
1923 if (instr.generates_cc) {
1924 LOG_CRITICAL(HW_GPU, "ISCADD Generates an unhandled Control Code");
1925 UNREACHABLE();
1926 }
1927 break; 1936 break;
1928 } 1937 }
1929 case OpCode::Id::POPC_C: 1938 case OpCode::Id::POPC_C:
@@ -1947,6 +1956,9 @@ private:
1947 case OpCode::Id::LOP_C: 1956 case OpCode::Id::LOP_C:
1948 case OpCode::Id::LOP_R: 1957 case OpCode::Id::LOP_R:
1949 case OpCode::Id::LOP_IMM: { 1958 case OpCode::Id::LOP_IMM: {
1959 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1960 "Condition codes generation in LOP is not implemented");
1961
1950 if (instr.alu.lop.invert_a) 1962 if (instr.alu.lop.invert_a)
1951 op_a = "~(" + op_a + ')'; 1963 op_a = "~(" + op_a + ')';
1952 1964
@@ -1955,15 +1967,14 @@ private:
1955 1967
1956 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, 1968 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
1957 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); 1969 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
1958 if (instr.generates_cc) {
1959 LOG_CRITICAL(HW_GPU, "LOP Generates an unhandled Control Code");
1960 UNREACHABLE();
1961 }
1962 break; 1970 break;
1963 } 1971 }
1964 case OpCode::Id::LOP3_C: 1972 case OpCode::Id::LOP3_C:
1965 case OpCode::Id::LOP3_R: 1973 case OpCode::Id::LOP3_R:
1966 case OpCode::Id::LOP3_IMM: { 1974 case OpCode::Id::LOP3_IMM: {
1975 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1976 "Condition codes generation in LOP3 is not implemented");
1977
1967 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); 1978 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
1968 std::string lut; 1979 std::string lut;
1969 1980
@@ -1974,17 +1985,15 @@ private:
1974 } 1985 }
1975 1986
1976 WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut); 1987 WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut);
1977 if (instr.generates_cc) {
1978 LOG_CRITICAL(HW_GPU, "LOP3 Generates an unhandled Control Code");
1979 UNREACHABLE();
1980 }
1981 break; 1988 break;
1982 } 1989 }
1983 case OpCode::Id::IMNMX_C: 1990 case OpCode::Id::IMNMX_C:
1984 case OpCode::Id::IMNMX_R: 1991 case OpCode::Id::IMNMX_R:
1985 case OpCode::Id::IMNMX_IMM: { 1992 case OpCode::Id::IMNMX_IMM: {
1986 ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None, 1993 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
1987 "Unimplemented"); 1994 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
1995 "Condition codes generation in IMNMX is not implemented");
1996
1988 const std::string condition = 1997 const std::string condition =
1989 GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); 1998 GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
1990 const std::string parameters = op_a + ',' + op_b; 1999 const std::string parameters = op_a + ',' + op_b;
@@ -1992,10 +2001,6 @@ private:
1992 '(' + condition + ") ? min(" + parameters + ") : max(" + 2001 '(' + condition + ") ? min(" + parameters + ") : max(" +
1993 parameters + ')', 2002 parameters + ')',
1994 1, 1); 2003 1, 1);
1995 if (instr.generates_cc) {
1996 LOG_CRITICAL(HW_GPU, "IMNMX Generates an unhandled Control Code");
1997 UNREACHABLE();
1998 }
1999 break; 2004 break;
2000 } 2005 }
2001 case OpCode::Id::LEA_R2: 2006 case OpCode::Id::LEA_R2:
@@ -2050,24 +2055,19 @@ private:
2050 op_b = regs.GetRegisterAsInteger(instr.gpr8); 2055 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2051 op_a = std::to_string(instr.lea.imm.entry_a); 2056 op_a = std::to_string(instr.lea.imm.entry_a);
2052 op_c = std::to_string(instr.lea.imm.entry_b); 2057 op_c = std::to_string(instr.lea.imm.entry_b);
2053 LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", 2058 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
2054 opcode->get().GetName());
2055 UNREACHABLE();
2056 } 2059 }
2057 } 2060 }
2058 if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) { 2061 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
2059 LOG_ERROR(HW_GPU, "Unhandled LEA Predicate"); 2062 "Unhandled LEA Predicate");
2060 UNREACHABLE();
2061 }
2062 const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; 2063 const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
2063 regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1); 2064 regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
2064 2065
2065 break; 2066 break;
2066 } 2067 }
2067 default: { 2068 default: {
2068 LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", 2069 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}",
2069 opcode->get().GetName()); 2070 opcode->get().GetName());
2070 UNREACHABLE();
2071 } 2071 }
2072 } 2072 }
2073 2073
@@ -2076,7 +2076,7 @@ private:
2076 case OpCode::Type::ArithmeticHalf: { 2076 case OpCode::Type::ArithmeticHalf: {
2077 if (opcode->get().GetId() == OpCode::Id::HADD2_C || 2077 if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
2078 opcode->get().GetId() == OpCode::Id::HADD2_R) { 2078 opcode->get().GetId() == OpCode::Id::HADD2_R) {
2079 ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented"); 2079 UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
2080 } 2080 }
2081 const bool negate_a = 2081 const bool negate_a =
2082 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; 2082 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -2114,9 +2114,8 @@ private:
2114 case OpCode::Id::HMUL2_R: 2114 case OpCode::Id::HMUL2_R:
2115 return '(' + op_a + " * " + op_b + ')'; 2115 return '(' + op_a + " * " + op_b + ')';
2116 default: 2116 default:
2117 LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", 2117 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}",
2118 opcode->get().GetName()); 2118 opcode->get().GetName());
2119 UNREACHABLE();
2120 return std::string("0"); 2119 return std::string("0");
2121 } 2120 }
2122 }(); 2121 }();
@@ -2127,10 +2126,10 @@ private:
2127 } 2126 }
2128 case OpCode::Type::ArithmeticHalfImmediate: { 2127 case OpCode::Type::ArithmeticHalfImmediate: {
2129 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { 2128 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
2130 ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented"); 2129 UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
2131 } else { 2130 } else {
2132 ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None, 2131 UNIMPLEMENTED_IF(instr.alu_half_imm.precision !=
2133 "Unimplemented"); 2132 Tegra::Shader::HalfPrecision::None);
2134 } 2133 }
2135 2134
2136 const std::string op_a = GetHalfFloat( 2135 const std::string op_a = GetHalfFloat(
@@ -2160,11 +2159,14 @@ private:
2160 std::string op_b = instr.ffma.negate_b ? "-" : ""; 2159 std::string op_b = instr.ffma.negate_b ? "-" : "";
2161 std::string op_c = instr.ffma.negate_c ? "-" : ""; 2160 std::string op_c = instr.ffma.negate_c ? "-" : "";
2162 2161
2163 ASSERT_MSG(instr.ffma.cc == 0, "FFMA cc not implemented"); 2162 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
2164 ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented", 2163 UNIMPLEMENTED_IF_MSG(
2165 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 2164 instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
2166 ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented", 2165 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
2167 instr.ffma.tab5980_1.Value()); 2166 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
2167 instr.ffma.tab5980_1.Value());
2168 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2169 "Condition codes generation in FFMA is not implemented");
2168 2170
2169 switch (opcode->get().GetId()) { 2171 switch (opcode->get().GetId()) {
2170 case OpCode::Id::FFMA_CR: { 2172 case OpCode::Id::FFMA_CR: {
@@ -2190,27 +2192,19 @@ private:
2190 break; 2192 break;
2191 } 2193 }
2192 default: { 2194 default: {
2193 LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->get().GetName()); 2195 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
2194 UNREACHABLE();
2195 } 2196 }
2196 } 2197 }
2197 2198
2198 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', 2199 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
2199 1, 1, instr.alu.saturate_d, 0, true); 2200 1, 1, instr.alu.saturate_d, 0, true);
2200 if (instr.generates_cc) {
2201 LOG_CRITICAL(HW_GPU, "FFMA Generates an unhandled Control Code");
2202 UNREACHABLE();
2203 }
2204
2205 break; 2201 break;
2206 } 2202 }
2207 case OpCode::Type::Hfma2: { 2203 case OpCode::Type::Hfma2: {
2208 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { 2204 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
2209 ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None, 2205 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None);
2210 "Unimplemented");
2211 } else { 2206 } else {
2212 ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None, 2207 UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None);
2213 "Unimplemented");
2214 } 2208 }
2215 const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR 2209 const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
2216 ? instr.hfma2.rr.saturate != 0 2210 ? instr.hfma2.rr.saturate != 0
@@ -2260,7 +2254,7 @@ private:
2260 case OpCode::Type::Conversion: { 2254 case OpCode::Type::Conversion: {
2261 switch (opcode->get().GetId()) { 2255 switch (opcode->get().GetId()) {
2262 case OpCode::Id::I2I_R: { 2256 case OpCode::Id::I2I_R: {
2263 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 2257 UNIMPLEMENTED_IF(instr.conversion.selector);
2264 2258
2265 std::string op_a = regs.GetRegisterAsInteger( 2259 std::string op_a = regs.GetRegisterAsInteger(
2266 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); 2260 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
@@ -2280,8 +2274,10 @@ private:
2280 } 2274 }
2281 case OpCode::Id::I2F_R: 2275 case OpCode::Id::I2F_R:
2282 case OpCode::Id::I2F_C: { 2276 case OpCode::Id::I2F_C: {
2283 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); 2277 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2284 ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); 2278 UNIMPLEMENTED_IF(instr.conversion.selector);
2279 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2280 "Condition codes generation in I2F is not implemented");
2285 2281
2286 std::string op_a{}; 2282 std::string op_a{};
2287 2283
@@ -2306,16 +2302,13 @@ private:
2306 } 2302 }
2307 2303
2308 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 2304 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2309
2310 if (instr.generates_cc) {
2311 LOG_CRITICAL(HW_GPU, "I2F Generates an unhandled Control Code");
2312 UNREACHABLE();
2313 }
2314 break; 2305 break;
2315 } 2306 }
2316 case OpCode::Id::F2F_R: { 2307 case OpCode::Id::F2F_R: {
2317 ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); 2308 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2318 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); 2309 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
2310 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2311 "Condition codes generation in F2F is not implemented");
2319 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 2312 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
2320 2313
2321 if (instr.conversion.abs_a) { 2314 if (instr.conversion.abs_a) {
@@ -2342,23 +2335,19 @@ private:
2342 op_a = "trunc(" + op_a + ')'; 2335 op_a = "trunc(" + op_a + ')';
2343 break; 2336 break;
2344 default: 2337 default:
2345 LOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}", 2338 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
2346 static_cast<u32>(instr.conversion.f2f.rounding.Value())); 2339 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
2347 UNREACHABLE();
2348 break; 2340 break;
2349 } 2341 }
2350 2342
2351 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); 2343 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
2352
2353 if (instr.generates_cc) {
2354 LOG_CRITICAL(HW_GPU, "F2F Generates an unhandled Control Code");
2355 UNREACHABLE();
2356 }
2357 break; 2344 break;
2358 } 2345 }
2359 case OpCode::Id::F2I_R: 2346 case OpCode::Id::F2I_R:
2360 case OpCode::Id::F2I_C: { 2347 case OpCode::Id::F2I_C: {
2361 ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); 2348 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
2349 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2350 "Condition codes generation in F2I is not implemented");
2362 std::string op_a{}; 2351 std::string op_a{};
2363 2352
2364 if (instr.is_b_gpr) { 2353 if (instr.is_b_gpr) {
@@ -2389,9 +2378,8 @@ private:
2389 op_a = "trunc(" + op_a + ')'; 2378 op_a = "trunc(" + op_a + ')';
2390 break; 2379 break;
2391 default: 2380 default:
2392 LOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}", 2381 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
2393 static_cast<u32>(instr.conversion.f2i.rounding.Value())); 2382 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
2394 UNREACHABLE();
2395 break; 2383 break;
2396 } 2384 }
2397 2385
@@ -2403,16 +2391,10 @@ private:
2403 2391
2404 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 2392 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
2405 1, false, 0, instr.conversion.dest_size); 2393 1, false, 0, instr.conversion.dest_size);
2406 if (instr.generates_cc) {
2407 LOG_CRITICAL(HW_GPU, "F2I Generates an unhandled Control Code");
2408 UNREACHABLE();
2409 }
2410 break; 2394 break;
2411 } 2395 }
2412 default: { 2396 default: {
2413 LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", 2397 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
2414 opcode->get().GetName());
2415 UNREACHABLE();
2416 } 2398 }
2417 } 2399 }
2418 break; 2400 break;
@@ -2421,10 +2403,10 @@ private:
2421 switch (opcode->get().GetId()) { 2403 switch (opcode->get().GetId()) {
2422 case OpCode::Id::LD_A: { 2404 case OpCode::Id::LD_A: {
2423 // Note: Shouldn't this be interp mode flat? As in no interpolation made. 2405 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
2424 ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, 2406 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2425 "Indirect attribute loads are not supported"); 2407 "Indirect attribute loads are not supported");
2426 ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, 2408 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2427 "Unaligned attribute loads are not supported"); 2409 "Unaligned attribute loads are not supported");
2428 2410
2429 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 2411 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
2430 Tegra::Shader::IpaSampleMode::Default}; 2412 Tegra::Shader::IpaSampleMode::Default};
@@ -2451,7 +2433,7 @@ private:
2451 break; 2433 break;
2452 } 2434 }
2453 case OpCode::Id::LD_C: { 2435 case OpCode::Id::LD_C: {
2454 ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); 2436 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
2455 2437
2456 // Add an extra scope and declare the index register inside to prevent 2438 // Add an extra scope and declare the index register inside to prevent
2457 // overwriting it in case it is used as an output of the LD instruction. 2439 // overwriting it in case it is used as an output of the LD instruction.
@@ -2479,9 +2461,8 @@ private:
2479 break; 2461 break;
2480 } 2462 }
2481 default: 2463 default:
2482 LOG_CRITICAL(HW_GPU, "Unhandled type: {}", 2464 UNIMPLEMENTED_MSG("Unhandled type: {}",
2483 static_cast<unsigned>(instr.ld_c.type.Value())); 2465 static_cast<unsigned>(instr.ld_c.type.Value()));
2484 UNREACHABLE();
2485 } 2466 }
2486 2467
2487 --shader.scope; 2468 --shader.scope;
@@ -2489,6 +2470,9 @@ private:
2489 break; 2470 break;
2490 } 2471 }
2491 case OpCode::Id::LD_L: { 2472 case OpCode::Id::LD_L: {
2473 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
2474 static_cast<unsigned>(instr.ld_l.unknown.Value()));
2475
2492 // Add an extra scope and declare the index register inside to prevent 2476 // Add an extra scope and declare the index register inside to prevent
2493 // overwriting it in case it is used as an output of the LD instruction. 2477 // overwriting it in case it is used as an output of the LD instruction.
2494 shader.AddLine('{'); 2478 shader.AddLine('{');
@@ -2501,20 +2485,13 @@ private:
2501 2485
2502 const std::string op_a = regs.GetLocalMemoryAsFloat("index"); 2486 const std::string op_a = regs.GetLocalMemoryAsFloat("index");
2503 2487
2504 if (instr.ld_l.unknown != 1) {
2505 LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}",
2506 static_cast<unsigned>(instr.ld_l.unknown.Value()));
2507 UNREACHABLE();
2508 }
2509
2510 switch (instr.ldst_sl.type.Value()) { 2488 switch (instr.ldst_sl.type.Value()) {
2511 case Tegra::Shader::StoreType::Bytes32: 2489 case Tegra::Shader::StoreType::Bytes32:
2512 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 2490 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2513 break; 2491 break;
2514 default: 2492 default:
2515 LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}", 2493 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
2516 static_cast<unsigned>(instr.ldst_sl.type.Value())); 2494 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2517 UNREACHABLE();
2518 } 2495 }
2519 2496
2520 --shader.scope; 2497 --shader.scope;
@@ -2522,10 +2499,10 @@ private:
2522 break; 2499 break;
2523 } 2500 }
2524 case OpCode::Id::ST_A: { 2501 case OpCode::Id::ST_A: {
2525 ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, 2502 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2526 "Indirect attribute loads are not supported"); 2503 "Indirect attribute loads are not supported");
2527 ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, 2504 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2528 "Unaligned attribute loads are not supported"); 2505 "Unaligned attribute loads are not supported");
2529 2506
2530 u64 next_element = instr.attribute.fmt20.element; 2507 u64 next_element = instr.attribute.fmt20.element;
2531 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); 2508 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
@@ -2550,6 +2527,9 @@ private:
2550 break; 2527 break;
2551 } 2528 }
2552 case OpCode::Id::ST_L: { 2529 case OpCode::Id::ST_L: {
2530 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
2531 static_cast<unsigned>(instr.st_l.unknown.Value()));
2532
2553 // Add an extra scope and declare the index register inside to prevent 2533 // Add an extra scope and declare the index register inside to prevent
2554 // overwriting it in case it is used as an output of the LD instruction. 2534 // overwriting it in case it is used as an output of the LD instruction.
2555 shader.AddLine('{'); 2535 shader.AddLine('{');
@@ -2560,20 +2540,13 @@ private:
2560 2540
2561 shader.AddLine("uint index = (" + op + " / 4);"); 2541 shader.AddLine("uint index = (" + op + " / 4);");
2562 2542
2563 if (instr.st_l.unknown != 0) {
2564 LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}",
2565 static_cast<unsigned>(instr.st_l.unknown.Value()));
2566 UNREACHABLE();
2567 }
2568
2569 switch (instr.ldst_sl.type.Value()) { 2543 switch (instr.ldst_sl.type.Value()) {
2570 case Tegra::Shader::StoreType::Bytes32: 2544 case Tegra::Shader::StoreType::Bytes32:
2571 regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); 2545 regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
2572 break; 2546 break;
2573 default: 2547 default:
2574 LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}", 2548 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
2575 static_cast<unsigned>(instr.ldst_sl.type.Value())); 2549 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2576 UNREACHABLE();
2577 } 2550 }
2578 2551
2579 --shader.scope; 2552 --shader.scope;
@@ -2585,78 +2558,99 @@ private:
2585 std::string coord; 2558 std::string coord;
2586 const bool is_array = instr.tex.array != 0; 2559 const bool is_array = instr.tex.array != 0;
2587 2560
2588 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 2561 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2589 "NODEP is not implemented"); 2562 "NODEP is not implemented");
2590 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), 2563 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2591 "AOFFI is not implemented"); 2564 "AOFFI is not implemented");
2592 2565
2593 const bool depth_compare = 2566 const bool depth_compare =
2594 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2567 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2595 u32 num_coordinates = TextureCoordinates(texture_type); 2568 u32 num_coordinates = TextureCoordinates(texture_type);
2596 if (depth_compare) 2569 u32 start_index = 0;
2597 num_coordinates += 1; 2570 std::string array_elem;
2571 if (is_array) {
2572 array_elem = regs.GetRegisterAsInteger(instr.gpr8);
2573 start_index = 1;
2574 }
2575 const auto process_mode = instr.tex.GetTextureProcessMode();
2576 u32 start_index_b = 0;
2577 std::string lod_value;
2578 if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
2579 process_mode != Tegra::Shader::TextureProcessMode::None) {
2580 start_index_b = 1;
2581 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2582 }
2583
2584 std::string depth_value;
2585 if (depth_compare) {
2586 depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
2587 }
2588
2589 bool depth_compare_extra = false;
2598 2590
2599 switch (num_coordinates) { 2591 switch (num_coordinates) {
2600 case 1: { 2592 case 1: {
2593 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2601 if (is_array) { 2594 if (is_array) {
2602 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2595 if (depth_compare) {
2603 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2596 coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
2604 coord = "vec2 coords = vec2(" + x + ", " + index + ");"; 2597 array_elem + ");";
2598 } else {
2599 coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
2600 }
2605 } else { 2601 } else {
2606 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2602 if (depth_compare) {
2607 coord = "float coords = " + x + ';'; 2603 coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
2604 } else {
2605 coord = "float coords = " + x + ';';
2606 }
2608 } 2607 }
2609 break; 2608 break;
2610 } 2609 }
2611 case 2: { 2610 case 2: {
2611 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2612 const std::string y =
2613 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2612 if (is_array) { 2614 if (is_array) {
2613 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2615 if (depth_compare) {
2614 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2616 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
2615 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2617 ", " + array_elem + ");";
2616 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; 2618 } else {
2619 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
2620 }
2617 } else { 2621 } else {
2618 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2622 if (depth_compare) {
2619 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2623 coord =
2620 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2624 "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
2625 } else {
2626 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2627 }
2621 } 2628 }
2622 break; 2629 break;
2623 } 2630 }
2624 case 3: { 2631 case 3: {
2625 if (depth_compare) { 2632 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
2626 if (is_array) { 2633 const std::string y =
2627 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2634 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
2628 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2635 const std::string z =
2629 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2636 regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
2630 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2637 if (is_array) {
2631 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + 2638 depth_compare_extra = depth_compare;
2632 ");"; 2639 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2633 } else { 2640 array_elem + ");";
2634 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2635 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2636 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2637 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2638 }
2639 } else { 2641 } else {
2640 if (is_array) { 2642 if (depth_compare) {
2641 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2643 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
2642 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2644 depth_value + ");";
2643 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2644 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 3);
2645 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2646 ");";
2647 } else { 2645 } else {
2648 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2649 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2650 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2651 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; 2646 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2652 } 2647 }
2653 } 2648 }
2654 break; 2649 break;
2655 } 2650 }
2656 default: 2651 default:
2657 LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", 2652 UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
2658 static_cast<u32>(num_coordinates)); 2653 static_cast<u32>(num_coordinates));
2659 UNREACHABLE();
2660 2654
2661 // Fallback to interpreting as a 2D texture for now 2655 // Fallback to interpreting as a 2D texture for now
2662 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2656 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -2664,126 +2658,172 @@ private:
2664 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2658 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2665 texture_type = Tegra::Shader::TextureType::Texture2D; 2659 texture_type = Tegra::Shader::TextureType::Texture2D;
2666 } 2660 }
2667 // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
2668 // or lod.
2669 std::string op_c;
2670 2661
2671 const std::string sampler = 2662 const std::string sampler =
2672 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2663 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2673 // Add an extra scope and declare the texture coords inside to prevent 2664 // Add an extra scope and declare the texture coords inside to prevent
2674 // overwriting them in case they are used as outputs of the texs instruction. 2665 // overwriting them in case they are used as outputs of the texs instruction.
2675 2666
2676 shader.AddLine("{"); 2667 shader.AddLine('{');
2677 ++shader.scope; 2668 ++shader.scope;
2678 shader.AddLine(coord); 2669 shader.AddLine(coord);
2679 std::string texture; 2670 std::string texture;
2680 2671
2681 switch (instr.tex.GetTextureProcessMode()) { 2672 switch (instr.tex.GetTextureProcessMode()) {
2682 case Tegra::Shader::TextureProcessMode::None: { 2673 case Tegra::Shader::TextureProcessMode::None: {
2683 texture = "texture(" + sampler + ", coords)"; 2674 if (!depth_compare_extra) {
2675 texture = "texture(" + sampler + ", coords)";
2676 } else {
2677 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2678 }
2684 break; 2679 break;
2685 } 2680 }
2686 case Tegra::Shader::TextureProcessMode::LZ: { 2681 case Tegra::Shader::TextureProcessMode::LZ: {
2687 texture = "textureLod(" + sampler + ", coords, 0.0)"; 2682 if (!depth_compare_extra) {
2683 texture = "textureLod(" + sampler + ", coords, 0.0)";
2684 } else {
2685 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2686 }
2688 break; 2687 break;
2689 } 2688 }
2690 case Tegra::Shader::TextureProcessMode::LB: 2689 case Tegra::Shader::TextureProcessMode::LB:
2691 case Tegra::Shader::TextureProcessMode::LBA: { 2690 case Tegra::Shader::TextureProcessMode::LBA: {
2692 if (depth_compare) { 2691 // TODO: Figure if A suffix changes the equation at all.
2693 if (is_array) 2692 if (!depth_compare_extra) {
2694 op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 2); 2693 texture = "texture(" + sampler + ", coords, " + lod_value + ')';
2695 else
2696 op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2697 } else { 2694 } else {
2698 op_c = regs.GetRegisterAsFloat(instr.gpr20); 2695 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2696 LOG_WARNING(HW_GPU,
2697 "OpenGL Limitation: can't set bias value along depth compare");
2699 } 2698 }
2700 // TODO: Figure if A suffix changes the equation at all.
2701 texture = "texture(" + sampler + ", coords, " + op_c + ')';
2702 break; 2699 break;
2703 } 2700 }
2704 case Tegra::Shader::TextureProcessMode::LL: 2701 case Tegra::Shader::TextureProcessMode::LL:
2705 case Tegra::Shader::TextureProcessMode::LLA: { 2702 case Tegra::Shader::TextureProcessMode::LLA: {
2706 if (num_coordinates <= 2) { 2703 // TODO: Figure if A suffix changes the equation at all.
2707 op_c = regs.GetRegisterAsFloat(instr.gpr20); 2704 if (!depth_compare_extra) {
2705 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2708 } else { 2706 } else {
2709 op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2707 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2708 LOG_WARNING(HW_GPU,
2709 "OpenGL Limitation: can't set lod value along depth compare");
2710 } 2710 }
2711 // TODO: Figure if A suffix changes the equation at all.
2712 texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
2713 break; 2711 break;
2714 } 2712 }
2715 default: { 2713 default: {
2716 texture = "texture(" + sampler + ", coords)"; 2714 if (!depth_compare_extra) {
2717 LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", 2715 texture = "texture(" + sampler + ", coords)";
2718 static_cast<u32>(instr.tex.GetTextureProcessMode())); 2716 } else {
2719 UNREACHABLE(); 2717 texture = "texture(" + sampler + ", coords, " + depth_value + ')';
2718 }
2719 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2720 static_cast<u32>(instr.tex.GetTextureProcessMode()));
2720 } 2721 }
2721 } 2722 }
2722 if (!depth_compare) { 2723 if (!depth_compare) {
2724 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2723 std::size_t dest_elem{}; 2725 std::size_t dest_elem{};
2724 for (std::size_t elem = 0; elem < 4; ++elem) { 2726 for (std::size_t elem = 0; elem < 4; ++elem) {
2725 if (!instr.tex.IsComponentEnabled(elem)) { 2727 if (!instr.tex.IsComponentEnabled(elem)) {
2726 // Skip disabled components 2728 // Skip disabled components
2727 continue; 2729 continue;
2728 } 2730 }
2729 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2731 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2732 dest_elem);
2730 ++dest_elem; 2733 ++dest_elem;
2731 } 2734 }
2732 } else { 2735 } else {
2733 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); 2736 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
2734 } 2737 }
2735 --shader.scope; 2738 --shader.scope;
2736 shader.AddLine("}"); 2739 shader.AddLine('}');
2737 break; 2740 break;
2738 } 2741 }
2739 case OpCode::Id::TEXS: { 2742 case OpCode::Id::TEXS: {
2740 std::string coord;
2741 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; 2743 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
2742 bool is_array{instr.texs.IsArrayTexture()}; 2744 bool is_array{instr.texs.IsArrayTexture()};
2743 2745
2744 ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 2746 UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2745 "NODEP is not implemented"); 2747 "NODEP is not implemented");
2746 2748
2747 const bool depth_compare = 2749 const bool depth_compare =
2748 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2750 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2749 u32 num_coordinates = TextureCoordinates(texture_type); 2751 u32 num_coordinates = TextureCoordinates(texture_type);
2750 if (depth_compare) 2752 const auto process_mode = instr.texs.GetTextureProcessMode();
2751 num_coordinates += 1; 2753 std::string lod_value;
2754 std::string coord;
2755 u32 lod_offset = 0;
2756 if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
2757 if (num_coordinates > 2) {
2758 lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2759 lod_offset = 2;
2760 } else {
2761 lod_value = regs.GetRegisterAsFloat(instr.gpr20);
2762 lod_offset = 1;
2763 }
2764 }
2752 2765
2753 switch (num_coordinates) { 2766 switch (num_coordinates) {
2767 case 1: {
2768 coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
2769 break;
2770 }
2754 case 2: { 2771 case 2: {
2755 if (is_array) { 2772 if (is_array) {
2756 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2773 if (depth_compare) {
2757 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2774 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2758 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2775 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2759 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; 2776 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2777 const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
2778 coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
2779 ");";
2780 } else {
2781 const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
2782 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2783 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2784 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
2785 }
2760 } else { 2786 } else {
2761 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2787 if (lod_offset != 0) {
2762 const std::string y = regs.GetRegisterAsFloat(instr.gpr20); 2788 if (depth_compare) {
2763 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2789 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2790 const std::string y =
2791 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2792 const std::string z =
2793 regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
2794 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2795 } else {
2796 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2797 const std::string y =
2798 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2799 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2800 }
2801 } else {
2802 if (depth_compare) {
2803 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2804 const std::string y =
2805 regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2806 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2807 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2808 } else {
2809 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2810 const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
2811 coord = "vec2 coords = vec2(" + x + ", " + y + ");";
2812 }
2813 }
2764 } 2814 }
2765 break; 2815 break;
2766 } 2816 }
2767 case 3: { 2817 case 3: {
2768 if (is_array) { 2818 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2769 const std::string index = regs.GetRegisterAsInteger(instr.gpr8); 2819 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2770 const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2820 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2771 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2821 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2772 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2773 coord =
2774 "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + ");";
2775 } else {
2776 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2777 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2778 const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
2779 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
2780 }
2781 break; 2822 break;
2782 } 2823 }
2783 default: 2824 default:
2784 LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", 2825 UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
2785 static_cast<u32>(num_coordinates)); 2826 static_cast<u32>(num_coordinates));
2786 UNREACHABLE();
2787 2827
2788 // Fallback to interpreting as a 2D texture for now 2828 // Fallback to interpreting as a 2D texture for now
2789 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2829 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
@@ -2795,7 +2835,7 @@ private:
2795 const std::string sampler = 2835 const std::string sampler =
2796 GetSampler(instr.sampler, texture_type, is_array, depth_compare); 2836 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
2797 std::string texture; 2837 std::string texture;
2798 switch (instr.texs.GetTextureProcessMode()) { 2838 switch (process_mode) {
2799 case Tegra::Shader::TextureProcessMode::None: { 2839 case Tegra::Shader::TextureProcessMode::None: {
2800 texture = "texture(" + sampler + ", coords)"; 2840 texture = "texture(" + sampler + ", coords)";
2801 break; 2841 break;
@@ -2809,15 +2849,13 @@ private:
2809 break; 2849 break;
2810 } 2850 }
2811 case Tegra::Shader::TextureProcessMode::LL: { 2851 case Tegra::Shader::TextureProcessMode::LL: {
2812 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); 2852 texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
2813 texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
2814 break; 2853 break;
2815 } 2854 }
2816 default: { 2855 default: {
2817 texture = "texture(" + sampler + ", coords)"; 2856 texture = "texture(" + sampler + ", coords)";
2818 LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", 2857 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2819 static_cast<u32>(instr.texs.GetTextureProcessMode())); 2858 static_cast<u32>(instr.texs.GetTextureProcessMode()));
2820 UNREACHABLE();
2821 } 2859 }
2822 } 2860 }
2823 if (!depth_compare) { 2861 if (!depth_compare) {
@@ -2825,47 +2863,48 @@ private:
2825 } else { 2863 } else {
2826 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); 2864 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
2827 } 2865 }
2866
2828 break; 2867 break;
2829 } 2868 }
2830 case OpCode::Id::TLDS: { 2869 case OpCode::Id::TLDS: {
2831 std::string coord;
2832 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; 2870 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
2833 const bool is_array{instr.tlds.IsArrayTexture()}; 2871 const bool is_array{instr.tlds.IsArrayTexture()};
2834 2872
2835 ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D); 2873 ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D);
2836 ASSERT(is_array == false); 2874 ASSERT(is_array == false);
2837 2875
2838 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 2876 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2839 "NODEP is not implemented"); 2877 "NODEP is not implemented");
2840 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), 2878 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2841 "AOFFI is not implemented"); 2879 "AOFFI is not implemented");
2842 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), 2880 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
2843 "MZ is not implemented"); 2881 "MZ is not implemented");
2882
2883 u32 extra_op_offset = 0;
2844 2884
2845 u32 op_c_offset = 0; 2885 // Scope to avoid variable name overlaps.
2886 shader.AddLine('{');
2887 ++shader.scope;
2888 std::string coords;
2846 2889
2847 switch (texture_type) { 2890 switch (texture_type) {
2848 case Tegra::Shader::TextureType::Texture1D: { 2891 case Tegra::Shader::TextureType::Texture1D: {
2849 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2892 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2850 coord = "int coords = " + x + ';'; 2893 coords = "float coords = " + x + ';';
2851 break; 2894 break;
2852 } 2895 }
2853 case Tegra::Shader::TextureType::Texture2D: { 2896 case Tegra::Shader::TextureType::Texture2D: {
2854 if (is_array) { 2897 UNIMPLEMENTED_IF_MSG(is_array, "Unhandled 2d array texture");
2855 LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture"); 2898
2856 UNREACHABLE(); 2899 const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
2857 } else { 2900 const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
2858 const std::string x = regs.GetRegisterAsInteger(instr.gpr8); 2901 // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
2859 const std::string y = regs.GetRegisterAsInteger(instr.gpr20); 2902 coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
2860 coord = "ivec2 coords = ivec2(" + x + ", " + y + ");"; 2903 extra_op_offset = 1;
2861 op_c_offset = 1;
2862 }
2863 break; 2904 break;
2864 } 2905 }
2865 default: 2906 default:
2866 LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", 2907 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
2867 static_cast<u32>(texture_type));
2868 UNREACHABLE();
2869 } 2908 }
2870 const std::string sampler = 2909 const std::string sampler =
2871 GetSampler(instr.sampler, texture_type, is_array, false); 2910 GetSampler(instr.sampler, texture_type, is_array, false);
@@ -2876,19 +2915,22 @@ private:
2876 break; 2915 break;
2877 } 2916 }
2878 case Tegra::Shader::TextureProcessMode::LL: { 2917 case Tegra::Shader::TextureProcessMode::LL: {
2879 const std::string op_c = 2918 shader.AddLine(
2880 regs.GetRegisterAsInteger(instr.gpr20.Value() + op_c_offset); 2919 "float lod = " +
2881 texture = "texelFetch(" + sampler + ", coords, " + op_c + ')'; 2920 regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
2921 texture = "texelFetch(" + sampler + ", coords, lod)";
2882 break; 2922 break;
2883 } 2923 }
2884 default: { 2924 default: {
2885 texture = "texelFetch(" + sampler + ", coords, 0)"; 2925 texture = "texelFetch(" + sampler + ", coords, 0)";
2886 LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}", 2926 UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
2887 static_cast<u32>(instr.tlds.GetTextureProcessMode())); 2927 static_cast<u32>(instr.tlds.GetTextureProcessMode()));
2888 UNREACHABLE();
2889 } 2928 }
2890 } 2929 }
2891 WriteTexsInstruction(instr, coord, texture); 2930 WriteTexsInstruction(instr, coords, texture);
2931
2932 --shader.scope;
2933 shader.AddLine('}');
2892 break; 2934 break;
2893 } 2935 }
2894 case OpCode::Id::TLD4: { 2936 case OpCode::Id::TLD4: {
@@ -2896,14 +2938,14 @@ private:
2896 ASSERT(instr.tld4.array == 0); 2938 ASSERT(instr.tld4.array == 0);
2897 std::string coord; 2939 std::string coord;
2898 2940
2899 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 2941 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2900 "NODEP is not implemented"); 2942 "NODEP is not implemented");
2901 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), 2943 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2902 "AOFFI is not implemented"); 2944 "AOFFI is not implemented");
2903 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), 2945 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
2904 "NDV is not implemented"); 2946 "NDV is not implemented");
2905 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), 2947 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
2906 "PTP is not implemented"); 2948 "PTP is not implemented");
2907 const bool depth_compare = 2949 const bool depth_compare =
2908 instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 2950 instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2909 auto texture_type = instr.tld4.texture_type.Value(); 2951 auto texture_type = instr.tld4.texture_type.Value();
@@ -2911,61 +2953,71 @@ private:
2911 if (depth_compare) 2953 if (depth_compare)
2912 num_coordinates += 1; 2954 num_coordinates += 1;
2913 2955
2956 // Add an extra scope and declare the texture coords inside to prevent
2957 // overwriting them in case they are used as outputs of the texs instruction.
2958 shader.AddLine('{');
2959 ++shader.scope;
2960
2914 switch (num_coordinates) { 2961 switch (num_coordinates) {
2915 case 2: { 2962 case 2: {
2916 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2963 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2917 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2964 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2918 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2965 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
2919 break; 2966 break;
2920 } 2967 }
2921 case 3: { 2968 case 3: {
2922 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2969 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2923 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2970 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2924 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2); 2971 const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
2925 coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; 2972 shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
2926 break; 2973 break;
2927 } 2974 }
2928 default: 2975 default:
2929 LOG_CRITICAL(HW_GPU, "Unhandled coordinates number {}", 2976 UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
2930 static_cast<u32>(num_coordinates)); 2977 static_cast<u32>(num_coordinates));
2931 UNREACHABLE();
2932 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 2978 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
2933 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 2979 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2934 coord = "vec2 coords = vec2(" + x + ", " + y + ");"; 2980 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
2935 texture_type = Tegra::Shader::TextureType::Texture2D; 2981 texture_type = Tegra::Shader::TextureType::Texture2D;
2936 } 2982 }
2937 2983
2938 const std::string sampler = 2984 const std::string sampler =
2939 GetSampler(instr.sampler, texture_type, false, depth_compare); 2985 GetSampler(instr.sampler, texture_type, false, depth_compare);
2940 // Add an extra scope and declare the texture coords inside to prevent 2986
2941 // overwriting them in case they are used as outputs of the texs instruction.
2942 shader.AddLine("{");
2943 ++shader.scope;
2944 shader.AddLine(coord);
2945 const std::string texture = "textureGather(" + sampler + ", coords, " + 2987 const std::string texture = "textureGather(" + sampler + ", coords, " +
2946 std::to_string(instr.tld4.component) + ')'; 2988 std::to_string(instr.tld4.component) + ')';
2989
2947 if (!depth_compare) { 2990 if (!depth_compare) {
2991 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2948 std::size_t dest_elem{}; 2992 std::size_t dest_elem{};
2949 for (std::size_t elem = 0; elem < 4; ++elem) { 2993 for (std::size_t elem = 0; elem < 4; ++elem) {
2950 if (!instr.tex.IsComponentEnabled(elem)) { 2994 if (!instr.tex.IsComponentEnabled(elem)) {
2951 // Skip disabled components 2995 // Skip disabled components
2952 continue; 2996 continue;
2953 } 2997 }
2954 regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); 2998 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
2999 dest_elem);
2955 ++dest_elem; 3000 ++dest_elem;
2956 } 3001 }
2957 } else { 3002 } else {
2958 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); 3003 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
2959 } 3004 }
2960 --shader.scope; 3005 --shader.scope;
2961 shader.AddLine("}"); 3006 shader.AddLine('}');
2962 break; 3007 break;
2963 } 3008 }
2964 case OpCode::Id::TLD4S: { 3009 case OpCode::Id::TLD4S: {
2965 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 3010 UNIMPLEMENTED_IF_MSG(
2966 "NODEP is not implemented"); 3011 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2967 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), 3012 "NODEP is not implemented");
2968 "AOFFI is not implemented"); 3013 UNIMPLEMENTED_IF_MSG(
3014 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
3015 "AOFFI is not implemented");
3016
3017 // Scope to avoid variable name overlaps.
3018 shader.AddLine('{');
3019 ++shader.scope;
3020 std::string coords;
2969 3021
2970 const bool depth_compare = 3022 const bool depth_compare =
2971 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); 3023 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -2974,28 +3026,32 @@ private:
2974 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. 3026 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
2975 const std::string sampler = GetSampler( 3027 const std::string sampler = GetSampler(
2976 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); 3028 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
2977 std::string coord;
2978 if (!depth_compare) { 3029 if (!depth_compare) {
2979 coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; 3030 coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
2980 } else { 3031 } else {
2981 // Note: TLD4S coordinate encoding works just like TEXS's 3032 // Note: TLD4S coordinate encoding works just like TEXS's
2982 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 3033 const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
2983 coord = "vec3 coords = vec3(" + op_a + ", " + op_c + ", " + op_b + ");"; 3034 coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
2984 } 3035 }
2985 const std::string texture = "textureGather(" + sampler + ", coords, " + 3036 const std::string texture = "textureGather(" + sampler + ", coords, " +
2986 std::to_string(instr.tld4s.component) + ')'; 3037 std::to_string(instr.tld4s.component) + ')';
2987 3038
2988 if (!depth_compare) { 3039 if (!depth_compare) {
2989 WriteTexsInstruction(instr, coord, texture); 3040 WriteTexsInstruction(instr, coords, texture);
2990 } else { 3041 } else {
2991 WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); 3042 WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
2992 } 3043 }
3044
3045 --shader.scope;
3046 shader.AddLine('}');
2993 break; 3047 break;
2994 } 3048 }
2995 case OpCode::Id::TXQ: { 3049 case OpCode::Id::TXQ: {
2996 ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 3050 UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2997 "NODEP is not implemented"); 3051 "NODEP is not implemented");
2998 3052
3053 ++shader.scope;
3054 shader.AddLine('{');
2999 // TODO: the new commits on the texture refactor, change the way samplers work. 3055 // TODO: the new commits on the texture refactor, change the way samplers work.
3000 // Sadly, not all texture instructions specify the type of texture their sampler 3056 // Sadly, not all texture instructions specify the type of texture their sampler
3001 // uses. This must be fixed at a later instance. 3057 // uses. This must be fixed at a later instance.
@@ -3003,23 +3059,30 @@ private:
3003 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); 3059 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
3004 switch (instr.txq.query_type) { 3060 switch (instr.txq.query_type) {
3005 case Tegra::Shader::TextureQueryType::Dimension: { 3061 case Tegra::Shader::TextureQueryType::Dimension: {
3006 const std::string texture = "textureQueryLevels(" + sampler + ')'; 3062 const std::string texture = "textureSize(" + sampler + ", " +
3007 regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1); 3063 regs.GetRegisterAsInteger(instr.gpr8) + ')';
3064 const std::string mip_level = "textureQueryLevels(" + sampler + ')';
3065 shader.AddLine("ivec2 sizes = " + texture + ';');
3066 regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1);
3067 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
3068 regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
3069 regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
3008 break; 3070 break;
3009 } 3071 }
3010 default: { 3072 default: {
3011 LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}", 3073 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
3012 static_cast<u32>(instr.txq.query_type.Value())); 3074 static_cast<u32>(instr.txq.query_type.Value()));
3013 UNREACHABLE();
3014 } 3075 }
3015 } 3076 }
3077 --shader.scope;
3078 shader.AddLine('}');
3016 break; 3079 break;
3017 } 3080 }
3018 case OpCode::Id::TMML: { 3081 case OpCode::Id::TMML: {
3019 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), 3082 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3020 "NODEP is not implemented"); 3083 "NODEP is not implemented");
3021 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), 3084 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
3022 "NDV is not implemented"); 3085 "NDV is not implemented");
3023 3086
3024 const std::string x = regs.GetRegisterAsFloat(instr.gpr8); 3087 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
3025 const bool is_array = instr.tmml.array != 0; 3088 const bool is_array = instr.tmml.array != 0;
@@ -3041,9 +3104,7 @@ private:
3041 break; 3104 break;
3042 } 3105 }
3043 default: 3106 default:
3044 LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", 3107 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
3045 static_cast<u32>(texture_type));
3046 UNREACHABLE();
3047 3108
3048 // Fallback to interpreting as a 2D texture for now 3109 // Fallback to interpreting as a 2D texture for now
3049 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); 3110 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
@@ -3066,8 +3127,7 @@ private:
3066 break; 3127 break;
3067 } 3128 }
3068 default: { 3129 default: {
3069 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName()); 3130 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
3070 UNREACHABLE();
3071 } 3131 }
3072 } 3132 }
3073 break; 3133 break;
@@ -3153,7 +3213,7 @@ private:
3153 break; 3213 break;
3154 } 3214 }
3155 case OpCode::Type::HalfSetPredicate: { 3215 case OpCode::Type::HalfSetPredicate: {
3156 ASSERT_MSG(instr.hsetp2.ftz == 0, "Unimplemented"); 3216 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
3157 3217
3158 const std::string op_a = 3218 const std::string op_a =
3159 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, 3219 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a,
@@ -3198,6 +3258,9 @@ private:
3198 break; 3258 break;
3199 } 3259 }
3200 case OpCode::Type::PredicateSetRegister: { 3260 case OpCode::Type::PredicateSetRegister: {
3261 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3262 "Condition codes generation in PSET is not implemented");
3263
3201 const std::string op_a = 3264 const std::string op_a =
3202 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); 3265 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
3203 const std::string op_b = 3266 const std::string op_b =
@@ -3218,12 +3281,6 @@ private:
3218 const std::string value = '(' + result + ") ? 1.0 : 0.0"; 3281 const std::string value = '(' + result + ") ? 1.0 : 0.0";
3219 regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1); 3282 regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
3220 } 3283 }
3221
3222 if (instr.generates_cc) {
3223 LOG_CRITICAL(HW_GPU, "PSET Generates an unhandled Control Code");
3224 UNREACHABLE();
3225 }
3226
3227 break; 3284 break;
3228 } 3285 }
3229 case OpCode::Type::PredicateSetPredicate: { 3286 case OpCode::Type::PredicateSetPredicate: {
@@ -3261,22 +3318,48 @@ private:
3261 const std::string pred = 3318 const std::string pred =
3262 GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); 3319 GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
3263 const std::string combiner = GetPredicateCombiner(instr.csetp.op); 3320 const std::string combiner = GetPredicateCombiner(instr.csetp.op);
3264 const std::string control_code = regs.GetControlCode(instr.csetp.cc); 3321 const std::string condition_code = regs.GetConditionCode(instr.csetp.cc);
3265 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { 3322 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
3266 SetPredicate(instr.csetp.pred3, 3323 SetPredicate(instr.csetp.pred3,
3267 '(' + control_code + ") " + combiner + " (" + pred + ')'); 3324 '(' + condition_code + ") " + combiner + " (" + pred + ')');
3268 } 3325 }
3269 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 3326 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3270 SetPredicate(instr.csetp.pred0, 3327 SetPredicate(instr.csetp.pred0,
3271 "!(" + control_code + ") " + combiner + " (" + pred + ')'); 3328 "!(" + condition_code + ") " + combiner + " (" + pred + ')');
3272 } 3329 }
3273 break; 3330 break;
3274 } 3331 }
3275 default: { 3332 default: {
3276 LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", 3333 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
3277 opcode->get().GetName()); 3334 }
3278 UNREACHABLE();
3279 } 3335 }
3336 break;
3337 }
3338 case OpCode::Type::RegisterSetPredicate: {
3339 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3340
3341 const std::string apply_mask = [&]() {
3342 switch (opcode->get().GetId()) {
3343 case OpCode::Id::R2P_IMM:
3344 return std::to_string(instr.r2p.immediate_mask);
3345 default:
3346 UNREACHABLE();
3347 }
3348 }();
3349 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3350 " >> " + std::to_string(instr.r2p.byte) + ')';
3351
3352 constexpr u64 programmable_preds = 7;
3353 for (u64 pred = 0; pred < programmable_preds; ++pred) {
3354 const auto shift = std::to_string(1 << pred);
3355
3356 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
3357 ++shader.scope;
3358
3359 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
3360
3361 --shader.scope;
3362 shader.AddLine('}');
3280 } 3363 }
3281 break; 3364 break;
3282 } 3365 }
@@ -3355,7 +3438,7 @@ private:
3355 break; 3438 break;
3356 } 3439 }
3357 case OpCode::Type::HalfSet: { 3440 case OpCode::Type::HalfSet: {
3358 ASSERT_MSG(instr.hset2.ftz == 0, "Unimplemented"); 3441 UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
3359 3442
3360 const std::string op_a = 3443 const std::string op_a =
3361 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, 3444 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a,
@@ -3399,15 +3482,17 @@ private:
3399 break; 3482 break;
3400 } 3483 }
3401 case OpCode::Type::Xmad: { 3484 case OpCode::Type::Xmad: {
3402 ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); 3485 UNIMPLEMENTED_IF(instr.xmad.sign_a);
3403 ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); 3486 UNIMPLEMENTED_IF(instr.xmad.sign_b);
3487 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3488 "Condition codes generation in XMAD is not implemented");
3404 3489
3405 std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; 3490 std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
3406 std::string op_b; 3491 std::string op_b;
3407 std::string op_c; 3492 std::string op_c;
3408 3493
3409 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed 3494 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
3410 ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented"); 3495 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
3411 const bool is_signed{instr.xmad.sign_a == 1}; 3496 const bool is_signed{instr.xmad.sign_a == 1};
3412 3497
3413 bool is_merge{}; 3498 bool is_merge{};
@@ -3440,8 +3525,7 @@ private:
3440 break; 3525 break;
3441 } 3526 }
3442 default: { 3527 default: {
3443 LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->get().GetName()); 3528 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
3444 UNREACHABLE();
3445 } 3529 }
3446 } 3530 }
3447 3531
@@ -3477,9 +3561,8 @@ private:
3477 op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; 3561 op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
3478 break; 3562 break;
3479 default: { 3563 default: {
3480 LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}", 3564 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}",
3481 static_cast<u32>(instr.xmad.mode.Value())); 3565 static_cast<u32>(instr.xmad.mode.Value()));
3482 UNREACHABLE();
3483 } 3566 }
3484 } 3567 }
3485 3568
@@ -3489,25 +3572,19 @@ private:
3489 } 3572 }
3490 3573
3491 regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); 3574 regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
3492 if (instr.generates_cc) {
3493 LOG_CRITICAL(HW_GPU, "XMAD Generates an unhandled Control Code");
3494 UNREACHABLE();
3495 }
3496 break; 3575 break;
3497 } 3576 }
3498 default: { 3577 default: {
3499 switch (opcode->get().GetId()) { 3578 switch (opcode->get().GetId()) {
3500 case OpCode::Id::EXIT: { 3579 case OpCode::Id::EXIT: {
3580 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3581 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3582 "EXIT condition code used: {}", static_cast<u32>(cc));
3583
3501 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { 3584 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
3502 EmitFragmentOutputsWrite(); 3585 EmitFragmentOutputsWrite();
3503 } 3586 }
3504 3587
3505 const Tegra::Shader::ControlCode cc = instr.flow_control_code;
3506 if (cc != Tegra::Shader::ControlCode::T) {
3507 LOG_CRITICAL(HW_GPU, "EXIT Control Code used: {}", static_cast<u32>(cc));
3508 UNREACHABLE();
3509 }
3510
3511 switch (instr.flow.cond) { 3588 switch (instr.flow.cond) {
3512 case Tegra::Shader::FlowCondition::Always: 3589 case Tegra::Shader::FlowCondition::Always:
3513 shader.AddLine("return true;"); 3590 shader.AddLine("return true;");
@@ -3522,26 +3599,24 @@ private:
3522 case Tegra::Shader::FlowCondition::Fcsm_Tr: 3599 case Tegra::Shader::FlowCondition::Fcsm_Tr:
3523 // TODO(bunnei): What is this used for? If we assume this conditon is not 3600 // TODO(bunnei): What is this used for? If we assume this conditon is not
3524 // satisifed, dual vertex shaders in Farming Simulator make more sense 3601 // satisifed, dual vertex shaders in Farming Simulator make more sense
3525 LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr"); 3602 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
3526 break; 3603 break;
3527 3604
3528 default: 3605 default:
3529 LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}", 3606 UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
3530 static_cast<u32>(instr.flow.cond.Value())); 3607 static_cast<u32>(instr.flow.cond.Value()));
3531 UNREACHABLE();
3532 } 3608 }
3533 break; 3609 break;
3534 } 3610 }
3535 case OpCode::Id::KIL: { 3611 case OpCode::Id::KIL: {
3536 ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); 3612 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
3613
3614 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3615 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3616 "KIL condition code used: {}", static_cast<u32>(cc));
3537 3617
3538 // Enclose "discard" in a conditional, so that GLSL compilation does not complain 3618 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
3539 // about unexecuted instructions that may follow this. 3619 // about unexecuted instructions that may follow this.
3540 const Tegra::Shader::ControlCode cc = instr.flow_control_code;
3541 if (cc != Tegra::Shader::ControlCode::T) {
3542 LOG_CRITICAL(HW_GPU, "KIL Control Code used: {}", static_cast<u32>(cc));
3543 UNREACHABLE();
3544 }
3545 shader.AddLine("if (true) {"); 3620 shader.AddLine("if (true) {");
3546 ++shader.scope; 3621 ++shader.scope;
3547 shader.AddLine("discard;"); 3622 shader.AddLine("discard;");
@@ -3551,7 +3626,8 @@ private:
3551 break; 3626 break;
3552 } 3627 }
3553 case OpCode::Id::OUT_R: { 3628 case OpCode::Id::OUT_R: {
3554 ASSERT(instr.gpr20.Value() == Register::ZeroIndex); 3629 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
3630 "Stream buffer is not supported");
3555 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, 3631 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3556 "OUT is expected to be used in a geometry shader."); 3632 "OUT is expected to be used in a geometry shader.");
3557 3633
@@ -3578,18 +3654,17 @@ private:
3578 break; 3654 break;
3579 } 3655 }
3580 default: { 3656 default: {
3581 LOG_CRITICAL(HW_GPU, "Unhandled system move: {}", 3657 UNIMPLEMENTED_MSG("Unhandled system move: {}",
3582 static_cast<u32>(instr.sys20.Value())); 3658 static_cast<u32>(instr.sys20.Value()));
3583 UNREACHABLE();
3584 } 3659 }
3585 } 3660 }
3586 break; 3661 break;
3587 } 3662 }
3588 case OpCode::Id::ISBERD: { 3663 case OpCode::Id::ISBERD: {
3589 ASSERT(instr.isberd.o == 0); 3664 UNIMPLEMENTED_IF(instr.isberd.o != 0);
3590 ASSERT(instr.isberd.skew == 0); 3665 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
3591 ASSERT(instr.isberd.shift == Tegra::Shader::IsberdShift::None); 3666 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
3592 ASSERT(instr.isberd.mode == Tegra::Shader::IsberdMode::None); 3667 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
3593 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, 3668 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3594 "ISBERD is expected to be used in a geometry shader."); 3669 "ISBERD is expected to be used in a geometry shader.");
3595 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); 3670 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
@@ -3597,13 +3672,13 @@ private:
3597 break; 3672 break;
3598 } 3673 }
3599 case OpCode::Id::BRA: { 3674 case OpCode::Id::BRA: {
3600 ASSERT_MSG(instr.bra.constant_buffer == 0, 3675 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3601 "BRA with constant buffers are not implemented"); 3676 "BRA with constant buffers are not implemented");
3602 const Tegra::Shader::ControlCode cc = instr.flow_control_code; 3677
3603 if (cc != Tegra::Shader::ControlCode::T) { 3678 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3604 LOG_CRITICAL(HW_GPU, "BRA Control Code used: {}", static_cast<u32>(cc)); 3679 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3605 UNREACHABLE(); 3680 "BRA condition code used: {}", static_cast<u32>(cc));
3606 } 3681
3607 const u32 target = offset + instr.bra.GetBranchTarget(); 3682 const u32 target = offset + instr.bra.GetBranchTarget();
3608 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); 3683 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3609 break; 3684 break;
@@ -3626,7 +3701,8 @@ private:
3626 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it 3701 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
3627 // sets the target of the jump that the SYNC instruction will make. The SSY opcode 3702 // sets the target of the jump that the SYNC instruction will make. The SSY opcode
3628 // has a similar structure to the BRA opcode. 3703 // has a similar structure to the BRA opcode.
3629 ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer flow is not supported"); 3704 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3705 "Constant buffer flow is not supported");
3630 3706
3631 const u32 target = offset + instr.bra.GetBranchTarget(); 3707 const u32 target = offset + instr.bra.GetBranchTarget();
3632 EmitPushToFlowStack(target); 3708 EmitPushToFlowStack(target);
@@ -3636,29 +3712,28 @@ private:
3636 // PBK pushes to a stack the address where BRK will jump to. This shares stack with 3712 // PBK pushes to a stack the address where BRK will jump to. This shares stack with
3637 // SSY but using SYNC on a PBK address will kill the shader execution. We don't 3713 // SSY but using SYNC on a PBK address will kill the shader execution. We don't
3638 // emulate this because it's very unlikely a driver will emit such invalid shader. 3714 // emulate this because it's very unlikely a driver will emit such invalid shader.
3639 ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer PBK is not supported"); 3715 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3716 "Constant buffer PBK is not supported");
3640 3717
3641 const u32 target = offset + instr.bra.GetBranchTarget(); 3718 const u32 target = offset + instr.bra.GetBranchTarget();
3642 EmitPushToFlowStack(target); 3719 EmitPushToFlowStack(target);
3643 break; 3720 break;
3644 } 3721 }
3645 case OpCode::Id::SYNC: { 3722 case OpCode::Id::SYNC: {
3723 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3724 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3725 "SYNC condition code used: {}", static_cast<u32>(cc));
3726
3646 // The SYNC opcode jumps to the address previously set by the SSY opcode 3727 // The SYNC opcode jumps to the address previously set by the SSY opcode
3647 const Tegra::Shader::ControlCode cc = instr.flow_control_code;
3648 if (cc != Tegra::Shader::ControlCode::T) {
3649 LOG_CRITICAL(HW_GPU, "SYNC Control Code used: {}", static_cast<u32>(cc));
3650 UNREACHABLE();
3651 }
3652 EmitPopFromFlowStack(); 3728 EmitPopFromFlowStack();
3653 break; 3729 break;
3654 } 3730 }
3655 case OpCode::Id::BRK: { 3731 case OpCode::Id::BRK: {
3656 // The BRK opcode jumps to the address previously set by the PBK opcode 3732 // The BRK opcode jumps to the address previously set by the PBK opcode
3657 const Tegra::Shader::ControlCode cc = instr.flow_control_code; 3733 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3658 if (cc != Tegra::Shader::ControlCode::T) { 3734 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3659 LOG_CRITICAL(HW_GPU, "BRK Control Code used: {}", static_cast<u32>(cc)); 3735 "BRK condition code used: {}", static_cast<u32>(cc));
3660 UNREACHABLE(); 3736
3661 }
3662 EmitPopFromFlowStack(); 3737 EmitPopFromFlowStack();
3663 break; 3738 break;
3664 } 3739 }
@@ -3669,6 +3744,9 @@ private:
3669 break; 3744 break;
3670 } 3745 }
3671 case OpCode::Id::VMAD: { 3746 case OpCode::Id::VMAD: {
3747 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3748 "Condition codes generation in VMAD is not implemented");
3749
3672 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; 3750 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
3673 const std::string op_a = GetVideoOperandA(instr); 3751 const std::string op_a = GetVideoOperandA(instr);
3674 const std::string op_b = GetVideoOperandB(instr); 3752 const std::string op_b = GetVideoOperandB(instr);
@@ -3688,11 +3766,6 @@ private:
3688 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, 3766 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
3689 instr.vmad.saturate == 1, 0, Register::Size::Word, 3767 instr.vmad.saturate == 1, 0, Register::Size::Word,
3690 instr.vmad.cc); 3768 instr.vmad.cc);
3691 if (instr.generates_cc) {
3692 LOG_CRITICAL(HW_GPU, "VMAD Generates an unhandled Control Code");
3693 UNREACHABLE();
3694 }
3695
3696 break; 3769 break;
3697 } 3770 }
3698 case OpCode::Id::VSETP: { 3771 case OpCode::Id::VSETP: {
@@ -3719,10 +3792,7 @@ private:
3719 } 3792 }
3720 break; 3793 break;
3721 } 3794 }
3722 default: { 3795 default: { UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); }
3723 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->get().GetName());
3724 UNREACHABLE();
3725 }
3726 } 3796 }
3727 3797
3728 break; 3798 break;
@@ -3847,6 +3917,7 @@ private:
3847 Maxwell3D::Regs::ShaderStage stage; 3917 Maxwell3D::Regs::ShaderStage stage;
3848 const std::string& suffix; 3918 const std::string& suffix;
3849 u64 local_memory_size; 3919 u64 local_memory_size;
3920 std::size_t shader_length;
3850 3921
3851 ShaderWriter shader; 3922 ShaderWriter shader;
3852 ShaderWriter declarations; 3923 ShaderWriter declarations;
@@ -3865,9 +3936,10 @@ std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u
3865 Maxwell3D::Regs::ShaderStage stage, 3936 Maxwell3D::Regs::ShaderStage stage,
3866 const std::string& suffix) { 3937 const std::string& suffix) {
3867 try { 3938 try {
3868 const auto subroutines = 3939 ControlFlowAnalyzer analyzer(program_code, main_offset, suffix);
3869 ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); 3940 const auto subroutines = analyzer.GetSubroutines();
3870 GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix); 3941 GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix,
3942 analyzer.GetShaderLength());
3871 return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; 3943 return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
3872 } catch (const DecompileFail& exception) { 3944 } catch (const DecompileFail& exception) {
3873 LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); 3945 LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 520b9d4e3..b425d98ae 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -163,6 +163,7 @@ private:
163struct ShaderEntries { 163struct ShaderEntries {
164 std::vector<ConstBufferEntry> const_buffer_entries; 164 std::vector<ConstBufferEntry> const_buffer_entries;
165 std::vector<SamplerEntry> texture_samplers; 165 std::vector<SamplerEntry> texture_samplers;
166 std::size_t shader_length;
166}; 167};
167 168
168using ProgramResult = std::pair<std::string, ShaderEntries>; 169using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d9910c6e8..934f4db78 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -233,6 +233,28 @@ void OpenGLState::ApplyStencilTest() const {
233 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); 233 config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
234 } 234 }
235} 235}
236// Viewport does not affects glClearBuffer so emulate viewport using scissor test
237void OpenGLState::EmulateViewportWithScissor() {
238 auto& current = viewports[0];
239 if (current.scissor.enabled) {
240 const GLint left = std::max(current.x, current.scissor.x);
241 const GLint right =
242 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
243 const GLint bottom = std::max(current.y, current.scissor.y);
244 const GLint top =
245 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
246 current.scissor.x = std::max(left, 0);
247 current.scissor.y = std::max(bottom, 0);
248 current.scissor.width = std::max(right - left, 0);
249 current.scissor.height = std::max(top - bottom, 0);
250 } else {
251 current.scissor.enabled = true;
252 current.scissor.x = current.x;
253 current.scissor.y = current.y;
254 current.scissor.width = current.width;
255 current.scissor.height = current.height;
256 }
257}
236 258
237void OpenGLState::ApplyViewport() const { 259void OpenGLState::ApplyViewport() const {
238 if (GLAD_GL_ARB_viewport_array && geometry_shaders.enabled) { 260 if (GLAD_GL_ARB_viewport_array && geometry_shaders.enabled) {
@@ -242,7 +264,9 @@ void OpenGLState::ApplyViewport() const {
242 const auto& updated = viewports[i]; 264 const auto& updated = viewports[i];
243 if (updated.x != current.x || updated.y != current.y || 265 if (updated.x != current.x || updated.y != current.y ||
244 updated.width != current.width || updated.height != current.height) { 266 updated.width != current.width || updated.height != current.height) {
245 glViewportIndexedf(i, updated.x, updated.y, updated.width, updated.height); 267 glViewportIndexedf(
268 i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
269 static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
246 } 270 }
247 if (updated.depth_range_near != current.depth_range_near || 271 if (updated.depth_range_near != current.depth_range_near ||
248 updated.depth_range_far != current.depth_range_far) { 272 updated.depth_range_far != current.depth_range_far) {
@@ -270,8 +294,7 @@ void OpenGLState::ApplyViewport() const {
270 const auto& updated = viewports[0]; 294 const auto& updated = viewports[0];
271 if (updated.x != current.x || updated.y != current.y || updated.width != current.width || 295 if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
272 updated.height != current.height) { 296 updated.height != current.height) {
273 glViewport(static_cast<GLint>(updated.x), static_cast<GLint>(updated.y), 297 glViewport(updated.x, updated.y, updated.width, updated.height);
274 static_cast<GLsizei>(updated.width), static_cast<GLsizei>(updated.height));
275 } 298 }
276 if (updated.depth_range_near != current.depth_range_near || 299 if (updated.depth_range_near != current.depth_range_near ||
277 updated.depth_range_far != current.depth_range_far) { 300 updated.depth_range_far != current.depth_range_far) {
@@ -339,14 +362,14 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
339 if (blend_changed || updated.src_rgb_func != current.src_rgb_func || 362 if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
340 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || 363 updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
341 updated.dst_a_func != current.dst_a_func) { 364 updated.dst_a_func != current.dst_a_func) {
342 glBlendFuncSeparateiARB(static_cast<GLuint>(target), updated.src_rgb_func, 365 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
343 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); 366 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
344 } 367 }
345 368
346 if (blend_changed || updated.rgb_equation != current.rgb_equation || 369 if (blend_changed || updated.rgb_equation != current.rgb_equation ||
347 updated.a_equation != current.a_equation) { 370 updated.a_equation != current.a_equation) {
348 glBlendEquationSeparateiARB(static_cast<GLuint>(target), updated.rgb_equation, 371 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
349 updated.a_equation); 372 updated.a_equation);
350 } 373 }
351} 374}
352 375
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index bdc743b0f..032fc43f0 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -156,10 +156,10 @@ public:
156 } draw; 156 } draw;
157 157
158 struct viewport { 158 struct viewport {
159 GLfloat x; 159 GLint x;
160 GLfloat y; 160 GLint y;
161 GLfloat width; 161 GLint width;
162 GLfloat height; 162 GLint height;
163 GLfloat depth_range_near; // GL_DEPTH_RANGE 163 GLfloat depth_range_near; // GL_DEPTH_RANGE
164 GLfloat depth_range_far; // GL_DEPTH_RANGE 164 GLfloat depth_range_far; // GL_DEPTH_RANGE
165 struct { 165 struct {
@@ -206,6 +206,7 @@ public:
206 OpenGLState& ResetBuffer(GLuint handle); 206 OpenGLState& ResetBuffer(GLuint handle);
207 OpenGLState& ResetVertexArray(GLuint handle); 207 OpenGLState& ResetVertexArray(GLuint handle);
208 OpenGLState& ResetFramebuffer(GLuint handle); 208 OpenGLState& ResetFramebuffer(GLuint handle);
209 void EmulateViewportWithScissor();
209 210
210private: 211private:
211 static OpenGLState cur_state; 212 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 065b3929c..a8833c06e 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -218,14 +218,19 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
218inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { 218inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
219 switch (equation) { 219 switch (equation) {
220 case Maxwell::Blend::Equation::Add: 220 case Maxwell::Blend::Equation::Add:
221 case Maxwell::Blend::Equation::AddGL:
221 return GL_FUNC_ADD; 222 return GL_FUNC_ADD;
222 case Maxwell::Blend::Equation::Subtract: 223 case Maxwell::Blend::Equation::Subtract:
224 case Maxwell::Blend::Equation::SubtractGL:
223 return GL_FUNC_SUBTRACT; 225 return GL_FUNC_SUBTRACT;
224 case Maxwell::Blend::Equation::ReverseSubtract: 226 case Maxwell::Blend::Equation::ReverseSubtract:
227 case Maxwell::Blend::Equation::ReverseSubtractGL:
225 return GL_FUNC_REVERSE_SUBTRACT; 228 return GL_FUNC_REVERSE_SUBTRACT;
226 case Maxwell::Blend::Equation::Min: 229 case Maxwell::Blend::Equation::Min:
230 case Maxwell::Blend::Equation::MinGL:
227 return GL_MIN; 231 return GL_MIN;
228 case Maxwell::Blend::Equation::Max: 232 case Maxwell::Blend::Equation::Max:
233 case Maxwell::Blend::Equation::MaxGL:
229 return GL_MAX; 234 return GL_MAX;
230 } 235 }
231 LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); 236 LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index ea38da932..4fd0d66c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -19,9 +19,9 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "core/telemetry_session.h" 20#include "core/telemetry_session.h"
21#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
22#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
24#include "video_core/utils.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27 27
@@ -304,6 +304,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
304 gl_framebuffer_data.resize(texture.width * texture.height * 4); 304 gl_framebuffer_data.resize(texture.width * texture.height * 4);
305 break; 305 break;
306 default: 306 default:
307 internal_format = GL_RGBA;
308 texture.gl_format = GL_RGBA;
309 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
310 gl_framebuffer_data.resize(texture.width * texture.height * 4);
311 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}",
312 static_cast<u32>(framebuffer.pixel_format));
307 UNREACHABLE(); 313 UNREACHABLE();
308 } 314 }
309 315
@@ -484,7 +490,7 @@ bool RendererOpenGL::Init() {
484 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); 490 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
485 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); 491 Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
486 492
487 if (!GLAD_GL_VERSION_3_3) { 493 if (!GLAD_GL_VERSION_4_3) {
488 return false; 494 return false;
489 } 495 }
490 496
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
deleted file mode 100644
index e0a14d48f..000000000
--- a/src/video_core/utils.h
+++ /dev/null
@@ -1,164 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore {
10
11// 8x8 Z-Order coordinate from 2D coordinates
12static inline u32 MortonInterleave(u32 x, u32 y) {
13 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
14 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
15 return xlut[x % 8] + ylut[y % 8];
16}
17
18/**
19 * Calculates the offset of the position of the pixel in Morton order
20 */
21static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
22 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
23 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
24 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
25 // texels are laid out in a 2x2 subtile like this:
26 // 2 3
27 // 0 1
28 //
29 // The full 8x8 tile has the texels arranged like this:
30 //
31 // 42 43 46 47 58 59 62 63
32 // 40 41 44 45 56 57 60 61
33 // 34 35 38 39 50 51 54 55
34 // 32 33 36 37 48 49 52 53
35 // 10 11 14 15 26 27 30 31
36 // 08 09 12 13 24 25 28 29
37 // 02 03 06 07 18 19 22 23
38 // 00 01 04 05 16 17 20 21
39 //
40 // This pattern is what's called Z-order curve, or Morton order.
41
42 const unsigned int block_height = 8;
43 const unsigned int coarse_x = x & ~7;
44
45 u32 i = VideoCore::MortonInterleave(x, y);
46
47 const unsigned int offset = coarse_x * block_height;
48
49 return (i + offset) * bytes_per_pixel;
50}
51
52static inline u32 MortonInterleave128(u32 x, u32 y) {
53 // 128x128 Z-Order coordinate from 2D coordinates
54 static constexpr u32 xlut[] = {
55 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
56 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
57 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
58 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
59 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
60 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
61 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
62 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
63 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
64 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
65 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
66 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
67 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
68 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
69 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
70 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
71 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
72 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
73 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
74 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
75 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
76 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
77 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
78 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
79 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
80 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
81 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
82 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
83 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
84 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
85 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
86 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
87 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
88 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
89 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
90 };
91 static constexpr u32 ylut[] = {
92 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
93 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
94 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
95 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
96 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
97 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
98 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
99 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
100 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
101 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
102 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
103 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
104 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
105 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
106 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
107 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
108 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
109 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
110 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
111 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
112 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
113 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
114 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
115 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
116 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
117 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
118 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
119 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
120 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
121 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
122 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
123 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
124 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
125 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
126 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
127 };
128 return xlut[x % 128] + ylut[y % 128];
129}
130
131static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
132 // Calculates the offset of the position of the pixel in Morton order
133 // Framebuffer images are split into 128x128 tiles.
134
135 const unsigned int block_height = 128;
136 const unsigned int coarse_x = x & ~127;
137
138 u32 i = MortonInterleave128(x, y);
139
140 const unsigned int offset = coarse_x * block_height;
141
142 return (i + offset) * bytes_per_pixel;
143}
144
145static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
146 u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
147 bool morton_to_gl) {
148 u8* data_ptrs[2];
149 for (unsigned y = 0; y < height; ++y) {
150 for (unsigned x = 0; x < width; ++x) {
151 const u32 coarse_y = y & ~127;
152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
158
159 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
160 }
161 }
162}
163
164} // namespace VideoCore
diff --git a/src/yuzu/applets/software_keyboard.cpp b/src/yuzu/applets/software_keyboard.cpp
index efefb1f99..8a26fdff1 100644
--- a/src/yuzu/applets/software_keyboard.cpp
+++ b/src/yuzu/applets/software_keyboard.cpp
@@ -82,8 +82,8 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
82 : QString::fromStdU16String(parameters.submit_text), 82 : QString::fromStdU16String(parameters.submit_text),
83 QDialogButtonBox::AcceptRole); 83 QDialogButtonBox::AcceptRole);
84 84
85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::Submit); 85 connect(buttons, &QDialogButtonBox::accepted, this, &QtSoftwareKeyboardDialog::accept);
86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::Reject); 86 connect(buttons, &QDialogButtonBox::rejected, this, &QtSoftwareKeyboardDialog::reject);
87 layout->addWidget(header_label); 87 layout->addWidget(header_label);
88 layout->addWidget(sub_label); 88 layout->addWidget(sub_label);
89 layout->addWidget(guide_label); 89 layout->addWidget(guide_label);
@@ -96,16 +96,16 @@ QtSoftwareKeyboardDialog::QtSoftwareKeyboardDialog(
96 96
97QtSoftwareKeyboardDialog::~QtSoftwareKeyboardDialog() = default; 97QtSoftwareKeyboardDialog::~QtSoftwareKeyboardDialog() = default;
98 98
99void QtSoftwareKeyboardDialog::Submit() { 99void QtSoftwareKeyboardDialog::accept() {
100 ok = true; 100 ok = true;
101 text = line_edit->text().toStdU16String(); 101 text = line_edit->text().toStdU16String();
102 accept(); 102 QDialog::accept();
103} 103}
104 104
105void QtSoftwareKeyboardDialog::Reject() { 105void QtSoftwareKeyboardDialog::reject() {
106 ok = false; 106 ok = false;
107 text.clear(); 107 text.clear();
108 accept(); 108 QDialog::reject();
109} 109}
110 110
111std::u16string QtSoftwareKeyboardDialog::GetText() const { 111std::u16string QtSoftwareKeyboardDialog::GetText() const {
@@ -129,13 +129,13 @@ QtSoftwareKeyboard::~QtSoftwareKeyboard() = default;
129 129
130void QtSoftwareKeyboard::RequestText(std::function<void(std::optional<std::u16string>)> out, 130void QtSoftwareKeyboard::RequestText(std::function<void(std::optional<std::u16string>)> out,
131 Core::Frontend::SoftwareKeyboardParameters parameters) const { 131 Core::Frontend::SoftwareKeyboardParameters parameters) const {
132 text_output = out; 132 text_output = std::move(out);
133 emit MainWindowGetText(parameters); 133 emit MainWindowGetText(parameters);
134} 134}
135 135
136void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message, 136void QtSoftwareKeyboard::SendTextCheckDialog(std::u16string error_message,
137 std::function<void()> finished_check) const { 137 std::function<void()> finished_check) const {
138 this->finished_check = finished_check; 138 this->finished_check = std::move(finished_check);
139 emit MainWindowTextCheckDialog(error_message); 139 emit MainWindowTextCheckDialog(error_message);
140} 140}
141 141
diff --git a/src/yuzu/applets/software_keyboard.h b/src/yuzu/applets/software_keyboard.h
index 73f56714f..c63720ba4 100644
--- a/src/yuzu/applets/software_keyboard.h
+++ b/src/yuzu/applets/software_keyboard.h
@@ -33,8 +33,8 @@ public:
33 Core::Frontend::SoftwareKeyboardParameters parameters); 33 Core::Frontend::SoftwareKeyboardParameters parameters);
34 ~QtSoftwareKeyboardDialog() override; 34 ~QtSoftwareKeyboardDialog() override;
35 35
36 void Submit(); 36 void accept() override;
37 void Reject(); 37 void reject() override;
38 38
39 std::u16string GetText() const; 39 std::u16string GetText() const;
40 bool GetStatus() const; 40 bool GetStatus() const;
@@ -70,11 +70,10 @@ signals:
70 void MainWindowGetText(Core::Frontend::SoftwareKeyboardParameters parameters) const; 70 void MainWindowGetText(Core::Frontend::SoftwareKeyboardParameters parameters) const;
71 void MainWindowTextCheckDialog(std::u16string error_message) const; 71 void MainWindowTextCheckDialog(std::u16string error_message) const;
72 72
73public slots: 73private:
74 void MainWindowFinishedText(std::optional<std::u16string> text); 74 void MainWindowFinishedText(std::optional<std::u16string> text);
75 void MainWindowFinishedCheckDialog(); 75 void MainWindowFinishedCheckDialog();
76 76
77private:
78 mutable std::function<void(std::optional<std::u16string>)> text_output; 77 mutable std::function<void(std::optional<std::u16string>)> text_output;
79 mutable std::function<void()> finished_check; 78 mutable std::function<void()> finished_check;
80}; 79};
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 39eef8858..384e17921 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -310,7 +310,7 @@ void GRenderWindow::InitRenderTarget() {
310 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, 310 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
311 // WA_DontShowOnScreen, WA_DeleteOnClose 311 // WA_DontShowOnScreen, WA_DeleteOnClose
312 QGLFormat fmt; 312 QGLFormat fmt;
313 fmt.setVersion(3, 3); 313 fmt.setVersion(4, 3);
314 fmt.setProfile(QGLFormat::CoreProfile); 314 fmt.setProfile(QGLFormat::CoreProfile);
315 fmt.setSwapInterval(false); 315 fmt.setSwapInterval(false);
316 316
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e24ed5f2b..83ebbd1fe 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -432,6 +432,7 @@ void Config::ReadValues() {
432 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 432 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool();
433 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 433 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt();
434 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString(); 434 Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString();
435 Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool();
435 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool(); 436 Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool();
436 qt_config->endGroup(); 437 qt_config->endGroup();
437 438
@@ -638,6 +639,7 @@ void Config::SaveValues() {
638 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 639 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub);
639 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 640 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port);
640 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args)); 641 qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args));
642 qt_config->setValue("dump_exefs", Settings::values.dump_exefs);
641 qt_config->setValue("dump_nso", Settings::values.dump_nso); 643 qt_config->setValue("dump_nso", Settings::values.dump_nso);
642 qt_config->endGroup(); 644 qt_config->endGroup();
643 645
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index fd5876b41..aa7de7b54 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -34,6 +34,7 @@ void ConfigureDebug::setConfiguration() {
34 ui->toggle_console->setChecked(UISettings::values.show_console); 34 ui->toggle_console->setChecked(UISettings::values.show_console);
35 ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter)); 35 ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter));
36 ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); 36 ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args));
37 ui->dump_exefs->setChecked(Settings::values.dump_exefs);
37 ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso); 38 ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso);
38} 39}
39 40
@@ -43,6 +44,7 @@ void ConfigureDebug::applyConfiguration() {
43 UISettings::values.show_console = ui->toggle_console->isChecked(); 44 UISettings::values.show_console = ui->toggle_console->isChecked();
44 Settings::values.log_filter = ui->log_filter_edit->text().toStdString(); 45 Settings::values.log_filter = ui->log_filter_edit->text().toStdString();
45 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); 46 Settings::values.program_args = ui->homebrew_args_edit->text().toStdString();
47 Settings::values.dump_exefs = ui->dump_exefs->isChecked();
46 Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked(); 48 Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked();
47 Debugger::ToggleConsole(); 49 Debugger::ToggleConsole();
48 Log::Filter filter; 50 Log::Filter filter;
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 9c5b702f8..758a92335 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -145,6 +145,16 @@
145 </property> 145 </property>
146 </widget> 146 </widget>
147 </item> 147 </item>
148 <item>
149 <widget class="QCheckBox" name="dump_exefs">
150 <property name="whatsThis">
151 <string>When checked, any game that yuzu loads will have its ExeFS dumped to the yuzu/dump directory.</string>
152 </property>
153 <property name="text">
154 <string>Dump ExeFS</string>
155 </property>
156 </widget>
157 </item>
148 </layout> 158 </layout>
149 </widget> 159 </widget>
150 </item> 160 </item>
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 91fcad994..e278cdd05 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -23,31 +23,31 @@
23 </property> 23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_2"> 24 <layout class="QVBoxLayout" name="verticalLayout_2">
25 <item> 25 <item>
26 <layout class="QHBoxLayout" name="horizontalLayout_2"> 26 <layout class="QHBoxLayout" name="horizontalLayout_2">
27 <item> 27 <item>
28 <widget class="QCheckBox" name="toggle_frame_limit"> 28 <widget class="QCheckBox" name="toggle_frame_limit">
29 <property name="text"> 29 <property name="text">
30 <string>Limit Speed Percent</string> 30 <string>Limit Speed Percent</string>
31 </property> 31 </property>
32 </widget> 32 </widget>
33 </item> 33 </item>
34 <item> 34 <item>
35 <widget class="QSpinBox" name="frame_limit"> 35 <widget class="QSpinBox" name="frame_limit">
36 <property name="suffix"> 36 <property name="suffix">
37 <string>%</string> 37 <string>%</string>
38 </property> 38 </property>
39 <property name="minimum"> 39 <property name="minimum">
40 <number>1</number> 40 <number>1</number>
41 </property> 41 </property>
42 <property name="maximum"> 42 <property name="maximum">
43 <number>9999</number> 43 <number>9999</number>
44 </property> 44 </property>
45 <property name="value"> 45 <property name="value">
46 <number>100</number> 46 <number>100</number>
47 </property> 47 </property>
48 </widget> 48 </widget>
49 </item> 49 </item>
50 </layout> 50 </layout>
51 </item> 51 </item>
52 <item> 52 <item>
53 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 53 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
@@ -61,7 +61,7 @@
61 <item> 61 <item>
62 <widget class="QLabel" name="label"> 62 <widget class="QLabel" name="label">
63 <property name="text"> 63 <property name="text">
64 <string>Internal Resolution:(Currently does nothing.)</string> 64 <string>Internal Resolution</string>
65 </property> 65 </property>
66 </widget> 66 </widget>
67 </item> 67 </item>
@@ -96,27 +96,27 @@
96 </item> 96 </item>
97 </layout> 97 </layout>
98 </item> 98 </item>
99 <item> 99 <item>
100 <layout class="QHBoxLayout" name="horizontalLayout_6"> 100 <layout class="QHBoxLayout" name="horizontalLayout_6">
101 <item> 101 <item>
102 <widget class="QLabel" name="bg_label"> 102 <widget class="QLabel" name="bg_label">
103 <property name="text"> 103 <property name="text">
104 <string>Background Color:</string> 104 <string>Background Color:</string>
105 </property> 105 </property>
106 </widget> 106 </widget>
107 </item> 107 </item>
108 <item> 108 <item>
109 <widget class="QPushButton" name="bg_button"> 109 <widget class="QPushButton" name="bg_button">
110 <property name="maximumSize"> 110 <property name="maximumSize">
111 <size> 111 <size>
112 <width>40</width> 112 <width>40</width>
113 <height>16777215</height> 113 <height>16777215</height>
114 </size> 114 </size>
115 </property> 115 </property>
116 </widget> 116 </widget>
117 </item> 117 </item>
118 </layout> 118 </layout>
119 </item> 119 </item>
120 </layout> 120 </layout>
121 </widget> 121 </widget>
122 </item> 122 </item>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 9e13bbf7c..9c6d150a5 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -518,32 +518,18 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
518QStringList GMainWindow::GetUnsupportedGLExtensions() { 518QStringList GMainWindow::GetUnsupportedGLExtensions() {
519 QStringList unsupported_ext; 519 QStringList unsupported_ext;
520 520
521 if (!GLAD_GL_ARB_program_interface_query)
522 unsupported_ext.append("ARB_program_interface_query");
523 if (!GLAD_GL_ARB_separate_shader_objects)
524 unsupported_ext.append("ARB_separate_shader_objects");
525 if (!GLAD_GL_ARB_vertex_attrib_binding)
526 unsupported_ext.append("ARB_vertex_attrib_binding");
527 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) 521 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
528 unsupported_ext.append("ARB_vertex_type_10f_11f_11f_rev"); 522 unsupported_ext.append("ARB_vertex_type_10f_11f_11f_rev");
529 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) 523 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
530 unsupported_ext.append("ARB_texture_mirror_clamp_to_edge"); 524 unsupported_ext.append("ARB_texture_mirror_clamp_to_edge");
531 if (!GLAD_GL_ARB_base_instance)
532 unsupported_ext.append("ARB_base_instance");
533 if (!GLAD_GL_ARB_texture_storage)
534 unsupported_ext.append("ARB_texture_storage");
535 if (!GLAD_GL_ARB_multi_bind) 525 if (!GLAD_GL_ARB_multi_bind)
536 unsupported_ext.append("ARB_multi_bind"); 526 unsupported_ext.append("ARB_multi_bind");
537 if (!GLAD_GL_ARB_copy_image)
538 unsupported_ext.append("ARB_copy_image");
539 527
540 // Extensions required to support some texture formats. 528 // Extensions required to support some texture formats.
541 if (!GLAD_GL_EXT_texture_compression_s3tc) 529 if (!GLAD_GL_EXT_texture_compression_s3tc)
542 unsupported_ext.append("EXT_texture_compression_s3tc"); 530 unsupported_ext.append("EXT_texture_compression_s3tc");
543 if (!GLAD_GL_ARB_texture_compression_rgtc) 531 if (!GLAD_GL_ARB_texture_compression_rgtc)
544 unsupported_ext.append("ARB_texture_compression_rgtc"); 532 unsupported_ext.append("ARB_texture_compression_rgtc");
545 if (!GLAD_GL_ARB_texture_compression_bptc)
546 unsupported_ext.append("ARB_texture_compression_bptc");
547 if (!GLAD_GL_ARB_depth_buffer_float) 533 if (!GLAD_GL_ARB_depth_buffer_float)
548 unsupported_ext.append("ARB_depth_buffer_float"); 534 unsupported_ext.append("ARB_depth_buffer_float");
549 535
@@ -562,8 +548,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
562 render_window->MakeCurrent(); 548 render_window->MakeCurrent();
563 549
564 if (!gladLoadGL()) { 550 if (!gladLoadGL()) {
565 QMessageBox::critical(this, tr("Error while initializing OpenGL 3.3 Core!"), 551 QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3 Core!"),
566 tr("Your GPU may not support OpenGL 3.3, or you do not " 552 tr("Your GPU may not support OpenGL 4.3, or you do not "
567 "have the latest graphics driver.")); 553 "have the latest graphics driver."));
568 return false; 554 return false;
569 } 555 }
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index c66353a65..097c1fbe3 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -366,6 +366,7 @@ void Config::ReadValues() {
366 Settings::values.gdbstub_port = 366 Settings::values.gdbstub_port =
367 static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689)); 367 static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
368 Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", ""); 368 Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", "");
369 Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false);
369 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); 370 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
370 371
371 // Web Service 372 // Web Service
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index ecf625e7b..d73669f36 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -206,6 +206,8 @@ log_filter = *:Trace
206# Port for listening to GDB connections. 206# Port for listening to GDB connections.
207use_gdbstub=false 207use_gdbstub=false
208gdbstub_port=24689 208gdbstub_port=24689
209# Determines whether or not yuzu will dump the ExeFS of all games it attempts to load while loading them
210dump_exefs=false
209# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them 211# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them
210dump_nso=false 212dump_nso=false
211 213
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index a9ad92a80..2d6f8cced 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -111,32 +111,18 @@ void EmuWindow_SDL2::Fullscreen() {
111bool EmuWindow_SDL2::SupportsRequiredGLExtensions() { 111bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
112 std::vector<std::string> unsupported_ext; 112 std::vector<std::string> unsupported_ext;
113 113
114 if (!GLAD_GL_ARB_program_interface_query)
115 unsupported_ext.push_back("ARB_program_interface_query");
116 if (!GLAD_GL_ARB_separate_shader_objects)
117 unsupported_ext.push_back("ARB_separate_shader_objects");
118 if (!GLAD_GL_ARB_vertex_attrib_binding)
119 unsupported_ext.push_back("ARB_vertex_attrib_binding");
120 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) 114 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
121 unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); 115 unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev");
122 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) 116 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
123 unsupported_ext.push_back("ARB_texture_mirror_clamp_to_edge"); 117 unsupported_ext.push_back("ARB_texture_mirror_clamp_to_edge");
124 if (!GLAD_GL_ARB_base_instance)
125 unsupported_ext.push_back("ARB_base_instance");
126 if (!GLAD_GL_ARB_texture_storage)
127 unsupported_ext.push_back("ARB_texture_storage");
128 if (!GLAD_GL_ARB_multi_bind) 118 if (!GLAD_GL_ARB_multi_bind)
129 unsupported_ext.push_back("ARB_multi_bind"); 119 unsupported_ext.push_back("ARB_multi_bind");
130 if (!GLAD_GL_ARB_copy_image)
131 unsupported_ext.push_back("ARB_copy_image");
132 120
133 // Extensions required to support some texture formats. 121 // Extensions required to support some texture formats.
134 if (!GLAD_GL_EXT_texture_compression_s3tc) 122 if (!GLAD_GL_EXT_texture_compression_s3tc)
135 unsupported_ext.push_back("EXT_texture_compression_s3tc"); 123 unsupported_ext.push_back("EXT_texture_compression_s3tc");
136 if (!GLAD_GL_ARB_texture_compression_rgtc) 124 if (!GLAD_GL_ARB_texture_compression_rgtc)
137 unsupported_ext.push_back("ARB_texture_compression_rgtc"); 125 unsupported_ext.push_back("ARB_texture_compression_rgtc");
138 if (!GLAD_GL_ARB_texture_compression_bptc)
139 unsupported_ext.push_back("ARB_texture_compression_bptc");
140 if (!GLAD_GL_ARB_depth_buffer_float) 126 if (!GLAD_GL_ARB_depth_buffer_float)
141 unsupported_ext.push_back("ARB_depth_buffer_float"); 127 unsupported_ext.push_back("ARB_depth_buffer_float");
142 128
@@ -157,7 +143,7 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
157 exit(1); 143 exit(1);
158 } 144 }
159 145
160 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3); 146 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
161 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 147 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
162 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); 148 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
163 SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); 149 SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);