summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/arm/arm_interface.h53
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp35
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h10
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp27
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h10
-rw-r--r--src/core/gdbstub/gdbstub.cpp50
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp31
10 files changed, 199 insertions, 125 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 0b2af2a9b..867e34932 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -10,7 +10,7 @@
10 10
11namespace Core { 11namespace Core {
12 12
13/// Generic ARM11 CPU interface 13/// Generic ARMv8 CPU interface
14class ARM_Interface : NonCopyable { 14class ARM_Interface : NonCopyable {
15public: 15public:
16 virtual ~ARM_Interface() {} 16 virtual ~ARM_Interface() {}
@@ -19,9 +19,9 @@ public:
19 std::array<u64, 31> cpu_registers; 19 std::array<u64, 31> cpu_registers;
20 u64 sp; 20 u64 sp;
21 u64 pc; 21 u64 pc;
22 u64 cpsr; 22 u64 pstate;
23 std::array<u128, 32> fpu_registers; 23 std::array<u128, 32> vector_registers;
24 u64 fpscr; 24 u64 fpcr;
25 }; 25 };
26 26
27 /// Runs the CPU until an event happens 27 /// Runs the CPU until an event happens
@@ -69,42 +69,50 @@ public:
69 */ 69 */
70 virtual void SetReg(int index, u64 value) = 0; 70 virtual void SetReg(int index, u64 value) = 0;
71 71
72 virtual u128 GetExtReg(int index) const = 0;
73
74 virtual void SetExtReg(int index, u128 value) = 0;
75
76 /** 72 /**
77 * Gets the value of a VFP register 73 * Gets the value of a specified vector register.
78 * @param index Register index (0-31) 74 *
79 * @return Returns the value in the register 75 * @param index The index of the vector register.
76 * @return the value within the vector register.
80 */ 77 */
81 virtual u32 GetVFPReg(int index) const = 0; 78 virtual u128 GetVectorReg(int index) const = 0;
82 79
83 /** 80 /**
84 * Sets a VFP register to the given value 81 * Sets a given value into a vector register.
85 * @param index Register index (0-31) 82 *
86 * @param value Value to set register to 83 * @param index The index of the vector register.
84 * @param value The new value to place in the register.
87 */ 85 */
88 virtual void SetVFPReg(int index, u32 value) = 0; 86 virtual void SetVectorReg(int index, u128 value) = 0;
89 87
90 /** 88 /**
91 * Get the current CPSR register 89 * Get the current PSTATE register
92 * @return Returns the value of the CPSR register 90 * @return Returns the value of the PSTATE register
93 */ 91 */
94 virtual u32 GetCPSR() const = 0; 92 virtual u32 GetPSTATE() const = 0;
95 93
96 /** 94 /**
97 * Set the current CPSR register 95 * Set the current PSTATE register
98 * @param cpsr Value to set CPSR to 96 * @param pstate Value to set PSTATE to
99 */ 97 */
100 virtual void SetCPSR(u32 cpsr) = 0; 98 virtual void SetPSTATE(u32 pstate) = 0;
101 99
102 virtual VAddr GetTlsAddress() const = 0; 100 virtual VAddr GetTlsAddress() const = 0;
103 101
104 virtual void SetTlsAddress(VAddr address) = 0; 102 virtual void SetTlsAddress(VAddr address) = 0;
105 103
104 /**
105 * Gets the value within the TPIDR_EL0 (read/write software thread ID) register.
106 *
107 * @return the value within the register.
108 */
106 virtual u64 GetTPIDR_EL0() const = 0; 109 virtual u64 GetTPIDR_EL0() const = 0;
107 110
111 /**
112 * Sets a new value within the TPIDR_EL0 (read/write software thread ID) register.
113 *
114 * @param value The new value to place in the register.
115 */
108 virtual void SetTPIDR_EL0(u64 value) = 0; 116 virtual void SetTPIDR_EL0(u64 value) = 0;
109 117
110 /** 118 /**
@@ -119,6 +127,7 @@ public:
119 */ 127 */
120 virtual void LoadContext(const ThreadContext& ctx) = 0; 128 virtual void LoadContext(const ThreadContext& ctx) = 0;
121 129
130 /// Clears the exclusive monitor's state.
122 virtual void ClearExclusiveState() = 0; 131 virtual void ClearExclusiveState() = 0;
123 132
124 /// Prepare core for thread reschedule (if needed to correctly handle state) 133 /// Prepare core for thread reschedule (if needed to correctly handle state)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 0c175d872..3f072c51f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -194,29 +194,20 @@ void ARM_Dynarmic::SetReg(int index, u64 value) {
194 jit->SetRegister(index, value); 194 jit->SetRegister(index, value);
195} 195}
196 196
197u128 ARM_Dynarmic::GetExtReg(int index) const { 197u128 ARM_Dynarmic::GetVectorReg(int index) const {
198 return jit->GetVector(index); 198 return jit->GetVector(index);
199} 199}
200 200
201void ARM_Dynarmic::SetExtReg(int index, u128 value) { 201void ARM_Dynarmic::SetVectorReg(int index, u128 value) {
202 jit->SetVector(index, value); 202 jit->SetVector(index, value);
203} 203}
204 204
205u32 ARM_Dynarmic::GetVFPReg(int /*index*/) const { 205u32 ARM_Dynarmic::GetPSTATE() const {
206 UNIMPLEMENTED();
207 return {};
208}
209
210void ARM_Dynarmic::SetVFPReg(int /*index*/, u32 /*value*/) {
211 UNIMPLEMENTED();
212}
213
214u32 ARM_Dynarmic::GetCPSR() const {
215 return jit->GetPstate(); 206 return jit->GetPstate();
216} 207}
217 208
218void ARM_Dynarmic::SetCPSR(u32 cpsr) { 209void ARM_Dynarmic::SetPSTATE(u32 pstate) {
219 jit->SetPstate(cpsr); 210 jit->SetPstate(pstate);
220} 211}
221 212
222u64 ARM_Dynarmic::GetTlsAddress() const { 213u64 ARM_Dynarmic::GetTlsAddress() const {
@@ -239,18 +230,18 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
239 ctx.cpu_registers = jit->GetRegisters(); 230 ctx.cpu_registers = jit->GetRegisters();
240 ctx.sp = jit->GetSP(); 231 ctx.sp = jit->GetSP();
241 ctx.pc = jit->GetPC(); 232 ctx.pc = jit->GetPC();
242 ctx.cpsr = jit->GetPstate(); 233 ctx.pstate = jit->GetPstate();
243 ctx.fpu_registers = jit->GetVectors(); 234 ctx.vector_registers = jit->GetVectors();
244 ctx.fpscr = jit->GetFpcr(); 235 ctx.fpcr = jit->GetFpcr();
245} 236}
246 237
247void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { 238void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
248 jit->SetRegisters(ctx.cpu_registers); 239 jit->SetRegisters(ctx.cpu_registers);
249 jit->SetSP(ctx.sp); 240 jit->SetSP(ctx.sp);
250 jit->SetPC(ctx.pc); 241 jit->SetPC(ctx.pc);
251 jit->SetPstate(static_cast<u32>(ctx.cpsr)); 242 jit->SetPstate(static_cast<u32>(ctx.pstate));
252 jit->SetVectors(ctx.fpu_registers); 243 jit->SetVectors(ctx.vector_registers);
253 jit->SetFpcr(static_cast<u32>(ctx.fpscr)); 244 jit->SetFpcr(static_cast<u32>(ctx.fpcr));
254} 245}
255 246
256void ARM_Dynarmic::PrepareReschedule() { 247void ARM_Dynarmic::PrepareReschedule() {
@@ -304,8 +295,8 @@ bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr va
304 295
305bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { 296bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
306 return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] { 297 return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
307 Memory::Write64(vaddr, value[0]); 298 Memory::Write64(vaddr + 0, value[0]);
308 Memory::Write64(vaddr, value[1]); 299 Memory::Write64(vaddr + 8, value[1]);
309 }); 300 });
310} 301}
311 302
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 56c60c853..e61382d3d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -29,14 +29,12 @@ public:
29 u64 GetPC() const override; 29 u64 GetPC() const override;
30 u64 GetReg(int index) const override; 30 u64 GetReg(int index) const override;
31 void SetReg(int index, u64 value) override; 31 void SetReg(int index, u64 value) override;
32 u128 GetExtReg(int index) const override; 32 u128 GetVectorReg(int index) const override;
33 void SetExtReg(int index, u128 value) override; 33 void SetVectorReg(int index, u128 value) override;
34 u32 GetVFPReg(int index) const override; 34 u32 GetPSTATE() const override;
35 void SetVFPReg(int index, u32 value) override; 35 void SetPSTATE(u32 pstate) override;
36 u32 GetCPSR() const override;
37 void Run() override; 36 void Run() override;
38 void Step() override; 37 void Step() override;
39 void SetCPSR(u32 cpsr) override;
40 VAddr GetTlsAddress() const override; 38 VAddr GetTlsAddress() const override;
41 void SetTlsAddress(VAddr address) override; 39 void SetTlsAddress(VAddr address) override;
42 void SetTPIDR_EL0(u64 value) override; 40 void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4e02b7cd4..e218a0b15 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -131,33 +131,24 @@ void ARM_Unicorn::SetReg(int regn, u64 val) {
131 CHECKED(uc_reg_write(uc, treg, &val)); 131 CHECKED(uc_reg_write(uc, treg, &val));
132} 132}
133 133
134u128 ARM_Unicorn::GetExtReg(int /*index*/) const { 134u128 ARM_Unicorn::GetVectorReg(int /*index*/) const {
135 UNIMPLEMENTED(); 135 UNIMPLEMENTED();
136 static constexpr u128 res{}; 136 static constexpr u128 res{};
137 return res; 137 return res;
138} 138}
139 139
140void ARM_Unicorn::SetExtReg(int /*index*/, u128 /*value*/) { 140void ARM_Unicorn::SetVectorReg(int /*index*/, u128 /*value*/) {
141 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
142} 142}
143 143
144u32 ARM_Unicorn::GetVFPReg(int /*index*/) const { 144u32 ARM_Unicorn::GetPSTATE() const {
145 UNIMPLEMENTED();
146 return {};
147}
148
149void ARM_Unicorn::SetVFPReg(int /*index*/, u32 /*value*/) {
150 UNIMPLEMENTED();
151}
152
153u32 ARM_Unicorn::GetCPSR() const {
154 u64 nzcv{}; 145 u64 nzcv{};
155 CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv)); 146 CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &nzcv));
156 return static_cast<u32>(nzcv); 147 return static_cast<u32>(nzcv);
157} 148}
158 149
159void ARM_Unicorn::SetCPSR(u32 cpsr) { 150void ARM_Unicorn::SetPSTATE(u32 pstate) {
160 u64 nzcv = cpsr; 151 u64 nzcv = pstate;
161 CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv)); 152 CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &nzcv));
162} 153}
163 154
@@ -219,7 +210,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
219 210
220 CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp)); 211 CHECKED(uc_reg_read(uc, UC_ARM64_REG_SP, &ctx.sp));
221 CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc)); 212 CHECKED(uc_reg_read(uc, UC_ARM64_REG_PC, &ctx.pc));
222 CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.cpsr)); 213 CHECKED(uc_reg_read(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
223 214
224 for (auto i = 0; i < 29; ++i) { 215 for (auto i = 0; i < 29; ++i) {
225 uregs[i] = UC_ARM64_REG_X0 + i; 216 uregs[i] = UC_ARM64_REG_X0 + i;
@@ -234,7 +225,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
234 225
235 for (int i = 0; i < 32; ++i) { 226 for (int i = 0; i < 32; ++i) {
236 uregs[i] = UC_ARM64_REG_Q0 + i; 227 uregs[i] = UC_ARM64_REG_Q0 + i;
237 tregs[i] = &ctx.fpu_registers[i]; 228 tregs[i] = &ctx.vector_registers[i];
238 } 229 }
239 230
240 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); 231 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
@@ -246,7 +237,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
246 237
247 CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp)); 238 CHECKED(uc_reg_write(uc, UC_ARM64_REG_SP, &ctx.sp));
248 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc)); 239 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &ctx.pc));
249 CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.cpsr)); 240 CHECKED(uc_reg_write(uc, UC_ARM64_REG_NZCV, &ctx.pstate));
250 241
251 for (int i = 0; i < 29; ++i) { 242 for (int i = 0; i < 29; ++i) {
252 uregs[i] = UC_ARM64_REG_X0 + i; 243 uregs[i] = UC_ARM64_REG_X0 + i;
@@ -261,7 +252,7 @@ void ARM_Unicorn::LoadContext(const ThreadContext& ctx) {
261 252
262 for (auto i = 0; i < 32; ++i) { 253 for (auto i = 0; i < 32; ++i) {
263 uregs[i] = UC_ARM64_REG_Q0 + i; 254 uregs[i] = UC_ARM64_REG_Q0 + i;
264 tregs[i] = (void*)&ctx.fpu_registers[i]; 255 tregs[i] = (void*)&ctx.vector_registers[i];
265 } 256 }
266 257
267 CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32)); 258 CHECKED(uc_reg_write_batch(uc, uregs, tregs, 32));
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index d6f7cf4ab..75761950b 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -22,12 +22,10 @@ public:
22 u64 GetPC() const override; 22 u64 GetPC() const override;
23 u64 GetReg(int index) const override; 23 u64 GetReg(int index) const override;
24 void SetReg(int index, u64 value) override; 24 void SetReg(int index, u64 value) override;
25 u128 GetExtReg(int index) const override; 25 u128 GetVectorReg(int index) const override;
26 void SetExtReg(int index, u128 value) override; 26 void SetVectorReg(int index, u128 value) override;
27 u32 GetVFPReg(int index) const override; 27 u32 GetPSTATE() const override;
28 void SetVFPReg(int index, u32 value) override; 28 void SetPSTATE(u32 pstate) override;
29 u32 GetCPSR() const override;
30 void SetCPSR(u32 cpsr) override;
31 VAddr GetTlsAddress() const override; 29 VAddr GetTlsAddress() const override;
32 void SetTlsAddress(VAddr address) override; 30 void SetTlsAddress(VAddr address) override;
33 void SetTPIDR_EL0(u64 value) override; 31 void SetTPIDR_EL0(u64 value) override;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index cfaf20a88..1b04f68bf 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -65,9 +65,9 @@ constexpr u32 MSG_WAITALL = 8;
65constexpr u32 LR_REGISTER = 30; 65constexpr u32 LR_REGISTER = 30;
66constexpr u32 SP_REGISTER = 31; 66constexpr u32 SP_REGISTER = 31;
67constexpr u32 PC_REGISTER = 32; 67constexpr u32 PC_REGISTER = 32;
68constexpr u32 CPSR_REGISTER = 33; 68constexpr u32 PSTATE_REGISTER = 33;
69constexpr u32 UC_ARM64_REG_Q0 = 34; 69constexpr u32 UC_ARM64_REG_Q0 = 34;
70constexpr u32 FPSCR_REGISTER = 66; 70constexpr u32 FPCR_REGISTER = 66;
71 71
72// TODO/WiP - Used while working on support for FPU 72// TODO/WiP - Used while working on support for FPU
73constexpr u32 TODO_DUMMY_REG_997 = 997; 73constexpr u32 TODO_DUMMY_REG_997 = 997;
@@ -116,7 +116,7 @@ constexpr char target_xml[] =
116 116
117 <reg name="pc" bitsize="64" type="code_ptr"/> 117 <reg name="pc" bitsize="64" type="code_ptr"/>
118 118
119 <flags id="cpsr_flags" size="4"> 119 <flags id="pstate_flags" size="4">
120 <field name="SP" start="0" end="0"/> 120 <field name="SP" start="0" end="0"/>
121 <field name="" start="1" end="1"/> 121 <field name="" start="1" end="1"/>
122 <field name="EL" start="2" end="3"/> 122 <field name="EL" start="2" end="3"/>
@@ -135,7 +135,7 @@ constexpr char target_xml[] =
135 <field name="Z" start="30" end="30"/> 135 <field name="Z" start="30" end="30"/>
136 <field name="N" start="31" end="31"/> 136 <field name="N" start="31" end="31"/>
137 </flags> 137 </flags>
138 <reg name="cpsr" bitsize="32" type="cpsr_flags"/> 138 <reg name="pstate" bitsize="32" type="pstate_flags"/>
139 </feature> 139 </feature>
140 <feature name="org.gnu.gdb.aarch64.fpu"> 140 <feature name="org.gnu.gdb.aarch64.fpu">
141 </feature> 141 </feature>
@@ -227,10 +227,10 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
227 return thread->context.sp; 227 return thread->context.sp;
228 } else if (id == PC_REGISTER) { 228 } else if (id == PC_REGISTER) {
229 return thread->context.pc; 229 return thread->context.pc;
230 } else if (id == CPSR_REGISTER) { 230 } else if (id == PSTATE_REGISTER) {
231 return thread->context.cpsr; 231 return thread->context.pstate;
232 } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { 232 } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
233 return thread->context.fpu_registers[id - UC_ARM64_REG_Q0][0]; 233 return thread->context.vector_registers[id - UC_ARM64_REG_Q0][0];
234 } else { 234 } else {
235 return 0; 235 return 0;
236 } 236 }
@@ -247,10 +247,10 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
247 thread->context.sp = val; 247 thread->context.sp = val;
248 } else if (id == PC_REGISTER) { 248 } else if (id == PC_REGISTER) {
249 thread->context.pc = val; 249 thread->context.pc = val;
250 } else if (id == CPSR_REGISTER) { 250 } else if (id == PSTATE_REGISTER) {
251 thread->context.cpsr = val; 251 thread->context.pstate = val;
252 } else if (id > CPSR_REGISTER && id < FPSCR_REGISTER) { 252 } else if (id > PSTATE_REGISTER && id < FPCR_REGISTER) {
253 thread->context.fpu_registers[id - (CPSR_REGISTER + 1)][0] = val; 253 thread->context.vector_registers[id - (PSTATE_REGISTER + 1)][0] = val;
254 } 254 }
255} 255}
256 256
@@ -781,11 +781,11 @@ static void ReadRegister() {
781 LongToGdbHex(reply, RegRead(id, current_thread)); 781 LongToGdbHex(reply, RegRead(id, current_thread));
782 } else if (id == PC_REGISTER) { 782 } else if (id == PC_REGISTER) {
783 LongToGdbHex(reply, RegRead(id, current_thread)); 783 LongToGdbHex(reply, RegRead(id, current_thread));
784 } else if (id == CPSR_REGISTER) { 784 } else if (id == PSTATE_REGISTER) {
785 IntToGdbHex(reply, (u32)RegRead(id, current_thread)); 785 IntToGdbHex(reply, static_cast<u32>(RegRead(id, current_thread)));
786 } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { 786 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
787 LongToGdbHex(reply, RegRead(id, current_thread)); 787 LongToGdbHex(reply, RegRead(id, current_thread));
788 } else if (id == FPSCR_REGISTER) { 788 } else if (id == FPCR_REGISTER) {
789 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread)); 789 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_998, current_thread));
790 } else { 790 } else {
791 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread)); 791 LongToGdbHex(reply, RegRead(TODO_DUMMY_REG_997, current_thread));
@@ -811,7 +811,7 @@ static void ReadRegisters() {
811 811
812 bufptr += 16; 812 bufptr += 16;
813 813
814 IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread)); 814 IntToGdbHex(bufptr, static_cast<u32>(RegRead(PSTATE_REGISTER, current_thread)));
815 815
816 bufptr += 8; 816 bufptr += 8;
817 817
@@ -843,11 +843,11 @@ static void WriteRegister() {
843 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); 843 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
844 } else if (id == PC_REGISTER) { 844 } else if (id == PC_REGISTER) {
845 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); 845 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
846 } else if (id == CPSR_REGISTER) { 846 } else if (id == PSTATE_REGISTER) {
847 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread); 847 RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
848 } else if (id >= UC_ARM64_REG_Q0 && id < FPSCR_REGISTER) { 848 } else if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
849 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread); 849 RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
850 } else if (id == FPSCR_REGISTER) { 850 } else if (id == FPCR_REGISTER) {
851 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread); 851 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr), current_thread);
852 } else { 852 } else {
853 RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread); 853 RegWrite(TODO_DUMMY_REG_997, GdbHexToLong(buffer_ptr), current_thread);
@@ -866,16 +866,16 @@ static void WriteRegisters() {
866 if (command_buffer[0] != 'G') 866 if (command_buffer[0] != 'G')
867 return SendReply("E01"); 867 return SendReply("E01");
868 868
869 for (u32 i = 0, reg = 0; reg <= FPSCR_REGISTER; i++, reg++) { 869 for (u32 i = 0, reg = 0; reg <= FPCR_REGISTER; i++, reg++) {
870 if (reg <= SP_REGISTER) { 870 if (reg <= SP_REGISTER) {
871 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); 871 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
872 } else if (reg == PC_REGISTER) { 872 } else if (reg == PC_REGISTER) {
873 RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread); 873 RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
874 } else if (reg == CPSR_REGISTER) { 874 } else if (reg == PSTATE_REGISTER) {
875 RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread); 875 RegWrite(PSTATE_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
876 } else if (reg >= UC_ARM64_REG_Q0 && reg < FPSCR_REGISTER) { 876 } else if (reg >= UC_ARM64_REG_Q0 && reg < FPCR_REGISTER) {
877 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread); 877 RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
878 } else if (reg == FPSCR_REGISTER) { 878 } else if (reg == FPCR_REGISTER) {
879 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread); 879 RegWrite(TODO_DUMMY_REG_998, GdbHexToLong(buffer_ptr + i * 16), current_thread);
880 } else { 880 } else {
881 UNIMPLEMENTED(); 881 UNIMPLEMENTED();
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 89cd5f401..d4183d6e3 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -217,8 +217,8 @@ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAdd
217 context.cpu_registers[0] = arg; 217 context.cpu_registers[0] = arg;
218 context.pc = entry_point; 218 context.pc = entry_point;
219 context.sp = stack_top; 219 context.sp = stack_top;
220 context.cpsr = 0; 220 context.pstate = 0;
221 context.fpscr = 0; 221 context.fpcr = 0;
222} 222}
223 223
224ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point, 224ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4a79ce39c..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -14,6 +14,7 @@ add_library(video_core STATIC
14 engines/maxwell_dma.cpp 14 engines/maxwell_dma.cpp
15 engines/maxwell_dma.h 15 engines/maxwell_dma.h
16 engines/shader_bytecode.h 16 engines/shader_bytecode.h
17 engines/shader_header.h
17 gpu.cpp 18 gpu.cpp
18 gpu.h 19 gpu.h
19 macro_interpreter.cpp 20 macro_interpreter.cpp
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra::Shader {
12
13enum class OutputTopology : u32 {
14 PointList = 1,
15 LineStrip = 6,
16 TriangleStrip = 7,
17};
18
19// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header {
22 union {
23 BitField<0, 5, u32> sph_type;
24 BitField<5, 5, u32> version;
25 BitField<10, 4, u32> shader_type;
26 BitField<14, 1, u32> mrt_enable;
27 BitField<15, 1, u32> kills_pixels;
28 BitField<16, 1, u32> does_global_store;
29 BitField<17, 4, u32> sass_version;
30 BitField<21, 5, u32> reserved;
31 BitField<26, 1, u32> does_load_or_store;
32 BitField<27, 1, u32> does_fp64;
33 BitField<28, 4, u32> stream_out_mask;
34 } common0;
35
36 union {
37 BitField<0, 24, u32> shader_local_memory_low_size;
38 BitField<24, 8, u32> per_patch_attribute_count;
39 } common1;
40
41 union {
42 BitField<0, 24, u32> shader_local_memory_high_size;
43 BitField<24, 8, u32> threads_per_input_primitive;
44 } common2;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_crs_size;
48 BitField<24, 4, OutputTopology> output_topology;
49 BitField<28, 4, u32> reserved;
50 } common3;
51
52 union {
53 BitField<0, 12, u32> max_output_vertices;
54 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
55 BitField<24, 4, u32> reserved;
56 BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
57 } common4;
58
59 union {
60 struct {
61 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
62 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
63 INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
64 INSERT_PADDING_BYTES(2); // ImapColor
65 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
66 INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
67 INSERT_PADDING_BYTES(1); // ImapReserved
68 INSERT_PADDING_BYTES(3); // OmapSystemValuesA
69 INSERT_PADDING_BYTES(1); // OmapSystemValuesB
70 INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
71 INSERT_PADDING_BYTES(2); // OmapColor
72 INSERT_PADDING_BYTES(2); // OmapSystemValuesC
73 INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
74 INSERT_PADDING_BYTES(1); // OmapReserved
75 } vtg;
76
77 struct {
78 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
79 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
80 INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
81 INSERT_PADDING_BYTES(2); // ImapColor
82 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
83 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
84 INSERT_PADDING_BYTES(2); // ImapReserved
85 struct {
86 u32 target;
87 union {
88 BitField<0, 1, u32> sample_mask;
89 BitField<1, 1, u32> depth;
90 BitField<2, 30, u32> reserved;
91 };
92 } omap;
93 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
94 const u32 bit = render_target * 4 + component;
95 return omap.target & (1 << bit);
96 }
97 } ps;
98 };
99};
100
101static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
102
103} // namespace Tegra::Shader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 252ff18fc..a1638c12e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
12#include "common/assert.h" 12#include "common/assert.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/shader_bytecode.h" 14#include "video_core/engines/shader_bytecode.h"
15#include "video_core/engines/shader_header.h"
15#include "video_core/renderer_opengl/gl_rasterizer.h" 16#include "video_core/renderer_opengl/gl_rasterizer.h"
16#include "video_core/renderer_opengl/gl_shader_decompiler.h" 17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
17 18
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
26using Tegra::Shader::SubOp; 27using Tegra::Shader::SubOp;
27 28
28constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; 29constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
29constexpr u32 PROGRAM_HEADER_SIZE = 0x50; 30constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
30 31
31class DecompileFail : public std::runtime_error { 32class DecompileFail : public std::runtime_error {
32public: 33public:
@@ -674,7 +675,7 @@ public:
674 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) 675 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
675 : subroutines(subroutines), program_code(program_code), main_offset(main_offset), 676 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
676 stage(stage), suffix(suffix) { 677 stage(stage), suffix(suffix) {
677 678 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
678 Generate(suffix); 679 Generate(suffix);
679 } 680 }
680 681
@@ -688,23 +689,6 @@ public:
688 } 689 }
689 690
690private: 691private:
691 // Shader program header for a Fragment Shader.
692 struct FragmentHeader {
693 INSERT_PADDING_WORDS(5);
694 INSERT_PADDING_WORDS(13);
695 u32 enabled_color_outputs;
696 union {
697 BitField<0, 1, u32> writes_samplemask;
698 BitField<1, 1, u32> writes_depth;
699 };
700
701 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
702 const u32 bit = render_target * 4 + component;
703 return enabled_color_outputs & (1 << bit);
704 }
705 };
706 static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
707
708 /// Gets the Subroutine object corresponding to the specified address. 692 /// Gets the Subroutine object corresponding to the specified address.
709 const Subroutine& GetSubroutine(u32 begin, u32 end) const { 693 const Subroutine& GetSubroutine(u32 begin, u32 end) const {
710 const auto iter = subroutines.find(Subroutine{begin, end, suffix}); 694 const auto iter = subroutines.find(Subroutine{begin, end, suffix});
@@ -1010,10 +994,8 @@ private:
1010 /// Writes the output values from a fragment shader to the corresponding GLSL output variables. 994 /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
1011 void EmitFragmentOutputsWrite() { 995 void EmitFragmentOutputsWrite() {
1012 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); 996 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
1013 FragmentHeader header;
1014 std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
1015 997
1016 ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); 998 ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
1017 999
1018 // Write the color outputs using the data in the shader registers, disabled 1000 // Write the color outputs using the data in the shader registers, disabled
1019 // rendertargets/components are skipped in the register assignment. 1001 // rendertargets/components are skipped in the register assignment.
@@ -1022,7 +1004,7 @@ private:
1022 ++render_target) { 1004 ++render_target) {
1023 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 1005 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1024 for (u32 component = 0; component < 4; ++component) { 1006 for (u32 component = 0; component < 4; ++component) {
1025 if (header.IsColorComponentOutputEnabled(render_target, component)) { 1007 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1026 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, 1008 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1027 regs.GetRegisterAsFloat(current_reg))); 1009 regs.GetRegisterAsFloat(current_reg)));
1028 ++current_reg; 1010 ++current_reg;
@@ -1030,7 +1012,7 @@ private:
1030 } 1012 }
1031 } 1013 }
1032 1014
1033 if (header.writes_depth) { 1015 if (header.ps.omap.depth) {
1034 // The depth output is always 2 registers after the last color output, and current_reg 1016 // The depth output is always 2 registers after the last color output, and current_reg
1035 // already contains one past the last color register. 1017 // already contains one past the last color register.
1036 1018
@@ -2666,6 +2648,7 @@ private:
2666private: 2648private:
2667 const std::set<Subroutine>& subroutines; 2649 const std::set<Subroutine>& subroutines;
2668 const ProgramCode& program_code; 2650 const ProgramCode& program_code;
2651 Tegra::Shader::Header header;
2669 const u32 main_offset; 2652 const u32 main_offset;
2670 Maxwell3D::Regs::ShaderStage stage; 2653 Maxwell3D::Regs::ShaderStage stage;
2671 const std::string& suffix; 2654 const std::string& suffix;