summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp75
-rw-r--r--src/core/arm/skyeye_common/vfp/vfp_helper.h117
-rw-r--r--src/core/arm/skyeye_common/vfp/vfpdouble.cpp35
-rw-r--r--src/core/arm/skyeye_common/vfp/vfpsingle.cpp30
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/hle/kernel/kernel.cpp6
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/kernel/process.h8
-rw-r--r--src/core/hle/kernel/thread.cpp71
-rw-r--r--src/core/hle/kernel/thread.h21
-rw-r--r--src/core/hle/svc.cpp33
-rw-r--r--src/core/mem_map.h8
-rw-r--r--src/video_core/rasterizer.cpp5
-rw-r--r--src/video_core/vertex_shader.cpp5
14 files changed, 269 insertions, 151 deletions
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index c2973fb39..315b4cc91 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -992,6 +992,14 @@ typedef struct _mcr_inst {
992 unsigned int inst; 992 unsigned int inst;
993} mcr_inst; 993} mcr_inst;
994 994
995typedef struct mcrr_inst {
996 unsigned int opcode_1;
997 unsigned int cp_num;
998 unsigned int crm;
999 unsigned int rt;
1000 unsigned int rt2;
1001} mcrr_inst;
1002
995typedef struct _mrs_inst { 1003typedef struct _mrs_inst {
996 unsigned int R; 1004 unsigned int R;
997 unsigned int Rd; 1005 unsigned int Rd;
@@ -1261,11 +1269,6 @@ static get_addr_fp_t get_calc_addr_op(unsigned int inst) {
1261#define CHECK_RM (inst_cream->Rm == 15) 1269#define CHECK_RM (inst_cream->Rm == 15)
1262#define CHECK_RS (inst_cream->Rs == 15) 1270#define CHECK_RS (inst_cream->Rs == 15)
1263 1271
1264#define UNIMPLEMENTED_INSTRUCTION(mnemonic) \
1265 LOG_ERROR(Core_ARM11, "unimplemented instruction: %s", mnemonic); \
1266 CITRA_IGNORE_EXIT(-1); \
1267 return nullptr;
1268
1269static ARM_INST_PTR INTERPRETER_TRANSLATE(adc)(unsigned int inst, int index) 1272static ARM_INST_PTR INTERPRETER_TRANSLATE(adc)(unsigned int inst, int index)
1270{ 1273{
1271 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(adc_inst)); 1274 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(adc_inst));
@@ -1871,7 +1874,26 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(mcr)(unsigned int inst, int index)
1871 inst_cream->inst = inst; 1874 inst_cream->inst = inst;
1872 return inst_base; 1875 return inst_base;
1873} 1876}
1874static ARM_INST_PTR INTERPRETER_TRANSLATE(mcrr)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("MCRR"); } 1877
1878static ARM_INST_PTR INTERPRETER_TRANSLATE(mcrr)(unsigned int inst, int index)
1879{
1880 arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(mcrr_inst));
1881 mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
1882
1883 inst_base->cond = BITS(inst, 28, 31);
1884 inst_base->idx = index;
1885 inst_base->br = NON_BRANCH;
1886 inst_base->load_r15 = 0;
1887
1888 inst_cream->crm = BITS(inst, 0, 3);
1889 inst_cream->opcode_1 = BITS(inst, 4, 7);
1890 inst_cream->cp_num = BITS(inst, 8, 11);
1891 inst_cream->rt = BITS(inst, 12, 15);
1892 inst_cream->rt2 = BITS(inst, 16, 19);
1893
1894 return inst_base;
1895}
1896
1875static ARM_INST_PTR INTERPRETER_TRANSLATE(mla)(unsigned int inst, int index) 1897static ARM_INST_PTR INTERPRETER_TRANSLATE(mla)(unsigned int inst, int index)
1876{ 1898{
1877 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mla_inst)); 1899 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mla_inst));
@@ -1930,7 +1952,12 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(mrc)(unsigned int inst, int index)
1930 inst_cream->inst = inst; 1952 inst_cream->inst = inst;
1931 return inst_base; 1953 return inst_base;
1932} 1954}
1933static ARM_INST_PTR INTERPRETER_TRANSLATE(mrrc)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("MRRC"); } 1955
1956static ARM_INST_PTR INTERPRETER_TRANSLATE(mrrc)(unsigned int inst, int index)
1957{
1958 return INTERPRETER_TRANSLATE(mcrr)(inst, index);
1959}
1960
1934static ARM_INST_PTR INTERPRETER_TRANSLATE(mrs)(unsigned int inst, int index) 1961static ARM_INST_PTR INTERPRETER_TRANSLATE(mrs)(unsigned int inst, int index)
1935{ 1962{
1936 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mrs_inst)); 1963 arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mrs_inst));
@@ -4754,7 +4781,24 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
4754 FETCH_INST; 4781 FETCH_INST;
4755 GOTO_NEXT_INST; 4782 GOTO_NEXT_INST;
4756 } 4783 }
4784
4757 MCRR_INST: 4785 MCRR_INST:
4786 {
4787 // Stubbed, as the MPCore doesn't have any registers that are accessible
4788 // through this instruction.
4789 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
4790 mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
4791
4792 LOG_ERROR(Core_ARM11, "MCRR executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u",
4793 inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2);
4794 }
4795
4796 cpu->Reg[15] += GET_INST_SIZE(cpu);
4797 INC_PC(sizeof(mcrr_inst));
4798 FETCH_INST;
4799 GOTO_NEXT_INST;
4800 }
4801
4758 MLA_INST: 4802 MLA_INST:
4759 { 4803 {
4760 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { 4804 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
@@ -4830,7 +4874,24 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
4830 FETCH_INST; 4874 FETCH_INST;
4831 GOTO_NEXT_INST; 4875 GOTO_NEXT_INST;
4832 } 4876 }
4877
4833 MRRC_INST: 4878 MRRC_INST:
4879 {
4880 // Stubbed, as the MPCore doesn't have any registers that are accessible
4881 // through this instruction.
4882 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
4883 mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component;
4884
4885 LOG_ERROR(Core_ARM11, "MRRC executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u",
4886 inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2);
4887 }
4888
4889 cpu->Reg[15] += GET_INST_SIZE(cpu);
4890 INC_PC(sizeof(mcrr_inst));
4891 FETCH_INST;
4892 GOTO_NEXT_INST;
4893 }
4894
4834 MRS_INST: 4895 MRS_INST:
4835 { 4896 {
4836 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { 4897 if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h
index 6b3dae280..ccc0212ab 100644
--- a/src/core/arm/skyeye_common/vfp/vfp_helper.h
+++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h
@@ -35,6 +35,7 @@
35#include <cstdio> 35#include <cstdio>
36#include "common/common_types.h" 36#include "common/common_types.h"
37#include "core/arm/skyeye_common/armdefs.h" 37#include "core/arm/skyeye_common/armdefs.h"
38#include "core/arm/skyeye_common/vfp/asm_vfp.h"
38 39
39#define do_div(n, base) {n/=base;} 40#define do_div(n, base) {n/=base;}
40 41
@@ -236,33 +237,6 @@ struct vfp_single {
236#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) 237#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
237#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) 238#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
238 239
239// Unpack a single-precision float. Note that this returns the magnitude
240// of the single-precision float mantissa with the 1. if necessary,
241// aligned to bit 30.
242static inline void vfp_single_unpack(vfp_single* s, s32 val)
243{
244 u32 significand;
245
246 s->sign = vfp_single_packed_sign(val) >> 16,
247 s->exponent = vfp_single_packed_exponent(val);
248
249 significand = (u32) val;
250 significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
251 if (s->exponent && s->exponent != 255)
252 significand |= 0x40000000;
253 s->significand = significand;
254}
255
256// Re-pack a single-precision float. This assumes that the float is
257// already normalised such that the MSB is bit 30, _not_ bit 31.
258static inline s32 vfp_single_pack(vfp_single* s)
259{
260 u32 val = (s->sign << 16) +
261 (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
262 (s->significand >> VFP_SINGLE_LOW_BITS);
263 return (s32)val;
264}
265
266enum : u32 { 240enum : u32 {
267 VFP_NUMBER = (1 << 0), 241 VFP_NUMBER = (1 << 0),
268 VFP_ZERO = (1 << 1), 242 VFP_ZERO = (1 << 1),
@@ -294,6 +268,39 @@ static inline int vfp_single_type(vfp_single* s)
294 return type; 268 return type;
295} 269}
296 270
271// Unpack a single-precision float. Note that this returns the magnitude
272// of the single-precision float mantissa with the 1. if necessary,
273// aligned to bit 30.
274static inline void vfp_single_unpack(vfp_single* s, s32 val, u32* fpscr)
275{
276 s->sign = vfp_single_packed_sign(val) >> 16,
277 s->exponent = vfp_single_packed_exponent(val);
278
279 u32 significand = ((u32)val << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
280 if (s->exponent && s->exponent != 255)
281 significand |= 0x40000000;
282 s->significand = significand;
283
284 // If flush-to-zero mode is enabled, turn the denormal into zero.
285 // On a VFPv2 architecture, the sign of the zero is always positive.
286 if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_single_type(s) & VFP_DENORMAL) != 0) {
287 s->sign = 0;
288 s->exponent = 0;
289 s->significand = 0;
290 *fpscr |= FPSCR_IDC;
291 }
292}
293
294// Re-pack a single-precision float. This assumes that the float is
295// already normalised such that the MSB is bit 30, _not_ bit 31.
296static inline s32 vfp_single_pack(vfp_single* s)
297{
298 u32 val = (s->sign << 16) +
299 (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
300 (s->significand >> VFP_SINGLE_LOW_BITS);
301 return (s32)val;
302}
303
297 304
298u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); 305u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func);
299 306
@@ -328,24 +335,49 @@ struct vfp_double {
328#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) 335#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
329#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) 336#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
330 337
338static inline int vfp_double_type(vfp_double* s)
339{
340 int type = VFP_NUMBER;
341 if (s->exponent == 2047) {
342 if (s->significand == 0)
343 type = VFP_INFINITY;
344 else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
345 type = VFP_QNAN;
346 else
347 type = VFP_SNAN;
348 } else if (s->exponent == 0) {
349 if (s->significand == 0)
350 type |= VFP_ZERO;
351 else
352 type |= VFP_DENORMAL;
353 }
354 return type;
355}
356
331// Unpack a double-precision float. Note that this returns the magnitude 357// Unpack a double-precision float. Note that this returns the magnitude
332// of the double-precision float mantissa with the 1. if necessary, 358// of the double-precision float mantissa with the 1. if necessary,
333// aligned to bit 62. 359// aligned to bit 62.
334static inline void vfp_double_unpack(vfp_double* s, s64 val) 360static inline void vfp_double_unpack(vfp_double* s, s64 val, u32* fpscr)
335{ 361{
336 u64 significand;
337
338 s->sign = vfp_double_packed_sign(val) >> 48; 362 s->sign = vfp_double_packed_sign(val) >> 48;
339 s->exponent = vfp_double_packed_exponent(val); 363 s->exponent = vfp_double_packed_exponent(val);
340 364
341 significand = (u64) val; 365 u64 significand = ((u64)val << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
342 significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
343 if (s->exponent && s->exponent != 2047) 366 if (s->exponent && s->exponent != 2047)
344 significand |= (1ULL << 62); 367 significand |= (1ULL << 62);
345 s->significand = significand; 368 s->significand = significand;
369
370 // If flush-to-zero mode is enabled, turn the denormal into zero.
371 // On a VFPv2 architecture, the sign of the zero is always positive.
372 if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_double_type(s) & VFP_DENORMAL) != 0) {
373 s->sign = 0;
374 s->exponent = 0;
375 s->significand = 0;
376 *fpscr |= FPSCR_IDC;
377 }
346} 378}
347 379
348// Re-pack a double-precision float. This assumes that the float is 380// Re-pack a double-precision float. This assumes that the float is
349// already normalised such that the MSB is bit 30, _not_ bit 31. 381// already normalised such that the MSB is bit 30, _not_ bit 31.
350static inline s64 vfp_double_pack(vfp_double* s) 382static inline s64 vfp_double_pack(vfp_double* s)
351{ 383{
@@ -355,25 +387,6 @@ static inline s64 vfp_double_pack(vfp_double* s)
355 return (s64)val; 387 return (s64)val;
356} 388}
357 389
358static inline int vfp_double_type(vfp_double* s)
359{
360 int type = VFP_NUMBER;
361 if (s->exponent == 2047) {
362 if (s->significand == 0)
363 type = VFP_INFINITY;
364 else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
365 type = VFP_QNAN;
366 else
367 type = VFP_SNAN;
368 } else if (s->exponent == 0) {
369 if (s->significand == 0)
370 type |= VFP_ZERO;
371 else
372 type |= VFP_DENORMAL;
373 }
374 return type;
375}
376
377u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); 390u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
378 391
379// A special flag to tell the normalisation code not to normalise. 392// A special flag to tell the normalisation code not to normalise.
diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
index d76d37fd4..ab9fec39d 100644
--- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp
@@ -291,7 +291,8 @@ static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32
291 vfp_double vdm, vdd, *vdp; 291 vfp_double vdm, vdd, *vdp;
292 int ret, tm; 292 int ret, tm;
293 293
294 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 294 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
295
295 tm = vfp_double_type(&vdm); 296 tm = vfp_double_type(&vdm);
296 if (tm & (VFP_NAN|VFP_INFINITY)) { 297 if (tm & (VFP_NAN|VFP_INFINITY)) {
297 vdp = &vdd; 298 vdp = &vdd;
@@ -473,7 +474,7 @@ static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32
473 u32 exceptions = 0; 474 u32 exceptions = 0;
474 475
475 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 476 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
476 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 477 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
477 478
478 tm = vfp_double_type(&vdm); 479 tm = vfp_double_type(&vdm);
479 480
@@ -543,7 +544,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32
543 int tm; 544 int tm;
544 545
545 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 546 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
546 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 547 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
547 548
548 /* 549 /*
549 * Do we have a denormalised number? 550 * Do we have a denormalised number?
@@ -624,7 +625,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32
624 int tm; 625 int tm;
625 626
626 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 627 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
627 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 628 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
628 vfp_double_dump("VDM", &vdm); 629 vfp_double_dump("VDM", &vdm);
629 630
630 /* 631 /*
@@ -896,11 +897,11 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f
896 struct vfp_double vdd, vdp, vdn, vdm; 897 struct vfp_double vdd, vdp, vdn, vdm;
897 u32 exceptions; 898 u32 exceptions;
898 899
899 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 900 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
900 if (vdn.exponent == 0 && vdn.significand) 901 if (vdn.exponent == 0 && vdn.significand)
901 vfp_double_normalise_denormal(&vdn); 902 vfp_double_normalise_denormal(&vdn);
902 903
903 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 904 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
904 if (vdm.exponent == 0 && vdm.significand) 905 if (vdm.exponent == 0 && vdm.significand)
905 vfp_double_normalise_denormal(&vdm); 906 vfp_double_normalise_denormal(&vdm);
906 907
@@ -908,7 +909,7 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f
908 if (negate & NEG_MULTIPLY) 909 if (negate & NEG_MULTIPLY)
909 vdp.sign = vfp_sign_negate(vdp.sign); 910 vdp.sign = vfp_sign_negate(vdp.sign);
910 911
911 vfp_double_unpack(&vdn, vfp_get_double(state, dd)); 912 vfp_double_unpack(&vdn, vfp_get_double(state, dd), &fpscr);
912 if (vdn.exponent == 0 && vdn.significand != 0) 913 if (vdn.exponent == 0 && vdn.significand != 0)
913 vfp_double_normalise_denormal(&vdn); 914 vfp_double_normalise_denormal(&vdn);
914 915
@@ -969,11 +970,11 @@ static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
969 u32 exceptions; 970 u32 exceptions;
970 971
971 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 972 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
972 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 973 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
973 if (vdn.exponent == 0 && vdn.significand) 974 if (vdn.exponent == 0 && vdn.significand)
974 vfp_double_normalise_denormal(&vdn); 975 vfp_double_normalise_denormal(&vdn);
975 976
976 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 977 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
977 if (vdm.exponent == 0 && vdm.significand) 978 if (vdm.exponent == 0 && vdm.significand)
978 vfp_double_normalise_denormal(&vdm); 979 vfp_double_normalise_denormal(&vdm);
979 980
@@ -990,11 +991,11 @@ static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpsc
990 u32 exceptions; 991 u32 exceptions;
991 992
992 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 993 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
993 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 994 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
994 if (vdn.exponent == 0 && vdn.significand) 995 if (vdn.exponent == 0 && vdn.significand)
995 vfp_double_normalise_denormal(&vdn); 996 vfp_double_normalise_denormal(&vdn);
996 997
997 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 998 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
998 if (vdm.exponent == 0 && vdm.significand) 999 if (vdm.exponent == 0 && vdm.significand)
999 vfp_double_normalise_denormal(&vdm); 1000 vfp_double_normalise_denormal(&vdm);
1000 1001
@@ -1013,11 +1014,11 @@ static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
1013 u32 exceptions; 1014 u32 exceptions;
1014 1015
1015 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 1016 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
1016 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 1017 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
1017 if (vdn.exponent == 0 && vdn.significand) 1018 if (vdn.exponent == 0 && vdn.significand)
1018 vfp_double_normalise_denormal(&vdn); 1019 vfp_double_normalise_denormal(&vdn);
1019 1020
1020 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 1021 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
1021 if (vdm.exponent == 0 && vdm.significand) 1022 if (vdm.exponent == 0 && vdm.significand)
1022 vfp_double_normalise_denormal(&vdm); 1023 vfp_double_normalise_denormal(&vdm);
1023 1024
@@ -1035,11 +1036,11 @@ static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
1035 u32 exceptions; 1036 u32 exceptions;
1036 1037
1037 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 1038 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
1038 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 1039 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
1039 if (vdn.exponent == 0 && vdn.significand) 1040 if (vdn.exponent == 0 && vdn.significand)
1040 vfp_double_normalise_denormal(&vdn); 1041 vfp_double_normalise_denormal(&vdn);
1041 1042
1042 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 1043 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
1043 if (vdm.exponent == 0 && vdm.significand) 1044 if (vdm.exponent == 0 && vdm.significand)
1044 vfp_double_normalise_denormal(&vdm); 1045 vfp_double_normalise_denormal(&vdm);
1045 1046
@@ -1063,8 +1064,8 @@ static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr
1063 int tm, tn; 1064 int tm, tn;
1064 1065
1065 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); 1066 LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__);
1066 vfp_double_unpack(&vdn, vfp_get_double(state, dn)); 1067 vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr);
1067 vfp_double_unpack(&vdm, vfp_get_double(state, dm)); 1068 vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr);
1068 1069
1069 vdd.sign = vdn.sign ^ vdm.sign; 1070 vdd.sign = vdn.sign ^ vdm.sign;
1070 1071
diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
index a78bdc430..4dfe0254d 100644
--- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
+++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp
@@ -330,7 +330,7 @@ static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 f
330 struct vfp_single vsm, vsd, *vsp; 330 struct vfp_single vsm, vsd, *vsp;
331 int ret, tm; 331 int ret, tm;
332 332
333 vfp_single_unpack(&vsm, m); 333 vfp_single_unpack(&vsm, m, &fpscr);
334 tm = vfp_single_type(&vsm); 334 tm = vfp_single_type(&vsm);
335 if (tm & (VFP_NAN|VFP_INFINITY)) { 335 if (tm & (VFP_NAN|VFP_INFINITY)) {
336 vsp = &vsd; 336 vsp = &vsd;
@@ -498,7 +498,7 @@ static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 f
498 int tm; 498 int tm;
499 u32 exceptions = 0; 499 u32 exceptions = 0;
500 500
501 vfp_single_unpack(&vsm, m); 501 vfp_single_unpack(&vsm, m, &fpscr);
502 502
503 tm = vfp_single_type(&vsm); 503 tm = vfp_single_type(&vsm);
504 504
@@ -563,7 +563,7 @@ static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 f
563 int rmode = fpscr & FPSCR_RMODE_MASK; 563 int rmode = fpscr & FPSCR_RMODE_MASK;
564 int tm; 564 int tm;
565 565
566 vfp_single_unpack(&vsm, m); 566 vfp_single_unpack(&vsm, m, &fpscr);
567 vfp_single_dump("VSM", &vsm); 567 vfp_single_dump("VSM", &vsm);
568 568
569 /* 569 /*
@@ -643,7 +643,7 @@ static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 f
643 int rmode = fpscr & FPSCR_RMODE_MASK; 643 int rmode = fpscr & FPSCR_RMODE_MASK;
644 int tm; 644 int tm;
645 645
646 vfp_single_unpack(&vsm, m); 646 vfp_single_unpack(&vsm, m, &fpscr);
647 vfp_single_dump("VSM", &vsm); 647 vfp_single_dump("VSM", &vsm);
648 648
649 /* 649 /*
@@ -925,11 +925,11 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
925 925
926 v = vfp_get_float(state, sn); 926 v = vfp_get_float(state, sn);
927 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, v); 927 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, v);
928 vfp_single_unpack(&vsn, v); 928 vfp_single_unpack(&vsn, v, &fpscr);
929 if (vsn.exponent == 0 && vsn.significand) 929 if (vsn.exponent == 0 && vsn.significand)
930 vfp_single_normalise_denormal(&vsn); 930 vfp_single_normalise_denormal(&vsn);
931 931
932 vfp_single_unpack(&vsm, m); 932 vfp_single_unpack(&vsm, m, &fpscr);
933 if (vsm.exponent == 0 && vsm.significand) 933 if (vsm.exponent == 0 && vsm.significand)
934 vfp_single_normalise_denormal(&vsm); 934 vfp_single_normalise_denormal(&vsm);
935 935
@@ -940,7 +940,7 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp
940 940
941 v = vfp_get_float(state, sd); 941 v = vfp_get_float(state, sd);
942 LOG_DEBUG(Core_ARM11, "s%u = %08x", sd, v); 942 LOG_DEBUG(Core_ARM11, "s%u = %08x", sd, v);
943 vfp_single_unpack(&vsn, v); 943 vfp_single_unpack(&vsn, v, &fpscr);
944 if (vsn.exponent == 0 && vsn.significand != 0) 944 if (vsn.exponent == 0 && vsn.significand != 0)
945 vfp_single_normalise_denormal(&vsn); 945 vfp_single_normalise_denormal(&vsn);
946 946
@@ -1004,11 +1004,11 @@ static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
1004 1004
1005 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); 1005 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
1006 1006
1007 vfp_single_unpack(&vsn, n); 1007 vfp_single_unpack(&vsn, n, &fpscr);
1008 if (vsn.exponent == 0 && vsn.significand) 1008 if (vsn.exponent == 0 && vsn.significand)
1009 vfp_single_normalise_denormal(&vsn); 1009 vfp_single_normalise_denormal(&vsn);
1010 1010
1011 vfp_single_unpack(&vsm, m); 1011 vfp_single_unpack(&vsm, m, &fpscr);
1012 if (vsm.exponent == 0 && vsm.significand) 1012 if (vsm.exponent == 0 && vsm.significand)
1013 vfp_single_normalise_denormal(&vsm); 1013 vfp_single_normalise_denormal(&vsm);
1014 1014
@@ -1027,11 +1027,11 @@ static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr
1027 1027
1028 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); 1028 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
1029 1029
1030 vfp_single_unpack(&vsn, n); 1030 vfp_single_unpack(&vsn, n, &fpscr);
1031 if (vsn.exponent == 0 && vsn.significand) 1031 if (vsn.exponent == 0 && vsn.significand)
1032 vfp_single_normalise_denormal(&vsn); 1032 vfp_single_normalise_denormal(&vsn);
1033 1033
1034 vfp_single_unpack(&vsm, m); 1034 vfp_single_unpack(&vsm, m, &fpscr);
1035 if (vsm.exponent == 0 && vsm.significand) 1035 if (vsm.exponent == 0 && vsm.significand)
1036 vfp_single_normalise_denormal(&vsm); 1036 vfp_single_normalise_denormal(&vsm);
1037 1037
@@ -1054,11 +1054,11 @@ static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
1054 /* 1054 /*
1055 * Unpack and normalise denormals. 1055 * Unpack and normalise denormals.
1056 */ 1056 */
1057 vfp_single_unpack(&vsn, n); 1057 vfp_single_unpack(&vsn, n, &fpscr);
1058 if (vsn.exponent == 0 && vsn.significand) 1058 if (vsn.exponent == 0 && vsn.significand)
1059 vfp_single_normalise_denormal(&vsn); 1059 vfp_single_normalise_denormal(&vsn);
1060 1060
1061 vfp_single_unpack(&vsm, m); 1061 vfp_single_unpack(&vsm, m, &fpscr);
1062 if (vsm.exponent == 0 && vsm.significand) 1062 if (vsm.exponent == 0 && vsm.significand)
1063 vfp_single_normalise_denormal(&vsm); 1063 vfp_single_normalise_denormal(&vsm);
1064 1064
@@ -1094,8 +1094,8 @@ static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
1094 1094
1095 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); 1095 LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n);
1096 1096
1097 vfp_single_unpack(&vsn, n); 1097 vfp_single_unpack(&vsn, n, &fpscr);
1098 vfp_single_unpack(&vsm, m); 1098 vfp_single_unpack(&vsm, m, &fpscr);
1099 1099
1100 vsd.sign = vsn.sign ^ vsm.sign; 1100 vsd.sign = vsn.sign ^ vsm.sign;
1101 1101
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b5c258230..53aae8c2f 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -24,9 +24,9 @@ ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core
24 24
25/// Run the core CPU loop 25/// Run the core CPU loop
26void RunLoop(int tight_loop) { 26void RunLoop(int tight_loop) {
27 // If the current thread is an idle thread, then don't execute instructions, 27 // If we don't have a currently active thread then don't execute instructions,
28 // instead advance to the next event and try to yield to the next thread 28 // instead advance to the next event and try to yield to the next thread
29 if (Kernel::GetCurrentThread()->IsIdle()) { 29 if (Kernel::GetCurrentThread() == nullptr) {
30 LOG_TRACE(Core_ARM11, "Idling"); 30 LOG_TRACE(Core_ARM11, "Idling");
31 CoreTiming::Idle(); 31 CoreTiming::Idle();
32 CoreTiming::Advance(); 32 CoreTiming::Advance();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index a3715e555..b5c98b249 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -115,8 +115,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
115 if (handle == CurrentThread) { 115 if (handle == CurrentThread) {
116 return GetCurrentThread(); 116 return GetCurrentThread();
117 } else if (handle == CurrentProcess) { 117 } else if (handle == CurrentProcess) {
118 LOG_ERROR(Kernel, "Current process (%08X) pseudo-handle not supported", CurrentProcess); 118 return g_current_process;
119 return nullptr;
120 } 119 }
121 120
122 if (!IsValid(handle)) { 121 if (!IsValid(handle)) {
@@ -139,6 +138,9 @@ void Init() {
139 Kernel::TimersInit(); 138 Kernel::TimersInit();
140 139
141 Object::next_object_id = 0; 140 Object::next_object_id = 0;
141 // TODO(Subv): Start the process ids from 10 for now, as lower PIDs are
142 // reserved for low-level services
143 Process::next_process_id = 10;
142} 144}
143 145
144/// Shutdown the kernel 146/// Shutdown the kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index efae4a179..1e439db9e 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -12,6 +12,8 @@
12 12
13namespace Kernel { 13namespace Kernel {
14 14
15u32 Process::next_process_id;
16
15SharedPtr<Process> Process::Create(std::string name, u64 program_id) { 17SharedPtr<Process> Process::Create(std::string name, u64 program_id) {
16 SharedPtr<Process> process(new Process); 18 SharedPtr<Process> process(new Process);
17 19
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 88ed9a5a5..90881054c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -55,6 +55,8 @@ public:
55 static const HandleType HANDLE_TYPE = HandleType::Process; 55 static const HandleType HANDLE_TYPE = HandleType::Process;
56 HandleType GetHandleType() const override { return HANDLE_TYPE; } 56 HandleType GetHandleType() const override { return HANDLE_TYPE; }
57 57
58 static u32 next_process_id;
59
58 /// Name of the process 60 /// Name of the process
59 std::string name; 61 std::string name;
60 /// Title ID corresponding to the process 62 /// Title ID corresponding to the process
@@ -69,6 +71,12 @@ public:
69 boost::container::static_vector<AddressMapping, 8> address_mappings; 71 boost::container::static_vector<AddressMapping, 8> address_mappings;
70 ProcessFlags flags; 72 ProcessFlags flags;
71 73
74 /// The id of this process
75 u32 process_id = next_process_id++;
76
77 /// Bitmask of the used TLS slots
78 std::bitset<300> used_tls_slots;
79
72 /** 80 /**
73 * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them 81 * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
74 * to this process. 82 * to this process.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 957cbdfee..afaf0cd5d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -17,6 +17,7 @@
17#include "core/core_timing.h" 17#include "core/core_timing.h"
18#include "core/hle/hle.h" 18#include "core/hle/hle.h"
19#include "core/hle/kernel/kernel.h" 19#include "core/hle/kernel/kernel.h"
20#include "core/hle/kernel/process.h"
20#include "core/hle/kernel/thread.h" 21#include "core/hle/kernel/thread.h"
21#include "core/hle/kernel/mutex.h" 22#include "core/hle/kernel/mutex.h"
22#include "core/hle/result.h" 23#include "core/hle/result.h"
@@ -106,6 +107,8 @@ void Thread::Stop() {
106 for (auto& wait_object : wait_objects) { 107 for (auto& wait_object : wait_objects) {
107 wait_object->RemoveWaitingThread(this); 108 wait_object->RemoveWaitingThread(this);
108 } 109 }
110
111 Kernel::g_current_process->used_tls_slots[tls_index] = false;
109} 112}
110 113
111Thread* ArbitrateHighestPriorityThread(u32 address) { 114Thread* ArbitrateHighestPriorityThread(u32 address) {
@@ -157,7 +160,7 @@ static void PriorityBoostStarvedThreads() {
157 160
158 u64 delta = current_ticks - thread->last_running_ticks; 161 u64 delta = current_ticks - thread->last_running_ticks;
159 162
160 if (thread->status == THREADSTATUS_READY && delta > boost_timeout && !thread->idle) { 163 if (thread->status == THREADSTATUS_READY && delta > boost_timeout) {
161 const s32 priority = std::max(ready_queue.get_first()->current_priority - 1, 0); 164 const s32 priority = std::max(ready_queue.get_first()->current_priority - 1, 0);
162 thread->BoostPriority(priority); 165 thread->BoostPriority(priority);
163 } 166 }
@@ -169,8 +172,6 @@ static void PriorityBoostStarvedThreads() {
169 * @param new_thread The thread to switch to 172 * @param new_thread The thread to switch to
170 */ 173 */
171static void SwitchContext(Thread* new_thread) { 174static void SwitchContext(Thread* new_thread) {
172 DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running.");
173
174 Thread* previous_thread = GetCurrentThread(); 175 Thread* previous_thread = GetCurrentThread();
175 176
176 // Save context for previous thread 177 // Save context for previous thread
@@ -188,6 +189,8 @@ static void SwitchContext(Thread* new_thread) {
188 189
189 // Load context of new thread 190 // Load context of new thread
190 if (new_thread) { 191 if (new_thread) {
192 DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running.");
193
191 current_thread = new_thread; 194 current_thread = new_thread;
192 195
193 ready_queue.remove(new_thread->current_priority, new_thread); 196 ready_queue.remove(new_thread->current_priority, new_thread);
@@ -215,6 +218,10 @@ static Thread* PopNextReadyThread() {
215 // We have to do better than the current thread. 218 // We have to do better than the current thread.
216 // This call returns null when that's not possible. 219 // This call returns null when that's not possible.
217 next = ready_queue.pop_first_better(thread->current_priority); 220 next = ready_queue.pop_first_better(thread->current_priority);
221 if (!next) {
222 // Otherwise just keep going with the current thread
223 next = thread;
224 }
218 } else { 225 } else {
219 next = ready_queue.pop_first(); 226 next = ready_queue.pop_first();
220 } 227 }
@@ -402,12 +409,20 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
402 thread->wait_address = 0; 409 thread->wait_address = 0;
403 thread->name = std::move(name); 410 thread->name = std::move(name);
404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 411 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
412 thread->owner_process = g_current_process;
413 thread->tls_index = -1;
414
415 // Find the next available TLS index, and mark it as used
416 auto& used_tls_slots = Kernel::g_current_process->used_tls_slots;
417 for (unsigned int i = 0; i < used_tls_slots.size(); ++i) {
418 if (used_tls_slots[i] == false) {
419 thread->tls_index = i;
420 used_tls_slots[i] = true;
421 break;
422 }
423 }
405 424
406 VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200; 425 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");
407
408 ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads");
409
410 thread->tls_address = tls_address;
411 426
412 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 427 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
413 // to initialize the context 428 // to initialize the context
@@ -439,6 +454,8 @@ void Thread::SetPriority(s32 priority) {
439 // If thread was ready, adjust queues 454 // If thread was ready, adjust queues
440 if (status == THREADSTATUS_READY) 455 if (status == THREADSTATUS_READY)
441 ready_queue.move(this, current_priority, priority); 456 ready_queue.move(this, current_priority, priority);
457 else
458 ready_queue.prepare(priority);
442 459
443 nominal_priority = current_priority = priority; 460 nominal_priority = current_priority = priority;
444} 461}
@@ -448,16 +465,6 @@ void Thread::BoostPriority(s32 priority) {
448 current_priority = priority; 465 current_priority = priority;
449} 466}
450 467
451SharedPtr<Thread> SetupIdleThread() {
452 // We need to pass a few valid values to get around parameter checking in Thread::Create.
453 // TODO(yuriks): Figure out a way to avoid passing the bogus VAddr parameter
454 auto thread = Thread::Create("idle", Memory::TLS_AREA_VADDR, THREADPRIO_LOWEST, 0,
455 THREADPROCESSORID_0, 0).MoveFrom();
456
457 thread->idle = true;
458 return thread;
459}
460
461SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) { 468SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) {
462 DEBUG_ASSERT(!GetCurrentThread()); 469 DEBUG_ASSERT(!GetCurrentThread());
463 470
@@ -474,24 +481,25 @@ SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) {
474} 481}
475 482
476void Reschedule() { 483void Reschedule() {
477 Thread* prev = GetCurrentThread();
478
479 PriorityBoostStarvedThreads(); 484 PriorityBoostStarvedThreads();
480 485
486 Thread* cur = GetCurrentThread();
481 Thread* next = PopNextReadyThread(); 487 Thread* next = PopNextReadyThread();
482 HLE::g_reschedule = false; 488 HLE::g_reschedule = false;
483 489
484 if (next != nullptr) { 490 // Don't bother switching to the same thread
485 LOG_TRACE(Kernel, "context switch %u -> %u", prev->GetObjectId(), next->GetObjectId()); 491 if (next == cur)
486 SwitchContext(next); 492 return;
487 } else {
488 LOG_TRACE(Kernel, "cannot context switch from %u, no higher priority thread!", prev->GetObjectId());
489 493
490 for (auto& thread : thread_list) { 494 if (cur && next) {
491 LOG_TRACE(Kernel, "\tid=%u prio=0x%02X, status=0x%08X", thread->GetObjectId(), 495 LOG_TRACE(Kernel, "context switch %u -> %u", cur->GetObjectId(), next->GetObjectId());
492 thread->current_priority, thread->status); 496 } else if (cur) {
493 } 497 LOG_TRACE(Kernel, "context switch %u -> idle", cur->GetObjectId());
498 } else {
499 LOG_TRACE(Kernel, "context switch idle -> %u", next->GetObjectId());
494 } 500 }
501
502 SwitchContext(next);
495} 503}
496 504
497void Thread::SetWaitSynchronizationResult(ResultCode result) { 505void Thread::SetWaitSynchronizationResult(ResultCode result) {
@@ -503,7 +511,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
503} 511}
504 512
505VAddr Thread::GetTLSAddress() const { 513VAddr Thread::GetTLSAddress() const {
506 return tls_address; 514 return Memory::TLS_AREA_VADDR + tls_index * 0x200;
507} 515}
508 516
509//////////////////////////////////////////////////////////////////////////////////////////////////// 517////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -516,9 +524,6 @@ void ThreadingInit() {
516 524
517 thread_list.clear(); 525 thread_list.clear();
518 ready_queue.clear(); 526 ready_queue.clear();
519
520 // Setup the idle thread
521 SetupIdleThread();
522} 527}
523 528
524void ThreadingShutdown() { 529void ThreadingShutdown() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index afdaf8511..6b329c12a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -45,6 +45,7 @@ enum ThreadStatus {
45namespace Kernel { 45namespace Kernel {
46 46
47class Mutex; 47class Mutex;
48class Process;
48 49
49class Thread final : public WaitObject { 50class Thread final : public WaitObject {
50public: 51public:
@@ -72,12 +73,6 @@ public:
72 void Acquire() override; 73 void Acquire() override;
73 74
74 /** 75 /**
75 * Checks if the thread is an idle (stub) thread
76 * @return True if the thread is an idle (stub) thread, false otherwise
77 */
78 inline bool IsIdle() const { return idle; }
79
80 /**
81 * Gets the thread's current priority 76 * Gets the thread's current priority
82 * @return The current thread's priority 77 * @return The current thread's priority
83 */ 78 */
@@ -156,11 +151,12 @@ public:
156 151
157 s32 processor_id; 152 s32 processor_id;
158 153
159 VAddr tls_address; ///< Address of the Thread Local Storage of the thread 154 s32 tls_index; ///< Index of the Thread Local Storage of the thread
160 155
161 /// Mutexes currently held by this thread, which will be released when it exits. 156 /// Mutexes currently held by this thread, which will be released when it exits.
162 boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; 157 boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;
163 158
159 SharedPtr<Process> owner_process; ///< Process that owns this thread
164 std::vector<SharedPtr<WaitObject>> wait_objects; ///< Objects that the thread is waiting on 160 std::vector<SharedPtr<WaitObject>> wait_objects; ///< Objects that the thread is waiting on
165 VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address 161 VAddr wait_address; ///< If waiting on an AddressArbiter, this is the arbitration address
166 bool wait_all; ///< True if the thread is waiting on all objects before resuming 162 bool wait_all; ///< True if the thread is waiting on all objects before resuming
@@ -168,9 +164,6 @@ public:
168 164
169 std::string name; 165 std::string name;
170 166
171 /// Whether this thread is intended to never actually be executed, i.e. always idle
172 bool idle = false;
173
174private: 167private:
175 Thread(); 168 Thread();
176 ~Thread() override; 169 ~Thread() override;
@@ -229,14 +222,6 @@ void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wa
229void WaitCurrentThread_ArbitrateAddress(VAddr wait_address); 222void WaitCurrentThread_ArbitrateAddress(VAddr wait_address);
230 223
231/** 224/**
232 * Sets up the idle thread, this is a thread that is intended to never execute instructions,
233 * only to advance the timing. It is scheduled when there are no other ready threads in the thread queue
234 * and will try to yield on every call.
235 * @return The handle of the idle thread
236 */
237SharedPtr<Thread> SetupIdleThread();
238
239/**
240 * Initialize threading 225 * Initialize threading
241 */ 226 */
242void ThreadingInit(); 227void ThreadingInit();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 1ec6599c7..e8159fbdb 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -16,6 +16,7 @@
16#include "core/hle/kernel/address_arbiter.h" 16#include "core/hle/kernel/address_arbiter.h"
17#include "core/hle/kernel/event.h" 17#include "core/hle/kernel/event.h"
18#include "core/hle/kernel/mutex.h" 18#include "core/hle/kernel/mutex.h"
19#include "core/hle/kernel/process.h"
19#include "core/hle/kernel/semaphore.h" 20#include "core/hle/kernel/semaphore.h"
20#include "core/hle/kernel/shared_memory.h" 21#include "core/hle/kernel/shared_memory.h"
21#include "core/hle/kernel/thread.h" 22#include "core/hle/kernel/thread.h"
@@ -424,6 +425,34 @@ static ResultCode ReleaseMutex(Handle handle) {
424 return RESULT_SUCCESS; 425 return RESULT_SUCCESS;
425} 426}
426 427
428/// Get the ID of the specified process
429static ResultCode GetProcessId(u32* process_id, Handle process_handle) {
430 LOG_TRACE(Kernel_SVC, "called process=0x%08X", process_handle);
431
432 const SharedPtr<Kernel::Process> process = Kernel::g_handle_table.Get<Kernel::Process>(process_handle);
433 if (process == nullptr)
434 return ERR_INVALID_HANDLE;
435
436 *process_id = process->process_id;
437 return RESULT_SUCCESS;
438}
439
440/// Get the ID of the process that owns the specified thread
441static ResultCode GetProcessIdOfThread(u32* process_id, Handle thread_handle) {
442 LOG_TRACE(Kernel_SVC, "called thread=0x%08X", thread_handle);
443
444 const SharedPtr<Kernel::Thread> thread = Kernel::g_handle_table.Get<Kernel::Thread>(thread_handle);
445 if (thread == nullptr)
446 return ERR_INVALID_HANDLE;
447
448 const SharedPtr<Kernel::Process> process = thread->owner_process;
449
450 ASSERT_MSG(process != nullptr, "Invalid parent process for thread=0x%08X", thread_handle);
451
452 *process_id = process->process_id;
453 return RESULT_SUCCESS;
454}
455
427/// Get the ID for the specified thread. 456/// Get the ID for the specified thread.
428static ResultCode GetThreadId(u32* thread_id, Handle handle) { 457static ResultCode GetThreadId(u32* thread_id, Handle handle) {
429 LOG_TRACE(Kernel_SVC, "called thread=0x%08X", handle); 458 LOG_TRACE(Kernel_SVC, "called thread=0x%08X", handle);
@@ -674,8 +703,8 @@ static const FunctionDef SVC_Table[] = {
674 {0x32, HLE::Wrap<SendSyncRequest>, "SendSyncRequest"}, 703 {0x32, HLE::Wrap<SendSyncRequest>, "SendSyncRequest"},
675 {0x33, nullptr, "OpenProcess"}, 704 {0x33, nullptr, "OpenProcess"},
676 {0x34, nullptr, "OpenThread"}, 705 {0x34, nullptr, "OpenThread"},
677 {0x35, nullptr, "GetProcessId"}, 706 {0x35, HLE::Wrap<GetProcessId>, "GetProcessId"},
678 {0x36, nullptr, "GetProcessIdOfThread"}, 707 {0x36, HLE::Wrap<GetProcessIdOfThread>, "GetProcessIdOfThread"},
679 {0x37, HLE::Wrap<GetThreadId>, "GetThreadId"}, 708 {0x37, HLE::Wrap<GetThreadId>, "GetThreadId"},
680 {0x38, HLE::Wrap<GetResourceLimit>, "GetResourceLimit"}, 709 {0x38, HLE::Wrap<GetResourceLimit>, "GetResourceLimit"},
681 {0x39, nullptr, "GetResourceLimitLimitValues"}, 710 {0x39, nullptr, "GetResourceLimitLimitValues"},
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index 64de76c39..71f90cb8a 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -94,10 +94,12 @@ enum : VAddr {
94 SHARED_PAGE_SIZE = 0x00001000, 94 SHARED_PAGE_SIZE = 0x00001000,
95 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, 95 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
96 96
97 // TODO(yuriks): The exact location and size of this area is uncomfirmed. 97 // TODO(yuriks): The size of this area is dynamic, the kernel grows
98 // it as more and more threads are created. For now we'll just use a
99 // hardcoded value.
98 /// Area where TLS (Thread-Local Storage) buffers are allocated. 100 /// Area where TLS (Thread-Local Storage) buffers are allocated.
99 TLS_AREA_VADDR = 0x1FFA0000, 101 TLS_AREA_VADDR = 0x1FF82000,
100 TLS_AREA_SIZE = 0x00002000, // Each TLS buffer is 0x200 bytes, allows for 16 threads 102 TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads
101 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, 103 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
102}; 104};
103 105
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 46a326bb4..02a08b20e 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -6,6 +6,7 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/math_util.h" 8#include "common/math_util.h"
9#include "common/profiler.h"
9 10
10#include "core/hw/gpu.h" 11#include "core/hw/gpu.h"
11#include "debug_utils/debug_utils.h" 12#include "debug_utils/debug_utils.h"
@@ -186,6 +187,8 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
186 return Math::Cross(vec1, vec2).z; 187 return Math::Cross(vec1, vec2).z;
187}; 188};
188 189
190static Common::Profiling::TimingCategory rasterization_category("Rasterization");
191
189/** 192/**
190 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing 193 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
191 * culling via recursion. 194 * culling via recursion.
@@ -195,6 +198,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
195 const VertexShader::OutputVertex& v2, 198 const VertexShader::OutputVertex& v2,
196 bool reversed = false) 199 bool reversed = false)
197{ 200{
201 Common::Profiling::ScopeTimer timer(rasterization_category);
202
198 // vertex positions in rasterizer coordinates 203 // vertex positions in rasterizer coordinates
199 static auto FloatToFix = [](float24 flt) { 204 static auto FloatToFix = [](float24 flt) {
200 // TODO: Rounding here is necessary to prevent garbage pixels at 205 // TODO: Rounding here is necessary to prevent garbage pixels at
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 885b7de59..4734e546a 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -12,6 +12,7 @@
12 12
13#include <nihstro/shader_bytecode.h> 13#include <nihstro/shader_bytecode.h>
14 14
15#include "common/profiler.h"
15 16
16#include "pica.h" 17#include "pica.h"
17#include "vertex_shader.h" 18#include "vertex_shader.h"
@@ -574,7 +575,11 @@ static void ProcessShaderCode(VertexShaderState& state) {
574 } 575 }
575} 576}
576 577
578static Common::Profiling::TimingCategory shader_category("Vertex Shader");
579
577OutputVertex RunShader(const InputVertex& input, int num_attributes) { 580OutputVertex RunShader(const InputVertex& input, int num_attributes) {
581 Common::Profiling::ScopeTimer timer(shader_category);
582
578 VertexShaderState state; 583 VertexShaderState state;
579 584
580 const u32* main = &shader_memory[registers.vs_main_offset]; 585 const u32* main = &shader_memory[registers.vs_main_offset];