diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/logging/log.h | 2 | ||||
| -rw-r--r-- | src/core/arm/arm_interface.h | 9 | ||||
| -rw-r--r-- | src/core/arm/dyncom/arm_dyncom.cpp | 10 | ||||
| -rw-r--r-- | src/core/arm/dyncom/arm_dyncom.h | 57 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp.cpp | 4 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp_helper.h | 654 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 187 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 54 | ||||
| -rw-r--r-- | src/core/hle/kernel/kernel.cpp | 6 | ||||
| -rw-r--r-- | src/core/hle/kernel/mutex.cpp | 33 | ||||
| -rw-r--r-- | src/core/hle/kernel/mutex.h | 3 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.cpp | 360 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.h | 115 | ||||
| -rw-r--r-- | src/core/hle/svc.cpp | 22 |
14 files changed, 634 insertions, 882 deletions
diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 3d94bf0d9..897ef36b8 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h | |||
| @@ -32,7 +32,7 @@ typedef u8 ClassType; | |||
| 32 | /** | 32 | /** |
| 33 | * Specifies the sub-system that generated the log message. | 33 | * Specifies the sub-system that generated the log message. |
| 34 | * | 34 | * |
| 35 | * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in log.cpp. | 35 | * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in backend.cpp. |
| 36 | */ | 36 | */ |
| 37 | enum class Class : ClassType { | 37 | enum class Class : ClassType { |
| 38 | Log, ///< Messages about the log system itself | 38 | Log, ///< Messages about the log system itself |
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index e612f7439..ef37ee055 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -86,6 +86,15 @@ public: | |||
| 86 | virtual void AddTicks(u64 ticks) = 0; | 86 | virtual void AddTicks(u64 ticks) = 0; |
| 87 | 87 | ||
| 88 | /** | 88 | /** |
| 89 | * Initializes a CPU context for use on this CPU | ||
| 90 | * @param context Thread context to reset | ||
| 91 | * @param stack_top Pointer to the top of the stack | ||
| 92 | * @param entry_point Entry point for execution | ||
| 93 | * @param arg User argument for thread | ||
| 94 | */ | ||
| 95 | virtual void ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg) = 0; | ||
| 96 | |||
| 97 | /** | ||
| 89 | * Saves the current CPU context | 98 | * Saves the current CPU context |
| 90 | * @param ctx Thread context to save | 99 | * @param ctx Thread context to save |
| 91 | */ | 100 | */ |
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index f6628ca33..68fddc94f 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp | |||
| @@ -93,6 +93,16 @@ void ARM_DynCom::ExecuteInstructions(int num_instructions) { | |||
| 93 | AddTicks(ticks_executed); | 93 | AddTicks(ticks_executed); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg) { | ||
| 97 | memset(&context, 0, sizeof(Core::ThreadContext)); | ||
| 98 | |||
| 99 | context.cpu_registers[0] = arg; | ||
| 100 | context.pc = entry_point; | ||
| 101 | context.sp = stack_top; | ||
| 102 | context.cpsr = 0x1F; // Usermode | ||
| 103 | context.mode = 8; // Instructs dyncom CPU core to start execution as if it's "resuming" a thread. | ||
| 104 | } | ||
| 105 | |||
| 96 | void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { | 106 | void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { |
| 97 | memcpy(ctx.cpu_registers, state->Reg, sizeof(ctx.cpu_registers)); | 107 | memcpy(ctx.cpu_registers, state->Reg, sizeof(ctx.cpu_registers)); |
| 98 | memcpy(ctx.fpu_registers, state->ExtReg, sizeof(ctx.fpu_registers)); | 108 | memcpy(ctx.fpu_registers, state->ExtReg, sizeof(ctx.fpu_registers)); |
diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h index f16fb070c..9e2dda843 100644 --- a/src/core/arm/dyncom/arm_dyncom.h +++ b/src/core/arm/dyncom/arm_dyncom.h | |||
| @@ -13,79 +13,24 @@ | |||
| 13 | 13 | ||
| 14 | class ARM_DynCom final : virtual public ARM_Interface { | 14 | class ARM_DynCom final : virtual public ARM_Interface { |
| 15 | public: | 15 | public: |
| 16 | |||
| 17 | ARM_DynCom(); | 16 | ARM_DynCom(); |
| 18 | ~ARM_DynCom(); | 17 | ~ARM_DynCom(); |
| 19 | 18 | ||
| 20 | /** | ||
| 21 | * Set the Program Counter to an address | ||
| 22 | * @param pc Address to set PC to | ||
| 23 | */ | ||
| 24 | void SetPC(u32 pc) override; | 19 | void SetPC(u32 pc) override; |
| 25 | |||
| 26 | /* | ||
| 27 | * Get the current Program Counter | ||
| 28 | * @return Returns current PC | ||
| 29 | */ | ||
| 30 | u32 GetPC() const override; | 20 | u32 GetPC() const override; |
| 31 | |||
| 32 | /** | ||
| 33 | * Get an ARM register | ||
| 34 | * @param index Register index (0-15) | ||
| 35 | * @return Returns the value in the register | ||
| 36 | */ | ||
| 37 | u32 GetReg(int index) const override; | 21 | u32 GetReg(int index) const override; |
| 38 | |||
| 39 | /** | ||
| 40 | * Set an ARM register | ||
| 41 | * @param index Register index (0-15) | ||
| 42 | * @param value Value to set register to | ||
| 43 | */ | ||
| 44 | void SetReg(int index, u32 value) override; | 22 | void SetReg(int index, u32 value) override; |
| 45 | |||
| 46 | /** | ||
| 47 | * Get the current CPSR register | ||
| 48 | * @return Returns the value of the CPSR register | ||
| 49 | */ | ||
| 50 | u32 GetCPSR() const override; | 23 | u32 GetCPSR() const override; |
| 51 | |||
| 52 | /** | ||
| 53 | * Set the current CPSR register | ||
| 54 | * @param cpsr Value to set CPSR to | ||
| 55 | */ | ||
| 56 | void SetCPSR(u32 cpsr) override; | 24 | void SetCPSR(u32 cpsr) override; |
| 57 | 25 | ||
| 58 | /** | ||
| 59 | * Returns the number of clock ticks since the last reset | ||
| 60 | * @return Returns number of clock ticks | ||
| 61 | */ | ||
| 62 | u64 GetTicks() const override; | 26 | u64 GetTicks() const override; |
| 63 | |||
| 64 | /** | ||
| 65 | * Advance the CPU core by the specified number of ticks (e.g. to simulate CPU execution time) | ||
| 66 | * @param ticks Number of ticks to advance the CPU core | ||
| 67 | */ | ||
| 68 | void AddTicks(u64 ticks) override; | 27 | void AddTicks(u64 ticks) override; |
| 69 | 28 | ||
| 70 | /** | 29 | void ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg); |
| 71 | * Saves the current CPU context | ||
| 72 | * @param ctx Thread context to save | ||
| 73 | */ | ||
| 74 | void SaveContext(Core::ThreadContext& ctx) override; | 30 | void SaveContext(Core::ThreadContext& ctx) override; |
| 75 | |||
| 76 | /** | ||
| 77 | * Loads a CPU context | ||
| 78 | * @param ctx Thread context to load | ||
| 79 | */ | ||
| 80 | void LoadContext(const Core::ThreadContext& ctx) override; | 31 | void LoadContext(const Core::ThreadContext& ctx) override; |
| 81 | 32 | ||
| 82 | /// Prepare core for thread reschedule (if needed to correctly handle state) | ||
| 83 | void PrepareReschedule() override; | 33 | void PrepareReschedule() override; |
| 84 | |||
| 85 | /** | ||
| 86 | * Executes the given number of instructions | ||
| 87 | * @param num_instructions Number of instructions to executes | ||
| 88 | */ | ||
| 89 | void ExecuteInstructions(int num_instructions) override; | 34 | void ExecuteInstructions(int num_instructions) override; |
| 90 | 35 | ||
| 91 | private: | 36 | private: |
diff --git a/src/core/arm/skyeye_common/vfp/vfp.cpp b/src/core/arm/skyeye_common/vfp/vfp.cpp index 888709124..1cf146c53 100644 --- a/src/core/arm/skyeye_common/vfp/vfp.cpp +++ b/src/core/arm/skyeye_common/vfp/vfp.cpp | |||
| @@ -773,8 +773,8 @@ void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpsc | |||
| 773 | * Comparison instructions always return at least one of | 773 | * Comparison instructions always return at least one of |
| 774 | * these flags set. | 774 | * these flags set. |
| 775 | */ | 775 | */ |
| 776 | if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 776 | if (exceptions & (FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG)) |
| 777 | fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); | 777 | fpscr &= ~(FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG); |
| 778 | 778 | ||
| 779 | fpscr |= exceptions; | 779 | fpscr |= exceptions; |
| 780 | 780 | ||
diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h index 581f0358f..b68090b80 100644 --- a/src/core/arm/skyeye_common/vfp/vfp_helper.h +++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h | |||
| @@ -45,444 +45,400 @@ | |||
| 45 | 45 | ||
| 46 | #define do_div(n, base) {n/=base;} | 46 | #define do_div(n, base) {n/=base;} |
| 47 | 47 | ||
| 48 | /* From vfpinstr.h */ | 48 | enum : u32 { |
| 49 | 49 | FOP_MASK = 0x00b00040, | |
| 50 | #define INST_CPRTDO(inst) (((inst) & 0x0f000000) == 0x0e000000) | 50 | FOP_FMAC = 0x00000000, |
| 51 | #define INST_CPRT(inst) ((inst) & (1 << 4)) | 51 | FOP_FNMAC = 0x00000040, |
| 52 | #define INST_CPRT_L(inst) ((inst) & (1 << 20)) | 52 | FOP_FMSC = 0x00100000, |
| 53 | #define INST_CPRT_Rd(inst) (((inst) & (15 << 12)) >> 12) | 53 | FOP_FNMSC = 0x00100040, |
| 54 | #define INST_CPRT_OP(inst) (((inst) >> 21) & 7) | 54 | FOP_FMUL = 0x00200000, |
| 55 | #define INST_CPNUM(inst) ((inst) & 0xf00) | 55 | FOP_FNMUL = 0x00200040, |
| 56 | #define CPNUM(cp) ((cp) << 8) | 56 | FOP_FADD = 0x00300000, |
| 57 | 57 | FOP_FSUB = 0x00300040, | |
| 58 | #define FOP_MASK (0x00b00040) | 58 | FOP_FDIV = 0x00800000, |
| 59 | #define FOP_FMAC (0x00000000) | 59 | FOP_EXT = 0x00b00040 |
| 60 | #define FOP_FNMAC (0x00000040) | 60 | }; |
| 61 | #define FOP_FMSC (0x00100000) | 61 | |
| 62 | #define FOP_FNMSC (0x00100040) | 62 | #define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) |
| 63 | #define FOP_FMUL (0x00200000) | 63 | |
| 64 | #define FOP_FNMUL (0x00200040) | 64 | enum : u32 { |
| 65 | #define FOP_FADD (0x00300000) | 65 | FEXT_MASK = 0x000f0080, |
| 66 | #define FOP_FSUB (0x00300040) | 66 | FEXT_FCPY = 0x00000000, |
| 67 | #define FOP_FDIV (0x00800000) | 67 | FEXT_FABS = 0x00000080, |
| 68 | #define FOP_EXT (0x00b00040) | 68 | FEXT_FNEG = 0x00010000, |
| 69 | 69 | FEXT_FSQRT = 0x00010080, | |
| 70 | #define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) | 70 | FEXT_FCMP = 0x00040000, |
| 71 | 71 | FEXT_FCMPE = 0x00040080, | |
| 72 | #define FEXT_MASK (0x000f0080) | 72 | FEXT_FCMPZ = 0x00050000, |
| 73 | #define FEXT_FCPY (0x00000000) | 73 | FEXT_FCMPEZ = 0x00050080, |
| 74 | #define FEXT_FABS (0x00000080) | 74 | FEXT_FCVT = 0x00070080, |
| 75 | #define FEXT_FNEG (0x00010000) | 75 | FEXT_FUITO = 0x00080000, |
| 76 | #define FEXT_FSQRT (0x00010080) | 76 | FEXT_FSITO = 0x00080080, |
| 77 | #define FEXT_FCMP (0x00040000) | 77 | FEXT_FTOUI = 0x000c0000, |
| 78 | #define FEXT_FCMPE (0x00040080) | 78 | FEXT_FTOUIZ = 0x000c0080, |
| 79 | #define FEXT_FCMPZ (0x00050000) | 79 | FEXT_FTOSI = 0x000d0000, |
| 80 | #define FEXT_FCMPEZ (0x00050080) | 80 | FEXT_FTOSIZ = 0x000d0080 |
| 81 | #define FEXT_FCVT (0x00070080) | 81 | }; |
| 82 | #define FEXT_FUITO (0x00080000) | 82 | |
| 83 | #define FEXT_FSITO (0x00080080) | 83 | #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) |
| 84 | #define FEXT_FTOUI (0x000c0000) | 84 | |
| 85 | #define FEXT_FTOUIZ (0x000c0080) | 85 | #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) |
| 86 | #define FEXT_FTOSI (0x000d0000) | 86 | #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) |
| 87 | #define FEXT_FTOSIZ (0x000d0080) | 87 | #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) |
| 88 | 88 | #define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) | |
| 89 | #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | 89 | #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) |
| 90 | 90 | #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) | |
| 91 | #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) | 91 | |
| 92 | #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) | 92 | #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) |
| 93 | #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) | ||
| 94 | #define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) | ||
| 95 | #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | ||
| 96 | #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) | ||
| 97 | |||
| 98 | #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) | ||
| 99 | |||
| 100 | #define FPSCR_N (1 << 31) | ||
| 101 | #define FPSCR_Z (1 << 30) | ||
| 102 | #define FPSCR_C (1 << 29) | ||
| 103 | #define FPSCR_V (1 << 28) | ||
| 104 | 93 | ||
| 105 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) | 94 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) |
| 106 | { | 95 | { |
| 107 | if (shift) { | 96 | if (shift) { |
| 108 | if (shift < 32) | 97 | if (shift < 32) |
| 109 | val = val >> shift | ((val << (32 - shift)) != 0); | 98 | val = val >> shift | ((val << (32 - shift)) != 0); |
| 110 | else | 99 | else |
| 111 | val = val != 0; | 100 | val = val != 0; |
| 112 | } | 101 | } |
| 113 | return val; | 102 | return val; |
| 114 | } | 103 | } |
| 115 | 104 | ||
| 116 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) | 105 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) |
| 117 | { | 106 | { |
| 118 | if (shift) { | 107 | if (shift) { |
| 119 | if (shift < 64) | 108 | if (shift < 64) |
| 120 | val = val >> shift | ((val << (64 - shift)) != 0); | 109 | val = val >> shift | ((val << (64 - shift)) != 0); |
| 121 | else | 110 | else |
| 122 | val = val != 0; | 111 | val = val != 0; |
| 123 | } | 112 | } |
| 124 | return val; | 113 | return val; |
| 125 | } | 114 | } |
| 126 | 115 | ||
| 127 | static inline u32 vfp_hi64to32jamming(u64 val) | 116 | static inline u32 vfp_hi64to32jamming(u64 val) |
| 128 | { | 117 | { |
| 129 | u32 v; | 118 | u32 v; |
| 130 | u32 highval = val >> 32; | 119 | u32 highval = val >> 32; |
| 131 | u32 lowval = val & 0xffffffff; | 120 | u32 lowval = val & 0xffffffff; |
| 132 | 121 | ||
| 133 | if (lowval >= 1) | 122 | if (lowval >= 1) |
| 134 | v = highval | 1; | 123 | v = highval | 1; |
| 135 | else | 124 | else |
| 136 | v = highval; | 125 | v = highval; |
| 137 | 126 | ||
| 138 | return v; | 127 | return v; |
| 139 | } | 128 | } |
| 140 | 129 | ||
| 141 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 130 | static inline void add128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) |
| 142 | { | 131 | { |
| 143 | *resl = nl + ml; | 132 | *resl = nl + ml; |
| 144 | *resh = nh + mh; | 133 | *resh = nh + mh; |
| 145 | if (*resl < nl) | 134 | if (*resl < nl) |
| 146 | *resh += 1; | 135 | *resh += 1; |
| 147 | } | 136 | } |
| 148 | 137 | ||
| 149 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 138 | static inline void sub128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) |
| 150 | { | 139 | { |
| 151 | *resl = nl - ml; | 140 | *resl = nl - ml; |
| 152 | *resh = nh - mh; | 141 | *resh = nh - mh; |
| 153 | if (*resl > nl) | 142 | if (*resl > nl) |
| 154 | *resh -= 1; | 143 | *resh -= 1; |
| 155 | } | 144 | } |
| 156 | 145 | ||
| 157 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) | 146 | static inline void mul64to128(u64* resh, u64* resl, u64 n, u64 m) |
| 158 | { | 147 | { |
| 159 | u32 nh, nl, mh, ml; | 148 | u32 nh, nl, mh, ml; |
| 160 | u64 rh, rma, rmb, rl; | 149 | u64 rh, rma, rmb, rl; |
| 161 | 150 | ||
| 162 | nl = n; | 151 | nl = n; |
| 163 | ml = m; | 152 | ml = m; |
| 164 | rl = (u64)nl * ml; | 153 | rl = (u64)nl * ml; |
| 165 | 154 | ||
| 166 | nh = n >> 32; | 155 | nh = n >> 32; |
| 167 | rma = (u64)nh * ml; | 156 | rma = (u64)nh * ml; |
| 168 | 157 | ||
| 169 | mh = m >> 32; | 158 | mh = m >> 32; |
| 170 | rmb = (u64)nl * mh; | 159 | rmb = (u64)nl * mh; |
| 171 | rma += rmb; | 160 | rma += rmb; |
| 172 | 161 | ||
| 173 | rh = (u64)nh * mh; | 162 | rh = (u64)nh * mh; |
| 174 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); | 163 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); |
| 175 | 164 | ||
| 176 | rma <<= 32; | 165 | rma <<= 32; |
| 177 | rl += rma; | 166 | rl += rma; |
| 178 | rh += (rl < rma); | 167 | rh += (rl < rma); |
| 179 | 168 | ||
| 180 | *resl = rl; | 169 | *resl = rl; |
| 181 | *resh = rh; | 170 | *resh = rh; |
| 182 | } | 171 | } |
| 183 | 172 | ||
| 184 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) | 173 | static inline void shift64left(u64* resh, u64* resl, u64 n) |
| 185 | { | 174 | { |
| 186 | *resh = n >> 63; | 175 | *resh = n >> 63; |
| 187 | *resl = n << 1; | 176 | *resl = n << 1; |
| 188 | } | 177 | } |
| 189 | 178 | ||
| 190 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) | 179 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) |
| 191 | { | 180 | { |
| 192 | u64 rh, rl; | 181 | u64 rh, rl; |
| 193 | mul64to128(&rh, &rl, n, m); | 182 | mul64to128(&rh, &rl, n, m); |
| 194 | return rh | (rl != 0); | 183 | return rh | (rl != 0); |
| 195 | } | 184 | } |
| 196 | 185 | ||
| 197 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) | 186 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) |
| 198 | { | 187 | { |
| 199 | u64 mh, ml, remh, reml, termh, terml, z; | 188 | u64 mh, ml, remh, reml, termh, terml, z; |
| 200 | 189 | ||
| 201 | if (nh >= m) | 190 | if (nh >= m) |
| 202 | return ~0ULL; | 191 | return ~0ULL; |
| 203 | mh = m >> 32; | 192 | mh = m >> 32; |
| 204 | if (mh << 32 <= nh) { | 193 | if (mh << 32 <= nh) { |
| 205 | z = 0xffffffff00000000ULL; | 194 | z = 0xffffffff00000000ULL; |
| 206 | } else { | 195 | } else { |
| 207 | z = nh; | 196 | z = nh; |
| 208 | do_div(z, mh); | 197 | do_div(z, mh); |
| 209 | z <<= 32; | 198 | z <<= 32; |
| 210 | } | 199 | } |
| 211 | mul64to128(&termh, &terml, m, z); | 200 | mul64to128(&termh, &terml, m, z); |
| 212 | sub128(&remh, &reml, nh, nl, termh, terml); | 201 | sub128(&remh, &reml, nh, nl, termh, terml); |
| 213 | ml = m << 32; | 202 | ml = m << 32; |
| 214 | while ((s64)remh < 0) { | 203 | while ((s64)remh < 0) { |
| 215 | z -= 0x100000000ULL; | 204 | z -= 0x100000000ULL; |
| 216 | add128(&remh, &reml, remh, reml, mh, ml); | 205 | add128(&remh, &reml, remh, reml, mh, ml); |
| 217 | } | 206 | } |
| 218 | remh = (remh << 32) | (reml >> 32); | 207 | remh = (remh << 32) | (reml >> 32); |
| 219 | if (mh << 32 <= remh) { | 208 | if (mh << 32 <= remh) { |
| 220 | z |= 0xffffffff; | 209 | z |= 0xffffffff; |
| 221 | } else { | 210 | } else { |
| 222 | do_div(remh, mh); | 211 | do_div(remh, mh); |
| 223 | z |= remh; | 212 | z |= remh; |
| 224 | } | 213 | } |
| 225 | return z; | 214 | return z; |
| 226 | } | 215 | } |
| 227 | 216 | ||
| 228 | /* | 217 | // Operations on unpacked elements |
| 229 | * Operations on unpacked elements | 218 | #define vfp_sign_negate(sign) (sign ^ 0x8000) |
| 230 | */ | ||
| 231 | #define vfp_sign_negate(sign) (sign ^ 0x8000) | ||
| 232 | 219 | ||
| 233 | /* | 220 | // Single-precision |
| 234 | * Single-precision | ||
| 235 | */ | ||
| 236 | struct vfp_single { | 221 | struct vfp_single { |
| 237 | s16 exponent; | 222 | s16 exponent; |
| 238 | u16 sign; | 223 | u16 sign; |
| 239 | u32 significand; | 224 | u32 significand; |
| 240 | }; | 225 | }; |
| 241 | 226 | ||
| 242 | /* | 227 | // VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa |
| 243 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa | 228 | // VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent |
| 244 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent | 229 | // VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand |
| 245 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand | 230 | // which are not propagated to the float upon packing. |
| 246 | * which are not propagated to the float upon packing. | 231 | #define VFP_SINGLE_MANTISSA_BITS (23) |
| 247 | */ | 232 | #define VFP_SINGLE_EXPONENT_BITS (8) |
| 248 | #define VFP_SINGLE_MANTISSA_BITS (23) | 233 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) |
| 249 | #define VFP_SINGLE_EXPONENT_BITS (8) | 234 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) |
| 250 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) | ||
| 251 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) | ||
| 252 | 235 | ||
| 253 | /* | 236 | // The bit in an unpacked float which indicates that it is a quiet NaN |
| 254 | * The bit in an unpacked float which indicates that it is a quiet NaN | ||
| 255 | */ | ||
| 256 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) | 237 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) |
| 257 | 238 | ||
| 258 | /* | 239 | // Operations on packed single-precision numbers |
| 259 | * Operations on packed single-precision numbers | 240 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) |
| 260 | */ | 241 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) |
| 261 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) | 242 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) |
| 262 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) | 243 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) |
| 263 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) | 244 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) |
| 264 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) | 245 | |
| 265 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) | 246 | // Unpack a single-precision float. Note that this returns the magnitude |
| 266 | 247 | // of the single-precision float mantissa with the 1. if necessary, | |
| 267 | /* | 248 | // aligned to bit 30. |
| 268 | * Unpack a single-precision float. Note that this returns the magnitude | 249 | static inline void vfp_single_unpack(vfp_single* s, s32 val) |
| 269 | * of the single-precision float mantissa with the 1. if necessary, | ||
| 270 | * aligned to bit 30. | ||
| 271 | */ | ||
| 272 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) | ||
| 273 | { | 250 | { |
| 274 | u32 significand; | 251 | u32 significand; |
| 275 | 252 | ||
| 276 | s->sign = vfp_single_packed_sign(val) >> 16, | 253 | s->sign = vfp_single_packed_sign(val) >> 16, |
| 277 | s->exponent = vfp_single_packed_exponent(val); | 254 | s->exponent = vfp_single_packed_exponent(val); |
| 278 | 255 | ||
| 279 | significand = (u32) val; | 256 | significand = (u32) val; |
| 280 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; | 257 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; |
| 281 | if (s->exponent && s->exponent != 255) | 258 | if (s->exponent && s->exponent != 255) |
| 282 | significand |= 0x40000000; | 259 | significand |= 0x40000000; |
| 283 | s->significand = significand; | 260 | s->significand = significand; |
| 284 | } | 261 | } |
| 285 | 262 | ||
| 286 | /* | 263 | // Re-pack a single-precision float. This assumes that the float is |
| 287 | * Re-pack a single-precision float. This assumes that the float is | 264 | // already normalised such that the MSB is bit 30, _not_ bit 31. |
| 288 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 265 | static inline s32 vfp_single_pack(vfp_single* s) |
| 289 | */ | ||
| 290 | static inline s32 vfp_single_pack(struct vfp_single *s) | ||
| 291 | { | 266 | { |
| 292 | u32 val; | 267 | u32 val = (s->sign << 16) + |
| 293 | val = (s->sign << 16) + | 268 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + |
| 294 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + | 269 | (s->significand >> VFP_SINGLE_LOW_BITS); |
| 295 | (s->significand >> VFP_SINGLE_LOW_BITS); | 270 | return (s32)val; |
| 296 | return (s32)val; | ||
| 297 | } | 271 | } |
| 298 | 272 | ||
| 299 | #define VFP_NUMBER (1<<0) | 273 | enum : u32 { |
| 300 | #define VFP_ZERO (1<<1) | 274 | VFP_NUMBER = (1 << 0), |
| 301 | #define VFP_DENORMAL (1<<2) | 275 | VFP_ZERO = (1 << 1), |
| 302 | #define VFP_INFINITY (1<<3) | 276 | VFP_DENORMAL = (1 << 2), |
| 303 | #define VFP_NAN (1<<4) | 277 | VFP_INFINITY = (1 << 3), |
| 304 | #define VFP_NAN_SIGNAL (1<<5) | 278 | VFP_NAN = (1 << 4), |
| 279 | VFP_NAN_SIGNAL = (1 << 5), | ||
| 305 | 280 | ||
| 306 | #define VFP_QNAN (VFP_NAN) | 281 | VFP_QNAN = (VFP_NAN), |
| 307 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) | 282 | VFP_SNAN = (VFP_NAN|VFP_NAN_SIGNAL) |
| 283 | }; | ||
| 308 | 284 | ||
| 309 | static inline int vfp_single_type(struct vfp_single *s) | 285 | static inline int vfp_single_type(vfp_single* s) |
| 310 | { | 286 | { |
| 311 | int type = VFP_NUMBER; | 287 | int type = VFP_NUMBER; |
| 312 | if (s->exponent == 255) { | 288 | if (s->exponent == 255) { |
| 313 | if (s->significand == 0) | 289 | if (s->significand == 0) |
| 314 | type = VFP_INFINITY; | 290 | type = VFP_INFINITY; |
| 315 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) | 291 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) |
| 316 | type = VFP_QNAN; | 292 | type = VFP_QNAN; |
| 317 | else | 293 | else |
| 318 | type = VFP_SNAN; | 294 | type = VFP_SNAN; |
| 319 | } else if (s->exponent == 0) { | 295 | } else if (s->exponent == 0) { |
| 320 | if (s->significand == 0) | 296 | if (s->significand == 0) |
| 321 | type |= VFP_ZERO; | 297 | type |= VFP_ZERO; |
| 322 | else | 298 | else |
| 323 | type |= VFP_DENORMAL; | 299 | type |= VFP_DENORMAL; |
| 324 | } | 300 | } |
| 325 | return type; | 301 | return type; |
| 326 | } | 302 | } |
| 327 | 303 | ||
| 328 | 304 | ||
| 329 | u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); | 305 | u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); |
| 330 | 306 | ||
| 331 | /* | 307 | // Double-precision |
| 332 | * Double-precision | ||
| 333 | */ | ||
| 334 | struct vfp_double { | 308 | struct vfp_double { |
| 335 | s16 exponent; | 309 | s16 exponent; |
| 336 | u16 sign; | 310 | u16 sign; |
| 337 | u64 significand; | 311 | u64 significand; |
| 338 | }; | 312 | }; |
| 339 | 313 | ||
| 340 | /* | 314 | // VFP_REG_ZERO is a special register number for vfp_get_double |
| 341 | * VFP_REG_ZERO is a special register number for vfp_get_double | 315 | // which returns (double)0.0. This is useful for the compare with |
| 342 | * which returns (double)0.0. This is useful for the compare with | 316 | // zero instructions. |
| 343 | * zero instructions. | ||
| 344 | */ | ||
| 345 | #ifdef CONFIG_VFPv3 | 317 | #ifdef CONFIG_VFPv3 |
| 346 | #define VFP_REG_ZERO 32 | 318 | #define VFP_REG_ZERO 32 |
| 347 | #else | 319 | #else |
| 348 | #define VFP_REG_ZERO 16 | 320 | #define VFP_REG_ZERO 16 |
| 349 | #endif | 321 | #endif |
| 350 | 322 | ||
| 351 | #define VFP_DOUBLE_MANTISSA_BITS (52) | 323 | #define VFP_DOUBLE_MANTISSA_BITS (52) |
| 352 | #define VFP_DOUBLE_EXPONENT_BITS (11) | 324 | #define VFP_DOUBLE_EXPONENT_BITS (11) |
| 353 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) | 325 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) |
| 354 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) | 326 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) |
| 355 | 327 | ||
| 356 | /* | 328 | // The bit in an unpacked double which indicates that it is a quiet NaN |
| 357 | * The bit in an unpacked double which indicates that it is a quiet NaN | 329 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) |
| 358 | */ | 330 | |
| 359 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) | 331 | // Operations on packed single-precision numbers |
| 360 | 332 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | |
| 361 | /* | 333 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) |
| 362 | * Operations on packed single-precision numbers | 334 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) |
| 363 | */ | 335 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) |
| 364 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | 336 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) |
| 365 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) | 337 | |
| 366 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) | 338 | // Unpack a double-precision float. Note that this returns the magnitude |
| 367 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) | 339 | // of the double-precision float mantissa with the 1. if necessary, |
| 368 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) | 340 | // aligned to bit 62. |
| 369 | 341 | static inline void vfp_double_unpack(vfp_double* s, s64 val) | |
| 370 | /* | ||
| 371 | * Unpack a double-precision float. Note that this returns the magnitude | ||
| 372 | * of the double-precision float mantissa with the 1. if necessary, | ||
| 373 | * aligned to bit 62. | ||
| 374 | */ | ||
| 375 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) | ||
| 376 | { | 342 | { |
| 377 | u64 significand; | 343 | u64 significand; |
| 378 | 344 | ||
| 379 | s->sign = vfp_double_packed_sign(val) >> 48; | 345 | s->sign = vfp_double_packed_sign(val) >> 48; |
| 380 | s->exponent = vfp_double_packed_exponent(val); | 346 | s->exponent = vfp_double_packed_exponent(val); |
| 381 | 347 | ||
| 382 | significand = (u64) val; | 348 | significand = (u64) val; |
| 383 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; | 349 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; |
| 384 | if (s->exponent && s->exponent != 2047) | 350 | if (s->exponent && s->exponent != 2047) |
| 385 | significand |= (1ULL << 62); | 351 | significand |= (1ULL << 62); |
| 386 | s->significand = significand; | 352 | s->significand = significand; |
| 387 | } | 353 | } |
| 388 | 354 | ||
| 389 | /* | 355 | // Re-pack a double-precision float. This assumes that the float is |
| 390 | * Re-pack a double-precision float. This assumes that the float is | 356 | // already normalised such that the MSB is bit 30, _not_ bit 31. |
| 391 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 357 | static inline s64 vfp_double_pack(vfp_double* s) |
| 392 | */ | ||
| 393 | static inline s64 vfp_double_pack(struct vfp_double *s) | ||
| 394 | { | 358 | { |
| 395 | u64 val; | 359 | u64 val = ((u64)s->sign << 48) + |
| 396 | val = ((u64)s->sign << 48) + | 360 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + |
| 397 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + | 361 | (s->significand >> VFP_DOUBLE_LOW_BITS); |
| 398 | (s->significand >> VFP_DOUBLE_LOW_BITS); | 362 | return (s64)val; |
| 399 | return (s64)val; | ||
| 400 | } | 363 | } |
| 401 | 364 | ||
| 402 | static inline int vfp_double_type(struct vfp_double *s) | 365 | static inline int vfp_double_type(vfp_double* s) |
| 403 | { | 366 | { |
| 404 | int type = VFP_NUMBER; | 367 | int type = VFP_NUMBER; |
| 405 | if (s->exponent == 2047) { | 368 | if (s->exponent == 2047) { |
| 406 | if (s->significand == 0) | 369 | if (s->significand == 0) |
| 407 | type = VFP_INFINITY; | 370 | type = VFP_INFINITY; |
| 408 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) | 371 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) |
| 409 | type = VFP_QNAN; | 372 | type = VFP_QNAN; |
| 410 | else | 373 | else |
| 411 | type = VFP_SNAN; | 374 | type = VFP_SNAN; |
| 412 | } else if (s->exponent == 0) { | 375 | } else if (s->exponent == 0) { |
| 413 | if (s->significand == 0) | 376 | if (s->significand == 0) |
| 414 | type |= VFP_ZERO; | 377 | type |= VFP_ZERO; |
| 415 | else | 378 | else |
| 416 | type |= VFP_DENORMAL; | 379 | type |= VFP_DENORMAL; |
| 417 | } | 380 | } |
| 418 | return type; | 381 | return type; |
| 419 | } | 382 | } |
| 420 | 383 | ||
| 421 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | ||
| 422 | |||
| 423 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); | 384 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); |
| 424 | 385 | ||
| 425 | /* | 386 | // A special flag to tell the normalisation code not to normalise. |
| 426 | * A special flag to tell the normalisation code not to normalise. | 387 | #define VFP_NAN_FLAG 0x100 |
| 427 | */ | 388 | |
| 428 | #define VFP_NAN_FLAG 0x100 | 389 | // A bit pattern used to indicate the initial (unset) value of the |
| 429 | 390 | // exception mask, in case nothing handles an instruction. This | |
| 430 | /* | 391 | // doesn't include the NAN flag, which get masked out before |
| 431 | * A bit pattern used to indicate the initial (unset) value of the | 392 | // we check for an error. |
| 432 | * exception mask, in case nothing handles an instruction. This | 393 | #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) |
| 433 | * doesn't include the NAN flag, which get masked out before | 394 | |
| 434 | * we check for an error. | 395 | // A flag to tell vfp instruction type. |
| 435 | */ | 396 | // OP_SCALAR - This operation always operates in scalar mode |
| 436 | #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) | 397 | // OP_SD - The instruction exceptionally writes to a single precision result. |
| 437 | 398 | // OP_DD - The instruction exceptionally writes to a double precision result. | |
| 438 | /* | 399 | // OP_SM - The instruction exceptionally reads from a single precision operand. |
| 439 | * A flag to tell vfp instruction type. | 400 | enum : u32 { |
| 440 | * OP_SCALAR - this operation always operates in scalar mode | 401 | OP_SCALAR = (1 << 0), |
| 441 | * OP_SD - the instruction exceptionally writes to a single precision result. | 402 | OP_SD = (1 << 1), |
| 442 | * OP_DD - the instruction exceptionally writes to a double precision result. | 403 | OP_DD = (1 << 1), |
| 443 | * OP_SM - the instruction exceptionally reads from a single precision operand. | 404 | OP_SM = (1 << 2) |
| 444 | */ | 405 | }; |
| 445 | #define OP_SCALAR (1 << 0) | ||
| 446 | #define OP_SD (1 << 1) | ||
| 447 | #define OP_DD (1 << 1) | ||
| 448 | #define OP_SM (1 << 2) | ||
| 449 | 406 | ||
| 450 | struct op { | 407 | struct op { |
| 451 | u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); | 408 | u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); |
| 452 | u32 flags; | 409 | u32 flags; |
| 453 | }; | 410 | }; |
| 454 | 411 | ||
| 455 | static inline u32 fls(ARMword x) | 412 | static inline u32 fls(ARMword x) |
| 456 | { | 413 | { |
| 457 | int r = 32; | 414 | int r = 32; |
| 458 | 415 | ||
| 459 | if (!x) | 416 | if (!x) |
| 460 | return 0; | 417 | return 0; |
| 461 | if (!(x & 0xffff0000u)) { | 418 | if (!(x & 0xffff0000u)) { |
| 462 | x <<= 16; | 419 | x <<= 16; |
| 463 | r -= 16; | 420 | r -= 16; |
| 464 | } | 421 | } |
| 465 | if (!(x & 0xff000000u)) { | 422 | if (!(x & 0xff000000u)) { |
| 466 | x <<= 8; | 423 | x <<= 8; |
| 467 | r -= 8; | 424 | r -= 8; |
| 468 | } | 425 | } |
| 469 | if (!(x & 0xf0000000u)) { | 426 | if (!(x & 0xf0000000u)) { |
| 470 | x <<= 4; | 427 | x <<= 4; |
| 471 | r -= 4; | 428 | r -= 4; |
| 472 | } | 429 | } |
| 473 | if (!(x & 0xc0000000u)) { | 430 | if (!(x & 0xc0000000u)) { |
| 474 | x <<= 2; | 431 | x <<= 2; |
| 475 | r -= 2; | 432 | r -= 2; |
| 476 | } | 433 | } |
| 477 | if (!(x & 0x80000000u)) { | 434 | if (!(x & 0x80000000u)) { |
| 478 | x <<= 1; | 435 | x <<= 1; |
| 479 | r -= 1; | 436 | r -= 1; |
| 480 | } | 437 | } |
| 481 | return r; | 438 | return r; |
| 482 | 439 | ||
| 483 | } | 440 | } |
| 484 | 441 | ||
| 485 | u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | 442 | u32 vfp_double_multiply(vfp_double* vdd, vfp_double* vdn, vfp_double* vdm, u32 fpscr); |
| 486 | u32 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); | 443 | u32 vfp_double_add(vfp_double* vdd, vfp_double* vdn, vfp_double *vdm, u32 fpscr); |
| 487 | u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); | 444 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, vfp_double* vd, u32 fpscr, u32 exceptions, const char* func); |
| 488 | u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr); | ||
diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index d35ca510a..2c15db12b 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp | |||
| @@ -83,134 +83,6 @@ static void vfp_double_normalise_denormal(struct vfp_double *vd) | |||
| 83 | vfp_double_dump("normalise_denormal: out", vd); | 83 | vfp_double_dump("normalise_denormal: out", vd); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | ||
| 87 | { | ||
| 88 | u64 significand, incr; | ||
| 89 | int exponent, shift, underflow; | ||
| 90 | u32 rmode; | ||
| 91 | |||
| 92 | vfp_double_dump("pack: in", vd); | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Infinities and NaNs are a special case. | ||
| 96 | */ | ||
| 97 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | ||
| 98 | goto pack; | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Special-case zero. | ||
| 102 | */ | ||
| 103 | if (vd->significand == 0) { | ||
| 104 | vd->exponent = 0; | ||
| 105 | goto pack; | ||
| 106 | } | ||
| 107 | |||
| 108 | exponent = vd->exponent; | ||
| 109 | significand = vd->significand; | ||
| 110 | |||
| 111 | shift = 32 - fls((ARMword)(significand >> 32)); | ||
| 112 | if (shift == 32) | ||
| 113 | shift = 64 - fls((ARMword)significand); | ||
| 114 | if (shift) { | ||
| 115 | exponent -= shift; | ||
| 116 | significand <<= shift; | ||
| 117 | } | ||
| 118 | |||
| 119 | #if 1 | ||
| 120 | vd->exponent = exponent; | ||
| 121 | vd->significand = significand; | ||
| 122 | vfp_double_dump("pack: normalised", vd); | ||
| 123 | #endif | ||
| 124 | |||
| 125 | /* | ||
| 126 | * Tiny number? | ||
| 127 | */ | ||
| 128 | underflow = exponent < 0; | ||
| 129 | if (underflow) { | ||
| 130 | significand = vfp_shiftright64jamming(significand, -exponent); | ||
| 131 | exponent = 0; | ||
| 132 | #if 1 | ||
| 133 | vd->exponent = exponent; | ||
| 134 | vd->significand = significand; | ||
| 135 | vfp_double_dump("pack: tiny number", vd); | ||
| 136 | #endif | ||
| 137 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | ||
| 138 | underflow = 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Select rounding increment. | ||
| 143 | */ | ||
| 144 | incr = 0; | ||
| 145 | rmode = fpscr & FPSCR_RMODE_MASK; | ||
| 146 | |||
| 147 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
| 148 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | ||
| 149 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | ||
| 150 | incr -= 1; | ||
| 151 | } | ||
| 152 | else if (rmode == FPSCR_ROUND_TOZERO) { | ||
| 153 | incr = 0; | ||
| 154 | } | ||
| 155 | else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | ||
| 156 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | ||
| 157 | |||
| 158 | LOG_TRACE(Core_ARM11, "VFP: rounding increment = 0x%08llx\n", incr); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Is our rounding going to overflow? | ||
| 162 | */ | ||
| 163 | if ((significand + incr) < significand) { | ||
| 164 | exponent += 1; | ||
| 165 | significand = (significand >> 1) | (significand & 1); | ||
| 166 | incr >>= 1; | ||
| 167 | #if 1 | ||
| 168 | vd->exponent = exponent; | ||
| 169 | vd->significand = significand; | ||
| 170 | vfp_double_dump("pack: overflow", vd); | ||
| 171 | #endif | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * If any of the low bits (which will be shifted out of the | ||
| 176 | * number) are non-zero, the result is inexact. | ||
| 177 | */ | ||
| 178 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | ||
| 179 | exceptions |= FPSCR_IXC; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Do our rounding. | ||
| 183 | */ | ||
| 184 | significand += incr; | ||
| 185 | |||
| 186 | /* | ||
| 187 | * Infinity? | ||
| 188 | */ | ||
| 189 | if (exponent >= 2046) { | ||
| 190 | exceptions |= FPSCR_OFC | FPSCR_IXC; | ||
| 191 | if (incr == 0) { | ||
| 192 | vd->exponent = 2045; | ||
| 193 | vd->significand = 0x7fffffffffffffffULL; | ||
| 194 | } | ||
| 195 | else { | ||
| 196 | vd->exponent = 2047; /* infinity */ | ||
| 197 | vd->significand = 0; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | else { | ||
| 201 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | ||
| 202 | exponent = 0; | ||
| 203 | if (exponent || significand > 0x8000000000000000ULL) | ||
| 204 | underflow = 0; | ||
| 205 | if (underflow) | ||
| 206 | exceptions |= FPSCR_UFC; | ||
| 207 | vd->exponent = exponent; | ||
| 208 | vd->significand = significand >> 1; | ||
| 209 | } | ||
| 210 | pack: | ||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | |||
| 214 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | 86 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) |
| 215 | { | 87 | { |
| 216 | u64 significand, incr; | 88 | u64 significand, incr; |
| @@ -511,7 +383,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 511 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr); | 383 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr); |
| 512 | m = vfp_get_double(state, dm); | 384 | m = vfp_get_double(state, dm); |
| 513 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | 385 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { |
| 514 | ret |= FPSCR_C | FPSCR_V; | 386 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 515 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 387 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
| 516 | /* | 388 | /* |
| 517 | * Signalling NaN, or signalling on quiet NaN | 389 | * Signalling NaN, or signalling on quiet NaN |
| @@ -521,7 +393,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 521 | 393 | ||
| 522 | d = vfp_get_double(state, dd); | 394 | d = vfp_get_double(state, dd); |
| 523 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | 395 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { |
| 524 | ret |= FPSCR_C | FPSCR_V; | 396 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 525 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 397 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
| 526 | /* | 398 | /* |
| 527 | * Signalling NaN, or signalling on quiet NaN | 399 | * Signalling NaN, or signalling on quiet NaN |
| @@ -535,7 +407,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 535 | /* | 407 | /* |
| 536 | * equal | 408 | * equal |
| 537 | */ | 409 | */ |
| 538 | ret |= FPSCR_Z | FPSCR_C; | 410 | ret |= FPSCR_ZFLAG | FPSCR_CFLAG; |
| 539 | //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret); | 411 | //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret); |
| 540 | } else if (vfp_double_packed_sign(d ^ m)) { | 412 | } else if (vfp_double_packed_sign(d ^ m)) { |
| 541 | /* | 413 | /* |
| @@ -545,22 +417,22 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 545 | /* | 417 | /* |
| 546 | * d is negative, so d < m | 418 | * d is negative, so d < m |
| 547 | */ | 419 | */ |
| 548 | ret |= FPSCR_N; | 420 | ret |= FPSCR_NFLAG; |
| 549 | else | 421 | else |
| 550 | /* | 422 | /* |
| 551 | * d is positive, so d > m | 423 | * d is positive, so d > m |
| 552 | */ | 424 | */ |
| 553 | ret |= FPSCR_C; | 425 | ret |= FPSCR_CFLAG; |
| 554 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | 426 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { |
| 555 | /* | 427 | /* |
| 556 | * d < m | 428 | * d < m |
| 557 | */ | 429 | */ |
| 558 | ret |= FPSCR_N; | 430 | ret |= FPSCR_NFLAG; |
| 559 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | 431 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { |
| 560 | /* | 432 | /* |
| 561 | * d > m | 433 | * d > m |
| 562 | */ | 434 | */ |
| 563 | ret |= FPSCR_C; | 435 | ret |= FPSCR_CFLAG; |
| 564 | } | 436 | } |
| 565 | } | 437 | } |
| 566 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret); | 438 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret); |
| @@ -592,49 +464,6 @@ static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 | |||
| 592 | return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr); | 464 | return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr); |
| 593 | } | 465 | } |
| 594 | 466 | ||
| 595 | u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr) //ichfly for internal use only | ||
| 596 | { | ||
| 597 | struct vfp_single vsd; | ||
| 598 | int tm; | ||
| 599 | u32 exceptions = 0; | ||
| 600 | |||
| 601 | LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); | ||
| 602 | |||
| 603 | tm = vfp_double_type(dm); | ||
| 604 | |||
| 605 | /* | ||
| 606 | * If we have a signalling NaN, signal invalid operation. | ||
| 607 | */ | ||
| 608 | if (tm == VFP_SNAN) | ||
| 609 | exceptions = FPSCR_IOC; | ||
| 610 | |||
| 611 | if (tm & VFP_DENORMAL) | ||
| 612 | vfp_double_normalise_denormal(dm); | ||
| 613 | |||
| 614 | vsd.sign = dm->sign; | ||
| 615 | vsd.significand = vfp_hi64to32jamming(dm->significand); | ||
| 616 | |||
| 617 | /* | ||
| 618 | * If we have an infinity or a NaN, the exponent must be 255 | ||
| 619 | */ | ||
| 620 | if (tm & (VFP_INFINITY | VFP_NAN)) { | ||
| 621 | vsd.exponent = 255; | ||
| 622 | if (tm == VFP_QNAN) | ||
| 623 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | ||
| 624 | goto pack_nan; | ||
| 625 | } | ||
| 626 | else if (tm & VFP_ZERO) | ||
| 627 | vsd.exponent = 0; | ||
| 628 | else | ||
| 629 | vsd.exponent = dm->exponent - (1023 - 127); | ||
| 630 | |||
| 631 | return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts"); | ||
| 632 | |||
| 633 | pack_nan: | ||
| 634 | vfp_put_float(state, vfp_single_pack(&vsd), sd); | ||
| 635 | return exceptions; | ||
| 636 | } | ||
| 637 | |||
| 638 | static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr) | 467 | static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr) |
| 639 | { | 468 | { |
| 640 | struct vfp_double vdm; | 469 | struct vfp_double vdm; |
| @@ -723,7 +552,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 | |||
| 723 | exceptions |= FPSCR_IDC; | 552 | exceptions |= FPSCR_IDC; |
| 724 | 553 | ||
| 725 | if (tm & VFP_NAN) | 554 | if (tm & VFP_NAN) |
| 726 | vdm.sign = 0; | 555 | vdm.sign = 1; |
| 727 | 556 | ||
| 728 | if (vdm.exponent >= 1023 + 32) { | 557 | if (vdm.exponent >= 1023 + 32) { |
| 729 | d = vdm.sign ? 0 : 0xffffffff; | 558 | d = vdm.sign ? 0 : 0xffffffff; |
diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index b7872bdc4..678b63f51 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp | |||
| @@ -419,7 +419,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 419 | 419 | ||
| 420 | d = vfp_get_float(state, sd); | 420 | d = vfp_get_float(state, sd); |
| 421 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { | 421 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { |
| 422 | ret |= FPSCR_C | FPSCR_V; | 422 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 423 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 423 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
| 424 | /* | 424 | /* |
| 425 | * Signalling NaN, or signalling on quiet NaN | 425 | * Signalling NaN, or signalling on quiet NaN |
| @@ -428,7 +428,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 428 | } | 428 | } |
| 429 | 429 | ||
| 430 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { | 430 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { |
| 431 | ret |= FPSCR_C | FPSCR_V; | 431 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 432 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 432 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
| 433 | /* | 433 | /* |
| 434 | * Signalling NaN, or signalling on quiet NaN | 434 | * Signalling NaN, or signalling on quiet NaN |
| @@ -441,7 +441,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 441 | /* | 441 | /* |
| 442 | * equal | 442 | * equal |
| 443 | */ | 443 | */ |
| 444 | ret |= FPSCR_Z | FPSCR_C; | 444 | ret |= FPSCR_ZFLAG | FPSCR_CFLAG; |
| 445 | } else if (vfp_single_packed_sign(d ^ m)) { | 445 | } else if (vfp_single_packed_sign(d ^ m)) { |
| 446 | /* | 446 | /* |
| 447 | * different signs | 447 | * different signs |
| @@ -450,22 +450,22 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 450 | /* | 450 | /* |
| 451 | * d is negative, so d < m | 451 | * d is negative, so d < m |
| 452 | */ | 452 | */ |
| 453 | ret |= FPSCR_N; | 453 | ret |= FPSCR_NFLAG; |
| 454 | else | 454 | else |
| 455 | /* | 455 | /* |
| 456 | * d is positive, so d > m | 456 | * d is positive, so d > m |
| 457 | */ | 457 | */ |
| 458 | ret |= FPSCR_C; | 458 | ret |= FPSCR_CFLAG; |
| 459 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { | 459 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { |
| 460 | /* | 460 | /* |
| 461 | * d < m | 461 | * d < m |
| 462 | */ | 462 | */ |
| 463 | ret |= FPSCR_N; | 463 | ret |= FPSCR_NFLAG; |
| 464 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { | 464 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { |
| 465 | /* | 465 | /* |
| 466 | * d > m | 466 | * d > m |
| 467 | */ | 467 | */ |
| 468 | ret |= FPSCR_C; | 468 | ret |= FPSCR_CFLAG; |
| 469 | } | 469 | } |
| 470 | } | 470 | } |
| 471 | return ret; | 471 | return ret; |
| @@ -491,46 +491,6 @@ static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 | |||
| 491 | return vfp_compare(state, sd, 1, 0, fpscr); | 491 | return vfp_compare(state, sd, 1, 0, fpscr); |
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | static s64 vfp_single_to_doubleintern(ARMul_State* state, s32 m, u32 fpscr) //ichfly for internal use only | ||
| 495 | { | ||
| 496 | struct vfp_single vsm; | ||
| 497 | struct vfp_double vdd; | ||
| 498 | int tm; | ||
| 499 | u32 exceptions = 0; | ||
| 500 | |||
| 501 | vfp_single_unpack(&vsm, m); | ||
| 502 | |||
| 503 | tm = vfp_single_type(&vsm); | ||
| 504 | |||
| 505 | /* | ||
| 506 | * If we have a signalling NaN, signal invalid operation. | ||
| 507 | */ | ||
| 508 | if (tm == VFP_SNAN) | ||
| 509 | exceptions = FPSCR_IOC; | ||
| 510 | |||
| 511 | if (tm & VFP_DENORMAL) | ||
| 512 | vfp_single_normalise_denormal(&vsm); | ||
| 513 | |||
| 514 | vdd.sign = vsm.sign; | ||
| 515 | vdd.significand = (u64)vsm.significand << 32; | ||
| 516 | |||
| 517 | /* | ||
| 518 | * If we have an infinity or NaN, the exponent must be 2047. | ||
| 519 | */ | ||
| 520 | if (tm & (VFP_INFINITY | VFP_NAN)) { | ||
| 521 | vdd.exponent = 2047; | ||
| 522 | if (tm == VFP_QNAN) | ||
| 523 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | ||
| 524 | goto pack_nan; | ||
| 525 | } else if (tm & VFP_ZERO) | ||
| 526 | vdd.exponent = 0; | ||
| 527 | else | ||
| 528 | vdd.exponent = vsm.exponent + (1023 - 127); | ||
| 529 | pack_nan: | ||
| 530 | vfp_double_normaliseroundintern(state, &vdd, fpscr, exceptions, "fcvtd"); | ||
| 531 | return vfp_double_pack(&vdd); | ||
| 532 | } | ||
| 533 | |||
| 534 | static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr) | 494 | static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr) |
| 535 | { | 495 | { |
| 536 | struct vfp_single vsm; | 496 | struct vfp_single vsm; |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 52dca4dd8..a2ffbcdb7 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -153,12 +153,8 @@ void Shutdown() { | |||
| 153 | * @return True on success, otherwise false | 153 | * @return True on success, otherwise false |
| 154 | */ | 154 | */ |
| 155 | bool LoadExec(u32 entry_point) { | 155 | bool LoadExec(u32 entry_point) { |
| 156 | Core::g_app_core->SetPC(entry_point); | ||
| 157 | |||
| 158 | // 0x30 is the typical main thread priority I've seen used so far | 156 | // 0x30 is the typical main thread priority I've seen used so far |
| 159 | g_main_thread = Kernel::SetupMainThread(0x30, Kernel::DEFAULT_STACK_SIZE); | 157 | g_main_thread = Kernel::SetupMainThread(Kernel::DEFAULT_STACK_SIZE, entry_point, 0x30); |
| 160 | // Setup the idle thread | ||
| 161 | Kernel::SetupIdleThread(); | ||
| 162 | 158 | ||
| 163 | return true; | 159 | return true; |
| 164 | } | 160 | } |
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 9f7166ca4..a811db392 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp | |||
| @@ -21,7 +21,7 @@ namespace Kernel { | |||
| 21 | */ | 21 | */ |
| 22 | static void ResumeWaitingThread(Mutex* mutex) { | 22 | static void ResumeWaitingThread(Mutex* mutex) { |
| 23 | // Reset mutex lock thread handle, nothing is waiting | 23 | // Reset mutex lock thread handle, nothing is waiting |
| 24 | mutex->locked = false; | 24 | mutex->lock_count = 0; |
| 25 | mutex->holding_thread = nullptr; | 25 | mutex->holding_thread = nullptr; |
| 26 | 26 | ||
| 27 | // Find the next waiting thread for the mutex... | 27 | // Find the next waiting thread for the mutex... |
| @@ -44,8 +44,7 @@ Mutex::~Mutex() {} | |||
| 44 | SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) { | 44 | SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) { |
| 45 | SharedPtr<Mutex> mutex(new Mutex); | 45 | SharedPtr<Mutex> mutex(new Mutex); |
| 46 | 46 | ||
| 47 | mutex->initial_locked = initial_locked; | 47 | mutex->lock_count = 0; |
| 48 | mutex->locked = false; | ||
| 49 | mutex->name = std::move(name); | 48 | mutex->name = std::move(name); |
| 50 | mutex->holding_thread = nullptr; | 49 | mutex->holding_thread = nullptr; |
| 51 | 50 | ||
| @@ -57,7 +56,7 @@ SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) { | |||
| 57 | } | 56 | } |
| 58 | 57 | ||
| 59 | bool Mutex::ShouldWait() { | 58 | bool Mutex::ShouldWait() { |
| 60 | return locked && holding_thread != GetCurrentThread(); | 59 | return lock_count > 0 && holding_thread != GetCurrentThread();; |
| 61 | } | 60 | } |
| 62 | 61 | ||
| 63 | void Mutex::Acquire() { | 62 | void Mutex::Acquire() { |
| @@ -66,21 +65,27 @@ void Mutex::Acquire() { | |||
| 66 | 65 | ||
| 67 | void Mutex::Acquire(SharedPtr<Thread> thread) { | 66 | void Mutex::Acquire(SharedPtr<Thread> thread) { |
| 68 | _assert_msg_(Kernel, !ShouldWait(), "object unavailable!"); | 67 | _assert_msg_(Kernel, !ShouldWait(), "object unavailable!"); |
| 69 | if (locked) | ||
| 70 | return; | ||
| 71 | 68 | ||
| 72 | locked = true; | 69 | // Actually "acquire" the mutex only if we don't already have it... |
| 70 | if (lock_count == 0) { | ||
| 71 | thread->held_mutexes.insert(this); | ||
| 72 | holding_thread = std::move(thread); | ||
| 73 | } | ||
| 73 | 74 | ||
| 74 | thread->held_mutexes.insert(this); | 75 | lock_count++; |
| 75 | holding_thread = std::move(thread); | ||
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | void Mutex::Release() { | 78 | void Mutex::Release() { |
| 79 | if (!locked) | 79 | // Only release if the mutex is held... |
| 80 | return; | 80 | if (lock_count > 0) { |
| 81 | 81 | lock_count--; | |
| 82 | holding_thread->held_mutexes.erase(this); | 82 | |
| 83 | ResumeWaitingThread(this); | 83 | // Yield to the next thread only if we've fully released the mutex... |
| 84 | if (lock_count == 0) { | ||
| 85 | holding_thread->held_mutexes.erase(this); | ||
| 86 | ResumeWaitingThread(this); | ||
| 87 | } | ||
| 88 | } | ||
| 84 | } | 89 | } |
| 85 | 90 | ||
| 86 | } // namespace | 91 | } // namespace |
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h index 548403614..d6d5328be 100644 --- a/src/core/hle/kernel/mutex.h +++ b/src/core/hle/kernel/mutex.h | |||
| @@ -30,8 +30,7 @@ public: | |||
| 30 | static const HandleType HANDLE_TYPE = HandleType::Mutex; | 30 | static const HandleType HANDLE_TYPE = HandleType::Mutex; |
| 31 | HandleType GetHandleType() const override { return HANDLE_TYPE; } | 31 | HandleType GetHandleType() const override { return HANDLE_TYPE; } |
| 32 | 32 | ||
| 33 | bool initial_locked; ///< Initial lock state when mutex was created | 33 | int lock_count; ///< Number of times the mutex has been acquired |
| 34 | bool locked; ///< Current locked state | ||
| 35 | std::string name; ///< Name of mutex (optional) | 34 | std::string name; ///< Name of mutex (optional) |
| 36 | SharedPtr<Thread> holding_thread; ///< Thread that has acquired the mutex | 35 | SharedPtr<Thread> holding_thread; ///< Thread that has acquired the mutex |
| 37 | 36 | ||
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 3987f9608..7f629c20e 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -21,8 +21,11 @@ | |||
| 21 | 21 | ||
| 22 | namespace Kernel { | 22 | namespace Kernel { |
| 23 | 23 | ||
| 24 | /// Event type for the thread wake up event | ||
| 25 | static int ThreadWakeupEventType = -1; | ||
| 26 | |||
| 24 | bool Thread::ShouldWait() { | 27 | bool Thread::ShouldWait() { |
| 25 | return status != THREADSTATUS_DORMANT; | 28 | return status != THREADSTATUS_DEAD; |
| 26 | } | 29 | } |
| 27 | 30 | ||
| 28 | void Thread::Acquire() { | 31 | void Thread::Acquire() { |
| @@ -33,12 +36,20 @@ void Thread::Acquire() { | |||
| 33 | static std::vector<SharedPtr<Thread>> thread_list; | 36 | static std::vector<SharedPtr<Thread>> thread_list; |
| 34 | 37 | ||
| 35 | // Lists only ready thread ids. | 38 | // Lists only ready thread ids. |
| 36 | static Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST+1> thread_ready_queue; | 39 | static Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST+1> ready_queue; |
| 37 | 40 | ||
| 38 | static Thread* current_thread; | 41 | static Thread* current_thread; |
| 39 | 42 | ||
| 40 | static const u32 INITIAL_THREAD_ID = 1; ///< The first available thread id at startup | 43 | // The first available thread id at startup |
| 41 | static u32 next_thread_id; ///< The next available thread id | 44 | static u32 next_thread_id = 1; |
| 45 | |||
| 46 | /** | ||
| 47 | * Creates a new thread ID | ||
| 48 | * @return The new thread ID | ||
| 49 | */ | ||
| 50 | inline static u32 const NewThreadId() { | ||
| 51 | return next_thread_id++; | ||
| 52 | } | ||
| 42 | 53 | ||
| 43 | Thread::Thread() {} | 54 | Thread::Thread() {} |
| 44 | Thread::~Thread() {} | 55 | Thread::~Thread() {} |
| @@ -47,86 +58,53 @@ Thread* GetCurrentThread() { | |||
| 47 | return current_thread; | 58 | return current_thread; |
| 48 | } | 59 | } |
| 49 | 60 | ||
| 50 | /// Resets a thread | 61 | /** |
| 51 | static void ResetThread(Thread* t, u32 arg, s32 lowest_priority) { | 62 | * Check if a thread is waiting on the specified wait object |
| 52 | memset(&t->context, 0, sizeof(Core::ThreadContext)); | 63 | * @param thread The thread to test |
| 53 | 64 | * @param wait_object The object to test against | |
| 54 | t->context.cpu_registers[0] = arg; | 65 | * @return True if the thread is waiting, false otherwise |
| 55 | t->context.pc = t->entry_point; | 66 | */ |
| 56 | t->context.sp = t->stack_top; | ||
| 57 | t->context.cpsr = 0x1F; // Usermode | ||
| 58 | |||
| 59 | // TODO(bunnei): This instructs the CPU core to start the execution as if it is "resuming" a | ||
| 60 | // thread. This is somewhat Sky-Eye specific, and should be re-architected in the future to be | ||
| 61 | // agnostic of the CPU core. | ||
| 62 | t->context.mode = 8; | ||
| 63 | |||
| 64 | if (t->current_priority < lowest_priority) { | ||
| 65 | t->current_priority = t->initial_priority; | ||
| 66 | } | ||
| 67 | |||
| 68 | t->wait_objects.clear(); | ||
| 69 | t->wait_address = 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | /// Change a thread to "ready" state | ||
| 73 | static void ChangeReadyState(Thread* t, bool ready) { | ||
| 74 | if (t->IsReady()) { | ||
| 75 | if (!ready) { | ||
| 76 | thread_ready_queue.remove(t->current_priority, t); | ||
| 77 | } | ||
| 78 | } else if (ready) { | ||
| 79 | if (t->IsRunning()) { | ||
| 80 | thread_ready_queue.push_front(t->current_priority, t); | ||
| 81 | } else { | ||
| 82 | thread_ready_queue.push_back(t->current_priority, t); | ||
| 83 | } | ||
| 84 | t->status = THREADSTATUS_READY; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | /// Check if a thread is waiting on a the specified wait object | ||
| 89 | static bool CheckWait_WaitObject(const Thread* thread, WaitObject* wait_object) { | 67 | static bool CheckWait_WaitObject(const Thread* thread, WaitObject* wait_object) { |
| 90 | auto itr = std::find(thread->wait_objects.begin(), thread->wait_objects.end(), wait_object); | 68 | if (thread->status != THREADSTATUS_WAIT_SYNCH) |
| 69 | return false; | ||
| 91 | 70 | ||
| 92 | if (itr != thread->wait_objects.end()) | 71 | auto itr = std::find(thread->wait_objects.begin(), thread->wait_objects.end(), wait_object); |
| 93 | return thread->IsWaiting(); | 72 | return itr != thread->wait_objects.end(); |
| 94 | |||
| 95 | return false; | ||
| 96 | } | 73 | } |
| 97 | 74 | ||
| 98 | /// Check if the specified thread is waiting on the specified address to be arbitrated | 75 | /** |
| 76 | * Check if the specified thread is waiting on the specified address to be arbitrated | ||
| 77 | * @param thread The thread to test | ||
| 78 | * @param wait_address The address to test against | ||
| 79 | * @return True if the thread is waiting, false otherwise | ||
| 80 | */ | ||
| 99 | static bool CheckWait_AddressArbiter(const Thread* thread, VAddr wait_address) { | 81 | static bool CheckWait_AddressArbiter(const Thread* thread, VAddr wait_address) { |
| 100 | return thread->IsWaiting() && thread->wait_objects.empty() && wait_address == thread->wait_address; | 82 | return thread->status == THREADSTATUS_WAIT_ARB && wait_address == thread->wait_address; |
| 101 | } | 83 | } |
| 102 | 84 | ||
| 103 | /// Stops the current thread | 85 | void Thread::Stop() { |
| 104 | void Thread::Stop(const char* reason) { | ||
| 105 | // Release all the mutexes that this thread holds | 86 | // Release all the mutexes that this thread holds |
| 106 | ReleaseThreadMutexes(this); | 87 | ReleaseThreadMutexes(this); |
| 107 | 88 | ||
| 108 | ChangeReadyState(this, false); | 89 | // Cancel any outstanding wakeup events for this thread |
| 109 | status = THREADSTATUS_DORMANT; | 90 | CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); |
| 91 | |||
| 92 | // Clean up thread from ready queue | ||
| 93 | // This is only needed when the thread is termintated forcefully (SVC TerminateProcess) | ||
| 94 | if (status == THREADSTATUS_READY){ | ||
| 95 | ready_queue.remove(current_priority, this); | ||
| 96 | } | ||
| 97 | |||
| 98 | status = THREADSTATUS_DEAD; | ||
| 99 | |||
| 110 | WakeupAllWaitingThreads(); | 100 | WakeupAllWaitingThreads(); |
| 111 | 101 | ||
| 112 | // Stopped threads are never waiting. | 102 | // Clean up any dangling references in objects that this thread was waiting for |
| 113 | for (auto& wait_object : wait_objects) { | 103 | for (auto& wait_object : wait_objects) { |
| 114 | wait_object->RemoveWaitingThread(this); | 104 | wait_object->RemoveWaitingThread(this); |
| 115 | } | 105 | } |
| 116 | wait_objects.clear(); | ||
| 117 | wait_address = 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Changes a threads state | ||
| 121 | static void ChangeThreadState(Thread* t, ThreadStatus new_status) { | ||
| 122 | if (!t || t->status == new_status) { | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | ChangeReadyState(t, (new_status & THREADSTATUS_READY) != 0); | ||
| 126 | t->status = new_status; | ||
| 127 | } | 106 | } |
| 128 | 107 | ||
| 129 | /// Arbitrate the highest priority thread that is waiting | ||
| 130 | Thread* ArbitrateHighestPriorityThread(u32 address) { | 108 | Thread* ArbitrateHighestPriorityThread(u32 address) { |
| 131 | Thread* highest_priority_thread = nullptr; | 109 | Thread* highest_priority_thread = nullptr; |
| 132 | s32 priority = THREADPRIO_LOWEST; | 110 | s32 priority = THREADPRIO_LOWEST; |
| @@ -153,108 +131,113 @@ Thread* ArbitrateHighestPriorityThread(u32 address) { | |||
| 153 | return highest_priority_thread; | 131 | return highest_priority_thread; |
| 154 | } | 132 | } |
| 155 | 133 | ||
| 156 | /// Arbitrate all threads currently waiting | ||
| 157 | void ArbitrateAllThreads(u32 address) { | 134 | void ArbitrateAllThreads(u32 address) { |
| 158 | 135 | // Resume all threads found to be waiting on the address | |
| 159 | // Iterate through threads, find highest priority thread that is waiting to be arbitrated... | ||
| 160 | for (auto& thread : thread_list) { | 136 | for (auto& thread : thread_list) { |
| 161 | if (CheckWait_AddressArbiter(thread.get(), address)) | 137 | if (CheckWait_AddressArbiter(thread.get(), address)) |
| 162 | thread->ResumeFromWait(); | 138 | thread->ResumeFromWait(); |
| 163 | } | 139 | } |
| 164 | } | 140 | } |
| 165 | 141 | ||
| 166 | /// Calls a thread by marking it as "ready" (note: will not actually execute until current thread yields) | 142 | /** |
| 167 | static void CallThread(Thread* t) { | 143 | * Switches the CPU's active thread context to that of the specified thread |
| 168 | // Stop waiting | 144 | * @param new_thread The thread to switch to |
| 169 | ChangeThreadState(t, THREADSTATUS_READY); | 145 | */ |
| 170 | } | 146 | static void SwitchContext(Thread* new_thread) { |
| 147 | _dbg_assert_msg_(Kernel, new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running."); | ||
| 171 | 148 | ||
| 172 | /// Switches CPU context to that of the specified thread | 149 | Thread* previous_thread = GetCurrentThread(); |
| 173 | static void SwitchContext(Thread* t) { | ||
| 174 | Thread* cur = GetCurrentThread(); | ||
| 175 | 150 | ||
| 176 | // Save context for current thread | 151 | // Save context for previous thread |
| 177 | if (cur) { | 152 | if (previous_thread) { |
| 178 | Core::g_app_core->SaveContext(cur->context); | 153 | Core::g_app_core->SaveContext(previous_thread->context); |
| 179 | 154 | ||
| 180 | if (cur->IsRunning()) { | 155 | if (previous_thread->status == THREADSTATUS_RUNNING) { |
| 181 | ChangeReadyState(cur, true); | 156 | // This is only the case when a reschedule is triggered without the current thread |
| 157 | // yielding execution (i.e. an event triggered, system core time-sliced, etc) | ||
| 158 | ready_queue.push_front(previous_thread->current_priority, previous_thread); | ||
| 159 | previous_thread->status = THREADSTATUS_READY; | ||
| 182 | } | 160 | } |
| 183 | } | 161 | } |
| 162 | |||
| 184 | // Load context of new thread | 163 | // Load context of new thread |
| 185 | if (t) { | 164 | if (new_thread) { |
| 186 | current_thread = t; | 165 | current_thread = new_thread; |
| 187 | ChangeReadyState(t, false); | 166 | |
| 188 | t->status = (t->status | THREADSTATUS_RUNNING) & ~THREADSTATUS_READY; | 167 | ready_queue.remove(new_thread->current_priority, new_thread); |
| 189 | Core::g_app_core->LoadContext(t->context); | 168 | new_thread->status = THREADSTATUS_RUNNING; |
| 169 | |||
| 170 | Core::g_app_core->LoadContext(new_thread->context); | ||
| 190 | } else { | 171 | } else { |
| 191 | current_thread = nullptr; | 172 | current_thread = nullptr; |
| 192 | } | 173 | } |
| 193 | } | 174 | } |
| 194 | 175 | ||
| 195 | /// Gets the next thread that is ready to be run by priority | 176 | /** |
| 196 | static Thread* NextThread() { | 177 | * Pops and returns the next thread from the thread queue |
| 178 | * @return A pointer to the next ready thread | ||
| 179 | */ | ||
| 180 | static Thread* PopNextReadyThread() { | ||
| 197 | Thread* next; | 181 | Thread* next; |
| 198 | Thread* cur = GetCurrentThread(); | 182 | Thread* thread = GetCurrentThread(); |
| 199 | 183 | ||
| 200 | if (cur && cur->IsRunning()) { | 184 | if (thread && thread->status == THREADSTATUS_RUNNING) { |
| 201 | next = thread_ready_queue.pop_first_better(cur->current_priority); | 185 | // We have to do better than the current thread. |
| 186 | // This call returns null when that's not possible. | ||
| 187 | next = ready_queue.pop_first_better(thread->current_priority); | ||
| 202 | } else { | 188 | } else { |
| 203 | next = thread_ready_queue.pop_first(); | 189 | next = ready_queue.pop_first(); |
| 204 | } | ||
| 205 | if (next == 0) { | ||
| 206 | return nullptr; | ||
| 207 | } | 190 | } |
| 191 | |||
| 208 | return next; | 192 | return next; |
| 209 | } | 193 | } |
| 210 | 194 | ||
| 211 | void WaitCurrentThread_Sleep() { | 195 | void WaitCurrentThread_Sleep() { |
| 212 | Thread* thread = GetCurrentThread(); | 196 | Thread* thread = GetCurrentThread(); |
| 213 | ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); | 197 | thread->status = THREADSTATUS_WAIT_SLEEP; |
| 214 | } | 198 | } |
| 215 | 199 | ||
| 216 | void WaitCurrentThread_WaitSynchronization(SharedPtr<WaitObject> wait_object, bool wait_set_output, bool wait_all) { | 200 | void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects, bool wait_set_output, bool wait_all) { |
| 217 | Thread* thread = GetCurrentThread(); | 201 | Thread* thread = GetCurrentThread(); |
| 218 | thread->wait_set_output = wait_set_output; | 202 | thread->wait_set_output = wait_set_output; |
| 219 | thread->wait_all = wait_all; | 203 | thread->wait_all = wait_all; |
| 220 | 204 | thread->wait_objects = std::move(wait_objects); | |
| 221 | // It's possible to call WaitSynchronizationN without any objects passed in... | 205 | thread->status = THREADSTATUS_WAIT_SYNCH; |
| 222 | if (wait_object != nullptr) | ||
| 223 | thread->wait_objects.push_back(wait_object); | ||
| 224 | |||
| 225 | ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); | ||
| 226 | } | 206 | } |
| 227 | 207 | ||
| 228 | void WaitCurrentThread_ArbitrateAddress(VAddr wait_address) { | 208 | void WaitCurrentThread_ArbitrateAddress(VAddr wait_address) { |
| 229 | Thread* thread = GetCurrentThread(); | 209 | Thread* thread = GetCurrentThread(); |
| 230 | thread->wait_address = wait_address; | 210 | thread->wait_address = wait_address; |
| 231 | ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); | 211 | thread->status = THREADSTATUS_WAIT_ARB; |
| 232 | } | 212 | } |
| 233 | 213 | ||
| 234 | /// Event type for the thread wake up event | ||
| 235 | static int ThreadWakeupEventType = -1; | ||
| 236 | // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, allowing | 214 | // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, allowing |
| 237 | // us to simply use a pool index or similar. | 215 | // us to simply use a pool index or similar. |
| 238 | static Kernel::HandleTable wakeup_callback_handle_table; | 216 | static Kernel::HandleTable wakeup_callback_handle_table; |
| 239 | 217 | ||
| 240 | /// Callback that will wake up the thread it was scheduled for | 218 | /** |
| 219 | * Callback that will wake up the thread it was scheduled for | ||
| 220 | * @param thread_handle The handle of the thread that's been awoken | ||
| 221 | * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time | ||
| 222 | */ | ||
| 241 | static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { | 223 | static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { |
| 242 | SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>((Handle)thread_handle); | 224 | SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>((Handle)thread_handle); |
| 243 | if (thread == nullptr) { | 225 | if (thread == nullptr) { |
| 244 | LOG_CRITICAL(Kernel, "Callback fired for invalid thread %08X", thread_handle); | 226 | LOG_CRITICAL(Kernel, "Callback fired for invalid thread %08X", (Handle)thread_handle); |
| 245 | return; | 227 | return; |
| 246 | } | 228 | } |
| 247 | 229 | ||
| 248 | thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, | 230 | if (thread->status == THREADSTATUS_WAIT_SYNCH) { |
| 249 | ErrorSummary::StatusChanged, ErrorLevel::Info)); | 231 | thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, |
| 232 | ErrorSummary::StatusChanged, ErrorLevel::Info)); | ||
| 250 | 233 | ||
| 251 | if (thread->wait_set_output) | 234 | if (thread->wait_set_output) |
| 252 | thread->SetWaitSynchronizationOutput(-1); | 235 | thread->SetWaitSynchronizationOutput(-1); |
| 236 | } | ||
| 253 | 237 | ||
| 254 | thread->ResumeFromWait(); | 238 | thread->ResumeFromWait(); |
| 255 | } | 239 | } |
| 256 | 240 | ||
| 257 | |||
| 258 | void Thread::WakeAfterDelay(s64 nanoseconds) { | 241 | void Thread::WakeAfterDelay(s64 nanoseconds) { |
| 259 | // Don't schedule a wakeup if the thread wants to wait forever | 242 | // Don't schedule a wakeup if the thread wants to wait forever |
| 260 | if (nanoseconds == -1) | 243 | if (nanoseconds == -1) |
| @@ -265,7 +248,7 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { | |||
| 265 | } | 248 | } |
| 266 | 249 | ||
| 267 | void Thread::ReleaseWaitObject(WaitObject* wait_object) { | 250 | void Thread::ReleaseWaitObject(WaitObject* wait_object) { |
| 268 | if (wait_objects.empty()) { | 251 | if (status != THREADSTATUS_WAIT_SYNCH || wait_objects.empty()) { |
| 269 | LOG_CRITICAL(Kernel, "thread is not waiting on any objects!"); | 252 | LOG_CRITICAL(Kernel, "thread is not waiting on any objects!"); |
| 270 | return; | 253 | return; |
| 271 | } | 254 | } |
| @@ -307,34 +290,48 @@ void Thread::ReleaseWaitObject(WaitObject* wait_object) { | |||
| 307 | } | 290 | } |
| 308 | 291 | ||
| 309 | void Thread::ResumeFromWait() { | 292 | void Thread::ResumeFromWait() { |
| 310 | // Cancel any outstanding wakeup events | 293 | // Cancel any outstanding wakeup events for this thread |
| 311 | CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); | 294 | CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); |
| 312 | 295 | ||
| 313 | status &= ~THREADSTATUS_WAIT; | 296 | switch (status) { |
| 314 | 297 | case THREADSTATUS_WAIT_SYNCH: | |
| 315 | // Remove this thread from all other WaitObjects | 298 | // Remove this thread from all other WaitObjects |
| 316 | for (auto wait_object : wait_objects) | 299 | for (auto wait_object : wait_objects) |
| 317 | wait_object->RemoveWaitingThread(this); | 300 | wait_object->RemoveWaitingThread(this); |
| 318 | 301 | break; | |
| 319 | wait_objects.clear(); | 302 | case THREADSTATUS_WAIT_ARB: |
| 320 | wait_set_output = false; | 303 | case THREADSTATUS_WAIT_SLEEP: |
| 321 | wait_all = false; | 304 | break; |
| 322 | wait_address = 0; | 305 | case THREADSTATUS_RUNNING: |
| 323 | 306 | case THREADSTATUS_READY: | |
| 324 | if (!(status & (THREADSTATUS_WAITSUSPEND | THREADSTATUS_DORMANT | THREADSTATUS_DEAD))) { | 307 | LOG_ERROR(Kernel, "Thread with object id %u has already resumed.", GetObjectId()); |
| 325 | ChangeReadyState(this, true); | 308 | _dbg_assert_(Kernel, false); |
| 309 | return; | ||
| 310 | case THREADSTATUS_DEAD: | ||
| 311 | // This should never happen, as threads must complete before being stopped. | ||
| 312 | LOG_CRITICAL(Kernel, "Thread with object id %u cannot be resumed because it's DEAD.", | ||
| 313 | GetObjectId()); | ||
| 314 | _dbg_assert_(Kernel, false); | ||
| 315 | return; | ||
| 326 | } | 316 | } |
| 317 | |||
| 318 | ready_queue.push_back(current_priority, this); | ||
| 319 | status = THREADSTATUS_READY; | ||
| 327 | } | 320 | } |
| 328 | 321 | ||
| 329 | /// Prints the thread queue for debugging purposes | 322 | /** |
| 323 | * Prints the thread queue for debugging purposes | ||
| 324 | */ | ||
| 330 | static void DebugThreadQueue() { | 325 | static void DebugThreadQueue() { |
| 331 | Thread* thread = GetCurrentThread(); | 326 | Thread* thread = GetCurrentThread(); |
| 332 | if (!thread) { | 327 | if (!thread) { |
| 333 | return; | 328 | LOG_DEBUG(Kernel, "Current: NO CURRENT THREAD"); |
| 329 | } else { | ||
| 330 | LOG_DEBUG(Kernel, "0x%02X %u (current)", thread->current_priority, GetCurrentThread()->GetObjectId()); | ||
| 334 | } | 331 | } |
| 335 | LOG_DEBUG(Kernel, "0x%02X %u (current)", thread->current_priority, GetCurrentThread()->GetObjectId()); | 332 | |
| 336 | for (auto& t : thread_list) { | 333 | for (auto& t : thread_list) { |
| 337 | s32 priority = thread_ready_queue.contains(t.get()); | 334 | s32 priority = ready_queue.contains(t.get()); |
| 338 | if (priority != -1) { | 335 | if (priority != -1) { |
| 339 | LOG_DEBUG(Kernel, "0x%02X %u", priority, t->GetObjectId()); | 336 | LOG_DEBUG(Kernel, "0x%02X %u", priority, t->GetObjectId()); |
| 340 | } | 337 | } |
| @@ -342,14 +339,7 @@ static void DebugThreadQueue() { | |||
| 342 | } | 339 | } |
| 343 | 340 | ||
| 344 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, | 341 | ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, |
| 345 | u32 arg, s32 processor_id, VAddr stack_top, u32 stack_size) { | 342 | u32 arg, s32 processor_id, VAddr stack_top) { |
| 346 | if (stack_size < 0x200) { | ||
| 347 | LOG_ERROR(Kernel, "(name=%s): invalid stack_size=0x%08X", name.c_str(), stack_size); | ||
| 348 | // TODO: Verify error | ||
| 349 | return ResultCode(ErrorDescription::InvalidSize, ErrorModule::Kernel, | ||
| 350 | ErrorSummary::InvalidArgument, ErrorLevel::Permanent); | ||
| 351 | } | ||
| 352 | |||
| 353 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { | 343 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { |
| 354 | s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); | 344 | s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); |
| 355 | LOG_WARNING(Kernel_SVC, "(name=%s): invalid priority=%d, clamping to %d", | 345 | LOG_WARNING(Kernel_SVC, "(name=%s): invalid priority=%d, clamping to %d", |
| @@ -369,13 +359,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 369 | SharedPtr<Thread> thread(new Thread); | 359 | SharedPtr<Thread> thread(new Thread); |
| 370 | 360 | ||
| 371 | thread_list.push_back(thread); | 361 | thread_list.push_back(thread); |
| 372 | thread_ready_queue.prepare(priority); | 362 | ready_queue.prepare(priority); |
| 373 | 363 | ||
| 374 | thread->thread_id = next_thread_id++; | 364 | thread->thread_id = NewThreadId(); |
| 375 | thread->status = THREADSTATUS_DORMANT; | 365 | thread->status = THREADSTATUS_DORMANT; |
| 376 | thread->entry_point = entry_point; | 366 | thread->entry_point = entry_point; |
| 377 | thread->stack_top = stack_top; | 367 | thread->stack_top = stack_top; |
| 378 | thread->stack_size = stack_size; | ||
| 379 | thread->initial_priority = thread->current_priority = priority; | 368 | thread->initial_priority = thread->current_priority = priority; |
| 380 | thread->processor_id = processor_id; | 369 | thread->processor_id = processor_id; |
| 381 | thread->wait_set_output = false; | 370 | thread->wait_set_output = false; |
| @@ -385,75 +374,74 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 385 | thread->name = std::move(name); | 374 | thread->name = std::move(name); |
| 386 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); | 375 | thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); |
| 387 | 376 | ||
| 388 | ResetThread(thread.get(), arg, 0); | 377 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 389 | CallThread(thread.get()); | 378 | // to initialize the context |
| 379 | Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); | ||
| 380 | |||
| 381 | ready_queue.push_back(thread->current_priority, thread.get()); | ||
| 382 | thread->status = THREADSTATUS_READY; | ||
| 390 | 383 | ||
| 391 | return MakeResult<SharedPtr<Thread>>(std::move(thread)); | 384 | return MakeResult<SharedPtr<Thread>>(std::move(thread)); |
| 392 | } | 385 | } |
| 393 | 386 | ||
| 394 | /// Set the priority of the thread specified by handle | 387 | // TODO(peachum): Remove this. Range checking should be done, and an appropriate error should be returned. |
| 395 | void Thread::SetPriority(s32 priority) { | 388 | static void ClampPriority(const Thread* thread, s32* priority) { |
| 396 | // If priority is invalid, clamp to valid range | 389 | if (*priority < THREADPRIO_HIGHEST || *priority > THREADPRIO_LOWEST) { |
| 397 | if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { | 390 | _dbg_assert_msg_(Kernel, false, "Application passed an out of range priority. An error should be returned."); |
| 398 | s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); | 391 | |
| 399 | LOG_WARNING(Kernel_SVC, "invalid priority=%d, clamping to %d", priority, new_priority); | 392 | s32 new_priority = CLAMP(*priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); |
| 393 | LOG_WARNING(Kernel_SVC, "(name=%s): invalid priority=%d, clamping to %d", | ||
| 394 | thread->name.c_str(), *priority, new_priority); | ||
| 400 | // TODO(bunnei): Clamping to a valid priority is not necessarily correct behavior... Confirm | 395 | // TODO(bunnei): Clamping to a valid priority is not necessarily correct behavior... Confirm |
| 401 | // validity of this | 396 | // validity of this |
| 402 | priority = new_priority; | 397 | *priority = new_priority; |
| 403 | } | 398 | } |
| 399 | } | ||
| 404 | 400 | ||
| 405 | // Change thread priority | 401 | void Thread::SetPriority(s32 priority) { |
| 406 | s32 old = current_priority; | 402 | ClampPriority(this, &priority); |
| 407 | thread_ready_queue.remove(old, this); | ||
| 408 | current_priority = priority; | ||
| 409 | thread_ready_queue.prepare(current_priority); | ||
| 410 | 403 | ||
| 411 | // Change thread status to "ready" and push to ready queue | 404 | if (current_priority == priority) { |
| 412 | if (IsRunning()) { | 405 | return; |
| 413 | status = (status & ~THREADSTATUS_RUNNING) | THREADSTATUS_READY; | ||
| 414 | } | 406 | } |
| 415 | if (IsReady()) { | 407 | |
| 416 | thread_ready_queue.push_back(current_priority, this); | 408 | if (status == THREADSTATUS_READY) { |
| 409 | // If thread was ready, adjust queues | ||
| 410 | ready_queue.remove(current_priority, this); | ||
| 411 | ready_queue.prepare(priority); | ||
| 412 | ready_queue.push_back(priority, this); | ||
| 417 | } | 413 | } |
| 414 | |||
| 415 | current_priority = priority; | ||
| 418 | } | 416 | } |
| 419 | 417 | ||
| 420 | SharedPtr<Thread> SetupIdleThread() { | 418 | SharedPtr<Thread> SetupIdleThread() { |
| 421 | // We need to pass a few valid values to get around parameter checking in Thread::Create. | 419 | // We need to pass a few valid values to get around parameter checking in Thread::Create. |
| 422 | auto thread = Thread::Create("idle", Memory::KERNEL_MEMORY_VADDR, THREADPRIO_LOWEST, 0, | 420 | auto thread = Thread::Create("idle", Memory::KERNEL_MEMORY_VADDR, THREADPRIO_LOWEST, 0, |
| 423 | THREADPROCESSORID_0, 0, Kernel::DEFAULT_STACK_SIZE).MoveFrom(); | 421 | THREADPROCESSORID_0, 0).MoveFrom(); |
| 424 | 422 | ||
| 425 | thread->idle = true; | 423 | thread->idle = true; |
| 426 | CallThread(thread.get()); | ||
| 427 | return thread; | 424 | return thread; |
| 428 | } | 425 | } |
| 429 | 426 | ||
| 430 | SharedPtr<Thread> SetupMainThread(s32 priority, u32 stack_size) { | 427 | SharedPtr<Thread> SetupMainThread(u32 stack_size, u32 entry_point, s32 priority) { |
| 428 | _dbg_assert_(Kernel, !GetCurrentThread()); | ||
| 429 | |||
| 431 | // Initialize new "main" thread | 430 | // Initialize new "main" thread |
| 432 | auto thread_res = Thread::Create("main", Core::g_app_core->GetPC(), priority, 0, | 431 | auto thread_res = Thread::Create("main", entry_point, priority, 0, |
| 433 | THREADPROCESSORID_0, Memory::SCRATCHPAD_VADDR_END, stack_size); | 432 | THREADPROCESSORID_0, Memory::SCRATCHPAD_VADDR_END); |
| 434 | // TODO(yuriks): Propagate error | 433 | |
| 435 | _dbg_assert_(Kernel, thread_res.Succeeded()); | 434 | SharedPtr<Thread> thread = thread_res.MoveFrom(); |
| 436 | SharedPtr<Thread> thread = std::move(*thread_res); | ||
| 437 | |||
| 438 | // If running another thread already, set it to "ready" state | ||
| 439 | Thread* cur = GetCurrentThread(); | ||
| 440 | if (cur && cur->IsRunning()) { | ||
| 441 | ChangeReadyState(cur, true); | ||
| 442 | } | ||
| 443 | 435 | ||
| 444 | // Run new "main" thread | 436 | // Run new "main" thread |
| 445 | current_thread = thread.get(); | 437 | SwitchContext(thread.get()); |
| 446 | thread->status = THREADSTATUS_RUNNING; | ||
| 447 | Core::g_app_core->LoadContext(thread->context); | ||
| 448 | 438 | ||
| 449 | return thread; | 439 | return thread; |
| 450 | } | 440 | } |
| 451 | 441 | ||
| 452 | |||
| 453 | /// Reschedules to the next available thread (call after current thread is suspended) | ||
| 454 | void Reschedule() { | 442 | void Reschedule() { |
| 455 | Thread* prev = GetCurrentThread(); | 443 | Thread* prev = GetCurrentThread(); |
| 456 | Thread* next = NextThread(); | 444 | Thread* next = PopNextReadyThread(); |
| 457 | HLE::g_reschedule = false; | 445 | HLE::g_reschedule = false; |
| 458 | 446 | ||
| 459 | if (next != nullptr) { | 447 | if (next != nullptr) { |
| @@ -480,8 +468,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 480 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 468 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 481 | 469 | ||
| 482 | void ThreadingInit() { | 470 | void ThreadingInit() { |
| 483 | next_thread_id = INITIAL_THREAD_ID; | ||
| 484 | ThreadWakeupEventType = CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); | 471 | ThreadWakeupEventType = CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); |
| 472 | |||
| 473 | // Setup the idle thread | ||
| 474 | SetupIdleThread(); | ||
| 485 | } | 475 | } |
| 486 | 476 | ||
| 487 | void ThreadingShutdown() { | 477 | void ThreadingShutdown() { |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 633bb7c98..cfd073a70 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -31,13 +31,13 @@ enum ThreadProcessorId { | |||
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | enum ThreadStatus { | 33 | enum ThreadStatus { |
| 34 | THREADSTATUS_RUNNING = 1, | 34 | THREADSTATUS_RUNNING, ///< Currently running |
| 35 | THREADSTATUS_READY = 2, | 35 | THREADSTATUS_READY, ///< Ready to run |
| 36 | THREADSTATUS_WAIT = 4, | 36 | THREADSTATUS_WAIT_ARB, ///< Waiting on an address arbiter |
| 37 | THREADSTATUS_SUSPEND = 8, | 37 | THREADSTATUS_WAIT_SLEEP, ///< Waiting due to a SleepThread SVC |
| 38 | THREADSTATUS_DORMANT = 16, | 38 | THREADSTATUS_WAIT_SYNCH, ///< Waiting due to a WaitSynchronization SVC |
| 39 | THREADSTATUS_DEAD = 32, | 39 | THREADSTATUS_DORMANT, ///< Created but not yet made ready |
| 40 | THREADSTATUS_WAITSUSPEND = THREADSTATUS_WAIT | THREADSTATUS_SUSPEND | 40 | THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated |
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | namespace Kernel { | 43 | namespace Kernel { |
| @@ -46,8 +46,19 @@ class Mutex; | |||
| 46 | 46 | ||
| 47 | class Thread final : public WaitObject { | 47 | class Thread final : public WaitObject { |
| 48 | public: | 48 | public: |
| 49 | /** | ||
| 50 | * Creates and returns a new thread. The new thread is immediately scheduled | ||
| 51 | * @param name The friendly name desired for the thread | ||
| 52 | * @param entry_point The address at which the thread should start execution | ||
| 53 | * @param priority The thread's priority | ||
| 54 | * @param arg User data to pass to the thread | ||
| 55 | * @param processor_id The ID(s) of the processors on which the thread is desired to be run | ||
| 56 | * @param stack_top The address of the thread's stack top | ||
| 57 | * @param stack_size The size of the thread's stack | ||
| 58 | * @return A shared pointer to the newly created thread | ||
| 59 | */ | ||
| 49 | static ResultVal<SharedPtr<Thread>> Create(std::string name, VAddr entry_point, s32 priority, | 60 | static ResultVal<SharedPtr<Thread>> Create(std::string name, VAddr entry_point, s32 priority, |
| 50 | u32 arg, s32 processor_id, VAddr stack_top, u32 stack_size); | 61 | u32 arg, s32 processor_id, VAddr stack_top); |
| 51 | 62 | ||
| 52 | std::string GetName() const override { return name; } | 63 | std::string GetName() const override { return name; } |
| 53 | std::string GetTypeName() const override { return "Thread"; } | 64 | std::string GetTypeName() const override { return "Thread"; } |
| @@ -55,22 +66,32 @@ public: | |||
| 55 | static const HandleType HANDLE_TYPE = HandleType::Thread; | 66 | static const HandleType HANDLE_TYPE = HandleType::Thread; |
| 56 | HandleType GetHandleType() const override { return HANDLE_TYPE; } | 67 | HandleType GetHandleType() const override { return HANDLE_TYPE; } |
| 57 | 68 | ||
| 58 | inline bool IsRunning() const { return (status & THREADSTATUS_RUNNING) != 0; } | ||
| 59 | inline bool IsStopped() const { return (status & THREADSTATUS_DORMANT) != 0; } | ||
| 60 | inline bool IsReady() const { return (status & THREADSTATUS_READY) != 0; } | ||
| 61 | inline bool IsWaiting() const { return (status & THREADSTATUS_WAIT) != 0; } | ||
| 62 | inline bool IsSuspended() const { return (status & THREADSTATUS_SUSPEND) != 0; } | ||
| 63 | inline bool IsIdle() const { return idle; } | ||
| 64 | |||
| 65 | bool ShouldWait() override; | 69 | bool ShouldWait() override; |
| 66 | void Acquire() override; | 70 | void Acquire() override; |
| 67 | 71 | ||
| 72 | /** | ||
| 73 | * Checks if the thread is an idle (stub) thread | ||
| 74 | * @return True if the thread is an idle (stub) thread, false otherwise | ||
| 75 | */ | ||
| 76 | inline bool IsIdle() const { return idle; } | ||
| 77 | |||
| 78 | /** | ||
| 79 | * Gets the thread's current priority | ||
| 80 | * @return The current thread's priority | ||
| 81 | */ | ||
| 68 | s32 GetPriority() const { return current_priority; } | 82 | s32 GetPriority() const { return current_priority; } |
| 83 | |||
| 84 | /** | ||
| 85 | * Sets the thread's current priority | ||
| 86 | * @param priority The new priority | ||
| 87 | */ | ||
| 69 | void SetPriority(s32 priority); | 88 | void SetPriority(s32 priority); |
| 70 | 89 | ||
| 90 | /** | ||
| 91 | * Gets the thread's thread ID | ||
| 92 | * @return The thread's ID | ||
| 93 | */ | ||
| 71 | u32 GetThreadId() const { return thread_id; } | 94 | u32 GetThreadId() const { return thread_id; } |
| 72 | |||
| 73 | void Stop(const char* reason); | ||
| 74 | 95 | ||
| 75 | /** | 96 | /** |
| 76 | * Release an acquired wait object | 97 | * Release an acquired wait object |
| @@ -78,12 +99,14 @@ public: | |||
| 78 | */ | 99 | */ |
| 79 | void ReleaseWaitObject(WaitObject* wait_object); | 100 | void ReleaseWaitObject(WaitObject* wait_object); |
| 80 | 101 | ||
| 81 | /// Resumes a thread from waiting by marking it as "ready" | 102 | /** |
| 103 | * Resumes a thread from waiting | ||
| 104 | */ | ||
| 82 | void ResumeFromWait(); | 105 | void ResumeFromWait(); |
| 83 | 106 | ||
| 84 | /** | 107 | /** |
| 85 | * Schedules an event to wake up the specified thread after the specified delay. | 108 | * Schedules an event to wake up the specified thread after the specified delay |
| 86 | * @param nanoseconds The time this thread will be allowed to sleep for. | 109 | * @param nanoseconds The time this thread will be allowed to sleep for |
| 87 | */ | 110 | */ |
| 88 | void WakeAfterDelay(s64 nanoseconds); | 111 | void WakeAfterDelay(s64 nanoseconds); |
| 89 | 112 | ||
| @@ -99,6 +122,11 @@ public: | |||
| 99 | */ | 122 | */ |
| 100 | void SetWaitSynchronizationOutput(s32 output); | 123 | void SetWaitSynchronizationOutput(s32 output); |
| 101 | 124 | ||
| 125 | /** | ||
| 126 | * Stops a thread, invalidating it from further use | ||
| 127 | */ | ||
| 128 | void Stop(); | ||
| 129 | |||
| 102 | Core::ThreadContext context; | 130 | Core::ThreadContext context; |
| 103 | 131 | ||
| 104 | u32 thread_id; | 132 | u32 thread_id; |
| @@ -106,7 +134,6 @@ public: | |||
| 106 | u32 status; | 134 | u32 status; |
| 107 | u32 entry_point; | 135 | u32 entry_point; |
| 108 | u32 stack_top; | 136 | u32 stack_top; |
| 109 | u32 stack_size; | ||
| 110 | 137 | ||
| 111 | s32 initial_priority; | 138 | s32 initial_priority; |
| 112 | s32 current_priority; | 139 | s32 current_priority; |
| @@ -136,31 +163,49 @@ private: | |||
| 136 | 163 | ||
| 137 | extern SharedPtr<Thread> g_main_thread; | 164 | extern SharedPtr<Thread> g_main_thread; |
| 138 | 165 | ||
| 139 | /// Sets up the primary application thread | 166 | /** |
| 140 | SharedPtr<Thread> SetupMainThread(s32 priority, u32 stack_size); | 167 | * Sets up the primary application thread |
| 168 | * @param stack_size The size of the thread's stack | ||
| 169 | * @param entry_point The address at which the thread should start execution | ||
| 170 | * @param priority The priority to give the main thread | ||
| 171 | * @return A shared pointer to the main thread | ||
| 172 | */ | ||
| 173 | SharedPtr<Thread> SetupMainThread(u32 stack_size, u32 entry_point, s32 priority); | ||
| 141 | 174 | ||
| 142 | /// Reschedules to the next available thread (call after current thread is suspended) | 175 | /** |
| 176 | * Reschedules to the next available thread (call after current thread is suspended) | ||
| 177 | */ | ||
| 143 | void Reschedule(); | 178 | void Reschedule(); |
| 144 | 179 | ||
| 145 | /// Arbitrate the highest priority thread that is waiting | 180 | /** |
| 181 | * Arbitrate the highest priority thread that is waiting | ||
| 182 | * @param address The address for which waiting threads should be arbitrated | ||
| 183 | */ | ||
| 146 | Thread* ArbitrateHighestPriorityThread(u32 address); | 184 | Thread* ArbitrateHighestPriorityThread(u32 address); |
| 147 | 185 | ||
| 148 | /// Arbitrate all threads currently waiting... | 186 | /** |
| 187 | * Arbitrate all threads currently waiting. | ||
| 188 | * @param address The address for which waiting threads should be arbitrated | ||
| 189 | */ | ||
| 149 | void ArbitrateAllThreads(u32 address); | 190 | void ArbitrateAllThreads(u32 address); |
| 150 | 191 | ||
| 151 | /// Gets the current thread | 192 | /** |
| 193 | * Gets the current thread | ||
| 194 | */ | ||
| 152 | Thread* GetCurrentThread(); | 195 | Thread* GetCurrentThread(); |
| 153 | 196 | ||
| 154 | /// Waits the current thread on a sleep | 197 | /** |
| 198 | * Waits the current thread on a sleep | ||
| 199 | */ | ||
| 155 | void WaitCurrentThread_Sleep(); | 200 | void WaitCurrentThread_Sleep(); |
| 156 | 201 | ||
| 157 | /** | 202 | /** |
| 158 | * Waits the current thread from a WaitSynchronization call | 203 | * Waits the current thread from a WaitSynchronization call |
| 159 | * @param wait_object Kernel object that we are waiting on | 204 | * @param wait_objects Kernel objects that we are waiting on |
| 160 | * @param wait_set_output If true, set the output parameter on thread wakeup (for WaitSynchronizationN only) | 205 | * @param wait_set_output If true, set the output parameter on thread wakeup (for WaitSynchronizationN only) |
| 161 | * @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only) | 206 | * @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only) |
| 162 | */ | 207 | */ |
| 163 | void WaitCurrentThread_WaitSynchronization(SharedPtr<WaitObject> wait_object, bool wait_set_output, bool wait_all); | 208 | void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects, bool wait_set_output, bool wait_all); |
| 164 | 209 | ||
| 165 | /** | 210 | /** |
| 166 | * Waits the current thread from an ArbitrateAddress call | 211 | * Waits the current thread from an ArbitrateAddress call |
| @@ -172,14 +217,18 @@ void WaitCurrentThread_ArbitrateAddress(VAddr wait_address); | |||
| 172 | * Sets up the idle thread, this is a thread that is intended to never execute instructions, | 217 | * Sets up the idle thread, this is a thread that is intended to never execute instructions, |
| 173 | * only to advance the timing. It is scheduled when there are no other ready threads in the thread queue | 218 | * only to advance the timing. It is scheduled when there are no other ready threads in the thread queue |
| 174 | * and will try to yield on every call. | 219 | * and will try to yield on every call. |
| 175 | * @returns The handle of the idle thread | 220 | * @return The handle of the idle thread |
| 176 | */ | 221 | */ |
| 177 | SharedPtr<Thread> SetupIdleThread(); | 222 | SharedPtr<Thread> SetupIdleThread(); |
| 178 | 223 | ||
| 179 | /// Initialize threading | 224 | /** |
| 225 | * Initialize threading | ||
| 226 | */ | ||
| 180 | void ThreadingInit(); | 227 | void ThreadingInit(); |
| 181 | 228 | ||
| 182 | /// Shutdown threading | 229 | /** |
| 230 | * Shutdown threading | ||
| 231 | */ | ||
| 183 | void ThreadingShutdown(); | 232 | void ThreadingShutdown(); |
| 184 | 233 | ||
| 185 | } // namespace | 234 | } // namespace |
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index 34a27917f..96da29923 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp | |||
| @@ -144,17 +144,17 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { | |||
| 144 | LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle, | 144 | LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle, |
| 145 | object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds); | 145 | object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds); |
| 146 | 146 | ||
| 147 | HLE::Reschedule(__func__); | ||
| 148 | |||
| 147 | // Check for next thread to schedule | 149 | // Check for next thread to schedule |
| 148 | if (object->ShouldWait()) { | 150 | if (object->ShouldWait()) { |
| 149 | 151 | ||
| 150 | object->AddWaitingThread(Kernel::GetCurrentThread()); | 152 | object->AddWaitingThread(Kernel::GetCurrentThread()); |
| 151 | Kernel::WaitCurrentThread_WaitSynchronization(object, false, false); | 153 | Kernel::WaitCurrentThread_WaitSynchronization({ object }, false, false); |
| 152 | 154 | ||
| 153 | // Create an event to wake the thread up after the specified nanosecond delay has passed | 155 | // Create an event to wake the thread up after the specified nanosecond delay has passed |
| 154 | Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); | 156 | Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); |
| 155 | 157 | ||
| 156 | HLE::Reschedule(__func__); | ||
| 157 | |||
| 158 | // NOTE: output of this SVC will be set later depending on how the thread resumes | 158 | // NOTE: output of this SVC will be set later depending on how the thread resumes |
| 159 | return RESULT_INVALID; | 159 | return RESULT_INVALID; |
| 160 | } | 160 | } |
| @@ -212,25 +212,29 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou | |||
| 212 | // NOTE: This should deadlock the current thread if no timeout was specified | 212 | // NOTE: This should deadlock the current thread if no timeout was specified |
| 213 | if (!wait_all) { | 213 | if (!wait_all) { |
| 214 | wait_thread = true; | 214 | wait_thread = true; |
| 215 | Kernel::WaitCurrentThread_WaitSynchronization(nullptr, true, wait_all); | ||
| 216 | } | 215 | } |
| 217 | } | 216 | } |
| 218 | 217 | ||
| 218 | HLE::Reschedule(__func__); | ||
| 219 | |||
| 219 | // If thread should wait, then set its state to waiting and then reschedule... | 220 | // If thread should wait, then set its state to waiting and then reschedule... |
| 220 | if (wait_thread) { | 221 | if (wait_thread) { |
| 221 | 222 | ||
| 222 | // Actually wait the current thread on each object if we decided to wait... | 223 | // Actually wait the current thread on each object if we decided to wait... |
| 224 | std::vector<SharedPtr<Kernel::WaitObject>> wait_objects; | ||
| 225 | wait_objects.reserve(handle_count); | ||
| 226 | |||
| 223 | for (int i = 0; i < handle_count; ++i) { | 227 | for (int i = 0; i < handle_count; ++i) { |
| 224 | auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); | 228 | auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); |
| 225 | object->AddWaitingThread(Kernel::GetCurrentThread()); | 229 | object->AddWaitingThread(Kernel::GetCurrentThread()); |
| 226 | Kernel::WaitCurrentThread_WaitSynchronization(object, true, wait_all); | 230 | wait_objects.push_back(object); |
| 227 | } | 231 | } |
| 228 | 232 | ||
| 233 | Kernel::WaitCurrentThread_WaitSynchronization(std::move(wait_objects), true, wait_all); | ||
| 234 | |||
| 229 | // Create an event to wake the thread up after the specified nanosecond delay has passed | 235 | // Create an event to wake the thread up after the specified nanosecond delay has passed |
| 230 | Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); | 236 | Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); |
| 231 | 237 | ||
| 232 | HLE::Reschedule(__func__); | ||
| 233 | |||
| 234 | // NOTE: output of this SVC will be set later depending on how the thread resumes | 238 | // NOTE: output of this SVC will be set later depending on how the thread resumes |
| 235 | return RESULT_INVALID; | 239 | return RESULT_INVALID; |
| 236 | } | 240 | } |
| @@ -319,7 +323,7 @@ static ResultCode CreateThread(u32* out_handle, u32 priority, u32 entry_point, u | |||
| 319 | } | 323 | } |
| 320 | 324 | ||
| 321 | CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( | 325 | CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( |
| 322 | name, entry_point, priority, arg, processor_id, stack_top, Kernel::DEFAULT_STACK_SIZE)); | 326 | name, entry_point, priority, arg, processor_id, stack_top)); |
| 323 | CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); | 327 | CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); |
| 324 | 328 | ||
| 325 | LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, " | 329 | LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, " |
| @@ -338,7 +342,7 @@ static ResultCode CreateThread(u32* out_handle, u32 priority, u32 entry_point, u | |||
| 338 | static void ExitThread() { | 342 | static void ExitThread() { |
| 339 | LOG_TRACE(Kernel_SVC, "called, pc=0x%08X", Core::g_app_core->GetPC()); | 343 | LOG_TRACE(Kernel_SVC, "called, pc=0x%08X", Core::g_app_core->GetPC()); |
| 340 | 344 | ||
| 341 | Kernel::GetCurrentThread()->Stop(__func__); | 345 | Kernel::GetCurrentThread()->Stop(); |
| 342 | HLE::Reschedule(__func__); | 346 | HLE::Reschedule(__func__); |
| 343 | } | 347 | } |
| 344 | 348 | ||