diff options
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp.cpp | 4 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp_helper.h | 654 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 187 | ||||
| -rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 54 |
4 files changed, 322 insertions, 577 deletions
diff --git a/src/core/arm/skyeye_common/vfp/vfp.cpp b/src/core/arm/skyeye_common/vfp/vfp.cpp index 888709124..1cf146c53 100644 --- a/src/core/arm/skyeye_common/vfp/vfp.cpp +++ b/src/core/arm/skyeye_common/vfp/vfp.cpp | |||
| @@ -773,8 +773,8 @@ void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpsc | |||
| 773 | * Comparison instructions always return at least one of | 773 | * Comparison instructions always return at least one of |
| 774 | * these flags set. | 774 | * these flags set. |
| 775 | */ | 775 | */ |
| 776 | if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) | 776 | if (exceptions & (FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG)) |
| 777 | fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); | 777 | fpscr &= ~(FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG); |
| 778 | 778 | ||
| 779 | fpscr |= exceptions; | 779 | fpscr |= exceptions; |
| 780 | 780 | ||
diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h index 581f0358f..b68090b80 100644 --- a/src/core/arm/skyeye_common/vfp/vfp_helper.h +++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h | |||
| @@ -45,444 +45,400 @@ | |||
| 45 | 45 | ||
| 46 | #define do_div(n, base) {n/=base;} | 46 | #define do_div(n, base) {n/=base;} |
| 47 | 47 | ||
| 48 | /* From vfpinstr.h */ | 48 | enum : u32 { |
| 49 | 49 | FOP_MASK = 0x00b00040, | |
| 50 | #define INST_CPRTDO(inst) (((inst) & 0x0f000000) == 0x0e000000) | 50 | FOP_FMAC = 0x00000000, |
| 51 | #define INST_CPRT(inst) ((inst) & (1 << 4)) | 51 | FOP_FNMAC = 0x00000040, |
| 52 | #define INST_CPRT_L(inst) ((inst) & (1 << 20)) | 52 | FOP_FMSC = 0x00100000, |
| 53 | #define INST_CPRT_Rd(inst) (((inst) & (15 << 12)) >> 12) | 53 | FOP_FNMSC = 0x00100040, |
| 54 | #define INST_CPRT_OP(inst) (((inst) >> 21) & 7) | 54 | FOP_FMUL = 0x00200000, |
| 55 | #define INST_CPNUM(inst) ((inst) & 0xf00) | 55 | FOP_FNMUL = 0x00200040, |
| 56 | #define CPNUM(cp) ((cp) << 8) | 56 | FOP_FADD = 0x00300000, |
| 57 | 57 | FOP_FSUB = 0x00300040, | |
| 58 | #define FOP_MASK (0x00b00040) | 58 | FOP_FDIV = 0x00800000, |
| 59 | #define FOP_FMAC (0x00000000) | 59 | FOP_EXT = 0x00b00040 |
| 60 | #define FOP_FNMAC (0x00000040) | 60 | }; |
| 61 | #define FOP_FMSC (0x00100000) | 61 | |
| 62 | #define FOP_FNMSC (0x00100040) | 62 | #define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) |
| 63 | #define FOP_FMUL (0x00200000) | 63 | |
| 64 | #define FOP_FNMUL (0x00200040) | 64 | enum : u32 { |
| 65 | #define FOP_FADD (0x00300000) | 65 | FEXT_MASK = 0x000f0080, |
| 66 | #define FOP_FSUB (0x00300040) | 66 | FEXT_FCPY = 0x00000000, |
| 67 | #define FOP_FDIV (0x00800000) | 67 | FEXT_FABS = 0x00000080, |
| 68 | #define FOP_EXT (0x00b00040) | 68 | FEXT_FNEG = 0x00010000, |
| 69 | 69 | FEXT_FSQRT = 0x00010080, | |
| 70 | #define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) | 70 | FEXT_FCMP = 0x00040000, |
| 71 | 71 | FEXT_FCMPE = 0x00040080, | |
| 72 | #define FEXT_MASK (0x000f0080) | 72 | FEXT_FCMPZ = 0x00050000, |
| 73 | #define FEXT_FCPY (0x00000000) | 73 | FEXT_FCMPEZ = 0x00050080, |
| 74 | #define FEXT_FABS (0x00000080) | 74 | FEXT_FCVT = 0x00070080, |
| 75 | #define FEXT_FNEG (0x00010000) | 75 | FEXT_FUITO = 0x00080000, |
| 76 | #define FEXT_FSQRT (0x00010080) | 76 | FEXT_FSITO = 0x00080080, |
| 77 | #define FEXT_FCMP (0x00040000) | 77 | FEXT_FTOUI = 0x000c0000, |
| 78 | #define FEXT_FCMPE (0x00040080) | 78 | FEXT_FTOUIZ = 0x000c0080, |
| 79 | #define FEXT_FCMPZ (0x00050000) | 79 | FEXT_FTOSI = 0x000d0000, |
| 80 | #define FEXT_FCMPEZ (0x00050080) | 80 | FEXT_FTOSIZ = 0x000d0080 |
| 81 | #define FEXT_FCVT (0x00070080) | 81 | }; |
| 82 | #define FEXT_FUITO (0x00080000) | 82 | |
| 83 | #define FEXT_FSITO (0x00080080) | 83 | #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) |
| 84 | #define FEXT_FTOUI (0x000c0000) | 84 | |
| 85 | #define FEXT_FTOUIZ (0x000c0080) | 85 | #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) |
| 86 | #define FEXT_FTOSI (0x000d0000) | 86 | #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) |
| 87 | #define FEXT_FTOSIZ (0x000d0080) | 87 | #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) |
| 88 | 88 | #define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) | |
| 89 | #define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | 89 | #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) |
| 90 | 90 | #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) | |
| 91 | #define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) | 91 | |
| 92 | #define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) | 92 | #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) |
| 93 | #define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) | ||
| 94 | #define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) | ||
| 95 | #define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) | ||
| 96 | #define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) | ||
| 97 | |||
| 98 | #define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) | ||
| 99 | |||
| 100 | #define FPSCR_N (1 << 31) | ||
| 101 | #define FPSCR_Z (1 << 30) | ||
| 102 | #define FPSCR_C (1 << 29) | ||
| 103 | #define FPSCR_V (1 << 28) | ||
| 104 | 93 | ||
| 105 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) | 94 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) |
| 106 | { | 95 | { |
| 107 | if (shift) { | 96 | if (shift) { |
| 108 | if (shift < 32) | 97 | if (shift < 32) |
| 109 | val = val >> shift | ((val << (32 - shift)) != 0); | 98 | val = val >> shift | ((val << (32 - shift)) != 0); |
| 110 | else | 99 | else |
| 111 | val = val != 0; | 100 | val = val != 0; |
| 112 | } | 101 | } |
| 113 | return val; | 102 | return val; |
| 114 | } | 103 | } |
| 115 | 104 | ||
| 116 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) | 105 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) |
| 117 | { | 106 | { |
| 118 | if (shift) { | 107 | if (shift) { |
| 119 | if (shift < 64) | 108 | if (shift < 64) |
| 120 | val = val >> shift | ((val << (64 - shift)) != 0); | 109 | val = val >> shift | ((val << (64 - shift)) != 0); |
| 121 | else | 110 | else |
| 122 | val = val != 0; | 111 | val = val != 0; |
| 123 | } | 112 | } |
| 124 | return val; | 113 | return val; |
| 125 | } | 114 | } |
| 126 | 115 | ||
| 127 | static inline u32 vfp_hi64to32jamming(u64 val) | 116 | static inline u32 vfp_hi64to32jamming(u64 val) |
| 128 | { | 117 | { |
| 129 | u32 v; | 118 | u32 v; |
| 130 | u32 highval = val >> 32; | 119 | u32 highval = val >> 32; |
| 131 | u32 lowval = val & 0xffffffff; | 120 | u32 lowval = val & 0xffffffff; |
| 132 | 121 | ||
| 133 | if (lowval >= 1) | 122 | if (lowval >= 1) |
| 134 | v = highval | 1; | 123 | v = highval | 1; |
| 135 | else | 124 | else |
| 136 | v = highval; | 125 | v = highval; |
| 137 | 126 | ||
| 138 | return v; | 127 | return v; |
| 139 | } | 128 | } |
| 140 | 129 | ||
| 141 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 130 | static inline void add128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) |
| 142 | { | 131 | { |
| 143 | *resl = nl + ml; | 132 | *resl = nl + ml; |
| 144 | *resh = nh + mh; | 133 | *resh = nh + mh; |
| 145 | if (*resl < nl) | 134 | if (*resl < nl) |
| 146 | *resh += 1; | 135 | *resh += 1; |
| 147 | } | 136 | } |
| 148 | 137 | ||
| 149 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | 138 | static inline void sub128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) |
| 150 | { | 139 | { |
| 151 | *resl = nl - ml; | 140 | *resl = nl - ml; |
| 152 | *resh = nh - mh; | 141 | *resh = nh - mh; |
| 153 | if (*resl > nl) | 142 | if (*resl > nl) |
| 154 | *resh -= 1; | 143 | *resh -= 1; |
| 155 | } | 144 | } |
| 156 | 145 | ||
| 157 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) | 146 | static inline void mul64to128(u64* resh, u64* resl, u64 n, u64 m) |
| 158 | { | 147 | { |
| 159 | u32 nh, nl, mh, ml; | 148 | u32 nh, nl, mh, ml; |
| 160 | u64 rh, rma, rmb, rl; | 149 | u64 rh, rma, rmb, rl; |
| 161 | 150 | ||
| 162 | nl = n; | 151 | nl = n; |
| 163 | ml = m; | 152 | ml = m; |
| 164 | rl = (u64)nl * ml; | 153 | rl = (u64)nl * ml; |
| 165 | 154 | ||
| 166 | nh = n >> 32; | 155 | nh = n >> 32; |
| 167 | rma = (u64)nh * ml; | 156 | rma = (u64)nh * ml; |
| 168 | 157 | ||
| 169 | mh = m >> 32; | 158 | mh = m >> 32; |
| 170 | rmb = (u64)nl * mh; | 159 | rmb = (u64)nl * mh; |
| 171 | rma += rmb; | 160 | rma += rmb; |
| 172 | 161 | ||
| 173 | rh = (u64)nh * mh; | 162 | rh = (u64)nh * mh; |
| 174 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); | 163 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); |
| 175 | 164 | ||
| 176 | rma <<= 32; | 165 | rma <<= 32; |
| 177 | rl += rma; | 166 | rl += rma; |
| 178 | rh += (rl < rma); | 167 | rh += (rl < rma); |
| 179 | 168 | ||
| 180 | *resl = rl; | 169 | *resl = rl; |
| 181 | *resh = rh; | 170 | *resh = rh; |
| 182 | } | 171 | } |
| 183 | 172 | ||
| 184 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) | 173 | static inline void shift64left(u64* resh, u64* resl, u64 n) |
| 185 | { | 174 | { |
| 186 | *resh = n >> 63; | 175 | *resh = n >> 63; |
| 187 | *resl = n << 1; | 176 | *resl = n << 1; |
| 188 | } | 177 | } |
| 189 | 178 | ||
| 190 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) | 179 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) |
| 191 | { | 180 | { |
| 192 | u64 rh, rl; | 181 | u64 rh, rl; |
| 193 | mul64to128(&rh, &rl, n, m); | 182 | mul64to128(&rh, &rl, n, m); |
| 194 | return rh | (rl != 0); | 183 | return rh | (rl != 0); |
| 195 | } | 184 | } |
| 196 | 185 | ||
| 197 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) | 186 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) |
| 198 | { | 187 | { |
| 199 | u64 mh, ml, remh, reml, termh, terml, z; | 188 | u64 mh, ml, remh, reml, termh, terml, z; |
| 200 | 189 | ||
| 201 | if (nh >= m) | 190 | if (nh >= m) |
| 202 | return ~0ULL; | 191 | return ~0ULL; |
| 203 | mh = m >> 32; | 192 | mh = m >> 32; |
| 204 | if (mh << 32 <= nh) { | 193 | if (mh << 32 <= nh) { |
| 205 | z = 0xffffffff00000000ULL; | 194 | z = 0xffffffff00000000ULL; |
| 206 | } else { | 195 | } else { |
| 207 | z = nh; | 196 | z = nh; |
| 208 | do_div(z, mh); | 197 | do_div(z, mh); |
| 209 | z <<= 32; | 198 | z <<= 32; |
| 210 | } | 199 | } |
| 211 | mul64to128(&termh, &terml, m, z); | 200 | mul64to128(&termh, &terml, m, z); |
| 212 | sub128(&remh, &reml, nh, nl, termh, terml); | 201 | sub128(&remh, &reml, nh, nl, termh, terml); |
| 213 | ml = m << 32; | 202 | ml = m << 32; |
| 214 | while ((s64)remh < 0) { | 203 | while ((s64)remh < 0) { |
| 215 | z -= 0x100000000ULL; | 204 | z -= 0x100000000ULL; |
| 216 | add128(&remh, &reml, remh, reml, mh, ml); | 205 | add128(&remh, &reml, remh, reml, mh, ml); |
| 217 | } | 206 | } |
| 218 | remh = (remh << 32) | (reml >> 32); | 207 | remh = (remh << 32) | (reml >> 32); |
| 219 | if (mh << 32 <= remh) { | 208 | if (mh << 32 <= remh) { |
| 220 | z |= 0xffffffff; | 209 | z |= 0xffffffff; |
| 221 | } else { | 210 | } else { |
| 222 | do_div(remh, mh); | 211 | do_div(remh, mh); |
| 223 | z |= remh; | 212 | z |= remh; |
| 224 | } | 213 | } |
| 225 | return z; | 214 | return z; |
| 226 | } | 215 | } |
| 227 | 216 | ||
| 228 | /* | 217 | // Operations on unpacked elements |
| 229 | * Operations on unpacked elements | 218 | #define vfp_sign_negate(sign) (sign ^ 0x8000) |
| 230 | */ | ||
| 231 | #define vfp_sign_negate(sign) (sign ^ 0x8000) | ||
| 232 | 219 | ||
| 233 | /* | 220 | // Single-precision |
| 234 | * Single-precision | ||
| 235 | */ | ||
| 236 | struct vfp_single { | 221 | struct vfp_single { |
| 237 | s16 exponent; | 222 | s16 exponent; |
| 238 | u16 sign; | 223 | u16 sign; |
| 239 | u32 significand; | 224 | u32 significand; |
| 240 | }; | 225 | }; |
| 241 | 226 | ||
| 242 | /* | 227 | // VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa |
| 243 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa | 228 | // VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent |
| 244 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent | 229 | // VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand |
| 245 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand | 230 | // which are not propagated to the float upon packing. |
| 246 | * which are not propagated to the float upon packing. | 231 | #define VFP_SINGLE_MANTISSA_BITS (23) |
| 247 | */ | 232 | #define VFP_SINGLE_EXPONENT_BITS (8) |
| 248 | #define VFP_SINGLE_MANTISSA_BITS (23) | 233 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) |
| 249 | #define VFP_SINGLE_EXPONENT_BITS (8) | 234 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) |
| 250 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) | ||
| 251 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) | ||
| 252 | 235 | ||
| 253 | /* | 236 | // The bit in an unpacked float which indicates that it is a quiet NaN |
| 254 | * The bit in an unpacked float which indicates that it is a quiet NaN | ||
| 255 | */ | ||
| 256 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) | 237 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) |
| 257 | 238 | ||
| 258 | /* | 239 | // Operations on packed single-precision numbers |
| 259 | * Operations on packed single-precision numbers | 240 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) |
| 260 | */ | 241 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) |
| 261 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) | 242 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) |
| 262 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) | 243 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) |
| 263 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) | 244 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) |
| 264 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) | 245 | |
| 265 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) | 246 | // Unpack a single-precision float. Note that this returns the magnitude |
| 266 | 247 | // of the single-precision float mantissa with the 1. if necessary, | |
| 267 | /* | 248 | // aligned to bit 30. |
| 268 | * Unpack a single-precision float. Note that this returns the magnitude | 249 | static inline void vfp_single_unpack(vfp_single* s, s32 val) |
| 269 | * of the single-precision float mantissa with the 1. if necessary, | ||
| 270 | * aligned to bit 30. | ||
| 271 | */ | ||
| 272 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) | ||
| 273 | { | 250 | { |
| 274 | u32 significand; | 251 | u32 significand; |
| 275 | 252 | ||
| 276 | s->sign = vfp_single_packed_sign(val) >> 16, | 253 | s->sign = vfp_single_packed_sign(val) >> 16, |
| 277 | s->exponent = vfp_single_packed_exponent(val); | 254 | s->exponent = vfp_single_packed_exponent(val); |
| 278 | 255 | ||
| 279 | significand = (u32) val; | 256 | significand = (u32) val; |
| 280 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; | 257 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; |
| 281 | if (s->exponent && s->exponent != 255) | 258 | if (s->exponent && s->exponent != 255) |
| 282 | significand |= 0x40000000; | 259 | significand |= 0x40000000; |
| 283 | s->significand = significand; | 260 | s->significand = significand; |
| 284 | } | 261 | } |
| 285 | 262 | ||
| 286 | /* | 263 | // Re-pack a single-precision float. This assumes that the float is |
| 287 | * Re-pack a single-precision float. This assumes that the float is | 264 | // already normalised such that the MSB is bit 30, _not_ bit 31. |
| 288 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 265 | static inline s32 vfp_single_pack(vfp_single* s) |
| 289 | */ | ||
| 290 | static inline s32 vfp_single_pack(struct vfp_single *s) | ||
| 291 | { | 266 | { |
| 292 | u32 val; | 267 | u32 val = (s->sign << 16) + |
| 293 | val = (s->sign << 16) + | 268 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + |
| 294 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + | 269 | (s->significand >> VFP_SINGLE_LOW_BITS); |
| 295 | (s->significand >> VFP_SINGLE_LOW_BITS); | 270 | return (s32)val; |
| 296 | return (s32)val; | ||
| 297 | } | 271 | } |
| 298 | 272 | ||
| 299 | #define VFP_NUMBER (1<<0) | 273 | enum : u32 { |
| 300 | #define VFP_ZERO (1<<1) | 274 | VFP_NUMBER = (1 << 0), |
| 301 | #define VFP_DENORMAL (1<<2) | 275 | VFP_ZERO = (1 << 1), |
| 302 | #define VFP_INFINITY (1<<3) | 276 | VFP_DENORMAL = (1 << 2), |
| 303 | #define VFP_NAN (1<<4) | 277 | VFP_INFINITY = (1 << 3), |
| 304 | #define VFP_NAN_SIGNAL (1<<5) | 278 | VFP_NAN = (1 << 4), |
| 279 | VFP_NAN_SIGNAL = (1 << 5), | ||
| 305 | 280 | ||
| 306 | #define VFP_QNAN (VFP_NAN) | 281 | VFP_QNAN = (VFP_NAN), |
| 307 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) | 282 | VFP_SNAN = (VFP_NAN|VFP_NAN_SIGNAL) |
| 283 | }; | ||
| 308 | 284 | ||
| 309 | static inline int vfp_single_type(struct vfp_single *s) | 285 | static inline int vfp_single_type(vfp_single* s) |
| 310 | { | 286 | { |
| 311 | int type = VFP_NUMBER; | 287 | int type = VFP_NUMBER; |
| 312 | if (s->exponent == 255) { | 288 | if (s->exponent == 255) { |
| 313 | if (s->significand == 0) | 289 | if (s->significand == 0) |
| 314 | type = VFP_INFINITY; | 290 | type = VFP_INFINITY; |
| 315 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) | 291 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) |
| 316 | type = VFP_QNAN; | 292 | type = VFP_QNAN; |
| 317 | else | 293 | else |
| 318 | type = VFP_SNAN; | 294 | type = VFP_SNAN; |
| 319 | } else if (s->exponent == 0) { | 295 | } else if (s->exponent == 0) { |
| 320 | if (s->significand == 0) | 296 | if (s->significand == 0) |
| 321 | type |= VFP_ZERO; | 297 | type |= VFP_ZERO; |
| 322 | else | 298 | else |
| 323 | type |= VFP_DENORMAL; | 299 | type |= VFP_DENORMAL; |
| 324 | } | 300 | } |
| 325 | return type; | 301 | return type; |
| 326 | } | 302 | } |
| 327 | 303 | ||
| 328 | 304 | ||
| 329 | u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); | 305 | u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); |
| 330 | 306 | ||
| 331 | /* | 307 | // Double-precision |
| 332 | * Double-precision | ||
| 333 | */ | ||
| 334 | struct vfp_double { | 308 | struct vfp_double { |
| 335 | s16 exponent; | 309 | s16 exponent; |
| 336 | u16 sign; | 310 | u16 sign; |
| 337 | u64 significand; | 311 | u64 significand; |
| 338 | }; | 312 | }; |
| 339 | 313 | ||
| 340 | /* | 314 | // VFP_REG_ZERO is a special register number for vfp_get_double |
| 341 | * VFP_REG_ZERO is a special register number for vfp_get_double | 315 | // which returns (double)0.0. This is useful for the compare with |
| 342 | * which returns (double)0.0. This is useful for the compare with | 316 | // zero instructions. |
| 343 | * zero instructions. | ||
| 344 | */ | ||
| 345 | #ifdef CONFIG_VFPv3 | 317 | #ifdef CONFIG_VFPv3 |
| 346 | #define VFP_REG_ZERO 32 | 318 | #define VFP_REG_ZERO 32 |
| 347 | #else | 319 | #else |
| 348 | #define VFP_REG_ZERO 16 | 320 | #define VFP_REG_ZERO 16 |
| 349 | #endif | 321 | #endif |
| 350 | 322 | ||
| 351 | #define VFP_DOUBLE_MANTISSA_BITS (52) | 323 | #define VFP_DOUBLE_MANTISSA_BITS (52) |
| 352 | #define VFP_DOUBLE_EXPONENT_BITS (11) | 324 | #define VFP_DOUBLE_EXPONENT_BITS (11) |
| 353 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) | 325 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) |
| 354 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) | 326 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) |
| 355 | 327 | ||
| 356 | /* | 328 | // The bit in an unpacked double which indicates that it is a quiet NaN |
| 357 | * The bit in an unpacked double which indicates that it is a quiet NaN | 329 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) |
| 358 | */ | 330 | |
| 359 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) | 331 | // Operations on packed single-precision numbers |
| 360 | 332 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | |
| 361 | /* | 333 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) |
| 362 | * Operations on packed single-precision numbers | 334 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) |
| 363 | */ | 335 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) |
| 364 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | 336 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) |
| 365 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) | 337 | |
| 366 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) | 338 | // Unpack a double-precision float. Note that this returns the magnitude |
| 367 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) | 339 | // of the double-precision float mantissa with the 1. if necessary, |
| 368 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) | 340 | // aligned to bit 62. |
| 369 | 341 | static inline void vfp_double_unpack(vfp_double* s, s64 val) | |
| 370 | /* | ||
| 371 | * Unpack a double-precision float. Note that this returns the magnitude | ||
| 372 | * of the double-precision float mantissa with the 1. if necessary, | ||
| 373 | * aligned to bit 62. | ||
| 374 | */ | ||
| 375 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) | ||
| 376 | { | 342 | { |
| 377 | u64 significand; | 343 | u64 significand; |
| 378 | 344 | ||
| 379 | s->sign = vfp_double_packed_sign(val) >> 48; | 345 | s->sign = vfp_double_packed_sign(val) >> 48; |
| 380 | s->exponent = vfp_double_packed_exponent(val); | 346 | s->exponent = vfp_double_packed_exponent(val); |
| 381 | 347 | ||
| 382 | significand = (u64) val; | 348 | significand = (u64) val; |
| 383 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; | 349 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; |
| 384 | if (s->exponent && s->exponent != 2047) | 350 | if (s->exponent && s->exponent != 2047) |
| 385 | significand |= (1ULL << 62); | 351 | significand |= (1ULL << 62); |
| 386 | s->significand = significand; | 352 | s->significand = significand; |
| 387 | } | 353 | } |
| 388 | 354 | ||
| 389 | /* | 355 | // Re-pack a double-precision float. This assumes that the float is |
| 390 | * Re-pack a double-precision float. This assumes that the float is | 356 | // already normalised such that the MSB is bit 30, _not_ bit 31. |
| 391 | * already normalised such that the MSB is bit 30, _not_ bit 31. | 357 | static inline s64 vfp_double_pack(vfp_double* s) |
| 392 | */ | ||
| 393 | static inline s64 vfp_double_pack(struct vfp_double *s) | ||
| 394 | { | 358 | { |
| 395 | u64 val; | 359 | u64 val = ((u64)s->sign << 48) + |
| 396 | val = ((u64)s->sign << 48) + | 360 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + |
| 397 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + | 361 | (s->significand >> VFP_DOUBLE_LOW_BITS); |
| 398 | (s->significand >> VFP_DOUBLE_LOW_BITS); | 362 | return (s64)val; |
| 399 | return (s64)val; | ||
| 400 | } | 363 | } |
| 401 | 364 | ||
| 402 | static inline int vfp_double_type(struct vfp_double *s) | 365 | static inline int vfp_double_type(vfp_double* s) |
| 403 | { | 366 | { |
| 404 | int type = VFP_NUMBER; | 367 | int type = VFP_NUMBER; |
| 405 | if (s->exponent == 2047) { | 368 | if (s->exponent == 2047) { |
| 406 | if (s->significand == 0) | 369 | if (s->significand == 0) |
| 407 | type = VFP_INFINITY; | 370 | type = VFP_INFINITY; |
| 408 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) | 371 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) |
| 409 | type = VFP_QNAN; | 372 | type = VFP_QNAN; |
| 410 | else | 373 | else |
| 411 | type = VFP_SNAN; | 374 | type = VFP_SNAN; |
| 412 | } else if (s->exponent == 0) { | 375 | } else if (s->exponent == 0) { |
| 413 | if (s->significand == 0) | 376 | if (s->significand == 0) |
| 414 | type |= VFP_ZERO; | 377 | type |= VFP_ZERO; |
| 415 | else | 378 | else |
| 416 | type |= VFP_DENORMAL; | 379 | type |= VFP_DENORMAL; |
| 417 | } | 380 | } |
| 418 | return type; | 381 | return type; |
| 419 | } | 382 | } |
| 420 | 383 | ||
| 421 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | ||
| 422 | |||
| 423 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); | 384 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); |
| 424 | 385 | ||
| 425 | /* | 386 | // A special flag to tell the normalisation code not to normalise. |
| 426 | * A special flag to tell the normalisation code not to normalise. | 387 | #define VFP_NAN_FLAG 0x100 |
| 427 | */ | 388 | |
| 428 | #define VFP_NAN_FLAG 0x100 | 389 | // A bit pattern used to indicate the initial (unset) value of the |
| 429 | 390 | // exception mask, in case nothing handles an instruction. This | |
| 430 | /* | 391 | // doesn't include the NAN flag, which get masked out before |
| 431 | * A bit pattern used to indicate the initial (unset) value of the | 392 | // we check for an error. |
| 432 | * exception mask, in case nothing handles an instruction. This | 393 | #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) |
| 433 | * doesn't include the NAN flag, which get masked out before | 394 | |
| 434 | * we check for an error. | 395 | // A flag to tell vfp instruction type. |
| 435 | */ | 396 | // OP_SCALAR - This operation always operates in scalar mode |
| 436 | #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) | 397 | // OP_SD - The instruction exceptionally writes to a single precision result. |
| 437 | 398 | // OP_DD - The instruction exceptionally writes to a double precision result. | |
| 438 | /* | 399 | // OP_SM - The instruction exceptionally reads from a single precision operand. |
| 439 | * A flag to tell vfp instruction type. | 400 | enum : u32 { |
| 440 | * OP_SCALAR - this operation always operates in scalar mode | 401 | OP_SCALAR = (1 << 0), |
| 441 | * OP_SD - the instruction exceptionally writes to a single precision result. | 402 | OP_SD = (1 << 1), |
| 442 | * OP_DD - the instruction exceptionally writes to a double precision result. | 403 | OP_DD = (1 << 1), |
| 443 | * OP_SM - the instruction exceptionally reads from a single precision operand. | 404 | OP_SM = (1 << 2) |
| 444 | */ | 405 | }; |
| 445 | #define OP_SCALAR (1 << 0) | ||
| 446 | #define OP_SD (1 << 1) | ||
| 447 | #define OP_DD (1 << 1) | ||
| 448 | #define OP_SM (1 << 2) | ||
| 449 | 406 | ||
| 450 | struct op { | 407 | struct op { |
| 451 | u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); | 408 | u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); |
| 452 | u32 flags; | 409 | u32 flags; |
| 453 | }; | 410 | }; |
| 454 | 411 | ||
| 455 | static inline u32 fls(ARMword x) | 412 | static inline u32 fls(ARMword x) |
| 456 | { | 413 | { |
| 457 | int r = 32; | 414 | int r = 32; |
| 458 | 415 | ||
| 459 | if (!x) | 416 | if (!x) |
| 460 | return 0; | 417 | return 0; |
| 461 | if (!(x & 0xffff0000u)) { | 418 | if (!(x & 0xffff0000u)) { |
| 462 | x <<= 16; | 419 | x <<= 16; |
| 463 | r -= 16; | 420 | r -= 16; |
| 464 | } | 421 | } |
| 465 | if (!(x & 0xff000000u)) { | 422 | if (!(x & 0xff000000u)) { |
| 466 | x <<= 8; | 423 | x <<= 8; |
| 467 | r -= 8; | 424 | r -= 8; |
| 468 | } | 425 | } |
| 469 | if (!(x & 0xf0000000u)) { | 426 | if (!(x & 0xf0000000u)) { |
| 470 | x <<= 4; | 427 | x <<= 4; |
| 471 | r -= 4; | 428 | r -= 4; |
| 472 | } | 429 | } |
| 473 | if (!(x & 0xc0000000u)) { | 430 | if (!(x & 0xc0000000u)) { |
| 474 | x <<= 2; | 431 | x <<= 2; |
| 475 | r -= 2; | 432 | r -= 2; |
| 476 | } | 433 | } |
| 477 | if (!(x & 0x80000000u)) { | 434 | if (!(x & 0x80000000u)) { |
| 478 | x <<= 1; | 435 | x <<= 1; |
| 479 | r -= 1; | 436 | r -= 1; |
| 480 | } | 437 | } |
| 481 | return r; | 438 | return r; |
| 482 | 439 | ||
| 483 | } | 440 | } |
| 484 | 441 | ||
| 485 | u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | 442 | u32 vfp_double_multiply(vfp_double* vdd, vfp_double* vdn, vfp_double* vdm, u32 fpscr); |
| 486 | u32 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); | 443 | u32 vfp_double_add(vfp_double* vdd, vfp_double* vdn, vfp_double *vdm, u32 fpscr); |
| 487 | u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); | 444 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, vfp_double* vd, u32 fpscr, u32 exceptions, const char* func); |
| 488 | u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr); | ||
diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index d35ca510a..2c15db12b 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp | |||
| @@ -83,134 +83,6 @@ static void vfp_double_normalise_denormal(struct vfp_double *vd) | |||
| 83 | vfp_double_dump("normalise_denormal: out", vd); | 83 | vfp_double_dump("normalise_denormal: out", vd); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | ||
| 87 | { | ||
| 88 | u64 significand, incr; | ||
| 89 | int exponent, shift, underflow; | ||
| 90 | u32 rmode; | ||
| 91 | |||
| 92 | vfp_double_dump("pack: in", vd); | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Infinities and NaNs are a special case. | ||
| 96 | */ | ||
| 97 | if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) | ||
| 98 | goto pack; | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Special-case zero. | ||
| 102 | */ | ||
| 103 | if (vd->significand == 0) { | ||
| 104 | vd->exponent = 0; | ||
| 105 | goto pack; | ||
| 106 | } | ||
| 107 | |||
| 108 | exponent = vd->exponent; | ||
| 109 | significand = vd->significand; | ||
| 110 | |||
| 111 | shift = 32 - fls((ARMword)(significand >> 32)); | ||
| 112 | if (shift == 32) | ||
| 113 | shift = 64 - fls((ARMword)significand); | ||
| 114 | if (shift) { | ||
| 115 | exponent -= shift; | ||
| 116 | significand <<= shift; | ||
| 117 | } | ||
| 118 | |||
| 119 | #if 1 | ||
| 120 | vd->exponent = exponent; | ||
| 121 | vd->significand = significand; | ||
| 122 | vfp_double_dump("pack: normalised", vd); | ||
| 123 | #endif | ||
| 124 | |||
| 125 | /* | ||
| 126 | * Tiny number? | ||
| 127 | */ | ||
| 128 | underflow = exponent < 0; | ||
| 129 | if (underflow) { | ||
| 130 | significand = vfp_shiftright64jamming(significand, -exponent); | ||
| 131 | exponent = 0; | ||
| 132 | #if 1 | ||
| 133 | vd->exponent = exponent; | ||
| 134 | vd->significand = significand; | ||
| 135 | vfp_double_dump("pack: tiny number", vd); | ||
| 136 | #endif | ||
| 137 | if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) | ||
| 138 | underflow = 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Select rounding increment. | ||
| 143 | */ | ||
| 144 | incr = 0; | ||
| 145 | rmode = fpscr & FPSCR_RMODE_MASK; | ||
| 146 | |||
| 147 | if (rmode == FPSCR_ROUND_NEAREST) { | ||
| 148 | incr = 1ULL << VFP_DOUBLE_LOW_BITS; | ||
| 149 | if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) | ||
| 150 | incr -= 1; | ||
| 151 | } | ||
| 152 | else if (rmode == FPSCR_ROUND_TOZERO) { | ||
| 153 | incr = 0; | ||
| 154 | } | ||
| 155 | else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) | ||
| 156 | incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; | ||
| 157 | |||
| 158 | LOG_TRACE(Core_ARM11, "VFP: rounding increment = 0x%08llx\n", incr); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Is our rounding going to overflow? | ||
| 162 | */ | ||
| 163 | if ((significand + incr) < significand) { | ||
| 164 | exponent += 1; | ||
| 165 | significand = (significand >> 1) | (significand & 1); | ||
| 166 | incr >>= 1; | ||
| 167 | #if 1 | ||
| 168 | vd->exponent = exponent; | ||
| 169 | vd->significand = significand; | ||
| 170 | vfp_double_dump("pack: overflow", vd); | ||
| 171 | #endif | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * If any of the low bits (which will be shifted out of the | ||
| 176 | * number) are non-zero, the result is inexact. | ||
| 177 | */ | ||
| 178 | if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) | ||
| 179 | exceptions |= FPSCR_IXC; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Do our rounding. | ||
| 183 | */ | ||
| 184 | significand += incr; | ||
| 185 | |||
| 186 | /* | ||
| 187 | * Infinity? | ||
| 188 | */ | ||
| 189 | if (exponent >= 2046) { | ||
| 190 | exceptions |= FPSCR_OFC | FPSCR_IXC; | ||
| 191 | if (incr == 0) { | ||
| 192 | vd->exponent = 2045; | ||
| 193 | vd->significand = 0x7fffffffffffffffULL; | ||
| 194 | } | ||
| 195 | else { | ||
| 196 | vd->exponent = 2047; /* infinity */ | ||
| 197 | vd->significand = 0; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | else { | ||
| 201 | if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) | ||
| 202 | exponent = 0; | ||
| 203 | if (exponent || significand > 0x8000000000000000ULL) | ||
| 204 | underflow = 0; | ||
| 205 | if (underflow) | ||
| 206 | exceptions |= FPSCR_UFC; | ||
| 207 | vd->exponent = exponent; | ||
| 208 | vd->significand = significand >> 1; | ||
| 209 | } | ||
| 210 | pack: | ||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | |||
| 214 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) | 86 | u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) |
| 215 | { | 87 | { |
| 216 | u64 significand, incr; | 88 | u64 significand, incr; |
| @@ -511,7 +383,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 511 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr); | 383 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr); |
| 512 | m = vfp_get_double(state, dm); | 384 | m = vfp_get_double(state, dm); |
| 513 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { | 385 | if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { |
| 514 | ret |= FPSCR_C | FPSCR_V; | 386 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 515 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 387 | if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
| 516 | /* | 388 | /* |
| 517 | * Signalling NaN, or signalling on quiet NaN | 389 | * Signalling NaN, or signalling on quiet NaN |
| @@ -521,7 +393,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 521 | 393 | ||
| 522 | d = vfp_get_double(state, dd); | 394 | d = vfp_get_double(state, dd); |
| 523 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { | 395 | if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { |
| 524 | ret |= FPSCR_C | FPSCR_V; | 396 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 525 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) | 397 | if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) |
| 526 | /* | 398 | /* |
| 527 | * Signalling NaN, or signalling on quiet NaN | 399 | * Signalling NaN, or signalling on quiet NaN |
| @@ -535,7 +407,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 535 | /* | 407 | /* |
| 536 | * equal | 408 | * equal |
| 537 | */ | 409 | */ |
| 538 | ret |= FPSCR_Z | FPSCR_C; | 410 | ret |= FPSCR_ZFLAG | FPSCR_CFLAG; |
| 539 | //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret); | 411 | //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret); |
| 540 | } else if (vfp_double_packed_sign(d ^ m)) { | 412 | } else if (vfp_double_packed_sign(d ^ m)) { |
| 541 | /* | 413 | /* |
| @@ -545,22 +417,22 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u | |||
| 545 | /* | 417 | /* |
| 546 | * d is negative, so d < m | 418 | * d is negative, so d < m |
| 547 | */ | 419 | */ |
| 548 | ret |= FPSCR_N; | 420 | ret |= FPSCR_NFLAG; |
| 549 | else | 421 | else |
| 550 | /* | 422 | /* |
| 551 | * d is positive, so d > m | 423 | * d is positive, so d > m |
| 552 | */ | 424 | */ |
| 553 | ret |= FPSCR_C; | 425 | ret |= FPSCR_CFLAG; |
| 554 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { | 426 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { |
| 555 | /* | 427 | /* |
| 556 | * d < m | 428 | * d < m |
| 557 | */ | 429 | */ |
| 558 | ret |= FPSCR_N; | 430 | ret |= FPSCR_NFLAG; |
| 559 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { | 431 | } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { |
| 560 | /* | 432 | /* |
| 561 | * d > m | 433 | * d > m |
| 562 | */ | 434 | */ |
| 563 | ret |= FPSCR_C; | 435 | ret |= FPSCR_CFLAG; |
| 564 | } | 436 | } |
| 565 | } | 437 | } |
| 566 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret); | 438 | LOG_TRACE(Core_ARM11, "In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret); |
| @@ -592,49 +464,6 @@ static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 | |||
| 592 | return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr); | 464 | return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr); |
| 593 | } | 465 | } |
| 594 | 466 | ||
| 595 | u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr) //ichfly for internal use only | ||
| 596 | { | ||
| 597 | struct vfp_single vsd; | ||
| 598 | int tm; | ||
| 599 | u32 exceptions = 0; | ||
| 600 | |||
| 601 | LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); | ||
| 602 | |||
| 603 | tm = vfp_double_type(dm); | ||
| 604 | |||
| 605 | /* | ||
| 606 | * If we have a signalling NaN, signal invalid operation. | ||
| 607 | */ | ||
| 608 | if (tm == VFP_SNAN) | ||
| 609 | exceptions = FPSCR_IOC; | ||
| 610 | |||
| 611 | if (tm & VFP_DENORMAL) | ||
| 612 | vfp_double_normalise_denormal(dm); | ||
| 613 | |||
| 614 | vsd.sign = dm->sign; | ||
| 615 | vsd.significand = vfp_hi64to32jamming(dm->significand); | ||
| 616 | |||
| 617 | /* | ||
| 618 | * If we have an infinity or a NaN, the exponent must be 255 | ||
| 619 | */ | ||
| 620 | if (tm & (VFP_INFINITY | VFP_NAN)) { | ||
| 621 | vsd.exponent = 255; | ||
| 622 | if (tm == VFP_QNAN) | ||
| 623 | vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; | ||
| 624 | goto pack_nan; | ||
| 625 | } | ||
| 626 | else if (tm & VFP_ZERO) | ||
| 627 | vsd.exponent = 0; | ||
| 628 | else | ||
| 629 | vsd.exponent = dm->exponent - (1023 - 127); | ||
| 630 | |||
| 631 | return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts"); | ||
| 632 | |||
| 633 | pack_nan: | ||
| 634 | vfp_put_float(state, vfp_single_pack(&vsd), sd); | ||
| 635 | return exceptions; | ||
| 636 | } | ||
| 637 | |||
| 638 | static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr) | 467 | static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr) |
| 639 | { | 468 | { |
| 640 | struct vfp_double vdm; | 469 | struct vfp_double vdm; |
| @@ -723,7 +552,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 | |||
| 723 | exceptions |= FPSCR_IDC; | 552 | exceptions |= FPSCR_IDC; |
| 724 | 553 | ||
| 725 | if (tm & VFP_NAN) | 554 | if (tm & VFP_NAN) |
| 726 | vdm.sign = 0; | 555 | vdm.sign = 1; |
| 727 | 556 | ||
| 728 | if (vdm.exponent >= 1023 + 32) { | 557 | if (vdm.exponent >= 1023 + 32) { |
| 729 | d = vdm.sign ? 0 : 0xffffffff; | 558 | d = vdm.sign ? 0 : 0xffffffff; |
diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index b7872bdc4..678b63f51 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp | |||
| @@ -419,7 +419,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 419 | 419 | ||
| 420 | d = vfp_get_float(state, sd); | 420 | d = vfp_get_float(state, sd); |
| 421 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { | 421 | if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { |
| 422 | ret |= FPSCR_C | FPSCR_V; | 422 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 423 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 423 | if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
| 424 | /* | 424 | /* |
| 425 | * Signalling NaN, or signalling on quiet NaN | 425 | * Signalling NaN, or signalling on quiet NaN |
| @@ -428,7 +428,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 428 | } | 428 | } |
| 429 | 429 | ||
| 430 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { | 430 | if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { |
| 431 | ret |= FPSCR_C | FPSCR_V; | 431 | ret |= FPSCR_CFLAG | FPSCR_VFLAG; |
| 432 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) | 432 | if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) |
| 433 | /* | 433 | /* |
| 434 | * Signalling NaN, or signalling on quiet NaN | 434 | * Signalling NaN, or signalling on quiet NaN |
| @@ -441,7 +441,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 441 | /* | 441 | /* |
| 442 | * equal | 442 | * equal |
| 443 | */ | 443 | */ |
| 444 | ret |= FPSCR_Z | FPSCR_C; | 444 | ret |= FPSCR_ZFLAG | FPSCR_CFLAG; |
| 445 | } else if (vfp_single_packed_sign(d ^ m)) { | 445 | } else if (vfp_single_packed_sign(d ^ m)) { |
| 446 | /* | 446 | /* |
| 447 | * different signs | 447 | * different signs |
| @@ -450,22 +450,22 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 | |||
| 450 | /* | 450 | /* |
| 451 | * d is negative, so d < m | 451 | * d is negative, so d < m |
| 452 | */ | 452 | */ |
| 453 | ret |= FPSCR_N; | 453 | ret |= FPSCR_NFLAG; |
| 454 | else | 454 | else |
| 455 | /* | 455 | /* |
| 456 | * d is positive, so d > m | 456 | * d is positive, so d > m |
| 457 | */ | 457 | */ |
| 458 | ret |= FPSCR_C; | 458 | ret |= FPSCR_CFLAG; |
| 459 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { | 459 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { |
| 460 | /* | 460 | /* |
| 461 | * d < m | 461 | * d < m |
| 462 | */ | 462 | */ |
| 463 | ret |= FPSCR_N; | 463 | ret |= FPSCR_NFLAG; |
| 464 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { | 464 | } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { |
| 465 | /* | 465 | /* |
| 466 | * d > m | 466 | * d > m |
| 467 | */ | 467 | */ |
| 468 | ret |= FPSCR_C; | 468 | ret |= FPSCR_CFLAG; |
| 469 | } | 469 | } |
| 470 | } | 470 | } |
| 471 | return ret; | 471 | return ret; |
| @@ -491,46 +491,6 @@ static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 | |||
| 491 | return vfp_compare(state, sd, 1, 0, fpscr); | 491 | return vfp_compare(state, sd, 1, 0, fpscr); |
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | static s64 vfp_single_to_doubleintern(ARMul_State* state, s32 m, u32 fpscr) //ichfly for internal use only | ||
| 495 | { | ||
| 496 | struct vfp_single vsm; | ||
| 497 | struct vfp_double vdd; | ||
| 498 | int tm; | ||
| 499 | u32 exceptions = 0; | ||
| 500 | |||
| 501 | vfp_single_unpack(&vsm, m); | ||
| 502 | |||
| 503 | tm = vfp_single_type(&vsm); | ||
| 504 | |||
| 505 | /* | ||
| 506 | * If we have a signalling NaN, signal invalid operation. | ||
| 507 | */ | ||
| 508 | if (tm == VFP_SNAN) | ||
| 509 | exceptions = FPSCR_IOC; | ||
| 510 | |||
| 511 | if (tm & VFP_DENORMAL) | ||
| 512 | vfp_single_normalise_denormal(&vsm); | ||
| 513 | |||
| 514 | vdd.sign = vsm.sign; | ||
| 515 | vdd.significand = (u64)vsm.significand << 32; | ||
| 516 | |||
| 517 | /* | ||
| 518 | * If we have an infinity or NaN, the exponent must be 2047. | ||
| 519 | */ | ||
| 520 | if (tm & (VFP_INFINITY | VFP_NAN)) { | ||
| 521 | vdd.exponent = 2047; | ||
| 522 | if (tm == VFP_QNAN) | ||
| 523 | vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; | ||
| 524 | goto pack_nan; | ||
| 525 | } else if (tm & VFP_ZERO) | ||
| 526 | vdd.exponent = 0; | ||
| 527 | else | ||
| 528 | vdd.exponent = vsm.exponent + (1023 - 127); | ||
| 529 | pack_nan: | ||
| 530 | vfp_double_normaliseroundintern(state, &vdd, fpscr, exceptions, "fcvtd"); | ||
| 531 | return vfp_double_pack(&vdd); | ||
| 532 | } | ||
| 533 | |||
| 534 | static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr) | 494 | static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr) |
| 535 | { | 495 | { |
| 536 | struct vfp_single vsm; | 496 | struct vfp_single vsm; |