- moved mmu to arm/interpreter folder

- added initial VFP code from skyeye
author: bunnei 2014-05-15 22:54:17 -0400
committer: bunnei 2014-05-15 22:54:17 -0400
commit: 3e1eafa244dc2ea6a1d8de6e841370c83c362dda (patch)
tree: f4fb8778ccb4d71cc73c10a7ad4d0be62acc4533 /src
parent: Merge pull request #15 from bunnei/hle-services (diff)
download: yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.tar.gz
yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.tar.xz
yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.zip
16 files changed, 8791 insertions, 1 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index eb4fef381..0d4a0ca7a 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -14,7 +14,11 @@ set(SRCS    core.cpp
            arm/interpreter/armsupp.cpp
            arm/interpreter/armvirt.cpp
            arm/interpreter/thumbemu.cpp
-            arm/mmu/arm1176jzf_s_mmu.cpp
+            arm/interpreter/vfp/vfp.cpp
+            arm/interpreter/vfp/vfpdouble.cpp
+            arm/interpreter/vfp/vfpinsr.cpp
+            arm/interpreter/vfp/vfpsingle.cpp
+            arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
            elf/elf_reader.cpp
            file_sys/directory_file_system.cpp
            file_sys/meta_file_system.cpp
diff --git a/src/core/arm/mmu/arm1176jzf_s_mmu.cpp b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
index a32f076b9..a32f076b9 100644
--- a/src/core/arm/mmu/arm1176jzf_s_mmu.cpp
+++ b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.cpp
diff --git a/src/core/arm/mmu/arm1176jzf_s_mmu.h b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.h
index 299c6b46b..299c6b46b 100644
--- a/src/core/arm/mmu/arm1176jzf_s_mmu.h
+++ b/src/core/arm/interpreter/mmu/arm1176jzf_s_mmu.h
diff --git a/src/core/arm/mmu/cache.h b/src/core/arm/interpreter/mmu/cache.h
index d308d9b87..d308d9b87 100644
--- a/src/core/arm/mmu/cache.h
+++ b/src/core/arm/interpreter/mmu/cache.h
diff --git a/src/core/arm/mmu/rb.h b/src/core/arm/interpreter/mmu/rb.h
index 7bf0ebb26..7bf0ebb26 100644
--- a/src/core/arm/mmu/rb.h
+++ b/src/core/arm/interpreter/mmu/rb.h
diff --git a/src/core/arm/mmu/tlb.h b/src/core/arm/interpreter/mmu/tlb.h
index 938c01786..938c01786 100644
--- a/src/core/arm/mmu/tlb.h
+++ b/src/core/arm/interpreter/mmu/tlb.h
diff --git a/src/core/arm/mmu/wb.h b/src/core/arm/interpreter/mmu/wb.h
index 8fb7de946..8fb7de946 100644
--- a/src/core/arm/mmu/wb.h
+++ b/src/core/arm/interpreter/mmu/wb.h
diff --git a/src/core/arm/interpreter/vfp/asm_vfp.h b/src/core/arm/interpreter/vfp/asm_vfp.h
new file mode 100644
index 000000000..f4ab34fd4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/asm_vfp.h
@@ -0,0 +1,84 @@
+/*
+ * arch/arm/include/asm/vfp.h
+ *
+ * VFP register definitions.
+ * First, the standard VFP set.
+ */
+#define FPSID                   cr0
+#define FPSCR                   cr1
+#define MVFR1                   cr6
+#define MVFR0                   cr7
+#define FPEXC                   cr8
+#define FPINST                  cr9
+#define FPINST2                 cr10
+/* FPSID bits */
+#define FPSID_IMPLEMENTER_BIT   (24)
+#define FPSID_IMPLEMENTER_MASK  (0xff << FPSID_IMPLEMENTER_BIT)
+#define FPSID_SOFTWARE          (1<<23)
+#define FPSID_FORMAT_BIT        (21)
+#define FPSID_FORMAT_MASK       (0x3  << FPSID_FORMAT_BIT)
+#define FPSID_NODOUBLE          (1<<20)
+#define FPSID_ARCH_BIT          (16)
+#define FPSID_ARCH_MASK         (0xF  << FPSID_ARCH_BIT)
+#define FPSID_PART_BIT          (8)
+#define FPSID_PART_MASK         (0xFF << FPSID_PART_BIT)
+#define FPSID_VARIANT_BIT       (4)
+#define FPSID_VARIANT_MASK      (0xF  << FPSID_VARIANT_BIT)
+#define FPSID_REV_BIT           (0)
+#define FPSID_REV_MASK          (0xF  << FPSID_REV_BIT)
+/* FPEXC bits */
+#define FPEXC_EX                (1 << 31)
+#define FPEXC_EN                (1 << 30)
+#define FPEXC_DEX               (1 << 29)
+#define FPEXC_FP2V              (1 << 28)
+#define FPEXC_VV                (1 << 27)
+#define FPEXC_TFV               (1 << 26)
+#define FPEXC_LENGTH_BIT        (8)
+#define FPEXC_LENGTH_MASK       (7 << FPEXC_LENGTH_BIT)
+#define FPEXC_IDF               (1 << 7)
+#define FPEXC_IXF               (1 << 4)
+#define FPEXC_UFF               (1 << 3)
+#define FPEXC_OFF               (1 << 2)
+#define FPEXC_DZF               (1 << 1)
+#define FPEXC_IOF               (1 << 0)
+#define FPEXC_TRAP_MASK         (FPEXC_IDF|FPEXC_IXF|FPEXC_UFF|FPEXC_OFF|FPEXC_DZF|FPEXC_IOF)
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN       (1<<25)
+#define FPSCR_FLUSHTOZERO       (1<<24)
+#define FPSCR_ROUND_NEAREST     (0<<22)
+#define FPSCR_ROUND_PLUSINF     (1<<22)
+#define FPSCR_ROUND_MINUSINF    (2<<22)
+#define FPSCR_ROUND_TOZERO      (3<<22)
+#define FPSCR_RMODE_BIT         (22)
+#define FPSCR_RMODE_MASK        (3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT        (20)
+#define FPSCR_STRIDE_MASK       (3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT        (16)
+#define FPSCR_LENGTH_MASK       (7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE               (1<<8)
+#define FPSCR_DZE               (1<<9)
+#define FPSCR_OFE               (1<<10)
+#define FPSCR_UFE               (1<<11)
+#define FPSCR_IXE               (1<<12)
+#define FPSCR_IDE               (1<<15)
+#define FPSCR_IOC               (1<<0)
+#define FPSCR_DZC               (1<<1)
+#define FPSCR_OFC               (1<<2)
+#define FPSCR_UFC               (1<<3)
+#define FPSCR_IXC               (1<<4)
+#define FPSCR_IDC               (1<<7)
+/* MVFR0 bits */
+#define MVFR0_A_SIMD_BIT        (0)
+#define MVFR0_A_SIMD_MASK       (0xf << MVFR0_A_SIMD_BIT)
+/* Bit patterns for decoding the packaged operation descriptors */
+#define VFPOPDESC_LENGTH_BIT    (9)
+#define VFPOPDESC_LENGTH_MASK   (0x07 << VFPOPDESC_LENGTH_BIT)
+#define VFPOPDESC_UNUSED_BIT    (24)
+#define VFPOPDESC_UNUSED_MASK   (0xFF << VFPOPDESC_UNUSED_BIT)
+#define VFPOPDESC_OPDESC_MASK   (~(VFPOPDESC_LENGTH_MASK | VFPOPDESC_UNUSED_MASK))
diff --git a/src/core/arm/interpreter/vfp/vfp.cpp b/src/core/arm/interpreter/vfp/vfp.cpp
new file mode 100644
index 000000000..eea5e24a9
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.cpp
@@ -0,0 +1,357 @@
+/*
+    armvfp.c - ARM VFPv3 emulation unit
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/* Note: this file handles interface with arm core and vfp registers */
+/* Opens debug for classic interpreter only */
+//#define DEBUG
+#include "common/common.h"
+#include "core/arm/interpreter/armdefs.h"
+#include "core/arm/interpreter/vfp/vfp.h"
+//ARMul_State* persistent_state; /* function calls from SoftFloat lib don't have an access to ARMul_state. */
+unsigned
+VFPInit (ARMul_State *state)
+{
+        state->VFP[VFP_OFFSET(VFP_FPSID)] = VFP_FPSID_IMPLMEN<<24 | VFP_FPSID_SW<<23 | VFP_FPSID_SUBARCH<<16 | 
+                VFP_FPSID_PARTNUM<<8 | VFP_FPSID_VARIANT<<4 | VFP_FPSID_REVISION;
+        state->VFP[VFP_OFFSET(VFP_FPEXC)] = 0;
+        state->VFP[VFP_OFFSET(VFP_FPSCR)] = 0;
+        
+        //persistent_state = state;
+        /* Reset only specify VFP_FPEXC_EN = '0' */
+        return No_exp;
+}
+unsigned
+VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+        /* MRC<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int OPC_1 = BITS (21, 23);
+        int Rt = BITS (12, 15);
+        int CRn = BITS (16, 19);
+        int CRm = BITS (0, 3);
+        int OPC_2 = BITS (5, 7);
+        
+        /* TODO check access permission */
+        
+        /* CRn/opc1 CRm/opc2 */
+        
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #define VFP_MRC_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_MRC_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+               instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+        return ARMul_CANT;
+}
+unsigned
+VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+        /* MCR<c> <coproc>,<opc1>,<Rt>,<CRn>,<CRm>{,<opc2>} */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int OPC_1 = BITS (21, 23);
+        int Rt = BITS (12, 15);
+        int CRn = BITS (16, 19);
+        int CRm = BITS (0, 3);
+        int OPC_2 = BITS (5, 7);
+        
+        /* TODO check access permission */
+        
+        /* CRn/opc1 CRm/opc2 */
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #define VFP_MCR_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_MCR_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, CRn %x, CRm %x, OPC_2 %x\n", 
+               instr, CoProc, OPC_1, Rt, CRn, CRm, OPC_2);
+        return ARMul_CANT;
+}
+unsigned
+VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2)
+{
+        /* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int OPC_1 = BITS (4, 7);
+        int Rt = BITS (12, 15);
+        int Rt2 = BITS (16, 19);
+        int CRm = BITS (0, 3);
+        
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #define VFP_MRRC_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_MRRC_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+               instr, CoProc, OPC_1, Rt, Rt2, CRm);
+        return ARMul_CANT;
+}
+unsigned
+VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2)
+{
+        /* MCRR<c> <coproc>,<opc1>,<Rt>,<Rt2>,<CRm> */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int OPC_1 = BITS (4, 7);
+        int Rt = BITS (12, 15);
+        int Rt2 = BITS (16, 19);
+        int CRm = BITS (0, 3);
+        
+        /* TODO check access permission */
+        
+        /* CRn/opc1 CRm/opc2 */
+        
+        if (CoProc == 11 || CoProc == 10)
+        {
+                #define VFP_MCRR_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_MCRR_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, OPC_1 %x, Rt %x, Rt2 %x, CRm %x\n", 
+               instr, CoProc, OPC_1, Rt, Rt2, CRm);
+        return ARMul_CANT;
+}
+unsigned
+VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value)
+{
+        /* STC{L}<c> <coproc>,<CRd>,[<Rn>],<option> */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int CRd = BITS (12, 15);
+        int Rn = BITS (16, 19);
+        int imm8 = BITS (0, 7);
+        int P = BIT(24);
+        int U = BIT(23);
+        int D = BIT(22);
+        int W = BIT(21);
+        
+        /* TODO check access permission */
+        
+        /* VSTM */
+        if ( (P|U|D|W) == 0 )
+        {
+                DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+        }
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #if 1
+                if (P == 0 && U == 0 && W == 0)
+                {
+                        DEBUG_LOG(ARM11, "VSTM Related encodings\n"); exit(-1);
+                }
+                if (P == U && W == 1)
+                {
+                        DEBUG_LOG(ARM11, "UNDEFINED\n"); exit(-1);
+                }
+                #endif
+                #define VFP_STC_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_STC_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+               instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+        return ARMul_CANT;
+}
+unsigned
+VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value)
+{
+        /* LDC{L}<c> <coproc>,<CRd>,[<Rn>] */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int CRd = BITS (12, 15);
+        int Rn = BITS (16, 19);
+        int imm8 = BITS (0, 7);
+        int P = BIT(24);
+        int U = BIT(23);
+        int D = BIT(22);
+        int W = BIT(21);
+        
+        /* TODO check access permission */
+        
+        if ( (P|U|D|W) == 0 )
+        {
+                DEBUG_LOG(ARM11, "In %s, UNDEFINED\n", __FUNCTION__); exit(-1);
+        }
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #define VFP_LDC_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_LDC_TRANS
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x, CoProc %x, CRd %x, Rn %x, imm8 %x, P %x, U %x, D %x, W %x\n", 
+               instr, CoProc, CRd, Rn, imm8, P, U, D, W);
+        return ARMul_CANT;
+}
+unsigned
+VFPCDP (ARMul_State * state, unsigned type, ARMword instr)
+{
+        /* CDP<c> <coproc>,<opc1>,<CRd>,<CRn>,<CRm>,<opc2> */
+        int CoProc = BITS (8, 11); /* 10 or 11 */
+        int OPC_1 = BITS (20, 23);
+        int CRd = BITS (12, 15);
+        int CRn = BITS (16, 19);
+        int CRm = BITS (0, 3);
+        int OPC_2 = BITS (5, 7);
+        
+        /* TODO check access permission */
+        
+        /* CRn/opc1 CRm/opc2 */
+        if (CoProc == 10 || CoProc == 11)
+        {
+                #define VFP_CDP_TRANS
+                #include "core/arm/interpreter/vfp/vfpinstr.cpp"
+                #undef VFP_CDP_TRANS
+                
+                int exceptions = 0;
+                if (CoProc == 10)
+                        exceptions = vfp_single_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else 
+                        exceptions = vfp_double_cpdo(state, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                vfp_raise_exceptions(state, exceptions, instr, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                return ARMul_DONE;
+        }
+        DEBUG_LOG(ARM11, "Can't identify %x\n", instr);
+        return ARMul_CANT;
+}
+/* ----------- MRC ------------ */
+#define VFP_MRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRC_IMPL
+#define VFP_MRRC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MRRC_IMPL
+/* ----------- MCR ------------ */
+#define VFP_MCR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCR_IMPL
+#define VFP_MCRR_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_MCRR_IMPL
+/* Memory operation are not inlined, as old Interpreter and Fast interpreter
+   don't have the same memory operation interface.
+   Old interpreter framework does one access to coprocessor per data, and
+   handles already data write, as well as address computation,
+   which is not the case for Fast interpreter. Therefore, implementation
+   of vfp instructions in old interpreter and fast interpreter are separate. */
+/* ----------- STC ------------ */
+#define VFP_STC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_STC_IMPL
+/* ----------- LDC ------------ */
+#define VFP_LDC_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_LDC_IMPL
+/* ----------- CDP ------------ */
+#define VFP_CDP_IMPL
+#include "core/arm/interpreter/vfp/vfpinstr.cpp"
+#undef VFP_CDP_IMPL
+/* Miscellaneous functions */
+int32_t vfp_get_float(arm_core_t* state, unsigned int reg)
+{
+        DBG("VFP get float: s%d=[%08x]\n", reg, state->ExtReg[reg]);
+        return state->ExtReg[reg];
+}
+void vfp_put_float(arm_core_t* state, int32_t val, unsigned int reg)
+{
+        DBG("VFP put float: s%d <= [%08x]\n", reg, val);
+        state->ExtReg[reg] = val;
+}
+uint64_t vfp_get_double(arm_core_t* state, unsigned int reg)
+{
+        uint64_t result;
+        result = ((uint64_t) state->ExtReg[reg*2+1])<<32 | state->ExtReg[reg*2];
+        DBG("VFP get double: s[%d-%d]=[%016llx]\n", reg*2+1, reg*2, result);
+        return result;
+}
+void vfp_put_double(arm_core_t* state, uint64_t val, unsigned int reg)
+{
+        DBG("VFP put double: s[%d-%d] <= [%08x-%08x]\n", reg*2+1, reg*2, (uint32_t) (val>>32), (uint32_t) (val & 0xffffffff));
+        state->ExtReg[reg*2] = (uint32_t) (val & 0xffffffff);
+        state->ExtReg[reg*2+1] = (uint32_t) (val>>32);
+}
+/*
+ * Process bitmask of exception conditions. (from vfpmodule.c)
+ */
+void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpscr)
+{
+        int si_code = 0;
+        vfpdebug("VFP: raising exceptions %08x\n", exceptions);
+        if (exceptions == VFP_EXCEPTION_ERROR) {
+                DEBUG_LOG(ARM11, "unhandled bounce %x\n", inst);
+                exit(-1);
+                return;
+        }
+        /*
+         * If any of the status flags are set, update the FPSCR.
+         * Comparison instructions always return at least one of
+         * these flags set.
+         */
+        if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
+                fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
+        fpscr |= exceptions;
+        state->VFP[VFP_OFFSET(VFP_FPSCR)] = fpscr;
+}
diff --git a/src/core/arm/interpreter/vfp/vfp.h b/src/core/arm/interpreter/vfp/vfp.h
new file mode 100644
index 000000000..e16076310
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp.h
@@ -0,0 +1,111 @@
+/* 
+    vfp/vfp.h - ARM VFPv3 emulation unit - vfp interface
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#ifndef __VFP_H__
+#define __VFP_H__
+#define DBG(msg) DEBUG_LOG(ARM11, msg)
+#define vfpdebug //printf
+#include "core/arm/interpreter/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */
+unsigned VFPInit (ARMul_State *state);
+unsigned VFPMRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPMCR (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPMRRC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value1, ARMword * value2);
+unsigned VFPMCRR (ARMul_State * state, unsigned type, ARMword instr, ARMword value1, ARMword value2);
+unsigned VFPSTC (ARMul_State * state, unsigned type, ARMword instr, ARMword * value);
+unsigned VFPLDC (ARMul_State * state, unsigned type, ARMword instr, ARMword value);
+unsigned VFPCDP (ARMul_State * state, unsigned type, ARMword instr);
+/* FPSID Information */
+#define VFP_FPSID_IMPLMEN 0     /* should be the same as cp15 0 c0 0*/
+#define VFP_FPSID_SW 0
+#define VFP_FPSID_SUBARCH 0x2   /* VFP version. Current is v3 (not strict) */
+#define VFP_FPSID_PARTNUM 0x1
+#define VFP_FPSID_VARIANT 0x1
+#define VFP_FPSID_REVISION 0x1
+/* FPEXC Flags */
+#define VFP_FPEXC_EX 1<<31
+#define VFP_FPEXC_EN 1<<30
+/* FPSCR Flags */
+#define VFP_FPSCR_NFLAG 1<<31
+#define VFP_FPSCR_ZFLAG 1<<30
+#define VFP_FPSCR_CFLAG 1<<29
+#define VFP_FPSCR_VFLAG 1<<28
+#define VFP_FPSCR_AHP 1<<26     /* Alternative Half Precision */
+#define VFP_FPSCR_DN 1<<25      /* Default NaN */
+#define VFP_FPSCR_FZ 1<<24      /* Flush-to-zero */
+#define VFP_FPSCR_RMODE 3<<22   /* Rounding Mode */
+#define VFP_FPSCR_STRIDE 3<<20  /* Stride (vector) */
+#define VFP_FPSCR_LEN 7<<16     /* Stride (vector) */
+#define VFP_FPSCR_IDE 1<<15     /* Input Denormal exc */
+#define VFP_FPSCR_IXE 1<<12     /* Inexact exc */
+#define VFP_FPSCR_UFE 1<<11     /* Undeflow exc */
+#define VFP_FPSCR_OFE 1<<10     /* Overflow exc */
+#define VFP_FPSCR_DZE 1<<9      /* Division by Zero exc */
+#define VFP_FPSCR_IOE 1<<8      /* Invalid Operation exc */
+#define VFP_FPSCR_IDC 1<<7      /* Input Denormal cum exc */
+#define VFP_FPSCR_IXC 1<<4      /* Inexact cum exc */
+#define VFP_FPSCR_UFC 1<<3      /* Undeflow cum exc */
+#define VFP_FPSCR_OFC 1<<2      /* Overflow cum exc */
+#define VFP_FPSCR_DZC 1<<1      /* Division by Zero cum exc */
+#define VFP_FPSCR_IOC 1<<0      /* Invalid Operation cum exc */
+/* Inline instructions. Note: Used in a cpp file as well */
+#ifdef __cplusplus
+ extern "C" {
+#endif
+int32_t vfp_get_float(ARMul_State * state, unsigned int reg);
+void vfp_put_float(ARMul_State * state, int32_t val, unsigned int reg);
+uint64_t vfp_get_double(ARMul_State * state, unsigned int reg);
+void vfp_put_double(ARMul_State * state, uint64_t val, unsigned int reg);
+void vfp_raise_exceptions(ARMul_State * state, uint32_t exceptions, uint32_t inst, uint32_t fpscr);
+extern uint32_t vfp_single_cpdo(ARMul_State * state, uint32_t inst, uint32_t fpscr);
+extern uint32_t vfp_double_cpdo(ARMul_State * state, uint32_t inst, uint32_t fpscr);
+/* MRC */
+inline void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword *value);
+inline void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value);
+inline void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2);
+inline void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+inline void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword imm);
+/* MCR */
+inline void VMSR(ARMul_State * state, ARMword reg, ARMword Rt);
+/* STC */
+inline int VSTM(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword* value);
+inline int VSTR(ARMul_State * state, int type, ARMword instr, ARMword* value);
+/* LDC */
+inline int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value);
+inline int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value);
+#ifdef __cplusplus
+ }
+#endif
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfp_helper.h b/src/core/arm/interpreter/vfp/vfp_helper.h
new file mode 100644
index 000000000..80f9a93f4
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfp_helper.h
@@ -0,0 +1,541 @@
+/*
+    vfp/vfp.h - ARM VFPv3 emulation unit - SoftFloat lib helper
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/* 
+ *  The following code is derivative from Linux Android kernel vfp
+ *  floating point support.
+ * 
+ *  Copyright (C) 2004 ARM Limited.
+ *  Written by Deep Blue Solutions Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __VFP_HELPER_H__
+#define __VFP_HELPER_H__
+/* Custom edit */
+#include <stdint.h>
+#include <stdio.h>
+#include "core/arm/interpreter/armdefs.h"
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+#define s16 int16_t
+#define s32 int32_t
+#define s64 int64_t
+#define pr_info //printf
+#define pr_debug //printf
+static u32 fls(int x);
+#define do_div(n, base) {n/=base;}
+/* From vfpinstr.h */
+#define INST_CPRTDO(inst)       (((inst) & 0x0f000000) == 0x0e000000)
+#define INST_CPRT(inst)         ((inst) & (1 << 4))
+#define INST_CPRT_L(inst)       ((inst) & (1 << 20))
+#define INST_CPRT_Rd(inst)      (((inst) & (15 << 12)) >> 12)
+#define INST_CPRT_OP(inst)      (((inst) >> 21) & 7)
+#define INST_CPNUM(inst)        ((inst) & 0xf00)
+#define CPNUM(cp)               ((cp) << 8)
+#define FOP_MASK        (0x00b00040)
+#define FOP_FMAC        (0x00000000)
+#define FOP_FNMAC       (0x00000040)
+#define FOP_FMSC        (0x00100000)
+#define FOP_FNMSC       (0x00100040)
+#define FOP_FMUL        (0x00200000)
+#define FOP_FNMUL       (0x00200040)
+#define FOP_FADD        (0x00300000)
+#define FOP_FSUB        (0x00300040)
+#define FOP_FDIV        (0x00800000)
+#define FOP_EXT         (0x00b00040)
+#define FOP_TO_IDX(inst)        ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4)
+#define FEXT_MASK       (0x000f0080)
+#define FEXT_FCPY       (0x00000000)
+#define FEXT_FABS       (0x00000080)
+#define FEXT_FNEG       (0x00010000)
+#define FEXT_FSQRT      (0x00010080)
+#define FEXT_FCMP       (0x00040000)
+#define FEXT_FCMPE      (0x00040080)
+#define FEXT_FCMPZ      (0x00050000)
+#define FEXT_FCMPEZ     (0x00050080)
+#define FEXT_FCVT       (0x00070080)
+#define FEXT_FUITO      (0x00080000)
+#define FEXT_FSITO      (0x00080080)
+#define FEXT_FTOUI      (0x000c0000)
+#define FEXT_FTOUIZ     (0x000c0080)
+#define FEXT_FTOSI      (0x000d0000)
+#define FEXT_FTOSIZ     (0x000d0080)
+#define FEXT_TO_IDX(inst)       ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_sd(inst)        ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
+#define vfp_get_dd(inst)        ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
+#define vfp_get_sm(inst)        ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
+#define vfp_get_dm(inst)        ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
+#define vfp_get_sn(inst)        ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
+#define vfp_get_dn(inst)        ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
+#define vfp_single(inst)        (((inst) & 0x0000f00) == 0xa00)
+#define FPSCR_N (1 << 31)
+#define FPSCR_Z (1 << 30)
+#define FPSCR_C (1 << 29)
+#define FPSCR_V (1 << 28)
+/* -------------- */
+/* From asm/include/vfp.h */
+/* FPSCR bits */
+#define FPSCR_DEFAULT_NAN       (1<<25)
+#define FPSCR_FLUSHTOZERO       (1<<24)
+#define FPSCR_ROUND_NEAREST     (0<<22)
+#define FPSCR_ROUND_PLUSINF     (1<<22)
+#define FPSCR_ROUND_MINUSINF    (2<<22)
+#define FPSCR_ROUND_TOZERO      (3<<22)
+#define FPSCR_RMODE_BIT         (22)
+#define FPSCR_RMODE_MASK        (3 << FPSCR_RMODE_BIT)
+#define FPSCR_STRIDE_BIT        (20)
+#define FPSCR_STRIDE_MASK       (3 << FPSCR_STRIDE_BIT)
+#define FPSCR_LENGTH_BIT        (16)
+#define FPSCR_LENGTH_MASK       (7 << FPSCR_LENGTH_BIT)
+#define FPSCR_IOE               (1<<8)
+#define FPSCR_DZE               (1<<9)
+#define FPSCR_OFE               (1<<10)
+#define FPSCR_UFE               (1<<11)
+#define FPSCR_IXE               (1<<12)
+#define FPSCR_IDE               (1<<15)
+#define FPSCR_IOC               (1<<0)
+#define FPSCR_DZC               (1<<1)
+#define FPSCR_OFC               (1<<2)
+#define FPSCR_UFC               (1<<3)
+#define FPSCR_IXC               (1<<4)
+#define FPSCR_IDC               (1<<7)
+/* ---------------- */
+static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
+{
+        if (shift) {
+                if (shift < 32)
+                        val = val >> shift | ((val << (32 - shift)) != 0);
+                else
+                        val = val != 0;
+        }
+        return val;
+}
+static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
+{
+        if (shift) {
+                if (shift < 64)
+                        val = val >> shift | ((val << (64 - shift)) != 0);
+                else
+                        val = val != 0;
+        }
+        return val;
+}
+static inline u32 vfp_hi64to32jamming(u64 val)
+{
+        u32 v;
+        u32 highval = val >> 32;
+        u32 lowval = val & 0xffffffff;
+        if (lowval >= 1)
+                v = highval | 1;
+        else
+                v = highval;
+        return v;
+}
+static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+        *resl = nl + ml;
+        *resh = nh + mh;
+        if (*resl < nl)
+                *resh += 1;
+}
+static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
+{
+        *resl = nl - ml;
+        *resh = nh - mh;
+        if (*resl > nl)
+                *resh -= 1;
+}
+static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
+{
+        u32 nh, nl, mh, ml;
+        u64 rh, rma, rmb, rl;
+        nl = n;
+        ml = m;
+        rl = (u64)nl * ml;
+        nh = n >> 32;
+        rma = (u64)nh * ml;
+        mh = m >> 32;
+        rmb = (u64)nl * mh;
+        rma += rmb;
+        rh = (u64)nh * mh;
+        rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
+        rma <<= 32;
+        rl += rma;
+        rh += (rl < rma);
+        *resl = rl;
+        *resh = rh;
+}
+static inline void shift64left(u64 *resh, u64 *resl, u64 n)
+{
+        *resh = n >> 63;
+        *resl = n << 1;
+}
+static inline u64 vfp_hi64multiply64(u64 n, u64 m)
+{
+        u64 rh, rl;
+        mul64to128(&rh, &rl, n, m);
+        return rh | (rl != 0);
+}
+static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
+{
+        u64 mh, ml, remh, reml, termh, terml, z;
+        if (nh >= m)
+                return ~0ULL;
+        mh = m >> 32;
+        if (mh << 32 <= nh) {
+                z = 0xffffffff00000000ULL;
+        } else {
+                z = nh;
+                do_div(z, mh);
+                z <<= 32;
+        }
+        mul64to128(&termh, &terml, m, z);
+        sub128(&remh, &reml, nh, nl, termh, terml);
+        ml = m << 32;
+        while ((s64)remh < 0) {
+                z -= 0x100000000ULL;
+                add128(&remh, &reml, remh, reml, mh, ml);
+        }
+        remh = (remh << 32) | (reml >> 32);
+        if (mh << 32 <= remh) {
+                z |= 0xffffffff;
+        } else {
+                do_div(remh, mh);
+                z |= remh;
+        }
+        return z;
+}
+/*
+ * Operations on unpacked elements
+ */
+#define vfp_sign_negate(sign)   (sign ^ 0x8000)
+/*
+ * Single-precision
+ */
+struct vfp_single {
+        s16     exponent;
+        u16     sign;
+        u32     significand;
+};
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern s32 vfp_get_float(ARMul_State * state, unsigned int reg);
+extern void vfp_put_float(ARMul_State * state, s32 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+/*
+ * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
+ * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
+ * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
+ *  which are not propagated to the float upon packing.
+ */
+#define VFP_SINGLE_MANTISSA_BITS        (23)
+#define VFP_SINGLE_EXPONENT_BITS        (8)
+#define VFP_SINGLE_LOW_BITS             (32 - VFP_SINGLE_MANTISSA_BITS - 2)
+#define VFP_SINGLE_LOW_BITS_MASK        ((1 << VFP_SINGLE_LOW_BITS) - 1)
+/*
+ * The bit in an unpacked float which indicates that it is a quiet NaN
+ */
+#define VFP_SINGLE_SIGNIFICAND_QNAN     (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_single_packed_sign(v)       ((v) & 0x80000000)
+#define vfp_single_packed_negate(v)     ((v) ^ 0x80000000)
+#define vfp_single_packed_abs(v)        ((v) & ~0x80000000)
+#define vfp_single_packed_exponent(v)   (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
+#define vfp_single_packed_mantissa(v)   ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
+/*
+ * Unpack a single-precision float.  Note that this returns the magnitude
+ * of the single-precision float mantissa with the 1. if necessary,
+ * aligned to bit 30.
+ */
+static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
+{
+        u32 significand;
+        s->sign = vfp_single_packed_sign(val) >> 16,
+        s->exponent = vfp_single_packed_exponent(val);
+        significand = (u32) val;
+        significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
+        if (s->exponent && s->exponent != 255)
+                significand |= 0x40000000;
+        s->significand = significand;
+}
+/*
+ * Re-pack a single-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s32 vfp_single_pack(struct vfp_single *s)
+{
+        u32 val;
+        val = (s->sign << 16) +
+              (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
+              (s->significand >> VFP_SINGLE_LOW_BITS);
+        return (s32)val;
+}
+#define VFP_NUMBER              (1<<0)
+#define VFP_ZERO                (1<<1)
+#define VFP_DENORMAL            (1<<2)
+#define VFP_INFINITY            (1<<3)
+#define VFP_NAN                 (1<<4)
+#define VFP_NAN_SIGNAL          (1<<5)
+#define VFP_QNAN                (VFP_NAN)
+#define VFP_SNAN                (VFP_NAN|VFP_NAN_SIGNAL)
+static inline int vfp_single_type(struct vfp_single *s)
+{
+        int type = VFP_NUMBER;
+        if (s->exponent == 255) {
+                if (s->significand == 0)
+                        type = VFP_INFINITY;
+                else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
+                        type = VFP_QNAN;
+                else
+                        type = VFP_SNAN;
+        } else if (s->exponent == 0) {
+                if (s->significand == 0)
+                        type |= VFP_ZERO;
+                else
+                        type |= VFP_DENORMAL;
+        }
+        return type;
+}
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
+/*
+ * Double-precision
+ */
+struct vfp_double {
+        s16     exponent;
+        u16     sign;
+        u64     significand;
+};
+/*
+ * VFP_REG_ZERO is a special register number for vfp_get_double
+ * which returns (double)0.0.  This is useful for the compare with
+ * zero instructions.
+ */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO    32
+#else
+#define VFP_REG_ZERO    16
+#endif
+#ifdef __cplusplus
+ extern "C" {
+#endif
+extern u64 vfp_get_double(ARMul_State * state, unsigned int reg);
+extern void vfp_put_double(ARMul_State * state, u64 val, unsigned int reg);
+#ifdef __cplusplus
+ }
+#endif
+#define VFP_DOUBLE_MANTISSA_BITS        (52)
+#define VFP_DOUBLE_EXPONENT_BITS        (11)
+#define VFP_DOUBLE_LOW_BITS             (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
+#define VFP_DOUBLE_LOW_BITS_MASK        ((1 << VFP_DOUBLE_LOW_BITS) - 1)
+/*
+ * The bit in an unpacked double which indicates that it is a quiet NaN
+ */
+#define VFP_DOUBLE_SIGNIFICAND_QNAN     (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
+/*
+ * Operations on packed single-precision numbers
+ */
+#define vfp_double_packed_sign(v)       ((v) & (1ULL << 63))
+#define vfp_double_packed_negate(v)     ((v) ^ (1ULL << 63))
+#define vfp_double_packed_abs(v)        ((v) & ~(1ULL << 63))
+#define vfp_double_packed_exponent(v)   (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
+#define vfp_double_packed_mantissa(v)   ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
+/*
+ * Unpack a double-precision float.  Note that this returns the magnitude
+ * of the double-precision float mantissa with the 1. if necessary,
+ * aligned to bit 62.
+ */
+static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
+{
+        u64 significand;
+        s->sign = vfp_double_packed_sign(val) >> 48;
+        s->exponent = vfp_double_packed_exponent(val);
+        significand = (u64) val;
+        significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
+        if (s->exponent && s->exponent != 2047)
+                significand |= (1ULL << 62);
+        s->significand = significand;
+}
+/*
+ * Re-pack a double-precision float.  This assumes that the float is
+ * already normalised such that the MSB is bit 30, _not_ bit 31.
+ */
+static inline s64 vfp_double_pack(struct vfp_double *s)
+{
+        u64 val;
+        val = ((u64)s->sign << 48) +
+              ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
+              (s->significand >> VFP_DOUBLE_LOW_BITS);
+        return (s64)val;
+}
+static inline int vfp_double_type(struct vfp_double *s)
+{
+        int type = VFP_NUMBER;
+        if (s->exponent == 2047) {
+                if (s->significand == 0)
+                        type = VFP_INFINITY;
+                else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
+                        type = VFP_QNAN;
+                else
+                        type = VFP_SNAN;
+        } else if (s->exponent == 0) {
+                if (s->significand == 0)
+                        type |= VFP_ZERO;
+                else
+                        type |= VFP_DENORMAL;
+        }
+        return type;
+}
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
+/*
+ * A special flag to tell the normalisation code not to normalise.
+ */
+#define VFP_NAN_FLAG    0x100
+/*
+ * A bit pattern used to indicate the initial (unset) value of the
+ * exception mask, in case nothing handles an instruction.  This
+ * doesn't include the NAN flag, which get masked out before
+ * we check for an error.
+ */
+#define VFP_EXCEPTION_ERROR     ((u32)-1 & ~VFP_NAN_FLAG)
+/*
+ * A flag to tell vfp instruction type.
+ *  OP_SCALAR - this operation always operates in scalar mode
+ *  OP_SD - the instruction exceptionally writes to a single precision result.
+ *  OP_DD - the instruction exceptionally writes to a double precision result.
+ *  OP_SM - the instruction exceptionally reads from a single precision operand.
+ */
+#define OP_SCALAR       (1 << 0)
+#define OP_SD           (1 << 1)
+#define OP_DD           (1 << 1)
+#define OP_SM           (1 << 2)
+struct op {
+        u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr);
+        u32 flags;
+};
+static inline u32 fls(int x)
+{
+        int r = 32;
+        if (!x)
+                return 0;
+        if (!(x & 0xffff0000u)) {
+                x <<= 16;
+                r -= 16;
+        }
+        if (!(x & 0xff000000u)) {
+                x <<= 8;
+                r -= 8;
+        }
+        if (!(x & 0xf0000000u)) {
+                x <<= 4;
+                r -= 4;
+        }
+        if (!(x & 0xc0000000u)) {
+                x <<= 2;
+                r -= 2;
+        }
+        if (!(x & 0x80000000u)) {
+                x <<= 1;
+                r -= 1;
+        }
+        return r;
+}
+#endif
diff --git a/src/core/arm/interpreter/vfp/vfpdouble.cpp b/src/core/arm/interpreter/vfp/vfpdouble.cpp
new file mode 100644
index 000000000..37fd28829
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpdouble.cpp
@@ -0,0 +1,1262 @@
+/*
+    vfp/vfpdouble.c - ARM VFPv3 emulation unit - SoftFloat double instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+ 
+#include "vfp_helper.h"
+#include "asm_vfp.h"
+static struct vfp_double vfp_double_default_qnan = {
+        //.exponent     = 2047,
+        //.sign         = 0,
+        //.significand  = VFP_DOUBLE_SIGNIFICAND_QNAN,
+};
+static void vfp_double_dump(const char *str, struct vfp_double *d)
+{
+        pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
+                 str, d->sign != 0, d->exponent, d->significand);
+}
+static void vfp_double_normalise_denormal(struct vfp_double *vd)
+{
+        int bits = 31 - fls(vd->significand >> 32);
+        if (bits == 31)
+                bits = 63 - fls(vd->significand);
+        vfp_double_dump("normalise_denormal: in", vd);
+        if (bits) {
+                vd->exponent -= bits - 1;
+                vd->significand <<= bits;
+        }
+        vfp_double_dump("normalise_denormal: out", vd);
+}
+u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
+{
+        u64 significand, incr;
+        int exponent, shift, underflow;
+        u32 rmode;
+        vfp_double_dump("pack: in", vd);
+        /*
+         * Infinities and NaNs are a special case.
+         */
+        if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
+                goto pack;
+        /*
+         * Special-case zero.
+         */
+        if (vd->significand == 0) {
+                vd->exponent = 0;
+                goto pack;
+        }
+        exponent = vd->exponent;
+        significand = vd->significand;
+        shift = 32 - fls(significand >> 32);
+        if (shift == 32)
+                shift = 64 - fls(significand);
+        if (shift) {
+                exponent -= shift;
+                significand <<= shift;
+        }
+#if 1
+        vd->exponent = exponent;
+        vd->significand = significand;
+        vfp_double_dump("pack: normalised", vd);
+#endif
+        /*
+         * Tiny number?
+         */
+        underflow = exponent < 0;
+        if (underflow) {
+                significand = vfp_shiftright64jamming(significand, -exponent);
+                exponent = 0;
+#if 1
+                vd->exponent = exponent;
+                vd->significand = significand;
+                vfp_double_dump("pack: tiny number", vd);
+#endif
+                if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
+                        underflow = 0;
+        }
+        /*
+         * Select rounding increment.
+         */
+        incr = 0;
+        rmode = fpscr & FPSCR_RMODE_MASK;
+        if (rmode == FPSCR_ROUND_NEAREST) {
+                incr = 1ULL << VFP_DOUBLE_LOW_BITS;
+                if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
+                        incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+                incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
+                incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
+        pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
+        /*
+         * Is our rounding going to overflow?
+         */
+        if ((significand + incr) < significand) {
+                exponent += 1;
+                significand = (significand >> 1) | (significand & 1);
+                incr >>= 1;
+#if 1
+                vd->exponent = exponent;
+                vd->significand = significand;
+                vfp_double_dump("pack: overflow", vd);
+#endif
+        }
+        /*
+         * If any of the low bits (which will be shifted out of the
+         * number) are non-zero, the result is inexact.
+         */
+        if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
+                exceptions |= FPSCR_IXC;
+        /*
+         * Do our rounding.
+         */
+        significand += incr;
+        /*
+         * Infinity?
+         */
+        if (exponent >= 2046) {
+                exceptions |= FPSCR_OFC | FPSCR_IXC;
+                if (incr == 0) {
+                        vd->exponent = 2045;
+                        vd->significand = 0x7fffffffffffffffULL;
+                } else {
+                        vd->exponent = 2047;            /* infinity */
+                        vd->significand = 0;
+                }
+        } else {
+                if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
+                        exponent = 0;
+                if (exponent || significand > 0x8000000000000000ULL)
+                        underflow = 0;
+                if (underflow)
+                        exceptions |= FPSCR_UFC;
+                vd->exponent = exponent;
+                vd->significand = significand >> 1;
+        }
+ pack:
+        vfp_double_dump("pack: final", vd);
+        {
+                s64 d = vfp_double_pack(vd);
+                pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
+                         dd, d, exceptions);
+                vfp_put_double(state, d, dd);
+        }
+        return exceptions;
+}
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
+                  struct vfp_double *vdm, u32 fpscr)
+{
+        struct vfp_double *nan;
+        int tn, tm = 0;
+        tn = vfp_double_type(vdn);
+        if (vdm)
+                tm = vfp_double_type(vdm);
+        if (fpscr & FPSCR_DEFAULT_NAN)
+                /*
+                 * Default NaN mode - always returns a quiet NaN
+                 */
+                nan = &vfp_double_default_qnan;
+        else {
+                /*
+                 * Contemporary mode - select the first signalling
+                 * NAN, or if neither are signalling, the first
+                 * quiet NAN.
+                 */
+                if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+                        nan = vdn;
+                else
+                        nan = vdm;
+                /*
+                 * Make the NaN quiet.
+                 */
+                nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+        }
+        *vdd = *nan;
+        /*
+         * If one was a signalling NAN, raise invalid operation.
+         */
+        return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+/*
+ * Extended operations
+ */
+static u32 vfp_double_fabs(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_put_double(state, vfp_double_packed_abs(vfp_get_double(state, dm)), dd);
+        return 0;
+}
+static u32 vfp_double_fcpy(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_put_double(state, vfp_get_double(state, dm), dd);
+        return 0;
+}
+static u32 vfp_double_fneg(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_put_double(state, vfp_double_packed_negate(vfp_get_double(state, dm)), dd);
+        return 0;
+}
+static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        struct vfp_double vdm, vdd;
+        int ret, tm;
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        tm = vfp_double_type(&vdm);
+        if (tm & (VFP_NAN|VFP_INFINITY)) {
+                struct vfp_double *vdp = &vdd;
+                if (tm & VFP_NAN)
+                        ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
+                else if (vdm.sign == 0) {
+ sqrt_copy:
+                        vdp = &vdm;
+                        ret = 0;
+                } else {
+ sqrt_invalid:
+                        vdp = &vfp_double_default_qnan;
+                        ret = FPSCR_IOC;
+                }
+                vfp_put_double(state, vfp_double_pack(vdp), dd);
+                return ret;
+        }
+        /*
+         * sqrt(+/- 0) == +/- 0
+         */
+        if (tm & VFP_ZERO)
+                goto sqrt_copy;
+        /*
+         * Normalise a denormalised number
+         */
+        if (tm & VFP_DENORMAL)
+                vfp_double_normalise_denormal(&vdm);
+        /*
+         * sqrt(<0) = invalid
+         */
+        if (vdm.sign)
+                goto sqrt_invalid;
+        vfp_double_dump("sqrt", &vdm);
+        /*
+         * Estimate the square root.
+         */
+        vdd.sign = 0;
+        vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
+        vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
+        vfp_double_dump("sqrt estimate1", &vdd);
+        vdm.significand >>= 1 + (vdm.exponent & 1);
+        vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
+        vfp_double_dump("sqrt estimate2", &vdd);
+        /*
+         * And now adjust.
+         */
+        if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
+                if (vdd.significand < 2) {
+                        vdd.significand = ~0ULL;
+                } else {
+                        u64 termh, terml, remh, reml;
+                        vdm.significand <<= 2;
+                        mul64to128(&termh, &terml, vdd.significand, vdd.significand);
+                        sub128(&remh, &reml, vdm.significand, 0, termh, terml);
+                        while ((s64)remh < 0) {
+                                vdd.significand -= 1;
+                                shift64left(&termh, &terml, vdd.significand);
+                                terml |= 1;
+                                add128(&remh, &reml, remh, reml, termh, terml);
+                        }
+                        vdd.significand |= (remh | reml) != 0;
+                }
+        }
+        vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fsqrt");
+}
+/*
+ * Equal        := ZC
+ * Less than    := N
+ * Greater than := C
+ * Unordered    := CV
+ */
+static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u32 fpscr)
+{
+        s64 d, m;
+        u32 ret = 0;
+        pr_debug("In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr);
+        m = vfp_get_double(state, dm);
+        if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
+                ret |= FPSCR_C | FPSCR_V;
+                if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+                        /*
+                         * Signalling NaN, or signalling on quiet NaN
+                         */
+                        ret |= FPSCR_IOC;
+        }
+        d = vfp_get_double(state, dd);
+        if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
+                ret |= FPSCR_C | FPSCR_V;
+                if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
+                        /*
+                         * Signalling NaN, or signalling on quiet NaN
+                         */
+                        ret |= FPSCR_IOC;
+        }
+        if (ret == 0) {
+                //printf("In %s, d=%lld, m =%lld\n ", __FUNCTION__, d, m);
+                if (d == m || vfp_double_packed_abs(d | m) == 0) {
+                        /*
+                         * equal
+                         */
+                        ret |= FPSCR_Z | FPSCR_C;
+                        //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret);
+                } else if (vfp_double_packed_sign(d ^ m)) {
+                        /*
+                         * different signs
+                         */
+                        if (vfp_double_packed_sign(d))
+                                /*
+                                 * d is negative, so d < m
+                                 */
+                                ret |= FPSCR_N;
+                        else
+                                /*
+                                 * d is positive, so d > m
+                                 */
+                                ret |= FPSCR_C;
+                } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
+                        /*
+                         * d < m
+                         */
+                        ret |= FPSCR_N;
+                } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
+                        /*
+                         * d > m
+                         */
+                        ret |= FPSCR_C;
+                }
+        }
+        pr_debug("In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret);
+        return ret;
+}
+static u32 vfp_double_fcmp(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_compare(state, dd, 0, dm, fpscr);
+}
+static u32 vfp_double_fcmpe(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_compare(state, dd, 1, dm, fpscr);
+}
+static u32 vfp_double_fcmpz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_compare(state, dd, 0, VFP_REG_ZERO, fpscr);
+}
+static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr);
+}
+static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+        struct vfp_double vdm;
+        struct vfp_single vsd;
+        int tm;
+        u32 exceptions = 0;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        tm = vfp_double_type(&vdm);
+        /*
+         * If we have a signalling NaN, signal invalid operation.
+         */
+        if (tm == VFP_SNAN)
+                exceptions = FPSCR_IOC;
+        if (tm & VFP_DENORMAL)
+                vfp_double_normalise_denormal(&vdm);
+        vsd.sign = vdm.sign;
+        vsd.significand = vfp_hi64to32jamming(vdm.significand);
+        /*
+         * If we have an infinity or a NaN, the exponent must be 255
+         */
+        if (tm & (VFP_INFINITY|VFP_NAN)) {
+                vsd.exponent = 255;
+                if (tm == VFP_QNAN)
+                        vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+                goto pack_nan;
+        } else if (tm & VFP_ZERO)
+                vsd.exponent = 0;
+        else
+                vsd.exponent = vdm.exponent - (1023 - 127);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts");
+ pack_nan:
+        vfp_put_float(state, vfp_single_pack(&vsd), sd);
+        return exceptions;
+}
+static u32 vfp_double_fuito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        struct vfp_double vdm;
+        u32 m = vfp_get_float(state, dm);
+        pr_debug("In %s\n", __FUNCTION__);
+        vdm.sign = 0;
+        vdm.exponent = 1023 + 63 - 1;
+        vdm.significand = (u64)m;
+        return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fuito");
+}
+static u32 vfp_double_fsito(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        struct vfp_double vdm;
+        u32 m = vfp_get_float(state, dm);
+        pr_debug("In %s\n", __FUNCTION__);
+        vdm.sign = (m & 0x80000000) >> 16;
+        vdm.exponent = 1023 + 63 - 1;
+        vdm.significand = vdm.sign ? -m : m;
+        return vfp_double_normaliseround(state, dd, &vdm, fpscr, 0, "fsito");
+}
+static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+        struct vfp_double vdm;
+        u32 d, exceptions = 0;
+        int rmode = fpscr & FPSCR_RMODE_MASK;
+        int tm;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        /*
+         * Do we have a denormalised number?
+         */
+        tm = vfp_double_type(&vdm);
+        if (tm & VFP_DENORMAL)
+                exceptions |= FPSCR_IDC;
+        if (tm & VFP_NAN)
+                vdm.sign = 0;
+        if (vdm.exponent >= 1023 + 32) {
+                d = vdm.sign ? 0 : 0xffffffff;
+                exceptions = FPSCR_IOC;
+        } else if (vdm.exponent >= 1023 - 1) {
+                int shift = 1023 + 63 - vdm.exponent;
+                u64 rem, incr = 0;
+                /*
+                 * 2^0 <= m < 2^32-2^8
+                 */
+                d = (vdm.significand << 1) >> shift;
+                rem = vdm.significand << (65 - shift);
+                if (rmode == FPSCR_ROUND_NEAREST) {
+                        incr = 0x8000000000000000ULL;
+                        if ((d & 1) == 0)
+                                incr -= 1;
+                } else if (rmode == FPSCR_ROUND_TOZERO) {
+                        incr = 0;
+                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+                        incr = ~0ULL;
+                }
+                if ((rem + incr) < rem) {
+                        if (d < 0xffffffff)
+                                d += 1;
+                        else
+                                exceptions |= FPSCR_IOC;
+                }
+                if (d && vdm.sign) {
+                        d = 0;
+                        exceptions |= FPSCR_IOC;
+                } else if (rem)
+                        exceptions |= FPSCR_IXC;
+        } else {
+                d = 0;
+                if (vdm.exponent | vdm.significand) {
+                        exceptions |= FPSCR_IXC;
+                        if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+                                d = 1;
+                        else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
+                                d = 0;
+                                exceptions |= FPSCR_IOC;
+                        }
+                }
+        }
+        pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+        vfp_put_float(state, d, sd);
+        return exceptions;
+}
+static u32 vfp_double_ftouiz(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_ftoui(state, sd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 fpscr)
+{
+        struct vfp_double vdm;
+        u32 d, exceptions = 0;
+        int rmode = fpscr & FPSCR_RMODE_MASK;
+        int tm;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        vfp_double_dump("VDM", &vdm);
+        /*
+         * Do we have denormalised number?
+         */
+        tm = vfp_double_type(&vdm);
+        if (tm & VFP_DENORMAL)
+                exceptions |= FPSCR_IDC;
+        if (tm & VFP_NAN) {
+                d = 0;
+                exceptions |= FPSCR_IOC;
+        } else if (vdm.exponent >= 1023 + 32) {
+                d = 0x7fffffff;
+                if (vdm.sign)
+                        d = ~d;
+                exceptions |= FPSCR_IOC;
+        } else if (vdm.exponent >= 1023 - 1) {
+                int shift = 1023 + 63 - vdm.exponent;   /* 58 */
+                u64 rem, incr = 0;
+                d = (vdm.significand << 1) >> shift;
+                rem = vdm.significand << (65 - shift);
+                if (rmode == FPSCR_ROUND_NEAREST) {
+                        incr = 0x8000000000000000ULL;
+                        if ((d & 1) == 0)
+                                incr -= 1;
+                } else if (rmode == FPSCR_ROUND_TOZERO) {
+                        incr = 0;
+                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
+                        incr = ~0ULL;
+                }
+                if ((rem + incr) < rem && d < 0xffffffff)
+                        d += 1;
+                if (d > 0x7fffffff + (vdm.sign != 0)) {
+                        d = 0x7fffffff + (vdm.sign != 0);
+                        exceptions |= FPSCR_IOC;
+                } else if (rem)
+                        exceptions |= FPSCR_IXC;
+                if (vdm.sign)
+                        d = -d;
+        } else {
+                d = 0;
+                if (vdm.exponent | vdm.significand) {
+                        exceptions |= FPSCR_IXC;
+                        if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
+                                d = 1;
+                        else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
+                                d = -1;
+                }
+        }
+        pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+        vfp_put_float(state, (s32)d, sd);
+        return exceptions;
+}
+static u32 vfp_double_ftosiz(ARMul_State* state, int dd, int unused, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_ftosi(state, dd, unused, dm, FPSCR_ROUND_TOZERO);
+}
+static struct op fops_ext[] = {
+    { vfp_double_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_double_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_double_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_double_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_double_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_double_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_double_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_fcvts,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_double_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_double_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_double_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_double_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_double_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_double_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+static u32
+vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
+                          struct vfp_double *vdm, u32 fpscr)
+{
+        struct vfp_double *vdp;
+        u32 exceptions = 0;
+        int tn, tm;
+        tn = vfp_double_type(vdn);
+        tm = vfp_double_type(vdm);
+        if (tn & tm & VFP_INFINITY) {
+                /*
+                 * Two infinities.  Are they different signs?
+                 */
+                if (vdn->sign ^ vdm->sign) {
+                        /*
+                         * different signs -> invalid
+                         */
+                        exceptions = FPSCR_IOC;
+                        vdp = &vfp_double_default_qnan;
+                } else {
+                        /*
+                         * same signs -> valid
+                         */
+                        vdp = vdn;
+                }
+        } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+                /*
+                 * One infinity and one number -> infinity
+                 */
+                vdp = vdn;
+        } else {
+                /*
+                 * 'n' is a NaN of some type
+                 */
+                return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+        }
+        *vdd = *vdp;
+        return exceptions;
+}
+static u32
+vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
+               struct vfp_double *vdm, u32 fpscr)
+{
+        u32 exp_diff;
+        u64 m_sig;
+        if (vdn->significand & (1ULL << 63) ||
+            vdm->significand & (1ULL << 63)) {
+                pr_info("VFP: bad FP values\n");
+                vfp_double_dump("VDN", vdn);
+                vfp_double_dump("VDM", vdm);
+        }
+        /*
+         * Ensure that 'n' is the largest magnitude number.  Note that
+         * if 'n' and 'm' have equal exponents, we do not swap them.
+         * This ensures that NaN propagation works correctly.
+         */
+        if (vdn->exponent < vdm->exponent) {
+                struct vfp_double *t = vdn;
+                vdn = vdm;
+                vdm = t;
+        }
+        /*
+         * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+         * infinity or a NaN here.
+         */
+        if (vdn->exponent == 2047)
+                return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
+        /*
+         * We have two proper numbers, where 'vdn' is the larger magnitude.
+         *
+         * Copy 'n' to 'd' before doing the arithmetic.
+         */
+        *vdd = *vdn;
+        /*
+         * Align 'm' with the result.
+         */
+        exp_diff = vdn->exponent - vdm->exponent;
+        m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
+        /*
+         * If the signs are different, we are really subtracting.
+         */
+        if (vdn->sign ^ vdm->sign) {
+                m_sig = vdn->significand - m_sig;
+                if ((s64)m_sig < 0) {
+                        vdd->sign = vfp_sign_negate(vdd->sign);
+                        m_sig = -m_sig;
+                } else if (m_sig == 0) {
+                        vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+                                      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+                }
+        } else {
+                m_sig += vdn->significand;
+        }
+        vdd->significand = m_sig;
+        return 0;
+}
+static u32
+vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
+                    struct vfp_double *vdm, u32 fpscr)
+{
+        vfp_double_dump("VDN", vdn);
+        vfp_double_dump("VDM", vdm);
+        /*
+         * Ensure that 'n' is the largest magnitude number.  Note that
+         * if 'n' and 'm' have equal exponents, we do not swap them.
+         * This ensures that NaN propagation works correctly.
+         */
+        if (vdn->exponent < vdm->exponent) {
+                struct vfp_double *t = vdn;
+                vdn = vdm;
+                vdm = t;
+                pr_debug("VFP: swapping M <-> N\n");
+        }
+        vdd->sign = vdn->sign ^ vdm->sign;
+        /*
+         * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+         */
+        if (vdn->exponent == 2047) {
+                if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
+                        return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
+                if ((vdm->exponent | vdm->significand) == 0) {
+                        *vdd = vfp_double_default_qnan;
+                        return FPSCR_IOC;
+                }
+                vdd->exponent = vdn->exponent;
+                vdd->significand = 0;
+                return 0;
+        }
+        /*
+         * If 'm' is zero, the result is always zero.  In this case,
+         * 'n' may be zero or a number, but it doesn't matter which.
+         */
+        if ((vdm->exponent | vdm->significand) == 0) {
+                vdd->exponent = 0;
+                vdd->significand = 0;
+                return 0;
+        }
+        /*
+         * We add 2 to the destination exponent for the same reason
+         * as the addition case - though this time we have +1 from
+         * each input operand.
+         */
+        vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
+        vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
+        vfp_double_dump("VDD", vdd);
+        return 0;
+}
+#define NEG_MULTIPLY    (1 << 0)
+#define NEG_SUBTRACT    (1 << 1)
+static u32
+vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
+{
+        struct vfp_double vdd, vdp, vdn, vdm;
+        u32 exceptions;
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        if (vdn.exponent == 0 && vdn.significand)
+                vfp_double_normalise_denormal(&vdn);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        if (vdm.exponent == 0 && vdm.significand)
+                vfp_double_normalise_denormal(&vdm);
+        exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
+        if (negate & NEG_MULTIPLY)
+                vdp.sign = vfp_sign_negate(vdp.sign);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dd));
+        if (negate & NEG_SUBTRACT)
+                vdn.sign = vfp_sign_negate(vdn.sign);
+        exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, func);
+}
+/*
+ * Standard operations
+ */
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_double_fmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, 0, "fmac");
+}
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_double_fnmac(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
+}
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_double_fmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
+}
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_double_fnmsc(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        pr_debug("In %s\n", __FUNCTION__);
+        return vfp_double_multiply_accumulate(state, dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        struct vfp_double vdd, vdn, vdm;
+        u32 exceptions;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        if (vdn.exponent == 0 && vdn.significand)
+                vfp_double_normalise_denormal(&vdn);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        if (vdm.exponent == 0 && vdm.significand)
+                vfp_double_normalise_denormal(&vdm);
+        exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fmul");
+}
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        struct vfp_double vdd, vdn, vdm;
+        u32 exceptions;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        if (vdn.exponent == 0 && vdn.significand)
+                vfp_double_normalise_denormal(&vdn);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        if (vdm.exponent == 0 && vdm.significand)
+                vfp_double_normalise_denormal(&vdm);
+        exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
+        vdd.sign = vfp_sign_negate(vdd.sign);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fnmul");
+}
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        struct vfp_double vdd, vdn, vdm;
+        u32 exceptions;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        if (vdn.exponent == 0 && vdn.significand)
+                vfp_double_normalise_denormal(&vdn);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        if (vdm.exponent == 0 && vdm.significand)
+                vfp_double_normalise_denormal(&vdm);
+        exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fadd");
+}
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        struct vfp_double vdd, vdn, vdm;
+        u32 exceptions;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        if (vdn.exponent == 0 && vdn.significand)
+                vfp_double_normalise_denormal(&vdn);
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        if (vdm.exponent == 0 && vdm.significand)
+                vfp_double_normalise_denormal(&vdm);
+        /*
+         * Subtraction is like addition, but with a negated operand.
+         */
+        vdm.sign = vfp_sign_negate(vdm.sign);
+        exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fsub");
+}
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr)
+{
+        struct vfp_double vdd, vdn, vdm;
+        u32 exceptions = 0;
+        int tm, tn;
+        pr_debug("In %s\n", __FUNCTION__);
+        vfp_double_unpack(&vdn, vfp_get_double(state, dn));
+        vfp_double_unpack(&vdm, vfp_get_double(state, dm));
+        vdd.sign = vdn.sign ^ vdm.sign;
+        tn = vfp_double_type(&vdn);
+        tm = vfp_double_type(&vdm);
+        /*
+         * Is n a NAN?
+         */
+        if (tn & VFP_NAN)
+                goto vdn_nan;
+        /*
+         * Is m a NAN?
+         */
+        if (tm & VFP_NAN)
+                goto vdm_nan;
+        /*
+         * If n and m are infinity, the result is invalid
+         * If n and m are zero, the result is invalid
+         */
+        if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+                goto invalid;
+        /*
+         * If n is infinity, the result is infinity
+         */
+        if (tn & VFP_INFINITY)
+                goto infinity;
+        /*
+         * If m is zero, raise div0 exceptions
+         */
+        if (tm & VFP_ZERO)
+                goto divzero;
+        /*
+         * If m is infinity, or n is zero, the result is zero
+         */
+        if (tm & VFP_INFINITY || tn & VFP_ZERO)
+                goto zero;
+        if (tn & VFP_DENORMAL)
+                vfp_double_normalise_denormal(&vdn);
+        if (tm & VFP_DENORMAL)
+                vfp_double_normalise_denormal(&vdm);
+        /*
+         * Ok, we have two numbers, we can perform division.
+         */     
+        vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
+        vdm.significand <<= 1;
+        if (vdm.significand <= (2 * vdn.significand)) {
+                vdn.significand >>= 1;
+                vdd.exponent++;
+        }
+        vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
+        if ((vdd.significand & 0x1ff) <= 2) {
+                u64 termh, terml, remh, reml;
+                mul64to128(&termh, &terml, vdm.significand, vdd.significand);
+                sub128(&remh, &reml, vdn.significand, 0, termh, terml);
+                while ((s64)remh < 0) {
+                        vdd.significand -= 1;
+                        add128(&remh, &reml, remh, reml, 0, vdm.significand);
+                }
+                vdd.significand |= (reml != 0);
+        }
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, 0, "fdiv");
+ vdn_nan:
+        exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
+ pack:
+        vfp_put_double(state, vfp_double_pack(&vdd), dd);
+        return exceptions;
+ vdm_nan:
+        exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
+        goto pack;
+ zero:
+        vdd.exponent = 0;
+        vdd.significand = 0;
+        goto pack;
+ divzero:
+        exceptions = FPSCR_DZC;
+ infinity:
+        vdd.exponent = 2047;
+        vdd.significand = 0;
+        goto pack;
+ invalid:
+        vfp_put_double(state, vfp_double_pack(&vfp_double_default_qnan), dd);
+        return FPSCR_IOC;
+}
+static struct op fops[] = {
+        { vfp_double_fmac,  0 },
+        { vfp_double_fmsc,  0 },
+        { vfp_double_fmul,  0 },
+        { vfp_double_fadd,  0 },
+        { vfp_double_fnmac, 0 },
+        { vfp_double_fnmsc, 0 },
+        { vfp_double_fnmul, 0 },
+        { vfp_double_fsub,  0 },
+        { vfp_double_fdiv,  0 },
+};
+#define FREG_BANK(x)    ((x) & 0x0c)
+#define FREG_IDX(x)     ((x) & 3)
+u32 vfp_double_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+        u32 op = inst & FOP_MASK;
+        u32 exceptions = 0;
+        unsigned int dest;
+        unsigned int dn = vfp_get_dn(inst);
+        unsigned int dm;
+        unsigned int vecitr, veclen, vecstride;
+        struct op *fop;
+        pr_debug("In %s\n", __FUNCTION__);
+        vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
+        fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+        /*
+         * fcvtds takes an sN register number as destination, not dN.
+         * It also always operates on scalars.
+         */
+        if (fop->flags & OP_SD)
+                dest = vfp_get_sd(inst);
+        else
+                dest = vfp_get_dd(inst);
+        /*
+         * f[us]ito takes a sN operand, not a dN operand.
+         */
+        if (fop->flags & OP_SM)
+                dm = vfp_get_sm(inst);
+        else
+                dm = vfp_get_dm(inst);
+        /*
+         * If destination bank is zero, vector length is always '1'.
+         * ARM DDI0100F C5.1.3, C5.3.2.
+         */
+        if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
+                veclen = 0;
+        else
+                veclen = fpscr & FPSCR_LENGTH_MASK;
+        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+                 (veclen >> FPSCR_LENGTH_BIT) + 1);
+        if (!fop->fn) {
+                printf("VFP: could not find double op %d\n", FEXT_TO_IDX(inst));
+                goto invalid;
+        }
+        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+                u32 except;
+                char type;
+                type = fop->flags & OP_SD ? 's' : 'd';
+                if (op == FOP_EXT)
+                        pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
+                                 vecitr >> FPSCR_LENGTH_BIT,
+                                 type, dest, dn, dm);
+                else
+                        pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
+                                 vecitr >> FPSCR_LENGTH_BIT,
+                                 type, dest, dn, FOP_TO_IDX(op), dm);
+                except = fop->fn(state, dest, dn, dm, fpscr);
+                pr_debug("VFP: itr%d: exceptions=%08x\n",
+                         vecitr >> FPSCR_LENGTH_BIT, except);
+                exceptions |= except;
+                /*
+                 * CHECK: It appears to be undefined whether we stop when
+                 * we encounter an exception.  We continue.
+                 */
+                dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
+                dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
+                if (FREG_BANK(dm) != 0)
+                        dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
+        }
+        return exceptions;
+ invalid:
+        return ~0;
+}
diff --git a/src/core/arm/interpreter/vfp/vfpinstr.cpp b/src/core/arm/interpreter/vfp/vfpinstr.cpp
new file mode 100644
index 000000000..a57047911
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpinstr.cpp
@@ -0,0 +1,5123 @@
+/*
+    vfp/vfpinstr.c - ARM VFPv3 emulation unit - Individual instructions data
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/* Notice: this file should not be compiled as is, and is meant to be
+   included in other files only. */
+/* ----------------------------------------------------------------------- */
+/* CDP instructions */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+/* ----------------------------------------------------------------------- */
+/* VMLA */
+/* cond 1110 0D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vmla
+#define vfpinstr_inst   vmla_inst
+#define VFPLABEL_INST   VMLA_INST
+#ifdef VFP_DECODE
+{"vmla",        4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmla",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmla_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VMLA :\n");
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 0)
+{
+        DBG("VMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int add = (BIT(6) == 0);
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG32(tmp);
+                mm = FR32(d);
+                tmp = FPADD(mm,tmp);
+                //LETS(d,tmp);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+                tmp = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(tmp);
+                tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+                nn = OR(SHL(nn,CONST64(32)),tmp);
+                nn = FPBITCAST64(nn);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG64(tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+                mm = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(mm);
+                tmp = FPADD(mm,tmp);
+                mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+                nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff))); 
+                LETFPS(2*d ,FPBITCAST32(nn));
+                LETFPS(d*2 + 1 , FPBITCAST32(mm));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D00 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr        vmls
+#define vfpinstr_inst   vmls_inst
+#define VFPLABEL_INST   VMLS_INST
+#ifdef VFP_DECODE
+{"vmls",        7,      ARMVFP2,    28 , 31, 0xF, 25, 27, 0x1,   23, 23, 1,  11, 11, 0,  8, 9, 0x2,  6, 6, 1,  4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmls",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmls_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VMLS :\n");
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s VMLS instruction is executed out of here.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int add = (BIT(6) == 0);
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG32(tmp);
+                mm = FR32(d);
+                tmp = FPADD(mm,tmp);
+                //LETS(d,tmp);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+                tmp = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(tmp);
+                tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+                nn = OR(SHL(nn,CONST64(32)),tmp);
+                nn = FPBITCAST64(nn);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG64(tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+                mm = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(mm);
+                tmp = FPADD(mm,tmp);
+                mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+                nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff))); 
+                LETFPS(2*d ,FPBITCAST32(nn));
+                LETFPS(d*2 + 1 , FPBITCAST32(mm));
+        }       
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VNMLA */
+/* cond 1110 0D01 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr        vnmla
+#define vfpinstr_inst   vnmla_inst
+#define VFPLABEL_INST   VNMLA_INST
+#ifdef VFP_DECODE
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x0,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+{"vnmla",       4,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     4, 4, 0},
+{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,  4, 4, 0},
+//{"vnmla",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmla",       0,      ARMVFP2, 0},
+{"vnmla",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmla_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VNMLA :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VNMLA :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s VNMLA instruction is executed out of here.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int add = (BIT(6) == 0);
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG32(tmp);
+                mm = FR32(d);
+                tmp = FPADD(FPNEG32(mm),tmp);
+                //LETS(d,tmp);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+                tmp = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(tmp);
+                tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+                nn = OR(SHL(nn,CONST64(32)),tmp);
+                nn = FPBITCAST64(nn);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG64(tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+                mm = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(mm);
+                tmp = FPADD(FPNEG64(mm),tmp);
+                mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));        
+                nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff)));
+                LETFPS(2*d ,FPBITCAST32(nn));
+                LETFPS(d*2 + 1 , FPBITCAST32(mm));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VNMLS */
+/* cond 1110 0D01 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vnmls
+#define vfpinstr_inst   vnmls_inst
+#define VFPLABEL_INST   VNMLS_INST
+#ifdef VFP_DECODE
+{"vnmls",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x1,    9, 11, 0x5,     6, 6, 0,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmls",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmls_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VNMLS :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 1 && (OPC_2 & 0x2) == 0)
+{
+        DBG("VNMLS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int add = (BIT(6) == 0);
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG32(tmp);
+                mm = FR32(d);
+                tmp = FPADD(FPNEG32(mm),tmp);
+                //LETS(d,tmp);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+                tmp = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(tmp);
+                tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+                nn = OR(SHL(nn,CONST64(32)),tmp);
+                nn = FPBITCAST64(nn);
+                tmp = FPMUL(nn,mm);
+                if(!add)
+                        tmp = FPNEG64(tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * d)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * d  + 1)));
+                mm = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(mm);
+                tmp = FPADD(FPNEG64(mm),tmp);
+                mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+                nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff))); 
+                LETFPS(2*d ,FPBITCAST32(nn));
+                LETFPS(d*2 + 1 , FPBITCAST32(mm));
+        }       
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VNMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vnmul
+#define vfpinstr_inst   vnmul_inst
+#define VFPLABEL_INST   VNMUL_INST
+#ifdef VFP_DECODE
+{"vnmul",       5,      ARMVFP2,        23, 27, 0x1c,   20, 21, 0x2,    9, 11, 0x5,     6, 6, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vnmul",       0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vnmul_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VNMUL :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VNMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}               
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int add = (BIT(6) == 0);
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                //LETS(d,tmp);
+                LETFPS(d,FPNEG32(tmp));
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                mm = ZEXT64(IBITCAST32(FR32(2 * m)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * m  + 1)));
+                tmp = OR(SHL(nn,CONST64(32)),mm);
+                mm = FPBITCAST64(tmp);
+                tmp = ZEXT64(IBITCAST32(FR32(2 * n)));
+                nn = ZEXT64(IBITCAST32(FR32(2 * n  + 1)));
+                nn = OR(SHL(nn,CONST64(32)),tmp);
+                nn = FPBITCAST64(nn);
+                tmp = FPMUL(nn,mm);
+                tmp = FPNEG64(tmp);
+                mm = TRUNC32(LSHR(IBITCAST64(tmp),CONST64(32)));
+                nn = TRUNC32(AND(IBITCAST64(tmp),CONST64(0xffffffff))); 
+                LETFPS(2*d ,FPBITCAST32(nn));
+                LETFPS(d*2 + 1 , FPBITCAST32(mm));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMUL */
+/* cond 1110 0D10 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vmul
+#define vfpinstr_inst   vmul_inst
+#define VFPLABEL_INST   VMUL_INST
+#ifdef VFP_DECODE
+{"vmul",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x2,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmul",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmul_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VMUL :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 2 && (OPC_2 & 0x2) == 0)
+{
+        DBG("VMUL :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //printf("\n\n\t\tin %s instruction is executed out.\n\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                //mm = SITOFP(32,FR(m));
+                //nn = SITOFP(32,FRn));
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPMUL(nn,mm);
+                //LETS(d,tmp);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                //mm = SITOFP(32,RSPR(m));
+                //LETS(d,tmp);
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value* m0 = FPBITCAST64(v64);
+                lo = FR32(2 * n);
+                hi = FR32(2 * n + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                hi64 = ZEXT64(hi);
+                lo64 = ZEXT64(lo);
+                v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value *n0 = FPBITCAST64(v64); 
+                tmp = FPMUL(n0,m0);
+                Value *val64 = IBITCAST64(tmp);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VADD */
+/* cond 1110 0D11 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vadd
+#define vfpinstr_inst   vadd_inst
+#define VFPLABEL_INST   VADD_INST
+#ifdef VFP_DECODE
+{"vadd",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vadd",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vadd_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VADD :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 0)
+{
+        DBG("VADD :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction will implement out of JIT.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPADD(nn,mm);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value* m0 = FPBITCAST64(v64);
+                lo = FR32(2 * n);
+                hi = FR32(2 * n + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                hi64 = ZEXT64(hi);
+                lo64 = ZEXT64(lo);
+                v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value *n0 = FPBITCAST64(v64); 
+                tmp = FPADD(n0,m0);
+                Value *val64 = IBITCAST64(tmp);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VSUB */
+/* cond 1110 0D11 Vn-- Vd-- 101X N1M0 Vm-- */
+#define vfpinstr        vsub
+#define vfpinstr_inst   vsub_inst
+#define VFPLABEL_INST   VSUB_INST
+#ifdef VFP_DECODE
+{"vsub",        5,      ARMVFP2,        23, 27, 0x1c,  20, 21, 0x3,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsub",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsub_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VSUB :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 3 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VSUB :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instr=0x%x, instruction is executed out of JIT.\n", __FUNCTION__, instr);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPSUB(nn,mm);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value* m0 = FPBITCAST64(v64);
+                lo = FR32(2 * n);
+                hi = FR32(2 * n + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                hi64 = ZEXT64(hi);
+                lo64 = ZEXT64(lo);
+                v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value *n0 = FPBITCAST64(v64); 
+                tmp = FPSUB(n0,m0);
+                Value *val64 = IBITCAST64(tmp);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        } 
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VDIV */
+/* cond 1110 1D00 Vn-- Vd-- 101X N0M0 Vm-- */
+#define vfpinstr        vdiv
+#define vfpinstr_inst   vdiv_inst
+#define VFPLABEL_INST   VDIV_INST
+#ifdef VFP_DECODE
+{"vdiv",        5,      ARMVFP2,        23, 27, 0x1d,  20, 21, 0x0,     9, 11, 0x5,      6, 6, 0,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vdiv",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vdiv_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VDIV :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xA && (OPC_2 & 0x2) == 0)
+{
+        DBG("VDIV :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int m;
+        int n;
+        int d ;
+        int s = BIT(8) == 0;
+        Value *mm;
+        Value *nn;
+        Value *tmp;
+        if(s){
+                m = BIT(5) | BITS(0,3) << 1;
+                n = BIT(7) | BITS(16,19) << 1;
+                d = BIT(22) | BITS(12,15) << 1;
+                mm = FR32(m);
+                nn = FR32(n);
+                tmp = FPDIV(nn,mm);
+                LETFPS(d,tmp);
+        }else {
+                m = BITS(0,3) | BIT(5) << 4;
+                n = BITS(16,19) | BIT(7) << 4;
+                d = BIT(22) << 4 | BITS(12,15);
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value* m0 = FPBITCAST64(v64);
+                lo = FR32(2 * n);
+                hi = FR32(2 * n + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                hi64 = ZEXT64(hi);
+                lo64 = ZEXT64(lo);
+                v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                Value *n0 = FPBITCAST64(v64); 
+                tmp = FPDIV(n0,m0);
+                Value *val64 = IBITCAST64(tmp);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        }               
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMOVI move immediate */
+/* cond 1110 1D11 im4H Vd-- 101X 0000 im4L */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr        vmovi
+#define vfpinstr_inst   vmovi_inst
+#define VFPLABEL_INST   VMOVI_INST
+#ifdef VFP_DECODE
+{"vmov(i)",       4,      ARMVFP3,        23, 27, 0x1d,   20, 21, 0x3,    9, 11, 0x5,     4, 7, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(i)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovi_inst {
+        unsigned int single;
+        unsigned int d;
+        unsigned int imm;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        
+        inst_cream->single   = BIT(inst, 8) == 0;
+        inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+        unsigned int imm8 = BITS(inst, 16, 19) << 4 | BITS(inst, 0, 3);
+        if (inst_cream->single)
+                inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0x1f : 0)<<25 | BITS(imm8, 0, 5)<<19;
+        else
+                inst_cream->imm = BIT(imm8, 7)<<31 | (BIT(imm8, 6)==0)<<30 | (BIT(imm8, 6) ? 0xff : 0)<<22 | BITS(imm8, 0, 5)<<16;
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                VMOVI(cpu, inst_cream->single, inst_cream->d, inst_cream->imm);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && BITS(4, 7) == 0)
+{
+        unsigned int single   = BIT(8) == 0;
+        unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+        unsigned int imm;
+        instr = BITS(16, 19) << 4 | BITS(0, 3); /* FIXME dirty workaround to get a correct imm */
+        if (single) {
+                imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0x1f : 0)<<25 | BITS(0, 5)<<19;
+        } else {
+                imm = BIT(7)<<31 | (BIT(6)==0)<<30 | (BIT(6) ? 0xff : 0)<<22 | BITS(0, 5)<<16;
+        }
+        VMOVI(state, single, d, imm);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVI(ARMul_State * state, ARMword single, ARMword d, ARMword imm)
+{
+        DBG("VMOV(I) :\n");
+                
+        if (single)
+        {
+                DBG("\ts%d <= [%x]\n", d, imm);
+                state->ExtReg[d] = imm;
+        }
+        else
+        {
+                /* Check endian please */
+                DBG("\ts[%d-%d] <= [%x-%x]\n", d*2+1, d*2, imm, 0);
+                state->ExtReg[d*2+1] = imm;
+                state->ExtReg[d*2] = 0;
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int single = (BIT(8) == 0);
+        int d;
+        int imm32;
+        Value *v;
+        Value *tmp;
+        v = CONST32(BITS(0,3) | BITS(16,19) << 4);
+        //v = CONST64(0x3ff0000000000000);
+        if(single){
+                d = BIT(22) | BITS(12,15) << 1;
+        }else {
+                d = BITS(12,15) | BIT(22) << 4;
+        }
+        if(single){
+                LETFPS(d,FPBITCAST32(v));
+        }else {
+                //v = UITOFP(64,v);
+                //tmp = IBITCAST64(v);
+                LETFPS(d*2 ,FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff)))));
+                LETFPS(d * 2 + 1,FPBITCAST32(TRUNC32(LSHR(v,CONST64(32)))));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMOVR move register */
+/* cond 1110 1D11 0000 Vd-- 101X 01M0 Vm-- */
+/* cond 1110 opc1 CRn- CRd- copr op20 CRm- CDP */
+#define vfpinstr        vmovr
+#define vfpinstr_inst   vmovr_inst
+#define VFPLABEL_INST   VMOVR_INST
+#ifdef VFP_DECODE
+{"vmov(r)",       5,      ARMVFP3,        23, 27, 0x1d,   16, 21, 0x30,    9, 11, 0x5,    6, 7, 1,        4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmov(r)",       0,      ARMVFP3, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovr_inst {
+        unsigned int single;
+        unsigned int d;
+        unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        VFP_DEBUG_UNTESTED(VMOVR);
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        
+        inst_cream->single   = BIT(inst, 8) == 0;
+        inst_cream->d        = (inst_cream->single ? BITS(inst,12,15)<<1 | BIT(inst,22) : BITS(inst,12,15) | BIT(inst,22)<<4);
+        inst_cream->m        = (inst_cream->single ? BITS(inst, 0, 3)<<1 | BIT(inst, 5) : BITS(inst, 0, 3) | BIT(inst, 5)<<4);
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                VMOVR(cpu, inst_cream->single, inst_cream->d, inst_cream->m);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ( (OPC_1 & 0xb) == 0xb && CRn == 0 && (OPC_2 & 0x6) == 0x2 )
+{
+        unsigned int single   = BIT(8) == 0;
+        unsigned int d        = (single ? BITS(12,15)<<1 | BIT(22) : BITS(12,15) | BIT(22)<<4);
+        unsigned int m        = (single ? BITS( 0, 3)<<1 | BIT( 5) : BITS( 0, 3) | BIT( 5)<<4);;
+        VMOVR(state, single, d, m);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_CDP_IMPL
+void VMOVR(ARMul_State * state, ARMword single, ARMword d, ARMword m)
+{
+        DBG("VMOV(R) :\n");
+                
+        if (single)
+        {
+                DBG("\ts%d <= s%d[%x]\n", d, m, state->ExtReg[m]);
+                state->ExtReg[d] = state->ExtReg[m];
+        }
+        else
+        {
+                /* Check endian please */
+                DBG("\ts[%d-%d] <= s[%d-%d][%x-%x]\n", d*2+1, d*2, m*2+1, m*2, state->ExtReg[m*2+1], state->ExtReg[m*2]);
+                state->ExtReg[d*2+1] = state->ExtReg[m*2+1];
+                state->ExtReg[d*2] = state->ExtReg[m*2];
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s VMOV \n", __FUNCTION__);
+        int single   = BIT(8) == 0;
+        int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+        int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+        if (single)
+        {
+                LETFPS(d, FR32(m));
+        }
+        else
+        {
+                /* Check endian please */
+                LETFPS((d*2 + 1), FR32(m*2 + 1));
+                LETFPS((d * 2), FR32(m * 2));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VABS */
+/* cond 1110 1D11 0000 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr        vabs
+#define vfpinstr_inst   vabs_inst
+#define VFPLABEL_INST   VABS_INST
+#ifdef VFP_DECODE
+{"vabs",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vabs",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vabs_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VABS);
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VABS :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 0 && (OPC_2 & 0x7) == 6)
+{
+        DBG("VABS :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int single   = BIT(8) == 0;
+        int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+        int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+        Value* m0;
+        if (single)
+        {
+                m0 =  FR32(m);
+                m0 = SELECT(FPCMP_OLT(m0,FPCONST32(0.0)),FPNEG32(m0),m0);
+                LETFPS(d,m0);
+        }
+        else
+        {
+                /* Check endian please */
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                m0 = FPBITCAST64(v64);
+                m0 = SELECT(FPCMP_OLT(m0,FPCONST64(0.0)),FPNEG64(m0),m0);
+                Value *val64 = IBITCAST64(m0);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VNEG */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr        vneg
+#define vfpinstr_inst   vneg_inst
+#define VFPLABEL_INST   VNEG_INST
+#ifdef VFP_DECODE
+//{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x30,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+{"vneg",        5,      ARMVFP2,        23, 27, 0x1d,  17, 21, 0x18,    9, 11, 0x5,      6, 7, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vneg",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vneg_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VNEG);
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                DBG("VNEG :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 2)
+{
+        DBG("VNEG :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int single   = BIT(8) == 0;
+        int d        = (single ? BITS(12,15)<<1 | BIT(22) : BIT(22) << 4 | BITS(12,15));
+        int m        = (single ? BITS(0, 3)<<1 | BIT(5) : BITS(0, 3) | BIT(5)<<4);
+        Value* m0;
+        if (single)
+        {
+                m0 =  FR32(m);
+                m0 = FPNEG32(m0);
+                LETFPS(d,m0);
+        }
+        else
+        {
+                /* Check endian please */
+                Value *lo = FR32(2 * m);
+                Value *hi = FR32(2 * m + 1);
+                hi = IBITCAST32(hi);
+                lo = IBITCAST32(lo);
+                Value *hi64 = ZEXT64(hi);
+                Value* lo64 = ZEXT64(lo);
+                Value* v64 = OR(SHL(hi64,CONST64(32)),lo64);
+                m0 = FPBITCAST64(v64);
+                m0 = FPNEG64(m0);
+                Value *val64 = IBITCAST64(m0);
+                hi = LSHR(val64,CONST64(32));
+                lo = AND(val64,CONST64(0xffffffff));
+                hi = TRUNC32(hi);
+                lo  = TRUNC32(lo);
+                hi = FPBITCAST32(hi);
+                lo = FPBITCAST32(lo);           
+                LETFPS(2*d ,lo);
+                LETFPS(d*2 + 1 , hi);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VSQRT */
+/* cond 1110 1D11 0001 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr        vsqrt
+#define vfpinstr_inst   vsqrt_inst
+#define VFPLABEL_INST   VSQRT_INST
+#ifdef VFP_DECODE
+{"vsqrt",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x31,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vsqrt",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vsqrt_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VSQRT :\n");
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 1 && (OPC_2 & 0x7) == 6)
+{
+        DBG("VSQRT :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int dp_op = (BIT(8) == 1);
+        int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+        int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+        Value* v;
+        Value* tmp;
+        if(dp_op){
+                v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+                tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+                v = OR(v,tmp);
+                v = FPSQRT(FPBITCAST64(v));
+                tmp = TRUNC32(LSHR(IBITCAST64(v),CONST64(32)));
+                v = TRUNC32(AND(IBITCAST64(v),CONST64( 0xffffffff)));           
+                LETFPS(2 * d , FPBITCAST32(v));
+                LETFPS(2 * d + 1, FPBITCAST32(tmp));
+        }else {
+                v = FR32(m);
+                v = FPSQRT(FPEXT(64,v));
+                v = FPTRUNC(32,v);
+                LETFPS(d,v);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 1 */
+#define vfpinstr        vcmp
+#define vfpinstr_inst   vcmp_inst
+#define VFPLABEL_INST   VCMP_INST
+#ifdef VFP_DECODE
+{"vcmp",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x34,    9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                DBG("VCMP(1) :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 4 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VCMP(1) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is executed out of JIT.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int dp_op = (BIT(8) == 1);
+        int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+        int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+        Value* v;
+        Value* tmp;
+        Value* n;
+        Value* z;
+        Value* c;
+        Value* vt;
+        Value* v1;
+        Value* nzcv;
+        if(dp_op){
+                v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+                tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+                v1 = OR(v,tmp);
+                v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+                tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+                v = OR(v,tmp);
+                z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+                n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+                c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+                tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+                v1 = tmp;
+                c = OR(c,tmp);
+                n = SHL(ZEXT32(n),CONST32(31));
+                z = SHL(ZEXT32(z),CONST32(30));
+                c = SHL(ZEXT32(c),CONST32(29));
+                v1 = SHL(ZEXT32(v1),CONST(28));
+                nzcv = OR(OR(OR(n,z),c),v1);    
+                v = R(VFP_FPSCR);
+                tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+                LET(VFP_FPSCR,tmp);
+        }else {
+                z = FPCMP_OEQ(FR32(d),FR32(m));
+                n = FPCMP_OLT(FR32(d),FR32(m));
+                c = FPCMP_OGE(FR32(d),FR32(m)); 
+                tmp = FPCMP_UNO(FR32(d),FR32(m));
+                c = OR(c,tmp);
+                v1 = tmp;
+                n = SHL(ZEXT32(n),CONST32(31));
+                z = SHL(ZEXT32(z),CONST32(30));
+                c = SHL(ZEXT32(c),CONST32(29));
+                v1 = SHL(ZEXT32(v1),CONST(28));
+                nzcv = OR(OR(OR(n,z),c),v1);    
+                v = R(VFP_FPSCR);
+                tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+                LET(VFP_FPSCR,tmp);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VCMP VCMPE */
+/* cond 1110 1D11 0100 Vd-- 101X E1M0 Vm-- Encoding 2 */
+#define vfpinstr        vcmp2
+#define vfpinstr_inst   vcmp2_inst
+#define VFPLABEL_INST   VCMP2_INST
+#ifdef VFP_DECODE
+{"vcmp2",        5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x35,    9, 11, 0x5,     0, 6, 0x40},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcmp2",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcmp2_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VCMP(2) :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 5 && (OPC_2 & 0x2) == 2 && CRm == 0)
+{
+        DBG("VCMP(2) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction will executed out of JIT.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int dp_op = (BIT(8) == 1);
+        int d = dp_op ? BITS(12,15) | BIT(22) << 4 : BIT(22) | BITS(12,15) << 1;
+        //int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+        Value* v;
+        Value* tmp;
+        Value* n;
+        Value* z;
+        Value* c;
+        Value* vt;
+        Value* v1;
+        Value* nzcv;
+        if(dp_op){
+                v1 = CONST64(0);
+                v = SHL(ZEXT64(IBITCAST32(FR32(2 * d + 1))),CONST64(32));
+                tmp = ZEXT64(IBITCAST32(FR32(2 * d)));
+                v = OR(v,tmp);
+                z = FPCMP_OEQ(FPBITCAST64(v),FPBITCAST64(v1));
+                n = FPCMP_OLT(FPBITCAST64(v),FPBITCAST64(v1));
+                c = FPCMP_OGE(FPBITCAST64(v),FPBITCAST64(v1)); 
+                tmp =  FPCMP_UNO(FPBITCAST64(v),FPBITCAST64(v1));
+                v1 = tmp;
+                c = OR(c,tmp);
+                n = SHL(ZEXT32(n),CONST32(31));
+                z = SHL(ZEXT32(z),CONST32(30));
+                c = SHL(ZEXT32(c),CONST32(29));
+                v1 = SHL(ZEXT32(v1),CONST(28));
+                nzcv = OR(OR(OR(n,z),c),v1);    
+                v = R(VFP_FPSCR);
+                tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+                LET(VFP_FPSCR,tmp);
+        }else {
+                v1 = CONST(0);
+                v1 = FPBITCAST32(v1);
+                z = FPCMP_OEQ(FR32(d),v1);
+                n = FPCMP_OLT(FR32(d),v1);
+                c = FPCMP_OGE(FR32(d),v1); 
+                tmp = FPCMP_UNO(FR32(d),v1);
+                c = OR(c,tmp);
+                v1 = tmp;
+                n = SHL(ZEXT32(n),CONST32(31));
+                z = SHL(ZEXT32(z),CONST32(30));
+                c = SHL(ZEXT32(c),CONST32(29));
+                v1 = SHL(ZEXT32(v1),CONST(28));
+                nzcv = OR(OR(OR(n,z),c),v1);    
+                v = R(VFP_FPSCR);
+                tmp = OR(nzcv,AND(v,CONST32(0x0fffffff)));
+                LET(VFP_FPSCR,tmp);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VCVTBDS between double and single */
+/* cond 1110 1D11 0111 Vd-- 101X 11M0 Vm-- */
+#define vfpinstr        vcvtbds
+#define vfpinstr_inst   vcvtbds_inst
+#define VFPLABEL_INST   VCVTBDS_INST
+#ifdef VFP_DECODE
+{"vcvt(bds)",   5,      ARMVFP2,        23, 27, 0x1d,  16, 21, 0x37,    9, 11, 0x5,      6, 7, 3,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bds)",        0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbds_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VCVT(BDS) :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn == 7 && (OPC_2 & 0x6) == 6)
+{
+        DBG("VCVT(BDS) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is executed out.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int dp_op = (BIT(8) == 1);
+        int d = dp_op ? BITS(12,15) << 1 | BIT(22) : BIT(22) << 4 | BITS(12,15);
+        int m = dp_op ? BITS(0,3) | BIT(5) << 4 : BIT(5) | BITS(0,3) << 1;
+        int d2s = dp_op;
+        Value* v;
+        Value* tmp;
+        Value* v1;
+        if(d2s){
+                v = SHL(ZEXT64(IBITCAST32(FR32(2 * m + 1))),CONST64(32));
+                tmp = ZEXT64(IBITCAST32(FR32(2 * m)));
+                v1 = OR(v,tmp);
+                tmp = FPTRUNC(32,FPBITCAST64(v1));
+                LETFPS(d,tmp);  
+        }else {
+                v = FR32(m);
+                tmp = FPEXT(64,v);
+                v = IBITCAST64(tmp);
+                tmp = TRUNC32(AND(v,CONST64(0xffffffff)));
+                v1 = TRUNC32(LSHR(v,CONST64(32)));
+                LETFPS(2 * d, FPBITCAST32(tmp) );
+                LETFPS(2 * d + 1, FPBITCAST32(v1));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VCVTBFF between floating point and fixed point */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr        vcvtbff
+#define vfpinstr_inst   vcvtbff_inst
+#define VFPLABEL_INST   VCVTBFF_INST
+#ifdef VFP_DECODE
+{"vcvt(bff)",   6,      ARMVFP3,        23, 27, 0x1d,  19, 21, 0x7,     17, 17, 0x1,      9, 11, 0x5,   6, 6, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bff)",   0,      ARMVFP3,         4, 4, 1},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbff_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;VFP_DEBUG_UNTESTED(VCVTBFF);
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VCVT(BFF) :\n");
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn >= 0xA && (OPC_2 & 0x2) == 2)
+{
+        DBG("VCVT(BFF) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arch_arm_undef(cpu, bb, instr);
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VCVTBFI between floating point and integer */
+/* cond 1110 1D11 1op2 Vd-- 101X X1M0 Vm-- */
+#define vfpinstr        vcvtbfi
+#define vfpinstr_inst   vcvtbfi_inst
+#define VFPLABEL_INST   VCVTBFI_INST
+#ifdef VFP_DECODE
+{"vcvt(bfi)",   5,      ARMVFP2,        23, 27, 0x1d,  19, 21, 0x7,     9, 11, 0x5,      6, 6, 1,     4, 4, 0},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vcvt(bfi)",   0,      ARMVFP2, 0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vcvtbfi_inst {
+        unsigned int instr;
+        unsigned int dp_operation;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->dp_operation = BIT(inst, 8);
+        inst_cream->instr = inst;
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                DBG("VCVT(BFI) :\n");
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                int ret;
+                
+                if (inst_cream->dp_operation)
+                        ret = vfp_double_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                else
+                        ret = vfp_single_cpdo(cpu, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                CHECK_VFP_CDP_RET;
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_CDP_TRANS
+if ((OPC_1 & 0xB) == 0xB && CRn > 7 && (OPC_2 & 0x2) == 2)
+{
+        DBG("VCVT(BFI) :\n");
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s, instruction will be executed out of JIT.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        unsigned int opc2 = BITS(16,18);
+        int to_integer = ((opc2 >> 2) == 1);    
+        int dp_op =  (BIT(8) == 1);
+        unsigned int op = BIT(7);
+        int m,d;
+        Value* v;
+        Value* hi;
+        Value* lo;
+        Value* v64; 
+        if(to_integer){
+                d = BIT(22) | (BITS(12,15) << 1);
+                if(dp_op)
+                        m = BITS(0,3) | BIT(5) << 4;
+                else
+                        m = BIT(5) | BITS(0,3) << 1;
+        }else {
+                m = BIT(5) | BITS(0,3) << 1;
+                if(dp_op)
+                        d = BITS(12,15) | BIT(22) << 4;
+                else
+                        d  = BIT(22) | BITS(12,15) << 1;                
+        }
+        if(to_integer){
+                if(dp_op){
+                        lo = FR32(m * 2);
+                        hi = FR32(m * 2 + 1);   
+                        hi = ZEXT64(IBITCAST32(hi));
+                        lo = ZEXT64(IBITCAST32(lo));
+                        v64 = OR(SHL(hi,CONST64(32)),lo);       
+                        if(BIT(16)){
+                                v = FPTOSI(32,FPBITCAST64(v64));
+                        }
+                        else
+                                v = FPTOUI(32,FPBITCAST64(v64));
+                                
+                                v = FPBITCAST32(v);
+                                LETFPS(d,v);
+                }else {
+                        v = FR32(m);
+                        if(BIT(16)){
+                                
+                                v = FPTOSI(32,v);
+                        }
+                        else
+                                v = FPTOUI(32,v);
+                                LETFPS(d,FPBITCAST32(v));
+                }
+        }else {
+                if(dp_op){      
+                        v = IBITCAST32(FR32(m));
+                        if(BIT(7))
+                                v64 = SITOFP(64,v); 
+                        else
+                                v64 = UITOFP(64,v);
+                        v = IBITCAST64(v64);
+                        hi = FPBITCAST32(TRUNC32(LSHR(v,CONST64(32))));
+                        lo = FPBITCAST32(TRUNC32(AND(v,CONST64(0xffffffff))));
+                        LETFPS(2 * d , lo);
+                        LETFPS(2 * d + 1, hi);
+                }else {
+                        v = IBITCAST32(FR32(m));
+                        if(BIT(7))
+                                v = SITOFP(32,v);
+                        else
+                                v = UITOFP(32,v);
+                                LETFPS(d,v);
+                }
+        }
+        return No_exp;
+}
+/**
+* @brief The implementation of c language for vcvtbfi instruction of dyncom
+*
+* @param cpu
+* @param instr
+*
+* @return 
+*/
+int vcvtbfi_instr_impl(arm_core_t* cpu, uint32 instr){
+        int dp_operation = BIT(8);
+        int ret;
+        if (dp_operation)
+                ret = vfp_double_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        else
+                ret = vfp_single_cpdo(cpu, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        vfp_raise_exceptions(cpu, ret, instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+        return 0;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* MRC / MCR instructions */
+/* cond 1110 AAAL XXXX XXXX 101C XBB1 XXXX */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- */
+/* ----------------------------------------------------------------------- */
+/* VMOVBRS between register and single precision */
+/* cond 1110 000o Vn-- Rt-- 1010 N001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr        vmovbrs
+#define vfpinstr_inst   vmovbrs_inst
+#define VFPLABEL_INST   VMOVBRS_INST
+#ifdef VFP_DECODE
+{"vmovbrs",    3,    ARMVFP2,    21, 27, 0x70,    8, 11, 0xA,    0, 6, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrs_inst {
+        unsigned int to_arm;
+        unsigned int t;
+        unsigned int n;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->to_arm   = BIT(inst, 20) == 1;
+        inst_cream->t        = BITS(inst, 12, 15);
+        inst_cream->n        = BIT(inst, 7) | BITS(inst, 16, 19)<<1;
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+           
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                VMOVBRS(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->n, &(cpu->Reg[inst_cream->t]));
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+        /* VMOV r to s */
+        /* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+        VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, value);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x0 && CRm == 0 && (OPC_2 & 0x3) == 0)
+{
+        /* VMOV s to r */
+        /* Transfering Rt is not mandatory, as the value of interest is pointed by value */
+        VMOVBRS(state, BIT(20), Rt, BIT(7)|CRn<<1, &value);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMOVBRS(ARMul_State * state, ARMword to_arm, ARMword t, ARMword n, ARMword *value)
+{
+        DBG("VMOV(BRS) :\n");
+        if (to_arm)
+        {
+                DBG("\tr%d <= s%d=[%x]\n", t, n, state->ExtReg[n]);
+                *value = state->ExtReg[n];
+        }
+        else
+        {
+                DBG("\ts%d <= r%d=[%x]\n", n, t, *value);
+                state->ExtReg[n] = *value;
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("VMOV(BRS) :\n");
+        int to_arm   = BIT(20) == 1;
+        int t        = BITS(12, 15);
+        int n        = BIT(7) | BITS(16, 19)<<1;
+        if (to_arm)
+        {
+                DBG("\tr%d <= s%d\n", t, n);
+                LET(t, IBITCAST32(FR32(n)));
+        }
+        else
+        {
+                DBG("\ts%d <= r%d\n", n, t);
+                LETFPS(n, FPBITCAST32(R(t)));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMSR */
+/* cond 1110 1110 reg- Rt-- 1010 0001 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr        vmsr
+#define vfpinstr_inst   vmsr_inst
+#define VFPLABEL_INST   VMSR_INST
+#ifdef VFP_DECODE
+{"vmsr",    2,    ARMVFP2,    20, 27, 0xEE,    0, 11, 0xA10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmsr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmsr_inst {
+        unsigned int reg;
+        unsigned int Rd;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->reg  = BITS(inst, 16, 19);
+        inst_cream->Rd   = BITS(inst, 12, 15);
+   
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled ,
+                   and in privilegied mode */
+                /* Exceptions must be checked, according to v7 ref manual */
+                CHECK_VFP_ENABLED;
+           
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                VMSR(cpu, inst_cream->reg, inst_cream->Rd);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+        VMSR(state, CRn, Rt);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MCR_IMPL
+void VMSR(ARMul_State * state, ARMword reg, ARMword Rt)
+{
+        if (reg == 1)
+        {
+                DBG("VMSR :\tfpscr <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+                state->VFP[VFP_OFFSET(VFP_FPSCR)] = state->Reg[Rt];
+        }
+        else if (reg == 8)
+        {
+                DBG("VMSR :\tfpexc <= r%d=[%x]\n", Rt, state->Reg[Rt]);
+                state->VFP[VFP_OFFSET(VFP_FPEXC)] = state->Reg[Rt];
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        DBG("VMSR :");
+        if(RD == 15) {
+                printf("in %s is not implementation.\n", __FUNCTION__);
+                exit(-1);
+        }
+        
+        Value *data = NULL;
+        int reg = RN;
+        int Rt   = RD;
+        if (reg == 1)
+        {
+                LET(VFP_FPSCR, R(Rt));
+                DBG("\tflags <= fpscr\n");
+        }
+        else
+        {
+                switch (reg)
+                {
+                case 8:
+                        LET(VFP_FPEXC, R(Rt));
+                        DBG("\tfpexc <= r%d \n", Rt);
+                        break;
+                default:
+                        DBG("\tSUBARCHITECTURE DEFINED\n");
+                        break;
+                }
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMOVBRC register to scalar */
+/* cond 1110 0XX0 Vd-- Rt-- 1011 DXX1 0000 */
+/* cond 1110 op10 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr        vmovbrc
+#define vfpinstr_inst   vmovbrc_inst
+#define VFPLABEL_INST   VMOVBRC_INST
+#ifdef VFP_DECODE
+{"vmovbrc",    4,    ARMVFP2,    23, 27, 0x1C,    20, 20, 0x0,    8,11,0xB,    0,4,0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrc",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrc_inst {
+        unsigned int esize;
+        unsigned int index;
+        unsigned int d;
+        unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+        inst_cream->t     = BITS(inst, 12, 15);
+        /* VFP variant of instruction */
+        inst_cream->esize = 32;
+        inst_cream->index = BIT(inst, 21);
+   
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if ((OPC_1 & 0x4) == 0 && CoProc == 11 && CRm == 0)
+{
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBRC);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arch_arm_undef(cpu, bb, instr);
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMRS */
+/* cond 1110 1111 CRn- Rt-- 1010 0001 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MRC */
+#define vfpinstr        vmrs
+#define vfpinstr_inst   vmrs_inst
+#define VFPLABEL_INST   VMRS_INST
+#ifdef VFP_DECODE
+{"vmrs",        2,      ARMVFP2,        20, 27, 0xEF,     0, 11, 0xa10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmrs",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmrs_inst {
+        unsigned int reg;
+        unsigned int Rt;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->reg  = BITS(inst, 16, 19);
+        inst_cream->Rt   = BITS(inst, 12, 15);
+   
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                /* FIXME: special case for access to FPSID and FPEXC, VFP must be disabled,
+                   and in privilegied mode */
+                /* Exceptions must be checked, according to v7 ref manual */
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                DBG("VMRS :");
+        
+                if (inst_cream->reg == 1) /* FPSCR */
+                {
+                        if (inst_cream->Rt != 15)
+                        {       
+                                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSCR)];
+                                DBG("\tr%d <= fpscr[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                        }
+                        else
+                        {       
+                                cpu->NFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 31) & 1;
+                                cpu->ZFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 30) & 1;
+                                cpu->CFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 29) & 1;
+                                cpu->VFlag = (cpu->VFP[VFP_OFFSET(VFP_FPSCR)] >> 28) & 1;
+                                DBG("\tflags <= fpscr[%1xxxxxxxx]\n", cpu->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+                        }
+                } 
+                else
+                {
+                        switch (inst_cream->reg)
+                        {
+                        case 0:
+                                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPSID)];
+                                DBG("\tr%d <= fpsid[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPSID)]);
+                                break;
+                        case 6:
+                                /* MVFR1, VFPv3 only ? */
+                                DBG("\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt);
+                                break;
+                        case 7:
+                                /* MVFR0, VFPv3 only? */
+                                DBG("\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt);
+                                break;
+                        case 8:
+                                cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_OFFSET(VFP_FPEXC)];
+                                DBG("\tr%d <= fpexc[%08x]\n", inst_cream->Rt, cpu->VFP[VFP_OFFSET(VFP_FPEXC)]);
+                                break;
+                        default:
+                                DBG("\tSUBARCHITECTURE DEFINED\n");
+                                break;
+                        }
+                }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MRC_TRANS
+if (OPC_1 == 0x7 && CRm == 0 && OPC_2 == 0)
+{
+        VMRS(state, CRn, Rt, value);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRC_IMPL
+void VMRS(ARMul_State * state, ARMword reg, ARMword Rt, ARMword * value)
+{
+        DBG("VMRS :");
+        if (reg == 1)
+        {
+                if (Rt != 15)
+                {
+                        *value = state->VFP[VFP_OFFSET(VFP_FPSCR)];
+                        DBG("\tr%d <= fpscr[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSCR)]);
+                }
+                else
+                {
+                        *value = state->VFP[VFP_OFFSET(VFP_FPSCR)] ;
+                        DBG("\tflags <= fpscr[%1xxxxxxxx]\n", state->VFP[VFP_OFFSET(VFP_FPSCR)]>>28);
+                }
+        }
+        else
+        {
+                switch (reg)
+                {
+                case 0:
+                        *value = state->VFP[VFP_OFFSET(VFP_FPSID)];
+                        DBG("\tr%d <= fpsid[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPSID)]);
+                        break;
+                case 6:
+                        /* MVFR1, VFPv3 only ? */
+                        DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+                        break;
+                case 7:
+                        /* MVFR0, VFPv3 only? */
+                        DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+                        break;
+                case 8:
+                        *value = state->VFP[VFP_OFFSET(VFP_FPEXC)];
+                        DBG("\tr%d <= fpexc[%08x]\n", Rt, state->VFP[VFP_OFFSET(VFP_FPEXC)]);
+                        break;
+                default:
+                        DBG("\tSUBARCHITECTURE DEFINED\n");
+                        break;
+                }
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        DBG("\t\tin %s .\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        
+        Value *data = NULL;
+        int reg = BITS(16, 19);;
+        int Rt   = BITS(12, 15);
+        DBG("VMRS : reg=%d, Rt=%d\n", reg, Rt);
+        if (reg == 1)
+        {
+                if (Rt != 15)
+                {
+                        LET(Rt, R(VFP_FPSCR));
+                        DBG("\tr%d <= fpscr\n", Rt);
+                }
+                else
+                {
+                        //LET(Rt, R(VFP_FPSCR));
+                        update_cond_from_fpscr(cpu, instr, bb, pc);
+                        DBG("In %s, \tflags <= fpscr\n", __FUNCTION__);
+                }
+        }
+        else
+        {
+                switch (reg)
+                {
+                case 0:
+                        LET(Rt, R(VFP_FPSID));
+                        DBG("\tr%d <= fpsid\n", Rt);
+                        break;
+                case 6:
+                        /* MVFR1, VFPv3 only ? */
+                        DBG("\tr%d <= MVFR1 unimplemented\n", Rt);
+                        break;
+                case 7:
+                        /* MVFR0, VFPv3 only? */
+                        DBG("\tr%d <= MVFR0 unimplemented\n", Rt);
+                        break;
+                case 8:
+                        LET(Rt, R(VFP_FPEXC));
+                        DBG("\tr%d <= fpexc\n", Rt);
+                        break;
+                default:
+                        DBG("\tSUBARCHITECTURE DEFINED\n");
+                        break;
+                }
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMOVBCR scalar to register */
+/* cond 1110 XXX1 Vd-- Rt-- 1011 NXX1 0000 */
+/* cond 1110 op11 CRn- Rt-- copr op21 CRm- MCR */
+#define vfpinstr        vmovbcr
+#define vfpinstr_inst   vmovbcr_inst
+#define VFPLABEL_INST   VMOVBCR_INST
+#ifdef VFP_DECODE
+{"vmovbcr",    4,    ARMVFP2,    24, 27, 0xE,    20, 20, 1,    8, 11,0xB,    0,4, 0x10},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbcr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbcr_inst {
+        unsigned int esize;
+        unsigned int index;
+        unsigned int d;
+        unsigned int t;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->d     = BITS(inst, 16, 19)|BIT(inst, 7)<<4;
+        inst_cream->t     = BITS(inst, 12, 15);
+        /* VFP variant of instruction */
+        inst_cream->esize = 32;
+        inst_cream->index = BIT(inst, 21);
+   
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCR_TRANS
+if (CoProc == 11 && CRm == 0)
+{
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBCR);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arch_arm_undef(cpu, bb, instr);
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* MRRC / MCRR instructions */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+/* cond 1100 0100 Rt2- Rt-- copr opc1 CRm- MCRR */
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRSS between 2 registers to 2 singles */
+/* cond 1100 010X Rt2- Rt-- 1010 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr        vmovbrrss
+#define vfpinstr_inst   vmovbrrss_inst
+#define VFPLABEL_INST   VMOVBRRSS_INST
+#ifdef VFP_DECODE
+{"vmovbrrss",    3,    ARMVFP2,    21, 27, 0x62,    8, 11, 0xA,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrss",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrss_inst {
+        unsigned int to_arm;
+        unsigned int t;
+        unsigned int t2;
+        unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->to_arm     = BIT(inst, 20) == 1;
+        inst_cream->t          = BITS(inst, 12, 15);
+        inst_cream->t2         = BITS(inst, 16, 19);
+        inst_cream->m          = BITS(inst, 0, 3)<<1|BIT(inst, 5);
+   
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 10 && (OPC_1 & 0xD) == 1)
+{
+        VFP_DEBUG_UNIMPLEMENTED(VMOVBRRSS);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arch_arm_undef(cpu, bb, instr);
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VMOVBRRD between 2 registers and 1 double */
+/* cond 1100 010X Rt2- Rt-- 1011 00X1 Vm-- */
+/* cond 1100 0101 Rt2- Rt-- copr opc1 CRm- MRRC */
+#define vfpinstr        vmovbrrd
+#define vfpinstr_inst   vmovbrrd_inst
+#define VFPLABEL_INST   VMOVBRRD_INST
+#ifdef VFP_DECODE
+{"vmovbrrd",    3,    ARMVFP2,    21, 27, 0x62,    6, 11, 0x2c,    4, 4, 1},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vmovbrrd",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vmovbrrd_inst {
+        unsigned int to_arm;
+        unsigned int t;
+        unsigned int t2;
+        unsigned int m;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->to_arm   = BIT(inst, 20) == 1;
+        inst_cream->t        = BITS(inst, 12, 15);
+        inst_cream->t2       = BITS(inst, 16, 19);
+        inst_cream->m        = BIT(inst, 5)<<4 | BITS(inst, 0, 3);
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                VMOVBRRD(cpu, inst_cream->to_arm, inst_cream->t, inst_cream->t2, inst_cream->m, 
+                                &(cpu->Reg[inst_cream->t]), &(cpu->Reg[inst_cream->t2]));
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_MCRR_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+        /* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+        VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, &value1, &value2);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_TRANS
+if (CoProc == 11 && (OPC_1 & 0xD) == 1)
+{
+        /* Transfering Rt and Rt2 is not mandatory, as the value of interest is pointed by value1 and value2 */
+        VMOVBRRD(state, BIT(20), Rt, Rt2, BIT(5)<<4|CRm, value1, value2);
+        return ARMul_DONE;
+}
+#endif
+#ifdef VFP_MRRC_IMPL
+void VMOVBRRD(ARMul_State * state, ARMword to_arm, ARMword t, ARMword t2, ARMword n, ARMword *value1, ARMword *value2)
+{
+        DBG("VMOV(BRRD) :\n");
+        if (to_arm)
+        {
+                DBG("\tr[%d-%d] <= s[%d-%d]=[%x-%x]\n", t2, t, n*2+1, n*2, state->ExtReg[n*2+1], state->ExtReg[n*2]);
+                *value2 = state->ExtReg[n*2+1];
+                *value1 = state->ExtReg[n*2];
+        }
+        else
+        {
+                DBG("\ts[%d-%d] <= r[%d-%d]=[%x-%x]\n", n*2+1, n*2, t2, t, *value2, *value1);
+                state->ExtReg[n*2+1] = *value2;
+                state->ExtReg[n*2] = *value1;
+        }
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int to_arm   = BIT(20) == 1;
+        int t        = BITS(12, 15);
+        int t2       = BITS(16, 19);
+        int n        = BIT(5)<<4 | BITS(0, 3);
+        if(to_arm){
+                LET(t, IBITCAST32(FR32(n * 2)));
+                LET(t2, IBITCAST32(FR32(n * 2 + 1)));
+        }
+        else{
+                LETFPS(n * 2, FPBITCAST32(R(t)));
+                LETFPS(n * 2 + 1, FPBITCAST32(R(t2)));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* LDC/STC between 2 registers and 1 double */
+/* cond 110X XXX1 Rn-- CRd- copr imm- imm- LDC */
+/* cond 110X XXX0 Rn-- CRd- copr imm8 imm8 STC */
+/* ----------------------------------------------------------------------- */
+/* VSTR */
+/* cond 1101 UD00 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr        vstr
+#define vfpinstr_inst   vstr_inst
+#define VFPLABEL_INST   VSTR_INST
+#ifdef VFP_DECODE
+{"vstr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0,       9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstr_inst {
+        unsigned int single;
+        unsigned int n;
+        unsigned int d;
+        unsigned int imm32;
+        unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        
+        inst_cream->single = BIT(inst, 8) == 0;
+        inst_cream->add    = BIT(inst, 23);
+        inst_cream->imm32  = BITS(inst, 0,7) << 2;
+        inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+        inst_cream->n      = BITS(inst, 16, 19);
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+                addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+                DBG("VSTR :\n");
+                
+                
+                if (inst_cream->single)
+                {
+                        fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d, cpu->ExtReg[inst_cream->d]);
+                }
+                else
+                {
+                        fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                        if (fault) goto MMU_EXCEPTION;
+                        /* Check endianness */
+                        fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d*2], 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[inst_cream->d*2+1], 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, inst_cream->d*2+1, inst_cream->d*2, cpu->ExtReg[inst_cream->d*2+1], cpu->ExtReg[inst_cream->d*2]);
+                }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && W == 0)
+{
+        return VSTR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VSTR(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+        static int i = 0;
+        static int single_reg, add, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_reg = BIT(8) == 0;       /* Double precision */
+                add = BIT(23);          /* */
+                imm32 = BITS(0,7)<<2;   /* may not be used */
+                d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                n = BITS(16, 19);       /* destination register */
+                
+                DBG("VSTR :\n");
+                
+                i = 0;
+                regs = 1;
+                
+                return ARMul_DONE;
+        }
+        else if (type == ARMul_DATA)
+        {
+                if (single_reg)
+                {
+                        *value = state->ExtReg[d+i];
+                        DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d+i]);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        *value = state->ExtReg[d*2+i];
+                        DBG("\taddr[?] <= s[%d]=[%x]\n", d*2+i, state->ExtReg[d*2+i]);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        int single = BIT(8) == 0;
+        int add    = BIT(23);
+        int imm32  = BITS(0,7) << 2;
+        int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+        int n      = BITS(16, 19);
+        Value* base = (n == 15) ? ADD(AND(R(n), CONST(0xFFFFFFFC)), CONST(8)): R(n);
+        Value* Addr = add ? ADD(base, CONST(imm32)) : SUB(base, CONST(imm32));
+        DBG("VSTR :\n");
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, 4, 0, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, 8, 0, cpu->dyncom_engine->bb_trap);
+        //Value* phys_addr;
+        if(single){
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                bb = cpu->dyncom_engine->bb;
+                arch_write_memory(cpu, bb, phys_addr, RSPR(d), 32);
+                #endif
+                //memory_write(cpu, bb, Addr, RSPR(d), 32);
+                memory_write(cpu, bb, Addr, IBITCAST32(FR32(d)), 32);
+                bb = cpu->dyncom_engine->bb;
+        }
+        else{
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                bb = cpu->dyncom_engine->bb;
+                arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2), 32);
+                #endif
+                //memory_write(cpu, bb, Addr, RSPR(d * 2), 32);
+                memory_write(cpu, bb, Addr, IBITCAST32(FR32(d * 2)), 32);
+                bb = cpu->dyncom_engine->bb;
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+                bb = cpu->dyncom_engine->bb;
+                arch_write_memory(cpu, bb, phys_addr, RSPR(d * 2 + 1), 32);
+                #endif
+                //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR(d * 2 + 1), 32);
+                memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32(d * 2 + 1)), 32);
+                bb = cpu->dyncom_engine->bb;
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VPUSH */
+/* cond 1101 0D10 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr        vpush
+#define vfpinstr_inst   vpush_inst
+#define VFPLABEL_INST   VPUSH_INST
+#ifdef VFP_DECODE
+{"vpush",       3,      ARMVFP2,        23, 27, 0x1a,  16, 21, 0x2d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpush",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpush_inst {
+        unsigned int single;
+        unsigned int d;
+        unsigned int imm32;
+        unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->single  = BIT(inst, 8) == 0;
+        inst_cream->d       = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+        inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+        inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                                
+                int i;
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                DBG("VPUSH :\n");
+                        
+                addr = cpu->Reg[R13] - inst_cream->imm32;
+                for (i = 0; i < inst_cream->regs; i++)
+                {
+                        if (inst_cream->single)
+                        {
+                                fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+                                addr += 4;
+                        }
+                        else
+                        {
+                                /* Careful of endianness, little by default */
+                                fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+                                addr += 8;
+                        }
+                }
+                DBG("\tsp[%x]", cpu->Reg[R13]);
+                cpu->Reg[R13] = cpu->Reg[R13] - inst_cream->imm32;
+                DBG("=>[%x]\n", cpu->Reg[R13]);
+        
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vpush_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+if (P == 1 && U == 0 && W == 1 && Rn == 0xD)
+{
+        return VPUSH(state, type, instr, value);
+}
+#endif
+#ifdef VFP_STC_IMPL
+int VPUSH(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+        static int i = 0;
+        static int single_regs, add, wback, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_regs = BIT(8) == 0;      /* Single precision */
+                d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                imm32 = BITS(0,7)<<2;   /* may not be used */
+                regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FSTMX if regs is odd */
+                DBG("VPUSH :\n");
+                DBG("\tsp[%x]", state->Reg[R13]);
+                state->Reg[R13] = state->Reg[R13] - imm32;
+                DBG("=>[%x]\n", state->Reg[R13]);
+                
+                i = 0;
+                
+                return ARMul_DONE;
+        } 
+        else if (type == ARMul_DATA)
+        {
+                if (single_regs)
+                {
+                        *value = state->ExtReg[d + i];
+                        DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        *value = state->ExtReg[d*2 + i];
+                        DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        int single  = BIT(8) == 0;
+        int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+        int imm32   = BITS(0, 7)<<2;
+        int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+        DBG("\t\tin %s \n", __FUNCTION__);
+        Value* Addr = SUB(R(13), CONST(imm32));
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+        //Value* phys_addr;
+        int i;
+        for (i = 0; i < regs; i++)
+        {
+                if (single)
+                {
+                        //fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+                        #endif
+                        //memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+                        memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)), 32);
+                        bb = cpu->dyncom_engine->bb;
+                        Addr = ADD(Addr, CONST(4));
+                }
+                else
+                {
+                        /* Careful of endianness, little by default */
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+                        #endif
+                        //memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+                        memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)), 32);
+                        bb = cpu->dyncom_engine->bb;
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+                        #endif
+                        //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+                        memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+                        bb = cpu->dyncom_engine->bb;
+                        Addr = ADD(Addr, CONST(8));
+                }
+        }
+        LET(13, SUB(R(13), CONST(imm32)));
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VSTM */
+/* cond 110P UDW0 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr        vstm
+#define vfpinstr_inst   vstm_inst
+#define VFPLABEL_INST   VSTM_INST
+#ifdef VFP_DECODE
+{"vstm",        3,      ARMVFP2,        25, 27, 0x6,    20, 20, 0,      9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vstm",        0,      ARMVFP2,        0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vstm_inst {
+        unsigned int single;
+        unsigned int add;
+        unsigned int wback;
+        unsigned int d;
+        unsigned int n;
+        unsigned int imm32;
+        unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->single = BIT(inst, 8) == 0;
+        inst_cream->add    = BIT(inst, 23);
+        inst_cream->wback  = BIT(inst, 21);
+        inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+        inst_cream->n      = BITS(inst, 16, 19);
+        inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+        inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST: /* encoding 1 */
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                int i;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+                DBG("VSTM : addr[%x]\n", addr);
+                
+                
+                for (i = 0; i < inst_cream->regs; i++)
+                {
+                        if (inst_cream->single)
+                        {
+                                fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+                                addr += 4;
+                        }
+                        else
+                        {
+                                /* Careful of endianness, little by default */
+                                fault = check_address_validity(cpu, addr, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = check_address_validity(cpu, addr + 4, &phys_addr, 0);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+                                addr += 8;
+                        }
+                }
+                if (inst_cream->wback){
+                        cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+                                                   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+                        DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+                }
+        }
+        cpu->Reg[15] += 4;
+        INC_PC(sizeof(vstm_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_STC_TRANS
+/* Should be the last operation of STC */
+return VSTM(state, type, instr, value);
+#endif
+#ifdef VFP_STC_IMPL
+int VSTM(ARMul_State * state, int type, ARMword instr, ARMword * value)
+{
+        static int i = 0;
+        static int single_regs, add, wback, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_regs = BIT(8) == 0;      /* Single precision */
+                add = BIT(23);          /* */
+                wback = BIT(21);        /* write-back */
+                d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                n = BITS(16, 19);       /* destination register */
+                imm32 = BITS(0,7) * 4;  /* may not be used */
+                regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FSTMX if regs is odd */
+                DBG("VSTM :\n");
+                
+                if (wback) {
+                        state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+                        DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+                }
+                
+                i = 0;
+                
+                return ARMul_DONE;
+        } 
+        else if (type == ARMul_DATA)
+        {
+                if (single_regs)
+                {
+                        *value = state->ExtReg[d + i];
+                        DBG("\taddr[?] <= s%d=[%x]\n", d+i, state->ExtReg[d + i]);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        *value = state->ExtReg[d*2 + i];
+                        DBG("\taddr[?] <= s[%d]=[%x]\n", d*2 + i, state->ExtReg[d*2 + i]);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        //arch_arm_undef(cpu, bb, instr);
+        int single = BIT(8) == 0;
+        int add    = BIT(23);
+        int wback  = BIT(21);
+        int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4);
+        int n      = BITS(16, 19);
+        int imm32  = BITS(0, 7)<<2;
+        int regs   = single ? BITS(0, 7) : BITS(1, 7);
+        Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+        DBG("VSTM \n");
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 0, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 8, 0, cpu->dyncom_engine->bb_trap);
+        int i;  
+        Value* phys_addr;
+        for (i = 0; i < regs; i++)
+        {
+                if (single)
+                {
+                        
+                        //fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                        /* if R(i) is R15? */
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR(d + i), 32);
+                        #endif
+                        //memory_write(cpu, bb, Addr, RSPR(d + i), 32);
+                        memory_write(cpu, bb, Addr, IBITCAST32(FR32(d + i)),32);
+                        bb = cpu->dyncom_engine->bb;
+                        //if (fault) goto MMU_EXCEPTION;
+                        //DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+                        Addr = ADD(Addr, CONST(4));
+                }
+                else
+                {
+                
+                        //fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2), 32);
+                        #endif
+                        //memory_write(cpu, bb, Addr, RSPR((d + i) * 2), 32);
+                        memory_write(cpu, bb, Addr, IBITCAST32(FR32((d + i) * 2)),32);
+                        bb = cpu->dyncom_engine->bb;
+                        //if (fault) goto MMU_EXCEPTION;
+                        //fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 0);
+                        bb = cpu->dyncom_engine->bb;
+                        arch_write_memory(cpu, bb, phys_addr, RSPR((d + i) * 2 + 1), 32);
+                        #endif
+                        //memory_write(cpu, bb, ADD(Addr, CONST(4)), RSPR((d + i) * 2 + 1), 32);
+                        memory_write(cpu, bb, ADD(Addr, CONST(4)), IBITCAST32(FR32((d + i) * 2 + 1)), 32);
+                        bb = cpu->dyncom_engine->bb;
+                        //if (fault) goto MMU_EXCEPTION;
+                        //DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+                        //addr += 8;
+                        Addr = ADD(Addr, CONST(8));
+                }
+        }
+        if (wback){
+                //cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+                //                         cpu->Reg[n] - imm32);
+                LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+                DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VPOP */
+/* cond 1100 1D11 1101 Vd-- 101X imm8 imm8 */
+#define vfpinstr        vpop
+#define vfpinstr_inst   vpop_inst
+#define VFPLABEL_INST   VPOP_INST
+#ifdef VFP_DECODE
+{"vpop",        3,      ARMVFP2,        23, 27, 0x19,  16, 21, 0x3d,    9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vpop",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vpop_inst {
+        unsigned int single;
+        unsigned int d;
+        unsigned int imm32;
+        unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->single  = BIT(inst, 8) == 0;
+        inst_cream->d       = (inst_cream->single ? (BITS(inst, 12, 15)<<1)|BIT(inst, 22) : BITS(inst, 12, 15)|(BIT(inst, 22)<<4));
+        inst_cream->imm32   = BITS(inst, 0, 7)<<2;
+        inst_cream->regs    = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+        
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                int i;
+                unsigned int value1, value2;
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                DBG("VPOP :\n");
+                
+                addr = cpu->Reg[R13];
+                
+                for (i = 0; i < inst_cream->regs; i++)
+                {
+                        if (inst_cream->single)
+                        {
+                                fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, value1, addr);
+                                cpu->ExtReg[inst_cream->d+i] = value1;
+                                addr += 4;
+                        }
+                        else
+                        {
+                                /* Careful of endianness, little by default */
+                                fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr, phys_addr, value1, 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr + 4, phys_addr, value2, 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, value2, value1, addr+4, addr);
+                                cpu->ExtReg[(inst_cream->d+i)*2] = value1;
+                                cpu->ExtReg[(inst_cream->d+i)*2 + 1] = value2;
+                                addr += 8;
+                        }
+                }
+                DBG("\tsp[%x]", cpu->Reg[R13]);
+                cpu->Reg[R13] = cpu->Reg[R13] + inst_cream->imm32;
+                DBG("=>[%x]\n", cpu->Reg[R13]);
+                
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vpop_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 0 && U == 1 && W == 1 && Rn == 0xD)
+{
+        return VPOP(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VPOP(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+        static int i = 0;
+        static int single_regs, add, wback, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_regs = BIT(8) == 0;      /* Single precision */
+                d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                imm32 = BITS(0,7)<<2;   /* may not be used */
+                regs = single_regs ? BITS(0, 7) : BITS(1, 7); /* FLDMX if regs is odd */
+                DBG("VPOP :\n");
+                DBG("\tsp[%x]", state->Reg[R13]);
+                state->Reg[R13] = state->Reg[R13] + imm32;
+                DBG("=>[%x]\n", state->Reg[R13]);
+                
+                i = 0;
+                
+                return ARMul_DONE;
+        }
+        else if (type == ARMul_TRANSFER)
+        {
+                return ARMul_DONE;
+        }
+        else if (type == ARMul_DATA)
+        {
+                if (single_regs)
+                {
+                        state->ExtReg[d + i] = value;
+                        DBG("\ts%d <= [%x]\n", d + i, value);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        state->ExtReg[d*2 + i] = value;
+                        DBG("\ts%d <= [%x]\n", d*2 + i, value);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        /* Should check if PC is destination register */
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        DBG("\t\tin %s instruction .\n", __FUNCTION__);
+        //arch_arm_undef(cpu, bb, instr);
+        int single  = BIT(8) == 0;
+        int d       = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+        int imm32   = BITS(0, 7)<<2;
+        int regs    = (single ? BITS(0, 7) : BITS(1, 7));
+        int i;
+        unsigned int value1, value2;
+        DBG("VPOP :\n");
+                
+        Value* Addr = R(13);
+        Value* val;
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+        //Value* phys_addr;     
+        for (i = 0; i < regs; i++)
+        {
+                if (single)
+                {
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, Addr, 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        LETFPS(d + i, FPBITCAST32(val));
+                        Addr = ADD(Addr, CONST(4));
+                }
+                else
+                {
+                        /* Careful of endianness, little by default */
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, Addr, 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        LETFPS((d + i) * 2, FPBITCAST32(val));
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, ADD(Addr, CONST(4)), 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+                        Addr = ADD(Addr, CONST(8));
+                }
+        }
+        LET(13, ADD(R(13), CONST(imm32)));
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VLDR */
+/* cond 1101 UD01 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr        vldr
+#define vfpinstr_inst   vldr_inst
+#define VFPLABEL_INST   VLDR_INST
+#ifdef VFP_DECODE
+{"vldr",        3,      ARMVFP2,        24, 27, 0xd,   20, 21, 0x1,     9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldr",    0,    ARMVFP2,    0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldr_inst {
+        unsigned int single;
+        unsigned int n;
+        unsigned int d;
+        unsigned int imm32;
+        unsigned int add;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx   = index;
+        inst_base->br    = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        
+        inst_cream->single = BIT(inst, 8) == 0;
+        inst_cream->add    = BIT(inst, 23);
+        inst_cream->imm32  = BITS(inst, 0,7) << 2;
+        inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+        inst_cream->n      = BITS(inst, 16, 19);
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                unsigned int base = (inst_cream->n == 15 ? (cpu->Reg[inst_cream->n] & 0xFFFFFFFC) + 8 : cpu->Reg[inst_cream->n]);
+                addr = (inst_cream->add ? base + inst_cream->imm32 : base - inst_cream->imm32);
+                DBG("VLDR :\n", addr);
+                
+                
+                if (inst_cream->single)
+                {
+                        fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d], 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d, cpu->ExtReg[inst_cream->d], addr);
+                }
+                else
+                {
+                        unsigned int word1, word2;
+                        fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = interpreter_read_memory(core, addr, phys_addr, word1, 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+                        if (fault) goto MMU_EXCEPTION;
+                        fault = interpreter_read_memory(core, addr + 4, phys_addr, word2, 32);
+                        if (fault) goto MMU_EXCEPTION;
+                        /* Check endianness */
+                        cpu->ExtReg[inst_cream->d*2] = word1;
+                        cpu->ExtReg[inst_cream->d*2+1] = word2;
+                        DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", inst_cream->d*2+1, inst_cream->d*2, word2, word1, addr+4, addr);
+                }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vldr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+if (P == 1 && W == 0)
+{
+        return VLDR(state, type, instr, value);
+}
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDR(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+        static int i = 0;
+        static int single_reg, add, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_reg = BIT(8) == 0;       /* Double precision */
+                add = BIT(23);          /* */
+                imm32 = BITS(0,7)<<2;   /* may not be used */
+                d = single_reg ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                n = BITS(16, 19);       /* destination register */
+                
+                DBG("VLDR :\n");
+                
+                i = 0;
+                regs = 1;
+                
+                return ARMul_DONE;
+        }
+        else if (type == ARMul_TRANSFER)
+        {
+                return ARMul_DONE;
+        }
+        else if (type == ARMul_DATA)
+        {
+                if (single_reg)
+                {
+                        state->ExtReg[d+i] = value;
+                        DBG("\ts%d <= [%x]\n", d+i, value);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        state->ExtReg[d*2+i] = value;
+                        DBG("\ts[%d] <= [%x]\n", d*2+i, value);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        /* Should check if PC is destination register */
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        int single = BIT(8) == 0;
+        int add    = BIT(23);
+        int wback  = BIT(21);
+        int d      = (single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|(BIT(22)<<4));
+        int n      = BITS(16, 19);
+        int imm32  = BITS(0, 7)<<2;
+        int regs   = (single ? BITS(0, 7) : BITS(1, 7));
+        Value* base = R(n);
+        DBG("\t\tin %s .\n", __FUNCTION__);
+        if(n == 15){
+                base = ADD(AND(base, CONST(0xFFFFFFFC)), CONST(8));
+        }
+        Value* Addr = add ? (ADD(base, CONST(imm32))) : (SUB(base, CONST(imm32)));
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, 4, 1, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, 8, 1, cpu->dyncom_engine->bb_trap);
+        //Value* phys_addr;
+        Value* val;
+        if(single){
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                bb = cpu->dyncom_engine->bb;
+                val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                #endif
+                memory_read(cpu, bb, Addr, 0, 32);
+                bb = cpu->dyncom_engine->bb;
+                val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                //LETS(d, val);
+                LETFPS(d,FPBITCAST32(val));
+        }
+        else{
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                bb = cpu->dyncom_engine->bb;
+                val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                #endif
+                memory_read(cpu, bb, Addr, 0, 32);
+                bb = cpu->dyncom_engine->bb;
+                val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                //LETS(d * 2, val);
+                LETFPS(d * 2,FPBITCAST32(val));
+                #if 0
+                phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+                bb = cpu->dyncom_engine->bb;
+                val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                #endif
+                memory_read(cpu, bb, ADD(Addr, CONST(4)), 0,32);
+                bb = cpu->dyncom_engine->bb;
+                val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                //LETS(d * 2 + 1, val);
+                LETFPS( d * 2 + 1,FPBITCAST32(val));
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+/* ----------------------------------------------------------------------- */
+/* VLDM */
+/* cond 110P UDW1 Rn-- Vd-- 101X imm8 imm8 */
+#define vfpinstr        vldm
+#define vfpinstr_inst   vldm_inst
+#define VFPLABEL_INST   VLDM_INST
+#ifdef VFP_DECODE
+{"vldm",        3,      ARMVFP2,        25, 27, 0x6,    20, 20, 1,      9, 11, 0x5},
+#endif
+#ifdef VFP_DECODE_EXCLUSION
+{"vldm",        0,      ARMVFP2,        0},
+#endif
+#ifdef VFP_INTERPRETER_TABLE
+INTERPRETER_TRANSLATE(vfpinstr),
+#endif
+#ifdef VFP_INTERPRETER_LABEL
+&&VFPLABEL_INST,
+#endif
+#ifdef VFP_INTERPRETER_STRUCT
+typedef struct _vldm_inst {
+        unsigned int single;
+        unsigned int add;
+        unsigned int wback;
+        unsigned int d;
+        unsigned int n;
+        unsigned int imm32;
+        unsigned int regs;
+} vfpinstr_inst;
+#endif
+#ifdef VFP_INTERPRETER_TRANS
+ARM_INST_PTR INTERPRETER_TRANSLATE(vfpinstr)(unsigned int inst, int index)
+{
+        VFP_DEBUG_TRANSLATE;
+        
+        arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(vfpinstr_inst));
+        vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+        inst_base->cond  = BITS(inst, 28, 31);
+        inst_base->idx     = index;
+        inst_base->br     = NON_BRANCH;
+        inst_base->load_r15 = 0;
+        inst_cream->single = BIT(inst, 8) == 0;
+        inst_cream->add    = BIT(inst, 23);
+        inst_cream->wback  = BIT(inst, 21);
+        inst_cream->d      = (inst_cream->single ? BITS(inst, 12, 15)<<1|BIT(inst, 22) : BITS(inst, 12, 15)|BIT(inst, 22)<<4);
+        inst_cream->n      = BITS(inst, 16, 19);
+        inst_cream->imm32  = BITS(inst, 0, 7)<<2;
+        inst_cream->regs   = (inst_cream->single ? BITS(inst, 0, 7) : BITS(inst, 1, 7));
+        return inst_base;
+}
+#endif
+#ifdef VFP_INTERPRETER_IMPL
+VFPLABEL_INST:
+{
+        INC_ICOUNTER;
+        if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) {
+                CHECK_VFP_ENABLED;
+                
+                int i;
+                
+                vfpinstr_inst *inst_cream = (vfpinstr_inst *)inst_base->component;
+                
+                addr = (inst_cream->add ? cpu->Reg[inst_cream->n] : cpu->Reg[inst_cream->n] - inst_cream->imm32);
+                DBG("VLDM : addr[%x]\n", addr);
+                
+                for (i = 0; i < inst_cream->regs; i++)
+                {
+                        if (inst_cream->single)
+                        {
+                                fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\ts%d <= [%x] addr[%x]\n", inst_cream->d+i, cpu->ExtReg[inst_cream->d+i], addr);
+                                addr += 4;
+                        }
+                        else
+                        {
+                                /* Careful of endianness, little by default */
+                                fault = check_address_validity(cpu, addr, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = check_address_validity(cpu, addr + 4, &phys_addr, 1);
+                                if (fault) goto MMU_EXCEPTION;
+                                fault = interpreter_read_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+                                if (fault) goto MMU_EXCEPTION;
+                                DBG("\ts[%d-%d] <= [%x-%x] addr[%x-%x]\n", (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2], addr+4, addr);
+                                addr += 8;
+                        }
+                }
+                if (inst_cream->wback){
+                        cpu->Reg[inst_cream->n] = (inst_cream->add ? cpu->Reg[inst_cream->n] + inst_cream->imm32 : 
+                                                   cpu->Reg[inst_cream->n] - inst_cream->imm32);
+                        DBG("\twback r%d[%x]\n", inst_cream->n, cpu->Reg[inst_cream->n]);
+                }
+        }
+        cpu->Reg[15] += GET_INST_SIZE(cpu);
+        INC_PC(sizeof(vfpinstr_inst));
+        FETCH_INST;
+        GOTO_NEXT_INST;
+}
+#endif
+#ifdef VFP_LDC_TRANS
+/* Should be the last operation of LDC */
+return VLDM(state, type, instr, value);
+#endif
+#ifdef VFP_LDC_IMPL
+int VLDM(ARMul_State * state, int type, ARMword instr, ARMword value)
+{
+        static int i = 0;
+        static int single_regs, add, wback, d, n, imm32, regs;
+        if (type == ARMul_FIRST)
+        {
+                single_regs = BIT(8) == 0;      /* Single precision */
+                add = BIT(23);          /* */
+                wback = BIT(21);        /* write-back */
+                d = single_regs ? BITS(12, 15)<<1|BIT(22) : BIT(22)<<4|BITS(12, 15); /* Base register */
+                n = BITS(16, 19);       /* destination register */
+                imm32 = BITS(0,7) * 4;  /* may not be used */
+                regs = single_regs ? BITS(0, 7) : BITS(0, 7)>>1; /* FLDMX if regs is odd */
+                DBG("VLDM :\n");
+                
+                if (wback) {
+                        state->Reg[n] = (add ? state->Reg[n] + imm32 : state->Reg[n] - imm32);
+                        DBG("\twback r%d[%x]\n", n, state->Reg[n]);
+                }
+                
+                i = 0;
+                
+                return ARMul_DONE;
+        } 
+        else if (type == ARMul_DATA)
+        {
+                if (single_regs)
+                {
+                        state->ExtReg[d + i] = value;
+                        DBG("\ts%d <= [%x] addr[?]\n", d+i, state->ExtReg[d + i]);
+                        i++;
+                        if (i < regs)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+                else
+                {
+                        /* FIXME Careful of endianness, may need to rework this */
+                        state->ExtReg[d*2 + i] = value;
+                        DBG("\ts[%d] <= [%x] addr[?]\n", d*2 + i, state->ExtReg[d*2 + i]);
+                        i++;
+                        if (i < regs*2)
+                                return ARMul_INC;
+                        else
+                                return ARMul_DONE;
+                }
+        }
+        return -1;
+}
+#endif
+#ifdef VFP_DYNCOM_TABLE
+DYNCOM_FILL_ACTION(vfpinstr),
+#endif
+#ifdef VFP_DYNCOM_TAG
+int DYNCOM_TAG(vfpinstr)(cpu_t *cpu, addr_t pc, uint32_t instr, tag_t *tag, addr_t *new_pc, addr_t *next_pc)
+{
+        int instr_size = INSTR_SIZE;
+        //DBG("\t\tin %s instruction is not implemented.\n", __FUNCTION__);
+        //arm_tag_trap(cpu, pc, instr, tag, new_pc, next_pc);
+        arm_tag_continue(cpu, pc, instr, tag, new_pc, next_pc);
+        DBG("In %s, pc=0x%x, next_pc=0x%x\n", __FUNCTION__, pc, *next_pc);
+        *tag |= TAG_NEW_BB;
+        if(instr >> 28 != 0xe)
+                *tag |= TAG_CONDITIONAL;
+        return instr_size;
+}
+#endif
+#ifdef VFP_DYNCOM_TRANS
+int DYNCOM_TRANS(vfpinstr)(cpu_t *cpu, uint32_t instr, BasicBlock *bb, addr_t pc){
+        int single = BIT(8) == 0;
+        int add    = BIT(23);
+        int wback  = BIT(21);
+        int d      = single ? BITS(12, 15)<<1|BIT(22) : BITS(12, 15)|BIT(22)<<4;
+        int n      = BITS(16, 19);
+        int imm32  = BITS(0, 7)<<2;
+        int regs   = single ? BITS(0, 7) : BITS(1, 7);
+        Value* Addr = SELECT(CONST1(add), R(n), SUB(R(n), CONST(imm32)));
+        //if(single)
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+        //else
+        //      bb = arch_check_mm(cpu, bb, Addr, regs * 4, 1, cpu->dyncom_engine->bb_trap);
+        DBG("VLDM \n");
+        int i;  
+        //Value* phys_addr;
+        Value* val;
+        for (i = 0; i < regs; i++)
+        {
+                if (single)
+                {
+                        
+                        //fault = interpreter_write_memory(core, addr, phys_addr, cpu->ExtReg[inst_cream->d+i], 32);
+                        /* if R(i) is R15? */
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, Addr, 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        //LETS(d + i, val);
+                        LETFPS(d + i, FPBITCAST32(val));
+                        //if (fault) goto MMU_EXCEPTION;
+                        //DBG("\taddr[%x] <= s%d=[%x]\n", addr, inst_cream->d+i, cpu->ExtReg[inst_cream->d+i]);
+                        Addr = ADD(Addr, CONST(4));
+                }
+                else
+                {
+                        #if 0   
+                        phys_addr = get_phys_addr(cpu, bb, Addr, 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, Addr, 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        LETFPS((d + i) * 2, FPBITCAST32(val));
+                        #if 0
+                        phys_addr = get_phys_addr(cpu, bb, ADD(Addr, CONST(4)), 1);
+                        bb = cpu->dyncom_engine->bb;
+                        val = arch_read_memory(cpu,bb,phys_addr,0,32);
+                        #endif
+                        memory_read(cpu, bb, Addr, 0, 32);
+                        bb = cpu->dyncom_engine->bb;
+                        val = new LoadInst(cpu->dyncom_engine->read_value, "", false, bb);
+                        LETFPS((d + i) * 2 + 1, FPBITCAST32(val));
+                        //fault = interpreter_write_memory(core, addr + 4, phys_addr, cpu->ExtReg[(inst_cream->d+i)*2 + 1], 32);
+                        //DBG("\taddr[%x-%x] <= s[%d-%d]=[%x-%x]\n", addr+4, addr, (inst_cream->d+i)*2+1, (inst_cream->d+i)*2, cpu->ExtReg[(inst_cream->d+i)*2+1], cpu->ExtReg[(inst_cream->d+i)*2]);
+                        //addr += 8;
+                        Addr = ADD(Addr, CONST(8));
+                }
+        }
+        if (wback){
+                //cpu->Reg[n] = (add ? cpu->Reg[n] + imm32 : 
+                //                         cpu->Reg[n] - imm32);
+                LET(n, SELECT(CONST1(add), ADD(R(n), CONST(imm32)), SUB(R(n), CONST(imm32))));
+                DBG("\twback r%d, add=%d, imm32=%d\n", n, add, imm32);
+        }
+        return No_exp;
+}
+#endif
+#undef vfpinstr
+#undef vfpinstr_inst
+#undef VFPLABEL_INST
+#define VFP_DEBUG_TRANSLATE DBG("in func %s, %x\n", __FUNCTION__, inst);
+#define VFP_DEBUG_UNIMPLEMENTED(x) printf("in func %s, " #x " unimplemented\n", __FUNCTION__); exit(-1);
+#define VFP_DEBUG_UNTESTED(x) printf("in func %s, " #x " untested\n", __FUNCTION__);
+#define CHECK_VFP_ENABLED       
+        
+#define CHECK_VFP_CDP_RET       vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_OFFSET(VFP_FPSCR)]); //if (ret == -1) {printf("VFP CDP FAILURE %x\n", inst_cream->instr); exit(-1);}
diff --git a/src/core/arm/interpreter/vfp/vfpsingle.cpp b/src/core/arm/interpreter/vfp/vfpsingle.cpp
new file mode 100644
index 000000000..24c0ded2b
--- /dev/null
+++ b/src/core/arm/interpreter/vfp/vfpsingle.cpp
@@ -0,0 +1,1277 @@
+/*
+    vfp/vfpsingle.c - ARM VFPv3 emulation unit - SoftFloat single instruction
+    Copyright (C) 2003 Skyeye Develop Group
+    for help please send mail to <skyeye-developer@lists.gro.clinux.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * This code is derived in part from :
+ * - Android kernel
+ * - John R. Housers softfloat library, which
+ * carries the following notice:
+ *
+ * ===========================================================================
+ * This C source file is part of the SoftFloat IEC/IEEE Floating-point
+ * Arithmetic Package, Release 2.
+ *
+ * Written by John R. Hauser.  This work was made possible in part by the
+ * International Computer Science Institute, located at Suite 600, 1947 Center
+ * Street, Berkeley, California 94704.  Funding was partially provided by the
+ * National Science Foundation under grant MIP-9311980.  The original version
+ * of this code was written as part of a project to build a fixed-point vector
+ * processor in collaboration with the University of California at Berkeley,
+ * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+ * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+ * arithmetic/softfloat.html'.
+ *
+ * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+ * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+ * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+ * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+ * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+ *
+ * Derivative works are acceptable, even for commercial purposes, so long as
+ * (1) they include prominent notice that the work is derivative, and (2) they
+ * include prominent notice akin to these three paragraphs for those parts of
+ * this code that are retained.
+ * ===========================================================================
+ */
+#include "vfp_helper.h"
+#include "asm_vfp.h"
+static struct vfp_single vfp_single_default_qnan = {
+        //.exponent     = 255,
+        //.sign         = 0,
+        //.significand  = VFP_SINGLE_SIGNIFICAND_QNAN,
+};
+static void vfp_single_dump(const char *str, struct vfp_single *s)
+{
+        pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
+                 str, s->sign != 0, s->exponent, s->significand);
+}
+static void vfp_single_normalise_denormal(struct vfp_single *vs)
+{
+        int bits = 31 - fls(vs->significand);
+        vfp_single_dump("normalise_denormal: in", vs);
+        if (bits) {
+                vs->exponent -= bits - 1;
+                vs->significand <<= bits;
+        }
+        vfp_single_dump("normalise_denormal: out", vs);
+}
+u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
+{
+        u32 significand, incr, rmode;
+        int exponent, shift, underflow;
+        vfp_single_dump("pack: in", vs);
+        /*
+         * Infinities and NaNs are a special case.
+         */
+        if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
+                goto pack;
+        /*
+         * Special-case zero.
+         */
+        if (vs->significand == 0) {
+                vs->exponent = 0;
+                goto pack;
+        }
+        exponent = vs->exponent;
+        significand = vs->significand;
+        /*
+         * Normalise first.  Note that we shift the significand up to
+         * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
+         * significant bit.
+         */
+        shift = 32 - fls(significand);
+        if (shift < 32 && shift) {
+                exponent -= shift;
+                significand <<= shift;
+        }
+#if 1
+        vs->exponent = exponent;
+        vs->significand = significand;
+        vfp_single_dump("pack: normalised", vs);
+#endif
+        /*
+         * Tiny number?
+         */
+        underflow = exponent < 0;
+        if (underflow) {
+                significand = vfp_shiftright32jamming(significand, -exponent);
+                exponent = 0;
+#if 1
+                vs->exponent = exponent;
+                vs->significand = significand;
+                vfp_single_dump("pack: tiny number", vs);
+#endif
+                if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
+                        underflow = 0;
+        }
+        /*
+         * Select rounding increment.
+         */
+        incr = 0;
+        rmode = fpscr & FPSCR_RMODE_MASK;
+        if (rmode == FPSCR_ROUND_NEAREST) {
+                incr = 1 << VFP_SINGLE_LOW_BITS;
+                if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
+                        incr -= 1;
+        } else if (rmode == FPSCR_ROUND_TOZERO) {
+                incr = 0;
+        } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
+                incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
+        pr_debug("VFP: rounding increment = 0x%08x\n", incr);
+        /*
+         * Is our rounding going to overflow?
+         */
+        if ((significand + incr) < significand) {
+                exponent += 1;
+                significand = (significand >> 1) | (significand & 1);
+                incr >>= 1;
+#if 1
+                vs->exponent = exponent;
+                vs->significand = significand;
+                vfp_single_dump("pack: overflow", vs);
+#endif
+        }
+        /*
+         * If any of the low bits (which will be shifted out of the
+         * number) are non-zero, the result is inexact.
+         */
+        if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
+                exceptions |= FPSCR_IXC;
+        /*
+         * Do our rounding.
+         */
+        significand += incr;
+        /*
+         * Infinity?
+         */
+        if (exponent >= 254) {
+                exceptions |= FPSCR_OFC | FPSCR_IXC;
+                if (incr == 0) {
+                        vs->exponent = 253;
+                        vs->significand = 0x7fffffff;
+                } else {
+                        vs->exponent = 255;             /* infinity */
+                        vs->significand = 0;
+                }
+        } else {
+                if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
+                        exponent = 0;
+                if (exponent || significand > 0x80000000)
+                        underflow = 0;
+                if (underflow)
+                        exceptions |= FPSCR_UFC;
+                vs->exponent = exponent;
+                vs->significand = significand >> 1;
+        }
+ pack:
+        vfp_single_dump("pack: final", vs);
+        {
+                s32 d = vfp_single_pack(vs);
+#if 1
+                pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
+                         sd, d, exceptions);
+#endif
+                vfp_put_float(state, d, sd);
+        }
+        return exceptions;
+}
+/*
+ * Propagate the NaN, setting exceptions if it is signalling.
+ * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
+ */
+static u32
+vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
+                  struct vfp_single *vsm, u32 fpscr)
+{
+        struct vfp_single *nan;
+        int tn, tm = 0;
+        tn = vfp_single_type(vsn);
+        if (vsm)
+                tm = vfp_single_type(vsm);
+        if (fpscr & FPSCR_DEFAULT_NAN)
+                /*
+                 * Default NaN mode - always returns a quiet NaN
+                 */
+                nan = &vfp_single_default_qnan;
+        else {
+                /*
+                 * Contemporary mode - select the first signalling
+                 * NAN, or if neither are signalling, the first
+                 * quiet NAN.
+                 */
+                if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
+                        nan = vsn;
+                else
+                        nan = vsm;
+                /*
+                 * Make the NaN quiet.
+                 */
+                nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
+        }
+        *vsd = *nan;
+        /*
+         * If one was a signalling NAN, raise invalid operation.
+         */
+        return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
+}
+/*
+ * Extended operations
+ */
+static u32 vfp_single_fabs(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        vfp_put_float(state, vfp_single_packed_abs(m), sd);
+        return 0;
+}
+static u32 vfp_single_fcpy(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        vfp_put_float(state, m, sd);
+        return 0;
+}
+static u32 vfp_single_fneg(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        vfp_put_float(state, vfp_single_packed_negate(m), sd);
+        return 0;
+}
+static const u16 sqrt_oddadjust[] = {
+        0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
+        0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
+};
+static const u16 sqrt_evenadjust[] = {
+        0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
+        0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
+};
+u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
+{
+        int index;
+        u32 z, a;
+        if ((significand & 0xc0000000) != 0x40000000) {
+                pr_debug("VFP: estimate_sqrt: invalid significand\n");
+        }
+        a = significand << 1;
+        index = (a >> 27) & 15;
+        if (exponent & 1) {
+                z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
+                z = ((a / z) << 14) + (z << 15);
+                a >>= 1;
+        } else {
+                z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
+                z = a / z + z;
+                z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
+                if (z <= a)
+                        return (s32)a >> 1;
+        }
+        {
+                u64 v = (u64)a << 31;
+                do_div(v, z);
+                return v + (z >> 1);
+        }
+}
+static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vsm, vsd;
+        int ret, tm;
+        vfp_single_unpack(&vsm, m);
+        tm = vfp_single_type(&vsm);
+        if (tm & (VFP_NAN|VFP_INFINITY)) {
+                struct vfp_single *vsp = &vsd;
+                if (tm & VFP_NAN)
+                        ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
+                else if (vsm.sign == 0) {
+ sqrt_copy:
+                        vsp = &vsm;
+                        ret = 0;
+                } else {
+ sqrt_invalid:
+                        vsp = &vfp_single_default_qnan;
+                        ret = FPSCR_IOC;
+                }
+                vfp_put_float(state, vfp_single_pack(vsp), sd);
+                return ret;
+        }
+        /*
+         * sqrt(+/- 0) == +/- 0
+         */
+        if (tm & VFP_ZERO)
+                goto sqrt_copy;
+        /*
+         * Normalise a denormalised number
+         */
+        if (tm & VFP_DENORMAL)
+                vfp_single_normalise_denormal(&vsm);
+        /*
+         * sqrt(<0) = invalid
+         */
+        if (vsm.sign)
+                goto sqrt_invalid;
+        vfp_single_dump("sqrt", &vsm);
+        /*
+         * Estimate the square root.
+         */
+        vsd.sign = 0;
+        vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
+        vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
+        vfp_single_dump("sqrt estimate", &vsd);
+        /*
+         * And now adjust.
+         */
+        if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
+                if (vsd.significand < 2) {
+                        vsd.significand = 0xffffffff;
+                } else {
+                        u64 term;
+                        s64 rem;
+                        vsm.significand <<= !(vsm.exponent & 1);
+                        term = (u64)vsd.significand * vsd.significand;
+                        rem = ((u64)vsm.significand << 32) - term;
+                        pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
+                        while (rem < 0) {
+                                vsd.significand -= 1;
+                                rem += ((u64)vsd.significand << 1) | 1;
+                        }
+                        vsd.significand |= rem != 0;
+                }
+        }
+        vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fsqrt");
+}
+/*
+ * Equal        := ZC
+ * Less than    := N
+ * Greater than := C
+ * Unordered    := CV
+ */
+static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u32 fpscr)
+{
+        s32 d;
+        u32 ret = 0;
+        d = vfp_get_float(state, sd);
+        if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
+                ret |= FPSCR_C | FPSCR_V;
+                if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+                        /*
+                         * Signalling NaN, or signalling on quiet NaN
+                         */
+                        ret |= FPSCR_IOC;
+        }
+        if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
+                ret |= FPSCR_C | FPSCR_V;
+                if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
+                        /*
+                         * Signalling NaN, or signalling on quiet NaN
+                         */
+                        ret |= FPSCR_IOC;
+        }
+        if (ret == 0) {
+                if (d == m || vfp_single_packed_abs(d | m) == 0) {
+                        /*
+                         * equal
+                         */
+                        ret |= FPSCR_Z | FPSCR_C;
+                } else if (vfp_single_packed_sign(d ^ m)) {
+                        /*
+                         * different signs
+                         */
+                        if (vfp_single_packed_sign(d))
+                                /*
+                                 * d is negative, so d < m
+                                 */
+                                ret |= FPSCR_N;
+                        else
+                                /*
+                                 * d is positive, so d > m
+                                 */
+                                ret |= FPSCR_C;
+                } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
+                        /*
+                         * d < m
+                         */
+                        ret |= FPSCR_N;
+                } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
+                        /*
+                         * d > m
+                         */
+                        ret |= FPSCR_C;
+                }
+        }
+        return ret;
+}
+static u32 vfp_single_fcmp(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_compare(state, sd, 0, m, fpscr);
+}
+static u32 vfp_single_fcmpe(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_compare(state, sd, 1, m, fpscr);
+}
+static u32 vfp_single_fcmpz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_compare(state, sd, 0, 0, fpscr);
+}
+static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_compare(state, sd, 1, 0, fpscr);
+}
+static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vsm;
+        struct vfp_double vdd;
+        int tm;
+        u32 exceptions = 0;
+        vfp_single_unpack(&vsm, m);
+        tm = vfp_single_type(&vsm);
+        /*
+         * If we have a signalling NaN, signal invalid operation.
+         */
+        if (tm == VFP_SNAN)
+                exceptions = FPSCR_IOC;
+        if (tm & VFP_DENORMAL)
+                vfp_single_normalise_denormal(&vsm);
+        vdd.sign = vsm.sign;
+        vdd.significand = (u64)vsm.significand << 32;
+        /*
+         * If we have an infinity or NaN, the exponent must be 2047.
+         */
+        if (tm & (VFP_INFINITY|VFP_NAN)) {
+                vdd.exponent = 2047;
+                if (tm == VFP_QNAN)
+                        vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
+                goto pack_nan;
+        } else if (tm & VFP_ZERO)
+                vdd.exponent = 0;
+        else
+                vdd.exponent = vsm.exponent + (1023 - 127);
+        return vfp_double_normaliseround(state, dd, &vdd, fpscr, exceptions, "fcvtd");
+ pack_nan:
+        vfp_put_double(state, vfp_double_pack(&vdd), dd);
+        return exceptions;
+}
+static u32 vfp_single_fuito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vs;
+        vs.sign = 0;
+        vs.exponent = 127 + 31 - 1;
+        vs.significand = (u32)m;
+        return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fuito");
+}
+static u32 vfp_single_fsito(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vs;
+        vs.sign = (m & 0x80000000) >> 16;
+        vs.exponent = 127 + 31 - 1;
+        vs.significand = vs.sign ? -m : m;
+        return vfp_single_normaliseround(state, sd, &vs, fpscr, 0, "fsito");
+}
+static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vsm;
+        u32 d, exceptions = 0;
+        int rmode = fpscr & FPSCR_RMODE_MASK;
+        int tm;
+        vfp_single_unpack(&vsm, m);
+        vfp_single_dump("VSM", &vsm);
+        /*
+         * Do we have a denormalised number?
+         */
+        tm = vfp_single_type(&vsm);
+        if (tm & VFP_DENORMAL)
+                exceptions |= FPSCR_IDC;
+        if (tm & VFP_NAN)
+                vsm.sign = 0;
+        if (vsm.exponent >= 127 + 32) {
+                d = vsm.sign ? 0 : 0xffffffff;
+                exceptions = FPSCR_IOC;
+        } else if (vsm.exponent >= 127 - 1) {
+                int shift = 127 + 31 - vsm.exponent;
+                u32 rem, incr = 0;
+                /*
+                 * 2^0 <= m < 2^32-2^8
+                 */
+                d = (vsm.significand << 1) >> shift;
+                rem = vsm.significand << (33 - shift);
+                if (rmode == FPSCR_ROUND_NEAREST) {
+                        incr = 0x80000000;
+                        if ((d & 1) == 0)
+                                incr -= 1;
+                } else if (rmode == FPSCR_ROUND_TOZERO) {
+                        incr = 0;
+                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+                        incr = ~0;
+                }
+                if ((rem + incr) < rem) {
+                        if (d < 0xffffffff)
+                                d += 1;
+                        else
+                                exceptions |= FPSCR_IOC;
+                }
+                if (d && vsm.sign) {
+                        d = 0;
+                        exceptions |= FPSCR_IOC;
+                } else if (rem)
+                        exceptions |= FPSCR_IXC;
+        } else {
+                d = 0;
+                if (vsm.exponent | vsm.significand) {
+                        exceptions |= FPSCR_IXC;
+                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+                                d = 1;
+                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
+                                d = 0;
+                                exceptions |= FPSCR_IOC;
+                        }
+                }
+        }
+        pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+        vfp_put_float(state, d, sd);
+        return exceptions;
+}
+static u32 vfp_single_ftouiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_single_ftoui(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        struct vfp_single vsm;
+        u32 d, exceptions = 0;
+        int rmode = fpscr & FPSCR_RMODE_MASK;
+        int tm;
+        vfp_single_unpack(&vsm, m);
+        vfp_single_dump("VSM", &vsm);
+        /*
+         * Do we have a denormalised number?
+         */
+        tm = vfp_single_type(&vsm);
+        if (vfp_single_type(&vsm) & VFP_DENORMAL)
+                exceptions |= FPSCR_IDC;
+        if (tm & VFP_NAN) {
+                d = 0;
+                exceptions |= FPSCR_IOC;
+        } else if (vsm.exponent >= 127 + 32) {
+                /*
+                 * m >= 2^31-2^7: invalid
+                 */
+                d = 0x7fffffff;
+                if (vsm.sign)
+                        d = ~d;
+                exceptions |= FPSCR_IOC;
+        } else if (vsm.exponent >= 127 - 1) {
+                int shift = 127 + 31 - vsm.exponent;
+                u32 rem, incr = 0;
+                /* 2^0 <= m <= 2^31-2^7 */
+                d = (vsm.significand << 1) >> shift;
+                rem = vsm.significand << (33 - shift);
+                if (rmode == FPSCR_ROUND_NEAREST) {
+                        incr = 0x80000000;
+                        if ((d & 1) == 0)
+                                incr -= 1;
+                } else if (rmode == FPSCR_ROUND_TOZERO) {
+                        incr = 0;
+                } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
+                        incr = ~0;
+                }
+                if ((rem + incr) < rem && d < 0xffffffff)
+                        d += 1;
+                if (d > 0x7fffffff + (vsm.sign != 0)) {
+                        d = 0x7fffffff + (vsm.sign != 0);
+                        exceptions |= FPSCR_IOC;
+                } else if (rem)
+                        exceptions |= FPSCR_IXC;
+                if (vsm.sign)
+                        d = -d;
+        } else {
+                d = 0;
+                if (vsm.exponent | vsm.significand) {
+                        exceptions |= FPSCR_IXC;
+                        if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
+                                d = 1;
+                        else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
+                                d = -1;
+                }
+        }
+        pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
+        vfp_put_float(state, (s32)d, sd);
+        return exceptions;
+}
+static u32 vfp_single_ftosiz(ARMul_State* state, int sd, int unused, s32 m, u32 fpscr)
+{
+        return vfp_single_ftosi(state, sd, unused, m, FPSCR_ROUND_TOZERO);
+}
+static struct op fops_ext[] = {
+    { vfp_single_fcpy,   0 },                 //0x00000000 - FEXT_FCPY
+    { vfp_single_fabs,   0 },                 //0x00000001 - FEXT_FABS
+    { vfp_single_fneg,   0 },                 //0x00000002 - FEXT_FNEG
+    { vfp_single_fsqrt,  0 },                 //0x00000003 - FEXT_FSQRT
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcmp,   OP_SCALAR },         //0x00000008 - FEXT_FCMP
+    { vfp_single_fcmpe,  OP_SCALAR },         //0x00000009 - FEXT_FCMPE
+    { vfp_single_fcmpz,  OP_SCALAR },         //0x0000000A - FEXT_FCMPZ
+    { vfp_single_fcmpez, OP_SCALAR },         //0x0000000B - FEXT_FCMPEZ
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_fcvtd,  OP_SCALAR|OP_DD },   //0x0000000F - FEXT_FCVT
+    { vfp_single_fuito,  OP_SCALAR },         //0x00000010 - FEXT_FUITO
+    { vfp_single_fsito,  OP_SCALAR },         //0x00000011 - FEXT_FSITO
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { NULL, 0 },
+    { vfp_single_ftoui,  OP_SCALAR },         //0x00000018 - FEXT_FTOUI
+    { vfp_single_ftouiz, OP_SCALAR },         //0x00000019 - FEXT_FTOUIZ
+    { vfp_single_ftosi,  OP_SCALAR },         //0x0000001A - FEXT_FTOSI
+    { vfp_single_ftosiz, OP_SCALAR },         //0x0000001B - FEXT_FTOSIZ
+};
+static u32
+vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
+                          struct vfp_single *vsm, u32 fpscr)
+{
+        struct vfp_single *vsp;
+        u32 exceptions = 0;
+        int tn, tm;
+        tn = vfp_single_type(vsn);
+        tm = vfp_single_type(vsm);
+        if (tn & tm & VFP_INFINITY) {
+                /*
+                 * Two infinities.  Are they different signs?
+                 */
+                if (vsn->sign ^ vsm->sign) {
+                        /*
+                         * different signs -> invalid
+                         */
+                        exceptions = FPSCR_IOC;
+                        vsp = &vfp_single_default_qnan;
+                } else {
+                        /*
+                         * same signs -> valid
+                         */
+                        vsp = vsn;
+                }
+        } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
+                /*
+                 * One infinity and one number -> infinity
+                 */
+                vsp = vsn;
+        } else {
+                /*
+                 * 'n' is a NaN of some type
+                 */
+                return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+        }
+        *vsd = *vsp;
+        return exceptions;
+}
+static u32
+vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
+               struct vfp_single *vsm, u32 fpscr)
+{
+        u32 exp_diff, m_sig;
+        if (vsn->significand & 0x80000000 ||
+            vsm->significand & 0x80000000) {
+                pr_info("VFP: bad FP values\n");
+                vfp_single_dump("VSN", vsn);
+                vfp_single_dump("VSM", vsm);
+        }
+        /*
+         * Ensure that 'n' is the largest magnitude number.  Note that
+         * if 'n' and 'm' have equal exponents, we do not swap them.
+         * This ensures that NaN propagation works correctly.
+         */
+        if (vsn->exponent < vsm->exponent) {
+                struct vfp_single *t = vsn;
+                vsn = vsm;
+                vsm = t;
+        }
+        /*
+         * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
+         * infinity or a NaN here.
+         */
+        if (vsn->exponent == 255)
+                return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
+        /*
+         * We have two proper numbers, where 'vsn' is the larger magnitude.
+         *
+         * Copy 'n' to 'd' before doing the arithmetic.
+         */
+        *vsd = *vsn;
+        /*
+         * Align both numbers.
+         */
+        exp_diff = vsn->exponent - vsm->exponent;
+        m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
+        /*
+         * If the signs are different, we are really subtracting.
+         */
+        if (vsn->sign ^ vsm->sign) {
+                m_sig = vsn->significand - m_sig;
+                if ((s32)m_sig < 0) {
+                        vsd->sign = vfp_sign_negate(vsd->sign);
+                        m_sig = -m_sig;
+                } else if (m_sig == 0) {
+                        vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+                                      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
+                }
+        } else {
+                m_sig = vsn->significand + m_sig;
+        }
+        vsd->significand = m_sig;
+        return 0;
+}
+static u32
+vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
+{
+        vfp_single_dump("VSN", vsn);
+        vfp_single_dump("VSM", vsm);
+        /*
+         * Ensure that 'n' is the largest magnitude number.  Note that
+         * if 'n' and 'm' have equal exponents, we do not swap them.
+         * This ensures that NaN propagation works correctly.
+         */
+        if (vsn->exponent < vsm->exponent) {
+                struct vfp_single *t = vsn;
+                vsn = vsm;
+                vsm = t;
+                pr_debug("VFP: swapping M <-> N\n");
+        }
+        vsd->sign = vsn->sign ^ vsm->sign;
+        /*
+         * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
+         */
+        if (vsn->exponent == 255) {
+                if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
+                        return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
+                if ((vsm->exponent | vsm->significand) == 0) {
+                        *vsd = vfp_single_default_qnan;
+                        return FPSCR_IOC;
+                }
+                vsd->exponent = vsn->exponent;
+                vsd->significand = 0;
+                return 0;
+        }
+        /*
+         * If 'm' is zero, the result is always zero.  In this case,
+         * 'n' may be zero or a number, but it doesn't matter which.
+         */
+        if ((vsm->exponent | vsm->significand) == 0) {
+                vsd->exponent = 0;
+                vsd->significand = 0;
+                return 0;
+        }
+        /*
+         * We add 2 to the destination exponent for the same reason as
+         * the addition case - though this time we have +1 from each
+         * input operand.
+         */
+        vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
+        vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
+        vfp_single_dump("VSD", vsd);
+        return 0;
+}
+#define NEG_MULTIPLY    (1 << 0)
+#define NEG_SUBTRACT    (1 << 1)
+static u32
+vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
+{
+        struct vfp_single vsd, vsp, vsn, vsm;
+        u32 exceptions;
+        s32 v;
+        v = vfp_get_float(state, sn);
+        pr_debug("VFP: s%u = %08x\n", sn, v);
+        vfp_single_unpack(&vsn, v);
+        if (vsn.exponent == 0 && vsn.significand)
+                vfp_single_normalise_denormal(&vsn);
+        vfp_single_unpack(&vsm, m);
+        if (vsm.exponent == 0 && vsm.significand)
+                vfp_single_normalise_denormal(&vsm);
+        exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
+        if (negate & NEG_MULTIPLY)
+                vsp.sign = vfp_sign_negate(vsp.sign);
+        v = vfp_get_float(state, sd);
+        pr_debug("VFP: s%u = %08x\n", sd, v);
+        vfp_single_unpack(&vsn, v);
+        if (negate & NEG_SUBTRACT)
+                vsn.sign = vfp_sign_negate(vsn.sign);
+        exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, func);
+}
+/*
+ * Standard operations
+ */
+/*
+ * sd = sd + (sn * sm)
+ */
+static u32 vfp_single_fmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+        return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, 0, "fmac");
+}
+/*
+ * sd = sd - (sn * sm)
+ */
+static u32 vfp_single_fnmac(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sd, sn);
+        return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
+}
+/*
+ * sd = -sd + (sn * sm)
+ */
+static u32 vfp_single_fmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+        return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
+}
+/*
+ * sd = -sd - (sn * sm)
+ */
+static u32 vfp_single_fnmsc(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+        return vfp_single_multiply_accumulate(state, sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
+}
+/*
+ * sd = sn * sm
+ */
+static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        struct vfp_single vsd, vsn, vsm;
+        u32 exceptions;
+        s32 n = vfp_get_float(state, sn);
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, n);
+        vfp_single_unpack(&vsn, n);
+        if (vsn.exponent == 0 && vsn.significand)
+                vfp_single_normalise_denormal(&vsn);
+        vfp_single_unpack(&vsm, m);
+        if (vsm.exponent == 0 && vsm.significand)
+                vfp_single_normalise_denormal(&vsm);
+        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fmul");
+}
+/*
+ * sd = -(sn * sm)
+ */
+static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        struct vfp_single vsd, vsn, vsm;
+        u32 exceptions;
+        s32 n = vfp_get_float(state, sn);
+        pr_debug("VFP: s%u = %08x\n", sn, n);
+        vfp_single_unpack(&vsn, n);
+        if (vsn.exponent == 0 && vsn.significand)
+                vfp_single_normalise_denormal(&vsn);
+        vfp_single_unpack(&vsm, m);
+        if (vsm.exponent == 0 && vsm.significand)
+                vfp_single_normalise_denormal(&vsm);
+        exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
+        vsd.sign = vfp_sign_negate(vsd.sign);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fnmul");
+}
+/*
+ * sd = sn + sm
+ */
+static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        struct vfp_single vsd, vsn, vsm;
+        u32 exceptions;
+        s32 n = vfp_get_float(state, sn);
+        pr_debug("VFP: s%u = %08x\n", sn, n);
+        /*
+         * Unpack and normalise denormals.
+         */
+        vfp_single_unpack(&vsn, n);
+        if (vsn.exponent == 0 && vsn.significand)
+                vfp_single_normalise_denormal(&vsn);
+        vfp_single_unpack(&vsm, m);
+        if (vsm.exponent == 0 && vsm.significand)
+                vfp_single_normalise_denormal(&vsm);
+        exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fadd");
+}
+/*
+ * sd = sn - sm
+ */
+static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        pr_debug("In %sVFP: s%u = %08x\n", __FUNCTION__, sn, sd);
+        /*
+         * Subtraction is addition with one sign inverted.
+         */
+        return vfp_single_fadd(state, sd, sn, vfp_single_packed_negate(m), fpscr);
+}
+/*
+ * sd = sn / sm
+ */
+static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr)
+{
+        struct vfp_single vsd, vsn, vsm;
+        u32 exceptions = 0;
+        s32 n = vfp_get_float(state, sn);
+        int tm, tn;
+        pr_debug("VFP: s%u = %08x\n", sn, n);
+        vfp_single_unpack(&vsn, n);
+        vfp_single_unpack(&vsm, m);
+        vsd.sign = vsn.sign ^ vsm.sign;
+        tn = vfp_single_type(&vsn);
+        tm = vfp_single_type(&vsm);
+        /*
+         * Is n a NAN?
+         */
+        if (tn & VFP_NAN)
+                goto vsn_nan;
+        /*
+         * Is m a NAN?
+         */
+        if (tm & VFP_NAN)
+                goto vsm_nan;
+        /*
+         * If n and m are infinity, the result is invalid
+         * If n and m are zero, the result is invalid
+         */
+        if (tm & tn & (VFP_INFINITY|VFP_ZERO))
+                goto invalid;
+        /*
+         * If n is infinity, the result is infinity
+         */
+        if (tn & VFP_INFINITY)
+                goto infinity;
+        /*
+         * If m is zero, raise div0 exception
+         */
+        if (tm & VFP_ZERO)
+                goto divzero;
+        /*
+         * If m is infinity, or n is zero, the result is zero
+         */
+        if (tm & VFP_INFINITY || tn & VFP_ZERO)
+                goto zero;
+        if (tn & VFP_DENORMAL)
+                vfp_single_normalise_denormal(&vsn);
+        if (tm & VFP_DENORMAL)
+                vfp_single_normalise_denormal(&vsm);
+        /*
+         * Ok, we have two numbers, we can perform division.
+         */
+        vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
+        vsm.significand <<= 1;
+        if (vsm.significand <= (2 * vsn.significand)) {
+                vsn.significand >>= 1;
+                vsd.exponent++;
+        }
+        {
+                u64 significand = (u64)vsn.significand << 32;
+                do_div(significand, vsm.significand);
+                vsd.significand = significand;
+        }
+        if ((vsd.significand & 0x3f) == 0)
+                vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
+        return vfp_single_normaliseround(state, sd, &vsd, fpscr, 0, "fdiv");
+ vsn_nan:
+        exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
+ pack:
+        vfp_put_float(state, vfp_single_pack(&vsd), sd);
+        return exceptions;
+ vsm_nan:
+        exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
+        goto pack;
+ zero:
+        vsd.exponent = 0;
+        vsd.significand = 0;
+        goto pack;
+ divzero:
+        exceptions = FPSCR_DZC;
+ infinity:
+        vsd.exponent = 255;
+        vsd.significand = 0;
+        goto pack;
+ invalid:
+        vfp_put_float(state, vfp_single_pack(&vfp_single_default_qnan), sd);
+        return FPSCR_IOC;
+}
+static struct op fops[] = {
+        { vfp_single_fmac,  0 },
+        { vfp_single_fmsc,  0 },
+        { vfp_single_fmul,  0 },
+        { vfp_single_fadd,  0 },
+        { vfp_single_fnmac, 0 },
+        { vfp_single_fnmsc, 0 },
+        { vfp_single_fnmul, 0 },
+        { vfp_single_fsub,  0 },
+        { vfp_single_fdiv,  0 },
+};
+#define FREG_BANK(x)    ((x) & 0x18)
+#define FREG_IDX(x)     ((x) & 7)
+u32 vfp_single_cpdo(ARMul_State* state, u32 inst, u32 fpscr)
+{
+        u32 op = inst & FOP_MASK;
+        u32 exceptions = 0;
+        unsigned int dest;
+        unsigned int sn = vfp_get_sn(inst);
+        unsigned int sm = vfp_get_sm(inst);
+        unsigned int vecitr, veclen, vecstride;
+        struct op *fop;
+        pr_debug("In %s\n", __FUNCTION__);
+        vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
+        fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
+        /*
+         * fcvtsd takes a dN register number as destination, not sN.
+         * Technically, if bit 0 of dd is set, this is an invalid
+         * instruction.  However, we ignore this for efficiency.
+         * It also only operates on scalars.
+         */
+        if (fop->flags & OP_DD)
+                dest = vfp_get_dd(inst);
+        else
+                dest = vfp_get_sd(inst);
+        /*
+         * If destination bank is zero, vector length is always '1'.
+         * ARM DDI0100F C5.1.3, C5.3.2.
+         */
+        if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
+                veclen = 0;
+        else
+                veclen = fpscr & FPSCR_LENGTH_MASK;
+        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
+                 (veclen >> FPSCR_LENGTH_BIT) + 1);
+        if (!fop->fn) {
+                printf("VFP: could not find single op %d, inst=0x%x@0x%x\n", FEXT_TO_IDX(inst), inst, state->Reg[15]);
+                exit(-1);
+                goto invalid;
+        }
+        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
+                s32 m = vfp_get_float(state, sm);
+                u32 except;
+                char type;
+                type = fop->flags & OP_DD ? 'd' : 's';
+                if (op == FOP_EXT)
+                        pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
+                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+                                 sm, m);
+                else
+                        pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
+                                 vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
+                                 FOP_TO_IDX(op), sm, m);
+                except = fop->fn(state, dest, sn, m, fpscr);
+                pr_debug("VFP: itr%d: exceptions=%08x\n",
+                         vecitr >> FPSCR_LENGTH_BIT, except);
+                exceptions |= except;
+                /*
+                 * CHECK: It appears to be undefined whether we stop when
+                 * we encounter an exception.  We continue.
+                 */
+                dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
+                sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
+                if (FREG_BANK(sm) != 0)
+                        sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
+        }
+        return exceptions;
+ invalid:
+        return (u32)-1;
+}
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index b56661e48..1aaf28eee 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -147,6 +147,10 @@
    <ClCompile Include="arm\interpreter\armvirt.cpp" />
    <ClCompile Include="arm\interpreter\arm_interpreter.cpp" />
    <ClCompile Include="arm\interpreter\thumbemu.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfp.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpdouble.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpinstr.cpp" />
+    <ClCompile Include="arm\interpreter\vfp\vfpsingle.cpp" />
    <ClCompile Include="arm\mmu\arm1176jzf_s_mmu.cpp" />
    <ClCompile Include="core.cpp" />
    <ClCompile Include="core_timing.cpp" />
@@ -182,6 +186,9 @@
    <ClInclude Include="arm\interpreter\arm_interpreter.h" />
    <ClInclude Include="arm\interpreter\arm_regformat.h" />
    <ClInclude Include="arm\interpreter\skyeye_defs.h" />
+    <ClInclude Include="arm\interpreter\vfp\asm_vfp.h" />
+    <ClInclude Include="arm\interpreter\vfp\vfp.h" />
+    <ClInclude Include="arm\interpreter\vfp\vfp_helper.h" />
    <ClInclude Include="arm\mmu\arm1176jzf_s_mmu.h" />
    <ClInclude Include="arm\mmu\cache.h" />
    <ClInclude Include="arm\mmu\rb.h" />
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index 5c947ec23..7436b9ed0 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -28,6 +28,9 @@
    <Filter Include="hle\service">
      <UniqueIdentifier>{812c5189-ca49-4704-b842-3ffad09092d3}</UniqueIdentifier>
    </Filter>
+    <Filter Include="arm\interpreter\vfp">
+      <UniqueIdentifier>{de62238f-a28e-4a33-8495-23fed6784588}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="arm\disassembler\arm_disasm.cpp">
@@ -114,6 +117,18 @@
    <ClCompile Include="hle\config_mem.cpp">
      <Filter>hle</Filter>
    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfp.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpinstr.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpdouble.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\vfp\vfpsingle.cpp">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="arm\disassembler\arm_disasm.h">
@@ -223,6 +238,15 @@
    <ClInclude Include="hle\config_mem.h">
      <Filter>hle</Filter>
    </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\asm_vfp.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\vfp.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
+    <ClInclude Include="arm\interpreter\vfp\vfp_helper.h">
+      <Filter>arm\interpreter\vfp</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <Text Include="CMakeLists.txt" />
author	bunnei	2014-05-15 22:54:17 -0400
committer	bunnei	2014-05-15 22:54:17 -0400
commit	3e1eafa244dc2ea6a1d8de6e841370c83c362dda (patch)
tree	f4fb8778ccb4d71cc73c10a7ad4d0be62acc4533 /src
parent	Merge pull request #15 from bunnei/hle-services (diff)
download	yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.tar.gz yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.tar.xz yuzu-3e1eafa244dc2ea6a1d8de6e841370c83c362dda.zip