Port DFG JIT to traditional ARM
https://bugs.webkit.org/show_bug.cgi?id=90198

Reviewed by Filip Pizlo.

Source/JavaScriptCore: 

This patch contains the macro assembler part of the
DFG JIT support on ARM systems with fixed 32 bit instruction
width. A large amount of old code was refactored, and the ARMv4
or lower support is removed from the macro assembler.

Sunspider is improved by 8%, and V8 is 92%.

* assembler/ARMAssembler.cpp:
(JSC::ARMAssembler::dataTransfer32):
(JSC::ARMAssembler::baseIndexTransfer32):
(JSC):
(JSC::ARMAssembler::dataTransfer16):
(JSC::ARMAssembler::baseIndexTransfer16):
(JSC::ARMAssembler::dataTransferFloat):
(JSC::ARMAssembler::baseIndexTransferFloat):
(JSC::ARMAssembler::executableCopy):
* assembler/ARMAssembler.h:
(JSC::ARMAssembler::ARMAssembler):
(JSC::ARMAssembler::emitInst):
(JSC::ARMAssembler::vmov_f64_r):
(ARMAssembler):
(JSC::ARMAssembler::vabs_f64_r):
(JSC::ARMAssembler::vneg_f64_r):
(JSC::ARMAssembler::ldr_imm):
(JSC::ARMAssembler::ldr_un_imm):
(JSC::ARMAssembler::dtr_u):
(JSC::ARMAssembler::dtr_ur):
(JSC::ARMAssembler::dtr_d):
(JSC::ARMAssembler::dtr_dr):
(JSC::ARMAssembler::dtrh_u):
(JSC::ARMAssembler::dtrh_ur):
(JSC::ARMAssembler::dtrh_d):
(JSC::ARMAssembler::dtrh_dr):
(JSC::ARMAssembler::fdtr_u):
(JSC::ARMAssembler::fdtr_d):
(JSC::ARMAssembler::push_r):
(JSC::ARMAssembler::pop_r):
(JSC::ARMAssembler::poke_r):
(JSC::ARMAssembler::peek_r):
(JSC::ARMAssembler::vmov_vfp64_r):
(JSC::ARMAssembler::vmov_arm64_r):
(JSC::ARMAssembler::vmov_vfp32_r):
(JSC::ARMAssembler::vmov_arm32_r):
(JSC::ARMAssembler::vcvt_u32_f64_r):
(JSC::ARMAssembler::vcvt_f64_f32_r):
(JSC::ARMAssembler::vcvt_f32_f64_r):
(JSC::ARMAssembler::clz_r):
(JSC::ARMAssembler::bkpt):
(JSC::ARMAssembler::bx):
(JSC::ARMAssembler::blx):
(JSC::ARMAssembler::labelIgnoringWatchpoints):
(JSC::ARMAssembler::labelForWatchpoint):
(JSC::ARMAssembler::label):
(JSC::ARMAssembler::getLdrImmAddress):
(JSC::ARMAssembler::replaceWithJump):
(JSC::ARMAssembler::maxJumpReplacementSize):
(JSC::ARMAssembler::getOp2Byte):
(JSC::ARMAssembler::getOp2Half):
(JSC::ARMAssembler::RM):
(JSC::ARMAssembler::RS):
(JSC::ARMAssembler::RD):
(JSC::ARMAssembler::RN):
* assembler/AssemblerBufferWithConstantPool.h:
(JSC::AssemblerBufferWithConstantPool::ensureSpaceForAnyInstruction):
* assembler/MacroAssemblerARM.cpp:
(JSC::MacroAssemblerARM::load32WithUnalignedHalfWords):
* assembler/MacroAssemblerARM.h:
(JSC::MacroAssemblerARM::add32):
(MacroAssemblerARM):
(JSC::MacroAssemblerARM::and32):
(JSC::MacroAssemblerARM::lshift32):
(JSC::MacroAssemblerARM::mul32):
(JSC::MacroAssemblerARM::neg32):
(JSC::MacroAssemblerARM::rshift32):
(JSC::MacroAssemblerARM::urshift32):
(JSC::MacroAssemblerARM::xor32):
(JSC::MacroAssemblerARM::load8):
(JSC::MacroAssemblerARM::load8Signed):
(JSC::MacroAssemblerARM::load16):
(JSC::MacroAssemblerARM::load16Signed):
(JSC::MacroAssemblerARM::load32):
(JSC::MacroAssemblerARM::load32WithAddressOffsetPatch):
(JSC::MacroAssemblerARM::store32WithAddressOffsetPatch):
(JSC::MacroAssemblerARM::store8):
(JSC::MacroAssemblerARM::store16):
(JSC::MacroAssemblerARM::store32):
(JSC::MacroAssemblerARM::move):
(JSC::MacroAssemblerARM::jump):
(JSC::MacroAssemblerARM::branchAdd32):
(JSC::MacroAssemblerARM::mull32):
(JSC::MacroAssemblerARM::branchMul32):
(JSC::MacroAssemblerARM::nearCall):
(JSC::MacroAssemblerARM::compare32):
(JSC::MacroAssemblerARM::test32):
(JSC::MacroAssemblerARM::sub32):
(JSC::MacroAssemblerARM::call):
(JSC::MacroAssemblerARM::loadFloat):
(JSC::MacroAssemblerARM::loadDouble):
(JSC::MacroAssemblerARM::storeFloat):
(JSC::MacroAssemblerARM::storeDouble):
(JSC::MacroAssemblerARM::moveDouble):
(JSC::MacroAssemblerARM::addDouble):
(JSC::MacroAssemblerARM::divDouble):
(JSC::MacroAssemblerARM::subDouble):
(JSC::MacroAssemblerARM::mulDouble):
(JSC::MacroAssemblerARM::absDouble):
(JSC::MacroAssemblerARM::negateDouble):
(JSC::MacroAssemblerARM::convertInt32ToDouble):
(JSC::MacroAssemblerARM::convertFloatToDouble):
(JSC::MacroAssemblerARM::convertDoubleToFloat):
(JSC::MacroAssemblerARM::branchTruncateDoubleToInt32):
(JSC::MacroAssemblerARM::branchTruncateDoubleToUint32):
(JSC::MacroAssemblerARM::truncateDoubleToInt32):
(JSC::MacroAssemblerARM::truncateDoubleToUint32):
(JSC::MacroAssemblerARM::branchConvertDoubleToInt32):
(JSC::MacroAssemblerARM::branchDoubleNonZero):
(JSC::MacroAssemblerARM::branchDoubleZeroOrNaN):
(JSC::MacroAssemblerARM::invert):
(JSC::MacroAssemblerARM::replaceWithJump):
(JSC::MacroAssemblerARM::maxJumpReplacementSize):
(JSC::MacroAssemblerARM::call32):
* assembler/SH4Assembler.h:
(JSC::SH4Assembler::label):
* dfg/DFGAssemblyHelpers.h:
(JSC::DFG::AssemblyHelpers::debugCall):
(JSC::DFG::AssemblyHelpers::boxDouble):
(JSC::DFG::AssemblyHelpers::unboxDouble):
* dfg/DFGCCallHelpers.h:
(CCallHelpers):
(JSC::DFG::CCallHelpers::setupArguments):
* dfg/DFGFPRInfo.h:
(DFG):
* dfg/DFGGPRInfo.h:
(DFG):
(GPRInfo):
* dfg/DFGOperations.cpp:
(JSC):
* dfg/DFGSpeculativeJIT.h:
(SpeculativeJIT):
(JSC::DFG::SpeculativeJIT::appendCallWithExceptionCheckSetResult):
(JSC::DFG::SpeculativeJIT::appendCallSetResult):
* jit/JITStubs.cpp:
(JSC):
* jit/JITStubs.h:
(JITStackFrame):
* jit/JSInterfaceJIT.h:
(JSInterfaceJIT):

Source/WTF: 

Enabling DFG JIT on ARM systems with 32 bit instruction set.

* wtf/InlineASM.h:
* wtf/Platform.h:



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@121885 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/assembler/ARMAssembler.cpp b/Source/JavaScriptCore/assembler/ARMAssembler.cpp
index 74809ca..362fcc6 100644
--- a/Source/JavaScriptCore/assembler/ARMAssembler.cpp
+++ b/Source/JavaScriptCore/assembler/ARMAssembler.cpp
@@ -262,86 +262,117 @@
 
 // Memory load/store helpers
 
-void ARMAssembler::dataTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, int32_t offset, bool bytes)
+void ARMAssembler::dataTransfer32(DataTransferTypeA transferType, RegisterID srcDst, RegisterID base, int32_t offset)
 {
-    ARMWord transferFlag = bytes ? DT_BYTE : 0;
     if (offset >= 0) {
         if (offset <= 0xfff)
-            dtr_u(isLoad, srcDst, base, offset | transferFlag);
+            dtr_u(transferType, srcDst, base, offset);
         else if (offset <= 0xfffff) {
             add_r(ARMRegisters::S0, base, OP2_IMM | (offset >> 12) | (10 << 8));
-            dtr_u(isLoad, srcDst, ARMRegisters::S0, (offset & 0xfff) | transferFlag);
+            dtr_u(transferType, srcDst, ARMRegisters::S0, (offset & 0xfff));
         } else {
             moveImm(offset, ARMRegisters::S0);
-            dtr_ur(isLoad, srcDst, base, ARMRegisters::S0 | transferFlag);
+            dtr_ur(transferType, srcDst, base, ARMRegisters::S0);
         }
     } else {
         if (offset >= -0xfff)
-            dtr_d(isLoad, srcDst, base, -offset | transferFlag);
+            dtr_d(transferType, srcDst, base, -offset);
         else if (offset >= -0xfffff) {
             sub_r(ARMRegisters::S0, base, OP2_IMM | (-offset >> 12) | (10 << 8));
-            dtr_d(isLoad, srcDst, ARMRegisters::S0, (-offset & 0xfff) | transferFlag);
+            dtr_d(transferType, srcDst, ARMRegisters::S0, (-offset & 0xfff));
         } else {
             moveImm(offset, ARMRegisters::S0);
-            dtr_ur(isLoad, srcDst, base, ARMRegisters::S0 | transferFlag);
+            dtr_ur(transferType, srcDst, base, ARMRegisters::S0);
         }
     }
 }
 
-void ARMAssembler::baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset, bool bytes)
+void ARMAssembler::baseIndexTransfer32(DataTransferTypeA transferType, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset)
 {
-    ARMWord op2;
-    ARMWord transferFlag = bytes ? DT_BYTE : 0;
-
     ASSERT(scale >= 0 && scale <= 3);
-    op2 = lsl(index, scale);
+    ARMWord op2 = lsl(index, scale);
 
-    if (offset >= 0 && offset <= 0xfff) {
-        add_r(ARMRegisters::S0, base, op2);
-        dtr_u(isLoad, srcDst, ARMRegisters::S0, offset | transferFlag);
-        return;
-    }
-    if (offset <= 0 && offset >= -0xfff) {
-        add_r(ARMRegisters::S0, base, op2);
-        dtr_d(isLoad, srcDst, ARMRegisters::S0, (-offset & 0xfff) | transferFlag);
+    if (!offset) {
+        dtr_ur(transferType, srcDst, base, op2);
         return;
     }
 
-    ldr_un_imm(ARMRegisters::S0, offset);
-    add_r(ARMRegisters::S0, ARMRegisters::S0, op2);
-    dtr_ur(isLoad, srcDst, base, ARMRegisters::S0 | transferFlag);
+    add_r(ARMRegisters::S1, base, op2);
+    dataTransfer32(transferType, srcDst, ARMRegisters::S1, offset);
 }
 
-void ARMAssembler::doubleTransfer(bool isLoad, FPRegisterID srcDst, RegisterID base, int32_t offset)
+void ARMAssembler::dataTransfer16(DataTransferTypeB transferType, RegisterID srcDst, RegisterID base, int32_t offset)
+{
+    if (offset >= 0) {
+        if (offset <= 0xff)
+            dtrh_u(transferType, srcDst, base, getOp2Half(offset));
+        else if (offset <= 0xffff) {
+            add_r(ARMRegisters::S0, base, OP2_IMM | (offset >> 8) | (12 << 8));
+            dtrh_u(transferType, srcDst, ARMRegisters::S0, getOp2Half(offset & 0xff));
+        } else {
+            moveImm(offset, ARMRegisters::S0);
+            dtrh_ur(transferType, srcDst, base, ARMRegisters::S0);
+        }
+    } else {
+        if (offset >= -0xff)
+            dtrh_d(transferType, srcDst, base, getOp2Half(-offset));
+        else if (offset >= -0xffff) {
+            sub_r(ARMRegisters::S0, base, OP2_IMM | (-offset >> 8) | (12 << 8));
+            dtrh_d(transferType, srcDst, ARMRegisters::S0, getOp2Half(-offset & 0xff));
+        } else {
+            moveImm(offset, ARMRegisters::S0);
+            dtrh_ur(transferType, srcDst, base, ARMRegisters::S0);
+        }
+    }
+}
+
+void ARMAssembler::baseIndexTransfer16(DataTransferTypeB transferType, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset)
+{
+    if (!scale && !offset) {
+        dtrh_ur(transferType, srcDst, base, index);
+        return;
+    }
+
+    add_r(ARMRegisters::S1, base, lsl(index, scale));
+    dataTransfer16(transferType, srcDst, ARMRegisters::S1, offset);
+}
+
+void ARMAssembler::dataTransferFloat(DataTransferTypeFloat transferType, FPRegisterID srcDst, RegisterID base, int32_t offset)
 {
     // VFP cannot directly access memory that is not four-byte-aligned
     if (!(offset & 0x3)) {
         if (offset <= 0x3ff && offset >= 0) {
-            fdtr_u(isLoad, srcDst, base, offset >> 2);
+            fdtr_u(transferType, srcDst, base, offset >> 2);
             return;
         }
         if (offset <= 0x3ffff && offset >= 0) {
             add_r(ARMRegisters::S0, base, OP2_IMM | (offset >> 10) | (11 << 8));
-            fdtr_u(isLoad, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff);
+            fdtr_u(transferType, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff);
             return;
         }
         offset = -offset;
 
         if (offset <= 0x3ff && offset >= 0) {
-            fdtr_d(isLoad, srcDst, base, offset >> 2);
+            fdtr_d(transferType, srcDst, base, offset >> 2);
             return;
         }
         if (offset <= 0x3ffff && offset >= 0) {
             sub_r(ARMRegisters::S0, base, OP2_IMM | (offset >> 10) | (11 << 8));
-            fdtr_d(isLoad, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff);
+            fdtr_d(transferType, srcDst, ARMRegisters::S0, (offset >> 2) & 0xff);
             return;
         }
         offset = -offset;
     }
 
-    ldr_un_imm(ARMRegisters::S0, offset);
+    moveImm(offset, ARMRegisters::S0);
     add_r(ARMRegisters::S0, ARMRegisters::S0, base);
-    fdtr_u(isLoad, srcDst, ARMRegisters::S0, 0);
+    fdtr_u(transferType, srcDst, ARMRegisters::S0, 0);
+}
+
+void ARMAssembler::baseIndexTransferFloat(DataTransferTypeFloat transferType, FPRegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset)
+{
+    add_r(ARMRegisters::S1, base, lsl(index, scale));
+    dataTransferFloat(transferType, srcDst, ARMRegisters::S1, offset);
 }
 
 PassRefPtr<ExecutableMemoryHandle> ARMAssembler::executableCopy(JSGlobalData& globalData, void* ownerUID, JITCompilationEffort effort)
@@ -361,10 +392,10 @@
         ARMWord* addr = getLdrImmAddress(ldrAddr);
         if (*addr != InvalidBranchTarget) {
             if (!(iter->m_offset & 1)) {
-                int diff = reinterpret_cast_ptr<ARMWord*>(data + *addr) - (ldrAddr + DefaultPrefetching);
+                intptr_t difference = reinterpret_cast_ptr<ARMWord*>(data + *addr) - (ldrAddr + DefaultPrefetching);
 
-                if ((diff <= BOFFSET_MAX && diff >= BOFFSET_MIN)) {
-                    *ldrAddr = B | getConditionalField(*ldrAddr) | (diff & BRANCH_MASK);
+                if ((difference <= BOFFSET_MAX && difference >= BOFFSET_MIN)) {
+                    *ldrAddr = B | getConditionalField(*ldrAddr) | (difference & BRANCH_MASK);
                     continue;
                 }
             }
diff --git a/Source/JavaScriptCore/assembler/ARMAssembler.h b/Source/JavaScriptCore/assembler/ARMAssembler.h
index 16dc0cf..251d751 100644
--- a/Source/JavaScriptCore/assembler/ARMAssembler.h
+++ b/Source/JavaScriptCore/assembler/ARMAssembler.h
@@ -41,16 +41,16 @@
             r0 = 0,
             r1,
             r2,
-            r3, S0 = r3,
+            r3, S0 = r3, /* Same as thumb assembler. */
             r4,
             r5,
             r6,
             r7,
-            r8, S1 = r8,
+            r8,
             r9,
             r10,
             r11,
-            r12,
+            r12, S1 = r12,
             r13, sp = r13,
             r14, lr = r14,
             r15, pc = r15
@@ -60,11 +60,11 @@
             d0,
             d1,
             d2,
-            d3, SD0 = d3,
+            d3,
             d4,
             d5,
             d6,
-            d7,
+            d7, SD0 = d7, /* Same as thumb assembler. */
             d8,
             d9,
             d10,
@@ -100,7 +100,10 @@
         typedef AssemblerBufferWithConstantPool<2048, 4, 4, ARMAssembler> ARMBuffer;
         typedef SegmentedVector<AssemblerLabel, 64> Jumps;
 
-        ARMAssembler() { }
+        ARMAssembler()
+            : m_indexOfTailOfLastWatchpoint(1)
+        {
+        }
 
         // ARM conditional constants
         typedef enum {
@@ -141,33 +144,33 @@
             MVN = (0xf << 21),
             MUL = 0x00000090,
             MULL = 0x00c00090,
+            VMOV_F64 = 0x0eb00b40,
             VADD_F64 = 0x0e300b00,
             VDIV_F64 = 0x0e800b00,
             VSUB_F64 = 0x0e300b40,
             VMUL_F64 = 0x0e200b00,
             VCMP_F64 = 0x0eb40b40,
             VSQRT_F64 = 0x0eb10bc0,
-            DTR = 0x05000000,
-            LDRH = 0x00100090,
-            STRH = 0x00000090,
+            VABS_F64 = 0x0eb00bc0,
+            VNEG_F64 = 0x0eb10b40,
             STMDB = 0x09200000,
             LDMIA = 0x08b00000,
-            FDTR = 0x0d000b00,
             B = 0x0a000000,
             BL = 0x0b000000,
-#if WTF_ARM_ARCH_AT_LEAST(5) || defined(__ARM_ARCH_4T__)
             BX = 0x012fff10,
-#endif
-            VMOV_VFP = 0x0e000a10,
-            VMOV_ARM = 0x0e100a10,
+            VMOV_VFP64 = 0x0c400a10,
+            VMOV_ARM64 = 0x0c500a10,
+            VMOV_VFP32 = 0x0e000a10,
+            VMOV_ARM32 = 0x0e100a10,
             VCVT_F64_S32 = 0x0eb80bc0,
             VCVT_S32_F64 = 0x0ebd0b40,
+            VCVT_U32_F64 = 0x0ebc0b40,
+            VCVT_F32_F64 = 0x0eb70bc0,
+            VCVT_F64_F32 = 0x0eb70ac0,
             VMRS_APSR = 0x0ef1fa10,
-#if WTF_ARM_ARCH_AT_LEAST(5)
             CLZ = 0x016f0f10,
             BKPT = 0xe1200070,
             BLX = 0x012fff30,
-#endif
 #if WTF_ARM_ARCH_AT_LEAST(7)
             MOVW = 0x03000000,
             MOVT = 0x03400000,
@@ -177,17 +180,37 @@
 
         enum {
             OP2_IMM = (1 << 25),
-            OP2_IMMh = (1 << 22),
+            OP2_IMM_HALF = (1 << 22),
             OP2_INV_IMM = (1 << 26),
             SET_CC = (1 << 20),
             OP2_OFSREG = (1 << 25),
+            // Data transfer flags.
             DT_UP = (1 << 23),
-            DT_BYTE = (1 << 22),
             DT_WB = (1 << 21),
-            // This flag is inlcuded in LDR and STR
             DT_PRE = (1 << 24),
-            HDT_UH = (1 << 5),
             DT_LOAD = (1 << 20),
+            DT_BYTE = (1 << 22),
+        };
+
+        enum DataTransferTypeA {
+            LoadUint32 = 0x05000000 | DT_LOAD,
+            LoadUint8 = 0x05400000 | DT_LOAD,
+            StoreUint32 = 0x05000000,
+            StoreUint8 = 0x05400000,
+        };
+
+        enum DataTransferTypeB {
+            LoadUint16 = 0x010000b0 | DT_LOAD,
+            LoadInt16 = 0x010000f0 | DT_LOAD,
+            LoadInt8 = 0x010000d0 | DT_LOAD,
+            StoreUint16 = 0x010000b0,
+        };
+
+        enum DataTransferTypeFloat {
+            LoadFloat = 0x0d000a00 | DT_LOAD,
+            LoadDouble = 0x0d000b00 | DT_LOAD,
+            StoreFloat = 0x0d000a00,
+            StoreDouble = 0x0d000b00,
         };
 
         // Masks of ARM instructions
@@ -218,7 +241,7 @@
 
         void emitInst(ARMWord op, int rd, int rn, ARMWord op2)
         {
-            ASSERT(((op2 & ~OP2_IMM) <= 0xfff) || (((op2 & ~OP2_IMMh) <= 0xfff)));
+            ASSERT(((op2 & ~OP2_IMM) <= 0xfff) || (((op2 & ~OP2_IMM_HALF) <= 0xfff)));
             m_buffer.putInt(op | RN(rn) | RD(rd) | op2);
         }
 
@@ -407,6 +430,11 @@
             m_buffer.putInt(static_cast<ARMWord>(cc) | MULL | RN(rdhi) | RD(rdlo) | RS(rn) | RM(rm));
         }
 
+        void vmov_f64_r(int dd, int dm, Condition cc = AL)
+        {
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VMOV_F64, dd, 0, dm);
+        }
+
         void vadd_f64_r(int dd, int dn, int dm, Condition cc = AL)
         {
             emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VADD_F64, dd, dn, dm);
@@ -437,100 +465,124 @@
             emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VSQRT_F64, dd, 0, dm);
         }
 
+        void vabs_f64_r(int dd, int dm, Condition cc = AL)
+        {
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VABS_F64, dd, 0, dm);
+        }
+
+        void vneg_f64_r(int dd, int dm, Condition cc = AL)
+        {
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VNEG_F64, dd, 0, dm);
+        }
+
         void ldr_imm(int rd, ARMWord imm, Condition cc = AL)
         {
-            m_buffer.putIntWithConstantInt(static_cast<ARMWord>(cc) | DTR | DT_LOAD | DT_UP | RN(ARMRegisters::pc) | RD(rd), imm, true);
+            m_buffer.putIntWithConstantInt(static_cast<ARMWord>(cc) | LoadUint32 | DT_UP | RN(ARMRegisters::pc) | RD(rd), imm, true);
         }
 
         void ldr_un_imm(int rd, ARMWord imm, Condition cc = AL)
         {
-            m_buffer.putIntWithConstantInt(static_cast<ARMWord>(cc) | DTR | DT_LOAD | DT_UP | RN(ARMRegisters::pc) | RD(rd), imm);
+            m_buffer.putIntWithConstantInt(static_cast<ARMWord>(cc) | LoadUint32 | DT_UP | RN(ARMRegisters::pc) | RD(rd), imm);
         }
 
-        void dtr_u(bool isLoad, int rd, int rb, ARMWord op2, Condition cc = AL)
+        void dtr_u(DataTransferTypeA transferType, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | DTR | (isLoad ? DT_LOAD : 0) | DT_UP, rd, rb, op2);
+            emitInst(static_cast<ARMWord>(cc) | transferType | DT_UP, rd, rb, op2);
         }
 
-        void dtr_ur(bool isLoad, int rd, int rb, int rm, Condition cc = AL)
+        void dtr_ur(DataTransferTypeA transferType, int rd, int rb, int rm, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | DTR | (isLoad ? DT_LOAD : 0) | DT_UP | OP2_OFSREG, rd, rb, rm);
+            emitInst(static_cast<ARMWord>(cc) | transferType | DT_UP | OP2_OFSREG, rd, rb, rm);
         }
 
-        void dtr_d(bool isLoad, int rd, int rb, ARMWord op2, Condition cc = AL)
+        void dtr_d(DataTransferTypeA transferType, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | DTR | (isLoad ? DT_LOAD : 0), rd, rb, op2);
+            emitInst(static_cast<ARMWord>(cc) | transferType, rd, rb, op2);
         }
 
-        void dtr_dr(bool isLoad, int rd, int rb, int rm, Condition cc = AL)
+        void dtr_dr(DataTransferTypeA transferType, int rd, int rb, int rm, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | DTR | (isLoad ? DT_LOAD : 0) | OP2_OFSREG, rd, rb, rm);
+            emitInst(static_cast<ARMWord>(cc) | transferType | OP2_OFSREG, rd, rb, rm);
         }
 
-        void ldrh_r(int rd, int rn, int rm, Condition cc = AL)
+        void dtrh_u(DataTransferTypeB transferType, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | LDRH | HDT_UH | DT_UP | DT_PRE, rd, rn, rm);
+            emitInst(static_cast<ARMWord>(cc) | transferType | DT_UP, rd, rb, op2);
         }
 
-        void ldrh_d(int rd, int rb, ARMWord op2, Condition cc = AL)
+        void dtrh_ur(DataTransferTypeB transferType, int rd, int rn, int rm, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | LDRH | HDT_UH | DT_PRE, rd, rb, op2);
+            emitInst(static_cast<ARMWord>(cc) | transferType | DT_UP, rd, rn, rm);
         }
 
-        void ldrh_u(int rd, int rb, ARMWord op2, Condition cc = AL)
+        void dtrh_d(DataTransferTypeB transferType, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | LDRH | HDT_UH | DT_UP | DT_PRE, rd, rb, op2);
+            emitInst(static_cast<ARMWord>(cc) | transferType, rd, rb, op2);
         }
 
-        void strh_r(int rn, int rm, int rd, Condition cc = AL)
+        void dtrh_dr(DataTransferTypeB transferType, int rd, int rn, int rm, Condition cc = AL)
         {
-            emitInst(static_cast<ARMWord>(cc) | STRH | HDT_UH | DT_UP | DT_PRE, rd, rn, rm);
+            emitInst(static_cast<ARMWord>(cc) | transferType, rd, rn, rm);
         }
 
-        void fdtr_u(bool isLoad, int rd, int rb, ARMWord op2, Condition cc = AL)
+        void fdtr_u(DataTransferTypeFloat type, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            ASSERT(op2 <= 0xff);
-            emitInst(static_cast<ARMWord>(cc) | FDTR | DT_UP | (isLoad ? DT_LOAD : 0), rd, rb, op2);
+            ASSERT(op2 <= 0xff && rd <= 15);
+            /* Only d0-d15 and s0, s2, s4 ... s30 are supported. */
+            m_buffer.putInt(static_cast<ARMWord>(cc) | DT_UP | type | (rd << 12) | RN(rb) | op2);
         }
 
-        void fdtr_d(bool isLoad, int rd, int rb, ARMWord op2, Condition cc = AL)
+        void fdtr_d(DataTransferTypeFloat type, int rd, int rb, ARMWord op2, Condition cc = AL)
         {
-            ASSERT(op2 <= 0xff);
-            emitInst(static_cast<ARMWord>(cc) | FDTR | (isLoad ? DT_LOAD : 0), rd, rb, op2);
+            ASSERT(op2 <= 0xff && rd <= 15);
+            /* Only d0-d15 and s0, s2, s4 ... s30 are supported. */
+            m_buffer.putInt(static_cast<ARMWord>(cc) | type | (rd << 12) | RN(rb) | op2);
         }
 
         void push_r(int reg, Condition cc = AL)
         {
             ASSERT(ARMWord(reg) <= 0xf);
-            m_buffer.putInt(cc | DTR | DT_WB | RN(ARMRegisters::sp) | RD(reg) | 0x4);
+            m_buffer.putInt(static_cast<ARMWord>(cc) | StoreUint32 | DT_WB | RN(ARMRegisters::sp) | RD(reg) | 0x4);
         }
 
         void pop_r(int reg, Condition cc = AL)
         {
             ASSERT(ARMWord(reg) <= 0xf);
-            m_buffer.putInt(cc | (DTR ^ DT_PRE) | DT_LOAD | DT_UP | RN(ARMRegisters::sp) | RD(reg) | 0x4);
+            m_buffer.putInt(static_cast<ARMWord>(cc) | (LoadUint32 ^ DT_PRE) | DT_UP | RN(ARMRegisters::sp) | RD(reg) | 0x4);
         }
 
         inline void poke_r(int reg, Condition cc = AL)
         {
-            dtr_d(false, ARMRegisters::sp, 0, reg, cc);
+            dtr_d(StoreUint32, ARMRegisters::sp, 0, reg, cc);
         }
 
         inline void peek_r(int reg, Condition cc = AL)
         {
-            dtr_u(true, reg, ARMRegisters::sp, 0, cc);
+            dtr_u(LoadUint32, reg, ARMRegisters::sp, 0, cc);
         }
 
-        void vmov_vfp_r(int sn, int rt, Condition cc = AL)
+        void vmov_vfp64_r(int sm, int rt, int rt2, Condition cc = AL)
         {
-            ASSERT(rt <= 15);
-            emitSinglePrecisionInst(static_cast<ARMWord>(cc) | VMOV_VFP, rt << 1, sn, 0);
+            ASSERT(rt != rt2);
+            m_buffer.putInt(static_cast<ARMWord>(cc) | VMOV_VFP64 | RN(rt2) | RD(rt) | (sm & 0xf) | ((sm & 0x10) << (5 - 4)));
         }
 
-        void vmov_arm_r(int rt, int sn, Condition cc = AL)
+        void vmov_arm64_r(int rt, int rt2, int sm, Condition cc = AL)
+        {
+            ASSERT(rt != rt2);
+            m_buffer.putInt(static_cast<ARMWord>(cc) | VMOV_ARM64 | RN(rt2) | RD(rt) | (sm & 0xf) | ((sm & 0x10) << (5 - 4)));
+        }
+
+        void vmov_vfp32_r(int sn, int rt, Condition cc = AL)
         {
             ASSERT(rt <= 15);
-            emitSinglePrecisionInst(static_cast<ARMWord>(cc) | VMOV_ARM, rt << 1, sn, 0);
+            emitSinglePrecisionInst(static_cast<ARMWord>(cc) | VMOV_VFP32, rt << 1, sn, 0);
+        }
+
+        void vmov_arm32_r(int rt, int sn, Condition cc = AL)
+        {
+            ASSERT(rt <= 15);
+            emitSinglePrecisionInst(static_cast<ARMWord>(cc) | VMOV_ARM32, rt << 1, sn, 0);
         }
 
         void vcvt_f64_s32_r(int dd, int sm, Condition cc = AL)
@@ -545,26 +597,37 @@
             emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VCVT_S32_F64, (sd >> 1), 0, dm);
         }
 
+        void vcvt_u32_f64_r(int sd, int dm, Condition cc = AL)
+        {
+            ASSERT(!(sd & 0x1)); // sd must be divisible by 2
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VCVT_U32_F64, (sd >> 1), 0, dm);
+        }
+
+        void vcvt_f64_f32_r(int dd, int sm, Condition cc = AL)
+        {
+            ASSERT(dd <= 15 && sm <= 15);
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VCVT_F64_F32, dd, 0, sm);
+        }
+
+        void vcvt_f32_f64_r(int dd, int sm, Condition cc = AL)
+        {
+            ASSERT(dd <= 15 && sm <= 15);
+            emitDoublePrecisionInst(static_cast<ARMWord>(cc) | VCVT_F32_F64, dd, 0, sm);
+        }
+
         void vmrs_apsr(Condition cc = AL)
         {
             m_buffer.putInt(static_cast<ARMWord>(cc) | VMRS_APSR);
         }
 
-#if WTF_ARM_ARCH_AT_LEAST(5)
         void clz_r(int rd, int rm, Condition cc = AL)
         {
             m_buffer.putInt(static_cast<ARMWord>(cc) | CLZ | RD(rd) | RM(rm));
         }
-#endif
 
         void bkpt(ARMWord value)
         {
-#if WTF_ARM_ARCH_AT_LEAST(5)
             m_buffer.putInt(BKPT | ((value & 0xff0) << 4) | (value & 0xf));
-#else
-            // Cannot access to Zero memory address
-            dtr_dr(true, ARMRegisters::S0, ARMRegisters::S0, ARMRegisters::S0);
-#endif
         }
 
         void nop()
@@ -574,23 +637,12 @@
 
         void bx(int rm, Condition cc = AL)
         {
-#if WTF_ARM_ARCH_AT_LEAST(5) || defined(__ARM_ARCH_4T__)
             emitInst(static_cast<ARMWord>(cc) | BX, 0, 0, RM(rm));
-#else
-            mov_r(ARMRegisters::pc, RM(rm), cc);
-#endif
         }
 
         AssemblerLabel blx(int rm, Condition cc = AL)
         {
-#if WTF_ARM_ARCH_AT_LEAST(5)
             emitInst(static_cast<ARMWord>(cc) | BLX, 0, 0, RM(rm));
-#else
-            ASSERT(rm != 14);
-            ensureSpace(2 * sizeof(ARMWord), 0);
-            mov_r(ARMRegisters::lr, ARMRegisters::pc, cc);
-            bx(rm, cc);
-#endif
             return m_buffer.label();
         }
 
@@ -653,10 +705,31 @@
             return m_buffer.sizeOfConstantPool();
         }
 
+        AssemblerLabel labelIgnoringWatchpoints()
+        {
+            m_buffer.ensureSpaceForAnyInstruction();
+            return m_buffer.label();
+        }
+
+        AssemblerLabel labelForWatchpoint()
+        {
+            m_buffer.ensureSpaceForAnyInstruction(maxJumpReplacementSize() / sizeof(ARMWord));
+            AssemblerLabel result = m_buffer.label();
+            if (result.m_offset != (m_indexOfTailOfLastWatchpoint - maxJumpReplacementSize()))
+                result = label();
+            m_indexOfTailOfLastWatchpoint = result.m_offset + maxJumpReplacementSize();
+            return label();
+        }
+
         AssemblerLabel label()
         {
-            m_buffer.ensureSpaceForAnyOneInstruction();
-            return m_buffer.label();
+            AssemblerLabel result = labelIgnoringWatchpoints();
+            while (result.m_offset + 1 < m_indexOfTailOfLastWatchpoint) {
+                nop();
+                // The available number of instructions are ensured by labelForWatchpoint.
+                result = m_buffer.label();
+            }
+            return result;
         }
 
         AssemblerLabel align(int alignment)
@@ -684,18 +757,28 @@
 
         unsigned debugOffset() { return m_buffer.debugOffset(); }
 
+        // DFG assembly helpers for moving data between fp and registers.
+        void vmov(RegisterID rd1, RegisterID rd2, FPRegisterID rn)
+        {
+            vmov_arm64_r(rd1, rd2, rn);
+        }
+
+        void vmov(FPRegisterID rd, RegisterID rn1, RegisterID rn2)
+        {
+            vmov_vfp64_r(rd, rn1, rn2);
+        }
+
         // Patching helpers
 
         static ARMWord* getLdrImmAddress(ARMWord* insn)
         {
-#if WTF_ARM_ARCH_AT_LEAST(5)
             // Check for call
             if ((*insn & 0x0f7f0000) != 0x051f0000) {
                 // Must be BLX
                 ASSERT((*insn & 0x012fff30) == 0x012fff30);
                 insn--;
             }
-#endif
+
             // Must be an ldr ..., [pc +/- imm]
             ASSERT((*insn & 0x0f7f0000) == 0x051f0000);
 
@@ -799,6 +882,32 @@
             return reinterpret_cast<void*>(readPointer(reinterpret_cast<void*>(getAbsoluteJumpAddress(from))));
         }
 
+        static void replaceWithJump(void* instructionStart, void* to)
+        {
+            ARMWord* instruction = reinterpret_cast<ARMWord*>(instructionStart) - 1;
+            intptr_t difference = reinterpret_cast<intptr_t>(to) - (reinterpret_cast<intptr_t>(instruction) + DefaultPrefetching * sizeof(ARMWord));
+
+            if (!(difference & 1)) {
+                difference >>= 2;
+                if ((difference <= BOFFSET_MAX && difference >= BOFFSET_MIN)) {
+                     // Direct branch.
+                     instruction[0] = B | AL | (difference & BRANCH_MASK);
+                     cacheFlush(instruction, sizeof(ARMWord));
+                     return;
+                }
+            }
+
+            // Load target.
+            instruction[0] = LoadUint32 | AL | RN(ARMRegisters::pc) | RD(ARMRegisters::pc) | 4;
+            instruction[1] = reinterpret_cast<ARMWord>(to);
+            cacheFlush(instruction, sizeof(ARMWord) * 2);
+        }
+
+        static ptrdiff_t maxJumpReplacementSize()
+        {
+            return sizeof(ARMWord) * 2;
+        }
+
         // Address operations
 
         static void* getRelocatedAddress(void* code, AssemblerLabel label)
@@ -820,13 +929,20 @@
 
         // Handle immediates
 
+        static ARMWord getOp2(ARMWord imm);
+
+        // Fast case if imm is known to be between 0 and 0xff
         static ARMWord getOp2Byte(ARMWord imm)
         {
             ASSERT(imm <= 0xff);
-            return OP2_IMMh | (imm & 0x0f) | ((imm & 0xf0) << 4) ;
+            return OP2_IMM | imm;
         }
 
-        static ARMWord getOp2(ARMWord imm);
+        static ARMWord getOp2Half(ARMWord imm)
+        {
+            ASSERT(imm <= 0xff);
+            return OP2_IMM_HALF | (imm & 0x0f) | ((imm & 0xf0) << 4);
+        }
 
 #if WTF_ARM_ARCH_AT_LEAST(7)
         static ARMWord getImm16Op2(ARMWord imm)
@@ -840,20 +956,14 @@
         void moveImm(ARMWord imm, int dest);
         ARMWord encodeComplexImm(ARMWord imm, int dest);
 
-        ARMWord getOffsetForHalfwordDataTransfer(ARMWord imm, int tmpReg)
-        {
-            // Encode immediate data in the instruction if it is possible
-            if (imm <= 0xff)
-                return getOp2Byte(imm);
-            // Otherwise, store the data in a temporary register
-            return encodeComplexImm(imm, tmpReg);
-        }
-
         // Memory load/store helpers
 
-        void dataTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, int32_t offset, bool bytes = false);
-        void baseIndexTransfer32(bool isLoad, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset, bool bytes = false);
-        void doubleTransfer(bool isLoad, FPRegisterID srcDst, RegisterID base, int32_t offset);
+        void dataTransfer32(DataTransferTypeA, RegisterID srcDst, RegisterID base, int32_t offset);
+        void baseIndexTransfer32(DataTransferTypeA, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset);
+        void dataTransfer16(DataTransferTypeB, RegisterID srcDst, RegisterID base, int32_t offset);
+        void baseIndexTransfer16(DataTransferTypeB, RegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset);
+        void dataTransferFloat(DataTransferTypeFloat, FPRegisterID srcDst, RegisterID base, int32_t offset);
+        void baseIndexTransferFloat(DataTransferTypeFloat, FPRegisterID srcDst, RegisterID base, RegisterID index, int scale, int32_t offset);
 
         // Constant pool hnadlers
 
@@ -901,25 +1011,25 @@
 #endif
 
     private:
-        ARMWord RM(int reg)
+        static ARMWord RM(int reg)
         {
             ASSERT(reg <= ARMRegisters::pc);
             return reg;
         }
 
-        ARMWord RS(int reg)
+        static ARMWord RS(int reg)
         {
             ASSERT(reg <= ARMRegisters::pc);
             return reg << 8;
         }
 
-        ARMWord RD(int reg)
+        static ARMWord RD(int reg)
         {
             ASSERT(reg <= ARMRegisters::pc);
             return reg << 12;
         }
 
-        ARMWord RN(int reg)
+        static ARMWord RN(int reg)
         {
             ASSERT(reg <= ARMRegisters::pc);
             return reg << 16;
@@ -934,6 +1044,7 @@
 
         ARMBuffer m_buffer;
         Jumps m_jumps;
+        uint32_t m_indexOfTailOfLastWatchpoint;
     };
 
 } // namespace JSC
diff --git a/Source/JavaScriptCore/assembler/AssemblerBufferWithConstantPool.h b/Source/JavaScriptCore/assembler/AssemblerBufferWithConstantPool.h
index e2ea261..4301472 100644
--- a/Source/JavaScriptCore/assembler/AssemblerBufferWithConstantPool.h
+++ b/Source/JavaScriptCore/assembler/AssemblerBufferWithConstantPool.h
@@ -127,9 +127,9 @@
         AssemblerBuffer::ensureSpace(insnSpace);
     }
 
-    void ensureSpaceForAnyOneInstruction()
+    void ensureSpaceForAnyInstruction(int amount = 1)
     {
-        flushIfNoSpaceFor(maxInstructionSize, sizeof(uint64_t));
+        flushIfNoSpaceFor(amount * maxInstructionSize, amount * sizeof(uint64_t));
     }
 
     bool isAligned(int alignment)
diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp b/Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp
index 2db5df1..0b826cd 100644
--- a/Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp
@@ -77,18 +77,18 @@
 
     if (address.offset >= 0 && address.offset + 0x2 <= 0xff) {
         m_assembler.add_r(ARMRegisters::S0, address.base, op2);
-        m_assembler.ldrh_u(dest, ARMRegisters::S0, ARMAssembler::getOp2Byte(address.offset));
-        m_assembler.ldrh_u(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(address.offset + 0x2));
+        m_assembler.dtrh_u(LoadUint16, dest, ARMRegisters::S0, ARMAssembler::getOp2Half(address.offset));
+        m_assembler.dtrh_u(LoadUint16, ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Half(address.offset + 0x2));
     } else if (address.offset < 0 && address.offset >= -0xff) {
         m_assembler.add_r(ARMRegisters::S0, address.base, op2);
-        m_assembler.ldrh_d(dest, ARMRegisters::S0, ARMAssembler::getOp2Byte(-address.offset));
-        m_assembler.ldrh_d(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(-address.offset - 0x2));
+        m_assembler.dtrh_d(LoadUint16, dest, ARMRegisters::S0, ARMAssembler::getOp2Byte(-address.offset));
+        m_assembler.dtrh_d(LoadUint16, ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(-address.offset - 0x2));
     } else {
-        m_assembler.ldr_un_imm(ARMRegisters::S0, address.offset);
+        m_assembler.moveImm(address.offset, ARMRegisters::S0);
         m_assembler.add_r(ARMRegisters::S0, ARMRegisters::S0, op2);
-        m_assembler.ldrh_r(dest, address.base, ARMRegisters::S0);
+        m_assembler.dtrh_r(true, TYPE_UH, dest, address.base, ARMRegisters::S0);
         m_assembler.add_r(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::OP2_IMM | 0x2);
-        m_assembler.ldrh_r(ARMRegisters::S0, address.base, ARMRegisters::S0);
+        m_assembler.dtrh_r(true, TYPE_UH, ARMRegisters::S0, address.base, ARMRegisters::S0);
     }
     m_assembler.orr_r(dest, dest, m_assembler.lsl(ARMRegisters::S0, 16));
 }
diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM.h
index 8ea29e3..4c6c1a4 100644
--- a/Source/JavaScriptCore/assembler/MacroAssemblerARM.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM.h
@@ -90,6 +90,11 @@
         m_assembler.adds_r(dest, dest, src);
     }
 
+    void add32(RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        m_assembler.adds_r(dest, op1, op2);
+    }
+
     void add32(TrustedImm32 imm, Address address)
     {
         load32(address, ARMRegisters::S1);
@@ -118,6 +123,11 @@
         m_assembler.ands_r(dest, dest, src);
     }
 
+    void and32(RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        m_assembler.ands_r(dest, op1, op2);
+    }
+
     void and32(TrustedImm32 imm, RegisterID dest)
     {
         ARMWord w = m_assembler.getImm(imm.m_value, ARMRegisters::S0, true);
@@ -136,13 +146,17 @@
             m_assembler.ands_r(dest, src, w);
     }
 
-    void lshift32(RegisterID shift_amount, RegisterID dest)
+    void lshift32(RegisterID shiftAmount, RegisterID dest)
     {
-        ARMWord w = ARMAssembler::getOp2(0x1f);
-        ASSERT(w != ARMAssembler::INVALID_IMM);
-        m_assembler.and_r(ARMRegisters::S0, shift_amount, w);
+        lshift32(dest, shiftAmount, dest);
+    }
 
-        m_assembler.movs_r(dest, m_assembler.lsl_r(dest, ARMRegisters::S0));
+    void lshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest)
+    {
+        ARMWord w = ARMAssembler::getOp2Byte(0x1f);
+        m_assembler.and_r(ARMRegisters::S0, shiftAmount, w);
+
+        m_assembler.movs_r(dest, m_assembler.lsl_r(src, ARMRegisters::S0));
     }
 
     void lshift32(TrustedImm32 imm, RegisterID dest)
@@ -155,13 +169,25 @@
         m_assembler.movs_r(dest, m_assembler.lsl(src, imm.m_value & 0x1f));
     }
 
+    void mul32(RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        if (op2 == dest) {
+            if (op1 == dest) {
+                move(op2, ARMRegisters::S0);
+                op2 = ARMRegisters::S0;
+            } else {
+                // Swap the operands.
+                RegisterID tmp = op1;
+                op1 = op2;
+                op2 = tmp;
+            }
+        }
+        m_assembler.muls_r(dest, op1, op2);
+    }
+
     void mul32(RegisterID src, RegisterID dest)
     {
-        if (src == dest) {
-            move(src, ARMRegisters::S0);
-            src = ARMRegisters::S0;
-        }
-        m_assembler.muls_r(dest, dest, src);
+        mul32(src, dest, dest);
     }
 
     void mul32(TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -172,7 +198,7 @@
 
     void neg32(RegisterID srcDest)
     {
-        m_assembler.rsbs_r(srcDest, srcDest, ARMAssembler::getOp2(0));
+        m_assembler.rsbs_r(srcDest, srcDest, ARMAssembler::getOp2Byte(0));
     }
 
     void or32(RegisterID src, RegisterID dest)
@@ -195,15 +221,19 @@
         m_assembler.orrs_r(dest, op1, op2);
     }
 
-    void rshift32(RegisterID shift_amount, RegisterID dest)
+    void rshift32(RegisterID shiftAmount, RegisterID dest)
     {
-        ARMWord w = ARMAssembler::getOp2(0x1f);
-        ASSERT(w != ARMAssembler::INVALID_IMM);
-        m_assembler.and_r(ARMRegisters::S0, shift_amount, w);
-
-        m_assembler.movs_r(dest, m_assembler.asr_r(dest, ARMRegisters::S0));
+        rshift32(dest, shiftAmount, dest);
     }
-    
+
+    void rshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest)
+    {
+        ARMWord w = ARMAssembler::getOp2Byte(0x1f);
+        m_assembler.and_r(ARMRegisters::S0, shiftAmount, w);
+
+        m_assembler.movs_r(dest, m_assembler.asr_r(src, ARMRegisters::S0));
+    }
+
     void rshift32(TrustedImm32 imm, RegisterID dest)
     {
         rshift32(dest, imm, dest);
@@ -213,16 +243,20 @@
     {
         m_assembler.movs_r(dest, m_assembler.asr(src, imm.m_value & 0x1f));
     }
-    
-    void urshift32(RegisterID shift_amount, RegisterID dest)
+
+    void urshift32(RegisterID shiftAmount, RegisterID dest)
     {
-        ARMWord w = ARMAssembler::getOp2(0x1f);
-        ASSERT(w != ARMAssembler::INVALID_IMM);
-        m_assembler.and_r(ARMRegisters::S0, shift_amount, w);
-        
-        m_assembler.movs_r(dest, m_assembler.lsr_r(dest, ARMRegisters::S0));
+        urshift32(dest, shiftAmount, dest);
     }
-    
+
+    void urshift32(RegisterID src, RegisterID shiftAmount, RegisterID dest)
+    {
+        ARMWord w = ARMAssembler::getOp2Byte(0x1f);
+        m_assembler.and_r(ARMRegisters::S0, shiftAmount, w);
+
+        m_assembler.movs_r(dest, m_assembler.lsr_r(src, ARMRegisters::S0));
+    }
+
     void urshift32(TrustedImm32 imm, RegisterID dest)
     {
         m_assembler.movs_r(dest, m_assembler.lsr(dest, imm.m_value & 0x1f));
@@ -266,6 +300,11 @@
         m_assembler.eors_r(dest, dest, src);
     }
 
+    void xor32(RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        m_assembler.eors_r(dest, op1, op2);
+    }
+
     void xor32(TrustedImm32 imm, RegisterID dest)
     {
         if (imm.m_value == -1)
@@ -295,22 +334,42 @@
 
     void load8(ImplicitAddress address, RegisterID dest)
     {
-        m_assembler.dataTransfer32(true, dest, address.base, address.offset, true);
+        m_assembler.dataTransfer32(ARMAssembler::LoadUint8, dest, address.base, address.offset);
     }
 
     void load8(BaseIndex address, RegisterID dest)
     {
-        m_assembler.baseIndexTransfer32(true, dest, address.base, address.index, static_cast<int>(address.scale), address.offset, true);
+        m_assembler.baseIndexTransfer32(ARMAssembler::LoadUint8, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
+    void load8Signed(BaseIndex address, RegisterID dest)
+    {
+        m_assembler.baseIndexTransfer16(ARMAssembler::LoadInt8, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
+    void load16(ImplicitAddress address, RegisterID dest)
+    {
+        m_assembler.dataTransfer16(ARMAssembler::LoadUint16, dest, address.base, address.offset);
+    }
+
+    void load16(BaseIndex address, RegisterID dest)
+    {
+        m_assembler.baseIndexTransfer16(ARMAssembler::LoadUint16, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
+    void load16Signed(BaseIndex address, RegisterID dest)
+    {
+        m_assembler.baseIndexTransfer16(ARMAssembler::LoadInt16, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
     }
 
     void load32(ImplicitAddress address, RegisterID dest)
     {
-        m_assembler.dataTransfer32(true, dest, address.base, address.offset);
+        m_assembler.dataTransfer32(ARMAssembler::LoadUint32, dest, address.base, address.offset);
     }
 
     void load32(BaseIndex address, RegisterID dest)
     {
-        m_assembler.baseIndexTransfer32(true, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
+        m_assembler.baseIndexTransfer32(ARMAssembler::LoadUint32, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
     }
 
 #if CPU(ARMV5_OR_LOWER)
@@ -331,7 +390,7 @@
     {
         DataLabel32 dataLabel(this);
         m_assembler.ldr_un_imm(ARMRegisters::S0, 0);
-        m_assembler.dtr_ur(true, dest, address.base, ARMRegisters::S0);
+        m_assembler.dtr_ur(ARMAssembler::LoadUint32, dest, address.base, ARMRegisters::S0);
         return dataLabel;
     }
     
@@ -342,36 +401,32 @@
         return dataLabel;
     }
 
-    void load16(BaseIndex address, RegisterID dest)
-    {
-        m_assembler.add_r(ARMRegisters::S1, address.base, m_assembler.lsl(address.index, address.scale));
-        load16(Address(ARMRegisters::S1, address.offset), dest);
-    }
-    
-    void load16(ImplicitAddress address, RegisterID dest)
-    {
-        if (address.offset >= 0)
-            m_assembler.ldrh_u(dest, address.base, m_assembler.getOffsetForHalfwordDataTransfer(address.offset, ARMRegisters::S0));
-        else
-            m_assembler.ldrh_d(dest, address.base, m_assembler.getOffsetForHalfwordDataTransfer(-address.offset, ARMRegisters::S0));
-    }
-
     DataLabel32 store32WithAddressOffsetPatch(RegisterID src, Address address)
     {
         DataLabel32 dataLabel(this);
         m_assembler.ldr_un_imm(ARMRegisters::S0, 0);
-        m_assembler.dtr_ur(false, src, address.base, ARMRegisters::S0);
+        m_assembler.dtr_ur(ARMAssembler::StoreUint32, src, address.base, ARMRegisters::S0);
         return dataLabel;
     }
 
+    void store8(RegisterID src, BaseIndex address)
+    {
+        m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint8, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
+    void store16(RegisterID src, BaseIndex address)
+    {
+        m_assembler.baseIndexTransfer16(ARMAssembler::StoreUint16, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
     void store32(RegisterID src, ImplicitAddress address)
     {
-        m_assembler.dataTransfer32(false, src, address.base, address.offset);
+        m_assembler.dataTransfer32(ARMAssembler::StoreUint32, src, address.base, address.offset);
     }
 
     void store32(RegisterID src, BaseIndex address)
     {
-        m_assembler.baseIndexTransfer32(false, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
+        m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint32, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
     }
 
     void store32(TrustedImm32 imm, ImplicitAddress address)
@@ -380,17 +435,23 @@
         store32(ARMRegisters::S1, address);
     }
 
+    void store32(TrustedImm32 imm, BaseIndex address)
+    {
+        move(imm, ARMRegisters::S1);
+        m_assembler.baseIndexTransfer32(ARMAssembler::StoreUint32, ARMRegisters::S1, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
     void store32(RegisterID src, void* address)
     {
         m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast<ARMWord>(address));
-        m_assembler.dtr_u(false, src, ARMRegisters::S0, 0);
+        m_assembler.dtr_u(ARMAssembler::StoreUint32, src, ARMRegisters::S0, 0);
     }
 
     void store32(TrustedImm32 imm, void* address)
     {
         m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast<ARMWord>(address));
         m_assembler.moveImm(imm.m_value, ARMRegisters::S1);
-        m_assembler.dtr_u(false, ARMRegisters::S1, ARMRegisters::S0, 0);
+        m_assembler.dtr_u(ARMAssembler::StoreUint32, ARMRegisters::S1, ARMRegisters::S0, 0);
     }
 
     void pop(RegisterID dest)
@@ -422,7 +483,8 @@
 
     void move(RegisterID src, RegisterID dest)
     {
-        m_assembler.mov_r(dest, src);
+        if (src != dest)
+            m_assembler.mov_r(dest, src);
     }
 
     void move(TrustedImmPtr imm, RegisterID dest)
@@ -566,6 +628,12 @@
         load32(address, ARMRegisters::pc);
     }
 
+    void jump(AbsoluteAddress address)
+    {
+        move(TrustedImmPtr(address.m_ptr), ARMRegisters::S0);
+        load32(Address(ARMRegisters::S0, 0), ARMRegisters::pc);
+    }
+
     Jump branchAdd32(ResultCondition cond, RegisterID src, RegisterID dest)
     {
         ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
@@ -573,6 +641,13 @@
         return Jump(m_assembler.jmp(ARMCondition(cond)));
     }
 
+    Jump branchAdd32(ResultCondition cond, RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
+        add32(op1, op2, dest);
+        return Jump(m_assembler.jmp(ARMCondition(cond)));
+    }
+
     Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, RegisterID dest)
     {
         ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
@@ -587,26 +662,45 @@
         return Jump(m_assembler.jmp(ARMCondition(cond)));
     }
 
-    void mull32(RegisterID src1, RegisterID src2, RegisterID dest)
+    Jump branchAdd32(ResultCondition cond, TrustedImm32 imm, AbsoluteAddress dest)
     {
-        if (src1 == dest) {
-            move(src1, ARMRegisters::S0);
-            src1 = ARMRegisters::S0;
+        ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
+        add32(imm, dest);
+        return Jump(m_assembler.jmp(ARMCondition(cond)));
+    }
+
+    void mull32(RegisterID op1, RegisterID op2, RegisterID dest)
+    {
+        if (op2 == dest) {
+            if (op1 == dest) {
+                move(op2, ARMRegisters::S0);
+                op2 = ARMRegisters::S0;
+            } else {
+                // Swap the operands.
+                RegisterID tmp = op1;
+                op1 = op2;
+                op2 = tmp;
+            }
         }
-        m_assembler.mull_r(ARMRegisters::S1, dest, src2, src1);
+        m_assembler.mull_r(ARMRegisters::S1, dest, op1, op2);
         m_assembler.cmp_r(ARMRegisters::S1, m_assembler.asr(dest, 31));
     }
 
+    Jump branchMul32(ResultCondition cond, RegisterID src1, RegisterID src2, RegisterID dest)
+    {
+        ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
+        if (cond == Overflow) {
+            mull32(src1, src2, dest);
+            cond = NonZero;
+        }
+        else
+            mul32(src1, src2, dest);
+        return Jump(m_assembler.jmp(ARMCondition(cond)));
+    }
+
     Jump branchMul32(ResultCondition cond, RegisterID src, RegisterID dest)
     {
-        ASSERT((cond == Overflow) || (cond == Signed) || (cond == Zero) || (cond == NonZero));
-        if (cond == Overflow) {
-            mull32(src, dest, dest);
-            cond = NonZero;
-        }
-        else
-            mul32(src, dest);
-        return Jump(m_assembler.jmp(ARMCondition(cond)));
+        return branchMul32(cond, src, dest, dest);
     }
 
     Jump branchMul32(ResultCondition cond, TrustedImm32 imm, RegisterID src, RegisterID dest)
@@ -671,14 +765,8 @@
 
     Call nearCall()
     {
-#if WTF_ARM_ARCH_AT_LEAST(5)
-        ensureSpace(2 * sizeof(ARMWord), sizeof(ARMWord));
         m_assembler.loadBranchTarget(ARMRegisters::S1, ARMAssembler::AL, true);
         return Call(m_assembler.blx(ARMRegisters::S1), Call::LinkableNear);
-#else
-        prepareCall();
-        return Call(m_assembler.jmp(ARMAssembler::AL, true), Call::LinkableNear);
-#endif
     }
 
     Call call(RegisterID target)
@@ -699,15 +787,15 @@
     void compare32(RelationalCondition cond, RegisterID left, RegisterID right, RegisterID dest)
     {
         m_assembler.cmp_r(left, right);
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(0));
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(1), ARMCondition(cond));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(0));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond));
     }
 
     void compare32(RelationalCondition cond, RegisterID left, TrustedImm32 right, RegisterID dest)
     {
         m_assembler.cmp_r(left, m_assembler.getImm(right.m_value, ARMRegisters::S0));
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(0));
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(1), ARMCondition(cond));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(0));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond));
     }
 
     void compare8(RelationalCondition cond, Address left, TrustedImm32 right, RegisterID dest)
@@ -722,8 +810,8 @@
             m_assembler.cmp_r(0, reg);
         else
             m_assembler.tst_r(reg, m_assembler.getImm(mask.m_value, ARMRegisters::S0));
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(0));
-        m_assembler.mov_r(dest, ARMAssembler::getOp2(1), ARMCondition(cond));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(0));
+        m_assembler.mov_r(dest, ARMAssembler::getOp2Byte(1), ARMCondition(cond));
     }
 
     void test32(ResultCondition cond, Address address, TrustedImm32 mask, RegisterID dest)
@@ -746,25 +834,25 @@
     void add32(TrustedImm32 imm, AbsoluteAddress address)
     {
         m_assembler.ldr_un_imm(ARMRegisters::S1, reinterpret_cast<ARMWord>(address.m_ptr));
-        m_assembler.dtr_u(true, ARMRegisters::S1, ARMRegisters::S1, 0);
+        m_assembler.dtr_u(ARMAssembler::LoadUint32, ARMRegisters::S1, ARMRegisters::S1, 0);
         add32(imm, ARMRegisters::S1);
         m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast<ARMWord>(address.m_ptr));
-        m_assembler.dtr_u(false, ARMRegisters::S1, ARMRegisters::S0, 0);
+        m_assembler.dtr_u(ARMAssembler::StoreUint32, ARMRegisters::S1, ARMRegisters::S0, 0);
     }
 
     void sub32(TrustedImm32 imm, AbsoluteAddress address)
     {
         m_assembler.ldr_un_imm(ARMRegisters::S1, reinterpret_cast<ARMWord>(address.m_ptr));
-        m_assembler.dtr_u(true, ARMRegisters::S1, ARMRegisters::S1, 0);
+        m_assembler.dtr_u(ARMAssembler::LoadUint32, ARMRegisters::S1, ARMRegisters::S1, 0);
         sub32(imm, ARMRegisters::S1);
         m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast<ARMWord>(address.m_ptr));
-        m_assembler.dtr_u(false, ARMRegisters::S1, ARMRegisters::S0, 0);
+        m_assembler.dtr_u(ARMAssembler::StoreUint32, ARMRegisters::S1, ARMRegisters::S0, 0);
     }
 
     void load32(const void* address, RegisterID dest)
     {
         m_assembler.ldr_un_imm(ARMRegisters::S0, reinterpret_cast<ARMWord>(address));
-        m_assembler.dtr_u(true, dest, ARMRegisters::S0, 0);
+        m_assembler.dtr_u(ARMAssembler::LoadUint32, dest, ARMRegisters::S0, 0);
     }
 
     Jump branch32(RelationalCondition cond, AbsoluteAddress left, RegisterID right)
@@ -790,14 +878,9 @@
 
     Call call()
     {
-#if WTF_ARM_ARCH_AT_LEAST(5)
         ensureSpace(2 * sizeof(ARMWord), sizeof(ARMWord));
         m_assembler.loadBranchTarget(ARMRegisters::S1, ARMAssembler::AL, true);
         return Call(m_assembler.blx(ARMRegisters::S1), Call::Linkable);
-#else
-        prepareCall();
-        return Call(m_assembler.jmp(ARMAssembler::AL, true), Call::Linkable);
-#endif
     }
 
     Call tailRecursiveCall()
@@ -861,20 +944,52 @@
     }
     static bool supportsFloatingPointAbs() { return false; }
 
+    void loadFloat(BaseIndex address, FPRegisterID dest)
+    {
+        m_assembler.baseIndexTransferFloat(ARMAssembler::LoadFloat, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
     void loadDouble(ImplicitAddress address, FPRegisterID dest)
     {
-        m_assembler.doubleTransfer(true, dest, address.base, address.offset);
+        m_assembler.dataTransferFloat(ARMAssembler::LoadDouble, dest, address.base, address.offset);
+    }
+
+    void loadDouble(BaseIndex address, FPRegisterID dest)
+    {
+        m_assembler.baseIndexTransferFloat(ARMAssembler::LoadDouble, dest, address.base, address.index, static_cast<int>(address.scale), address.offset);
     }
 
     void loadDouble(const void* address, FPRegisterID dest)
     {
-        m_assembler.ldr_un_imm(ARMRegisters::S0, (ARMWord)address);
-        m_assembler.fdtr_u(true, dest, ARMRegisters::S0, 0);
+        move(TrustedImm32(reinterpret_cast<ARMWord>(address)), ARMRegisters::S0);
+        m_assembler.fdtr_u(ARMAssembler::LoadDouble, dest, ARMRegisters::S0, 0);
+    }
+
+    void storeFloat(FPRegisterID src, BaseIndex address)
+    {
+        m_assembler.baseIndexTransferFloat(ARMAssembler::StoreFloat, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
     }
 
     void storeDouble(FPRegisterID src, ImplicitAddress address)
     {
-        m_assembler.doubleTransfer(false, src, address.base, address.offset);
+        m_assembler.dataTransferFloat(ARMAssembler::StoreDouble, src, address.base, address.offset);
+    }
+
+    void storeDouble(FPRegisterID src, BaseIndex address)
+    {
+        m_assembler.baseIndexTransferFloat(ARMAssembler::StoreDouble, src, address.base, address.index, static_cast<int>(address.scale), address.offset);
+    }
+
+    void storeDouble(FPRegisterID src, const void* address)
+    {
+        move(TrustedImm32(reinterpret_cast<ARMWord>(address)), ARMRegisters::S0);
+        m_assembler.dataTransferFloat(ARMAssembler::StoreDouble, src, ARMRegisters::S0, 0);
+    }
+
+    void moveDouble(FPRegisterID src, FPRegisterID dest)
+    {
+        if (src != dest)
+            m_assembler.vmov_f64_r(dest, src);
     }
 
     void addDouble(FPRegisterID src, FPRegisterID dest)
@@ -882,17 +997,33 @@
         m_assembler.vadd_f64_r(dest, dest, src);
     }
 
+    void addDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
+    {
+        m_assembler.vadd_f64_r(dest, op1, op2);
+    }
+
     void addDouble(Address src, FPRegisterID dest)
     {
         loadDouble(src, ARMRegisters::SD0);
         addDouble(ARMRegisters::SD0, dest);
     }
 
+    void addDouble(AbsoluteAddress address, FPRegisterID dest)
+    {
+        loadDouble(address.m_ptr, ARMRegisters::SD0);
+        addDouble(ARMRegisters::SD0, dest);
+    }
+
     void divDouble(FPRegisterID src, FPRegisterID dest)
     {
         m_assembler.vdiv_f64_r(dest, dest, src);
     }
 
+    void divDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
+    {
+        m_assembler.vdiv_f64_r(dest, op1, op2);
+    }
+
     void divDouble(Address src, FPRegisterID dest)
     {
         ASSERT_NOT_REACHED(); // Untested
@@ -905,6 +1036,11 @@
         m_assembler.vsub_f64_r(dest, dest, src);
     }
 
+    void subDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
+    {
+        m_assembler.vsub_f64_r(dest, op1, op2);
+    }
+
     void subDouble(Address src, FPRegisterID dest)
     {
         loadDouble(src, ARMRegisters::SD0);
@@ -922,39 +1058,55 @@
         mulDouble(ARMRegisters::SD0, dest);
     }
 
+    void mulDouble(FPRegisterID op1, FPRegisterID op2, FPRegisterID dest)
+    {
+        m_assembler.vmul_f64_r(dest, op1, op2);
+    }
+
     void sqrtDouble(FPRegisterID src, FPRegisterID dest)
     {
         m_assembler.vsqrt_f64_r(dest, src);
     }
     
-    void absDouble(FPRegisterID, FPRegisterID)
+    void absDouble(FPRegisterID src, FPRegisterID dest)
     {
-        ASSERT_NOT_REACHED();
+        m_assembler.vabs_f64_r(dest, src);
+    }
+
+    void negateDouble(FPRegisterID src, FPRegisterID dest)
+    {
+        m_assembler.vneg_f64_r(dest, src);
     }
 
     void convertInt32ToDouble(RegisterID src, FPRegisterID dest)
     {
-        m_assembler.vmov_vfp_r(dest << 1, src);
+        m_assembler.vmov_vfp32_r(dest << 1, src);
         m_assembler.vcvt_f64_s32_r(dest, dest << 1);
     }
 
     void convertInt32ToDouble(Address src, FPRegisterID dest)
     {
-        ASSERT_NOT_REACHED(); // Untested
-        // flds does not worth the effort here
         load32(src, ARMRegisters::S1);
         convertInt32ToDouble(ARMRegisters::S1, dest);
     }
 
     void convertInt32ToDouble(AbsoluteAddress src, FPRegisterID dest)
     {
-        ASSERT_NOT_REACHED(); // Untested
-        // flds does not worth the effort here
-        m_assembler.ldr_un_imm(ARMRegisters::S1, (ARMWord)src.m_ptr);
-        m_assembler.dtr_u(true, ARMRegisters::S1, ARMRegisters::S1, 0);
+        move(TrustedImmPtr(src.m_ptr), ARMRegisters::S1);
+        load32(Address(ARMRegisters::S1), ARMRegisters::S1);
         convertInt32ToDouble(ARMRegisters::S1, dest);
     }
 
+    void convertFloatToDouble(FPRegisterID src, FPRegisterID dst)
+    {
+        m_assembler.vcvt_f64_f32_r(dst, src);
+    }
+
+    void convertDoubleToFloat(FPRegisterID src, FPRegisterID dst)
+    {
+        m_assembler.vcvt_f32_f64_r(dst, src);
+    }
+
     Jump branchDouble(DoubleCondition cond, FPRegisterID left, FPRegisterID right)
     {
         m_assembler.vcmp_f64_r(left, right);
@@ -968,12 +1120,42 @@
     // If the result is not representable as a 32 bit value, branch.
     // May also branch for some values that are representable in 32 bits
     // (specifically, in this case, INT_MIN).
-    Jump branchTruncateDoubleToInt32(FPRegisterID src, RegisterID dest)
+    enum BranchTruncateType { BranchIfTruncateFailed, BranchIfTruncateSuccessful };
+    Jump branchTruncateDoubleToInt32(FPRegisterID src, RegisterID dest, BranchTruncateType branchType = BranchIfTruncateFailed)
     {
-        UNUSED_PARAM(src);
-        UNUSED_PARAM(dest);
-        ASSERT_NOT_REACHED();
-        return jump();
+        truncateDoubleToInt32(src, dest);
+
+        m_assembler.add_r(ARMRegisters::S0, dest, ARMAssembler::getOp2Byte(1));
+        m_assembler.bic_r(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(1));
+
+        ARMWord w = ARMAssembler::getOp2(0x80000000);
+        ASSERT(w != ARMAssembler::INVALID_IMM);
+        m_assembler.cmp_r(ARMRegisters::S0, w);
+        return Jump(m_assembler.jmp(branchType == BranchIfTruncateFailed ? ARMAssembler::EQ : ARMAssembler::NE));
+    }
+
+    Jump branchTruncateDoubleToUint32(FPRegisterID src, RegisterID dest, BranchTruncateType branchType = BranchIfTruncateFailed)
+    {
+        truncateDoubleToUint32(src, dest);
+
+        m_assembler.add_r(ARMRegisters::S0, dest, ARMAssembler::getOp2Byte(1));
+        m_assembler.bic_r(ARMRegisters::S0, ARMRegisters::S0, ARMAssembler::getOp2Byte(1));
+
+        m_assembler.cmp_r(ARMRegisters::S0, ARMAssembler::getOp2Byte(0));
+        return Jump(m_assembler.jmp(branchType == BranchIfTruncateFailed ? ARMAssembler::EQ : ARMAssembler::NE));
+    }
+
+    // Result is undefined if the value is outside of the integer range.
+    void truncateDoubleToInt32(FPRegisterID src, RegisterID dest)
+    {
+        m_assembler.vcvt_s32_f64_r(ARMRegisters::SD0 << 1, src);
+        m_assembler.vmov_arm32_r(dest, ARMRegisters::SD0 << 1);
+    }
+
+    void truncateDoubleToUint32(FPRegisterID src, RegisterID dest)
+    {
+        m_assembler.vcvt_u32_f64_r(ARMRegisters::SD0 << 1, src);
+        m_assembler.vmov_arm32_r(dest, ARMRegisters::SD0 << 1);
     }
 
     // Convert 'src' to an integer, and places the resulting 'dest'.
@@ -983,7 +1165,7 @@
     void branchConvertDoubleToInt32(FPRegisterID src, RegisterID dest, JumpList& failureCases, FPRegisterID fpTemp)
     {
         m_assembler.vcvt_s32_f64_r(ARMRegisters::SD0 << 1, src);
-        m_assembler.vmov_arm_r(dest, ARMRegisters::SD0 << 1);
+        m_assembler.vmov_arm32_r(dest, ARMRegisters::SD0 << 1);
 
         // Convert the integer result back to float & compare to the original value - if not equal or unordered (NaN) then jump.
         m_assembler.vcvt_f64_s32_r(ARMRegisters::SD0, ARMRegisters::SD0 << 1);
@@ -995,18 +1177,25 @@
 
     Jump branchDoubleNonZero(FPRegisterID reg, FPRegisterID scratch)
     {
-        m_assembler.mov_r(ARMRegisters::S0, ARMAssembler::getOp2(0));
+        m_assembler.mov_r(ARMRegisters::S0, ARMAssembler::getOp2Byte(0));
         convertInt32ToDouble(ARMRegisters::S0, scratch);
         return branchDouble(DoubleNotEqual, reg, scratch);
     }
 
     Jump branchDoubleZeroOrNaN(FPRegisterID reg, FPRegisterID scratch)
     {
-        m_assembler.mov_r(ARMRegisters::S0, ARMAssembler::getOp2(0));
+        m_assembler.mov_r(ARMRegisters::S0, ARMAssembler::getOp2Byte(0));
         convertInt32ToDouble(ARMRegisters::S0, scratch);
         return branchDouble(DoubleEqualOrUnordered, reg, scratch);
     }
 
+    // Invert a relational condition, e.g. == becomes !=, < becomes >=, etc.
+    static RelationalCondition invert(RelationalCondition cond)
+    {
+        ASSERT((static_cast<uint32_t>(cond & 0x0fffffff)) == 0 && static_cast<uint32_t>(cond) < static_cast<uint32_t>(ARMAssembler::AL));
+        return static_cast<RelationalCondition>(cond ^ 0x10000000);
+    }
+
     void nop()
     {
         m_assembler.nop();
@@ -1019,12 +1208,12 @@
 
     static void replaceWithJump(CodeLocationLabel instructionStart, CodeLocationLabel destination)
     {
-        ASSERT_NOT_REACHED();
+        ARMAssembler::replaceWithJump(instructionStart.dataLocation(), destination.dataLocation());
     }
     
     static ptrdiff_t maxJumpReplacementSize()
     {
-        ASSERT_NOT_REACHED();
+        ARMAssembler::maxJumpReplacementSize();
         return 0;
     }
 
@@ -1049,58 +1238,10 @@
         return m_assembler.sizeOfConstantPool();
     }
 
-    void prepareCall()
-    {
-#if WTF_ARM_ARCH_VERSION < 5
-        ensureSpace(2 * sizeof(ARMWord), sizeof(ARMWord));
-
-        m_assembler.mov_r(linkRegister, ARMRegisters::pc);
-#endif
-    }
-
     void call32(RegisterID base, int32_t offset)
     {
-#if WTF_ARM_ARCH_AT_LEAST(5)
-        int targetReg = ARMRegisters::S1;
-#else
-        int targetReg = ARMRegisters::pc;
-#endif
-        int tmpReg = ARMRegisters::S1;
-
-        if (base == ARMRegisters::sp)
-            offset += 4;
-
-        if (offset >= 0) {
-            if (offset <= 0xfff) {
-                prepareCall();
-                m_assembler.dtr_u(true, targetReg, base, offset);
-            } else if (offset <= 0xfffff) {
-                m_assembler.add_r(tmpReg, base, ARMAssembler::OP2_IMM | (offset >> 12) | (10 << 8));
-                prepareCall();
-                m_assembler.dtr_u(true, targetReg, tmpReg, offset & 0xfff);
-            } else {
-                m_assembler.moveImm(offset, tmpReg);
-                prepareCall();
-                m_assembler.dtr_ur(true, targetReg, base, tmpReg);
-            }
-        } else  {
-            offset = -offset;
-            if (offset <= 0xfff) {
-                prepareCall();
-                m_assembler.dtr_d(true, targetReg, base, offset);
-            } else if (offset <= 0xfffff) {
-                m_assembler.sub_r(tmpReg, base, ARMAssembler::OP2_IMM | (offset >> 12) | (10 << 8));
-                prepareCall();
-                m_assembler.dtr_d(true, targetReg, tmpReg, offset & 0xfff);
-            } else {
-                m_assembler.moveImm(offset, tmpReg);
-                prepareCall();
-                m_assembler.dtr_dr(true, targetReg, base, tmpReg);
-            }
-        }
-#if WTF_ARM_ARCH_AT_LEAST(5)
-        m_assembler.blx(targetReg);
-#endif
+        load32(Address(base, offset), ARMRegisters::S1);
+        m_assembler.blx(ARMRegisters::S1);
     }
 
 private:
diff --git a/Source/JavaScriptCore/assembler/SH4Assembler.h b/Source/JavaScriptCore/assembler/SH4Assembler.h
index 59d0422..d55d393 100644
--- a/Source/JavaScriptCore/assembler/SH4Assembler.h
+++ b/Source/JavaScriptCore/assembler/SH4Assembler.h
@@ -1241,7 +1241,7 @@
 
     AssemblerLabel label()
     {
-        m_buffer.ensureSpaceForAnyOneInstruction();
+        m_buffer.ensureSpaceForAnyInstruction();
         return m_buffer.label();
     }