2008-11-13  Maciej Stachowiak  <mjs@apple.com>

        Reviewed by Cameron Zwarich.
        
        - remove immediate checks from native codegen for known non-immediate cases like "this"
        
        ~.5% speedup on v8 benchmarks
        
        In the future we can extend this model to remove all sorts of
        typechecks based on local type info or type inference.
        
        I also added an assertion to verify that all slow cases linked as
        many slow case jumps as the corresponding fast case generated, and
        fixed the pre-existing cases where this was not true.
        
        * VM/CTI.cpp:
        (JSC::CTI::emitJumpSlowCaseIfNotJSCell):
        (JSC::CTI::linkSlowCaseIfNotJSCell):
        (JSC::CTI::compileBinaryArithOp):
        (JSC::CTI::compileBinaryArithOpSlowCase):
        (JSC::CTI::privateCompileMainPass):
        (JSC::CTI::privateCompileSlowCases):
        * VM/CTI.h:
        * VM/CodeBlock.h:
        (JSC::CodeBlock::isKnownNotImmediate):



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@38369 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 2a364a0..8489d2e 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,29 @@
+2008-11-13  Maciej Stachowiak  <mjs@apple.com>
+
+        Reviewed by Cameron Zwarich.
+        
+        - remove immediate checks from native codegen for known non-immediate cases like "this"
+        
+        ~.5% speedup on v8 benchmarks
+        
+        In the future we can extend this model to remove all sorts of
+        typechecks based on local type info or type inference.
+        
+        I also added an assertion to verify that all slow cases linked as
+        many slow case jumps as the corresponding fast case generated, and
+        fixed the pre-existing cases where this was not true.
+        
+        * VM/CTI.cpp:
+        (JSC::CTI::emitJumpSlowCaseIfNotJSCell):
+        (JSC::CTI::linkSlowCaseIfNotJSCell):
+        (JSC::CTI::compileBinaryArithOp):
+        (JSC::CTI::compileBinaryArithOpSlowCase):
+        (JSC::CTI::privateCompileMainPass):
+        (JSC::CTI::privateCompileSlowCases):
+        * VM/CTI.h:
+        * VM/CodeBlock.h:
+        (JSC::CodeBlock::isKnownNotImmediate):
+
 2008-11-13  Cameron Zwarich  <zwarich@apple.com>
 
         Reviewed by Maciej Stachowiak.
diff --git a/JavaScriptCore/VM/CTI.cpp b/JavaScriptCore/VM/CTI.cpp
index c727585..31e35e9 100644
--- a/JavaScriptCore/VM/CTI.cpp
+++ b/JavaScriptCore/VM/CTI.cpp
@@ -505,6 +505,23 @@
     m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), opcodeIndex));
 }
 
+ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex, int vReg)
+{
+    if (m_codeBlock->isKnownNotImmediate(vReg))
+        return;
+
+    emitJumpSlowCaseIfNotJSCell(reg, opcodeIndex);
+}
+
+ALWAYS_INLINE bool CTI::linkSlowCaseIfNotJSCell(const Vector<SlowCaseEntry>::iterator& iter, int vReg)
+{
+    if (m_codeBlock->isKnownNotImmediate(vReg))
+        return false;
+    
+    m_jit.link(iter->from, m_jit.label());
+    return true;
+}
+
 ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotImmNum(X86Assembler::RegisterID reg, unsigned opcodeIndex)
 {
     m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, reg);
@@ -856,7 +873,7 @@
         m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::edx);
         X86Assembler::JmpSrc op2imm = m_jit.emitUnlinkedJne();
         if (!types.second().definitelyIsNumber()) {
-            emitJumpSlowCaseIfNotJSCell(X86::edx, i);
+            emitJumpSlowCaseIfNotJSCell(X86::edx, i, src2);
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::edx);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
         }
@@ -866,7 +883,7 @@
         m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
         X86Assembler::JmpSrc op1imm = m_jit.emitUnlinkedJne();
         if (!types.first().definitelyIsNumber()) {
-            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i, src1);
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
         }
@@ -903,7 +920,7 @@
         m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
         X86Assembler::JmpSrc op1imm = m_jit.emitUnlinkedJne();
         if (!types.first().definitelyIsNumber()) {
-            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i, src1);
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
         }
@@ -913,7 +930,7 @@
         m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::edx);
         X86Assembler::JmpSrc op2imm = m_jit.emitUnlinkedJne();
         if (!types.second().definitelyIsNumber()) {
-            emitJumpSlowCaseIfNotJSCell(X86::edx, i);
+            emitJumpSlowCaseIfNotJSCell(X86::edx, i, src2);
             m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::edx);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
         }
@@ -998,22 +1015,26 @@
     m_jit.link(iter->from, here);
     if (types.second().isReusable() && isSSE2Present()) {
         if (!types.first().definitelyIsNumber()) {
-            m_jit.link((++iter)->from, here);
-            m_jit.link((++iter)->from, here);
+            if (linkSlowCaseIfNotJSCell(++iter, src2))
+                ++iter;
+            m_jit.link(iter->from, here);
         }
         if (!types.second().definitelyIsNumber()) {
-            m_jit.link((++iter)->from, here);
-            m_jit.link((++iter)->from, here);
+            if (linkSlowCaseIfNotJSCell(++iter, src1))
+                ++iter;
+            m_jit.link(iter->from, here);
         }
         m_jit.link((++iter)->from, here);
     } else if (types.first().isReusable() && isSSE2Present()) {
         if (!types.first().definitelyIsNumber()) {
-            m_jit.link((++iter)->from, here);
-            m_jit.link((++iter)->from, here);
+            if (linkSlowCaseIfNotJSCell(++iter, src1))
+                ++iter;
+            m_jit.link(iter->from, here);
         }
         if (!types.second().definitelyIsNumber()) {
-            m_jit.link((++iter)->from, here);
-            m_jit.link((++iter)->from, here);
+            if (linkSlowCaseIfNotJSCell(++iter, src2))
+                ++iter;
+            m_jit.link(iter->from, here);
         }
         m_jit.link((++iter)->from, here);
     } else
@@ -1183,16 +1204,19 @@
             // to just after the arguments have been loaded into registers 'hotPathBegin', and we generate code
             // such that the StructureID & offset are always at the same distance from this.
 
-            emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
+            int baseVReg = instruction[i + 1].u.operand;
+            emitGetArg(baseVReg, X86::eax, i);
             emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
 
             ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].opcodeIndex == i);
+
+            // Jump to a slow case if either the base object is an immediate, or if the StructureID does not match.
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i, baseVReg);
+
             X86Assembler::JmpDst hotPathBegin = m_jit.label();
             m_propertyAccessCompilationInfo[propertyAccessInstructionIndex].hotPathBegin = hotPathBegin;
             ++propertyAccessInstructionIndex;
 
-            // Jump to a slow case if either the base object is an immediate, or if the StructureID does not match.
-            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
             // It is important that the following instruction plants a 32bit immediate, in order that it can be patched over.
             m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
             ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetPutByIdStructureID);
@@ -1212,15 +1236,17 @@
             // to array-length / prototype access tranpolines, and finally we also the the property-map access offset as a label
             // to jump back to if one of these trampolies finds a match.
 
-            emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+            int baseVReg = instruction[i + 2].u.operand;
+            emitGetArg(baseVReg, X86::eax, i);
 
             ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].opcodeIndex == i);
 
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i, baseVReg);
+
             X86Assembler::JmpDst hotPathBegin = m_jit.label();
             m_propertyAccessCompilationInfo[propertyAccessInstructionIndex].hotPathBegin = hotPathBegin;
             ++propertyAccessInstructionIndex;
 
-            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
             m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
             ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetGetByIdStructureID);
             m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
@@ -1553,7 +1579,9 @@
             break;
         }
         case op_negate: {
-            emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+            int srcVReg = instruction[i + 2].u.operand;
+            emitGetArg(srcVReg, X86::eax, i);
+
             m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
             X86Assembler::JmpSrc notImmediate = m_jit.emitUnlinkedJe();
 
@@ -1579,7 +1607,7 @@
                 m_jit.link(notImmediate, m_jit.label());
                 ResultType resultType(instruction[i + 3].u.resultType);
                 if (!resultType.definitelyIsNumber()) {
-                    emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+                    emitJumpSlowCaseIfNotJSCell(X86::eax, i, srcVReg);
                     StructureID* numberStructureID = m_globalData->numberStructureID.get();
                     m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
                     m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
@@ -2027,12 +2055,13 @@
             break;
         }
         case op_to_jsnumber: {
-            emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+            int srcVReg = instruction[i + 2].u.operand;
+            emitGetArg(srcVReg, X86::eax, i);
             
             m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
             X86Assembler::JmpSrc wasImmediate = m_jit.emitUnlinkedJnz();
 
-            emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+            emitJumpSlowCaseIfNotJSCell(X86::eax, i, srcVReg);
 
             m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::eax, X86::ecx);
             m_jit.cmpl_i32m(NumberType, OBJECT_OFFSET(StructureID, m_typeInfo.m_type), X86::ecx);
@@ -2338,6 +2367,18 @@
         i += 4; \
         break; \
     }
+
+#define CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(name) \
+    case name: { \
+        m_jit.link(iter->from, m_jit.label()); \
+        m_jit.link((++iter)->from, m_jit.label());                \
+        emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx); \
+        emitGetPutArg(instruction[i + 3].u.operand, 4, X86::ecx); \
+        emitCTICall(instruction + i, i, Machine::cti_##name); \
+        emitPutResult(instruction[i + 1].u.operand); \
+        i += 4; \
+        break; \
+    }
     
 void CTI::privateCompileSlowCases()
 {
@@ -2350,6 +2391,10 @@
         killLastResultRegister();
 
         unsigned i = iter->to;
+#ifndef NDEBUG
+        unsigned firstTo = i;
+#endif
+
         switch (OpcodeID opcodeID = m_machine->getOpcodeID(instruction[i].u.opcode)) {
         case op_convert_this: {
             m_jit.link(iter->from, m_jit.label());
@@ -2432,6 +2477,14 @@
         }
         case op_negate: {
             m_jit.link(iter->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
+            ResultType resultType(instruction[i + 3].u.resultType);
+            if (!resultType.definitelyIsNumber()) {
+                if (linkSlowCaseIfNotJSCell(++iter, instruction[i + 2].u.operand))
+                    ++iter;
+                m_jit.link(iter->from, m_jit.label());
+            }
+
             emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
             emitCTICall(instruction + i, i, Machine::cti_op_negate);
             emitPutResult(instruction[i + 1].u.operand);
@@ -2488,8 +2541,9 @@
             break;
         }
         case op_put_by_id: {
+            if (linkSlowCaseIfNotJSCell(iter, instruction[i + 1].u.operand))
+                ++iter;
             m_jit.link(iter->from, m_jit.label());
-            m_jit.link((++iter)->from, m_jit.label());
 
             Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 2].u.operand]);
             emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
@@ -2512,8 +2566,9 @@
             // prototype access trampoline fail we want to bail out back to here.  To do so we can subtract back
             // the distance from the call to the head of the slow case.
 
+            if (linkSlowCaseIfNotJSCell(iter, instruction[i + 2].u.operand))
+                ++iter;
             m_jit.link(iter->from, m_jit.label());
-            m_jit.link((++iter)->from, m_jit.label());
 
 #ifndef NDEBUG
             X86Assembler::JmpDst coldPathBegin = m_jit.label();
@@ -2762,10 +2817,12 @@
             i += 4;
             break;
         }
-        CTI_COMPILE_BINARY_OP_SLOW_CASE(op_stricteq);
-        CTI_COMPILE_BINARY_OP_SLOW_CASE(op_nstricteq);
+        CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(op_stricteq);
+        CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(op_nstricteq);
         case op_instanceof: {
             m_jit.link(iter->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
+            m_jit.link((++iter)->from, m_jit.label());
             emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
             emitGetPutArg(instruction[i + 3].u.operand, 4, X86::ecx);
             emitGetPutArg(instruction[i + 4].u.operand, 8, X86::ecx);
@@ -2798,6 +2855,7 @@
             int32_t value;
             if (src1Value && ((value = JSImmediate::intValue(src1Value)) > 0)) {
                 m_jit.link(iter->from, m_jit.label());
+                m_jit.link((++iter)->from, m_jit.label());
                 // There is an extra slow case for (op1 * -N) or (-N * op2), to check for 0 since this should produce a result of -0.
                 emitGetPutArg(src1, 0, X86::ecx);
                 emitGetPutArg(src2, 4, X86::ecx);
@@ -2805,6 +2863,7 @@
                 emitPutResult(dst);
             } else if (src2Value && ((value = JSImmediate::intValue(src2Value)) > 0)) {
                 m_jit.link(iter->from, m_jit.label());
+                m_jit.link((++iter)->from, m_jit.label());
                 // There is an extra slow case for (op1 * -N) or (-N * op2), to check for 0 since this should produce a result of -0.
                 emitGetPutArg(src1, 0, X86::ecx);
                 emitGetPutArg(src2, 4, X86::ecx);
@@ -2956,7 +3015,8 @@
             break;
         }
         case op_to_jsnumber: {
-            m_jit.link(iter->from, m_jit.label());
+            if (linkSlowCaseIfNotJSCell(iter, instruction[i + 2].u.operand))
+                ++iter;
             m_jit.link(iter->from, m_jit.label());
 
             emitPutArg(X86::eax, 0);
@@ -2972,6 +3032,9 @@
             break;
         }
 
+        ASSERT_WITH_MESSAGE((iter + 1) == m_slowCases.end() || firstTo != (iter + 1)->to,"Not enough jumps linked in slow case codegen.");
+        ASSERT_WITH_MESSAGE(firstTo == iter->to, "Too many jumps linked in slow case codegen.");
+
         m_jit.link(m_jit.emitUnlinkedJmp(), m_labels[i]);
     }
 
diff --git a/JavaScriptCore/VM/CTI.h b/JavaScriptCore/VM/CTI.h
index 909262c..3d65d9c 100644
--- a/JavaScriptCore/VM/CTI.h
+++ b/JavaScriptCore/VM/CTI.h
@@ -269,12 +269,12 @@
         static const int ctiArgumentInitSize = 0;
 #endif
         // These architecture specific value are used to enable repatching - see comment on op_put_by_id.
-        static const int repatchOffsetPutByIdStructureID = 19;
-        static const int repatchOffsetPutByIdPropertyMapOffset = 34;
+        static const int repatchOffsetPutByIdStructureID = 7;
+        static const int repatchOffsetPutByIdPropertyMapOffset = 22;
         // These architecture specific value are used to enable repatching - see comment on op_get_by_id.
-        static const int repatchOffsetGetByIdStructureID = 19;
-        static const int repatchOffsetGetByIdBranchToSlowCase = 25;
-        static const int repatchOffsetGetByIdPropertyMapOffset = 34;
+        static const int repatchOffsetGetByIdStructureID = 7;
+        static const int repatchOffsetGetByIdBranchToSlowCase = 13;
+        static const int repatchOffsetGetByIdPropertyMapOffset = 22;
 #if ENABLE(OPCODE_SAMPLING)
         static const int repatchOffsetGetByIdSlowCaseCall = 27 + 4 + ctiArgumentInitSize;
 #else
@@ -392,8 +392,9 @@
         JSValue* getConstantImmediateNumericArg(unsigned src);
         unsigned getDeTaggedConstantImmediate(JSValue* imm);
 
-        void emitJumpSlowCaseIfIsJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex);
-        void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex);
+        bool linkSlowCaseIfNotJSCell(const Vector<SlowCaseEntry>::iterator&, int vReg);
+        void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID, unsigned opcodeIndex);
+        void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID, unsigned opcodeIndex, int VReg);
 
         void emitJumpSlowCaseIfNotImmNum(X86Assembler::RegisterID, unsigned opcodeIndex);
         void emitJumpSlowCaseIfNotImmNums(X86Assembler::RegisterID, X86Assembler::RegisterID, unsigned opcodeIndex);
diff --git a/JavaScriptCore/VM/CodeBlock.h b/JavaScriptCore/VM/CodeBlock.h
index 41a749b..4ca6b97 100644
--- a/JavaScriptCore/VM/CodeBlock.h
+++ b/JavaScriptCore/VM/CodeBlock.h
@@ -255,6 +255,17 @@
             linkedCallerList.shrink(lastPos);
         }
 
+        inline bool isKnownNotImmediate(int index)
+        {
+            if (index == thisRegister)
+                return true;
+
+            if (isConstantRegisterIndex(index))
+                return !JSImmediate::isImmediate(getConstant(index));
+
+            return false;
+        }
+
         ALWAYS_INLINE bool isConstantRegisterIndex(int index)
         {
             return index >= numVars && index < numVars + numConstants;