2008-11-13 Maciej Stachowiak <mjs@apple.com>
Reviewed by Cameron Zwarich.
- remove immediate checks from native codegen for known non-immediate cases like "this"
~.5% speedup on v8 benchmarks
In the future we can extend this model to remove all sorts of
typechecks based on local type info or type inference.
I also added an assertion to verify that all slow cases linked as
many slow case jumps as the corresponding fast case generated, and
fixed the pre-existing cases where this was not true.
* VM/CTI.cpp:
(JSC::CTI::emitJumpSlowCaseIfNotJSCell):
(JSC::CTI::linkSlowCaseIfNotJSCell):
(JSC::CTI::compileBinaryArithOp):
(JSC::CTI::compileBinaryArithOpSlowCase):
(JSC::CTI::privateCompileMainPass):
(JSC::CTI::privateCompileSlowCases):
* VM/CTI.h:
* VM/CodeBlock.h:
(JSC::CodeBlock::isKnownNotImmediate):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@38369 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 2a364a0..8489d2e 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,29 @@
+2008-11-13 Maciej Stachowiak <mjs@apple.com>
+
+ Reviewed by Cameron Zwarich.
+
+ - remove immediate checks from native codegen for known non-immediate cases like "this"
+
+ ~.5% speedup on v8 benchmarks
+
+ In the future we can extend this model to remove all sorts of
+ typechecks based on local type info or type inference.
+
+ I also added an assertion to verify that all slow cases linked as
+ many slow case jumps as the corresponding fast case generated, and
+ fixed the pre-existing cases where this was not true.
+
+ * VM/CTI.cpp:
+ (JSC::CTI::emitJumpSlowCaseIfNotJSCell):
+ (JSC::CTI::linkSlowCaseIfNotJSCell):
+ (JSC::CTI::compileBinaryArithOp):
+ (JSC::CTI::compileBinaryArithOpSlowCase):
+ (JSC::CTI::privateCompileMainPass):
+ (JSC::CTI::privateCompileSlowCases):
+ * VM/CTI.h:
+ * VM/CodeBlock.h:
+ (JSC::CodeBlock::isKnownNotImmediate):
+
2008-11-13 Cameron Zwarich <zwarich@apple.com>
Reviewed by Maciej Stachowiak.
diff --git a/JavaScriptCore/VM/CTI.cpp b/JavaScriptCore/VM/CTI.cpp
index c727585..31e35e9 100644
--- a/JavaScriptCore/VM/CTI.cpp
+++ b/JavaScriptCore/VM/CTI.cpp
@@ -505,6 +505,23 @@
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), opcodeIndex));
}
+ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex, int vReg)
+{
+ if (m_codeBlock->isKnownNotImmediate(vReg))
+ return;
+
+ emitJumpSlowCaseIfNotJSCell(reg, opcodeIndex);
+}
+
+ALWAYS_INLINE bool CTI::linkSlowCaseIfNotJSCell(const Vector<SlowCaseEntry>::iterator& iter, int vReg)
+{
+ if (m_codeBlock->isKnownNotImmediate(vReg))
+ return false;
+
+ m_jit.link(iter->from, m_jit.label());
+ return true;
+}
+
ALWAYS_INLINE void CTI::emitJumpSlowCaseIfNotImmNum(X86Assembler::RegisterID reg, unsigned opcodeIndex)
{
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, reg);
@@ -856,7 +873,7 @@
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::edx);
X86Assembler::JmpSrc op2imm = m_jit.emitUnlinkedJne();
if (!types.second().definitelyIsNumber()) {
- emitJumpSlowCaseIfNotJSCell(X86::edx, i);
+ emitJumpSlowCaseIfNotJSCell(X86::edx, i, src2);
m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::edx);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
}
@@ -866,7 +883,7 @@
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
X86Assembler::JmpSrc op1imm = m_jit.emitUnlinkedJne();
if (!types.first().definitelyIsNumber()) {
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, src1);
m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
}
@@ -903,7 +920,7 @@
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
X86Assembler::JmpSrc op1imm = m_jit.emitUnlinkedJne();
if (!types.first().definitelyIsNumber()) {
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, src1);
m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
}
@@ -913,7 +930,7 @@
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::edx);
X86Assembler::JmpSrc op2imm = m_jit.emitUnlinkedJne();
if (!types.second().definitelyIsNumber()) {
- emitJumpSlowCaseIfNotJSCell(X86::edx, i);
+ emitJumpSlowCaseIfNotJSCell(X86::edx, i, src2);
m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::edx);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
}
@@ -998,22 +1015,26 @@
m_jit.link(iter->from, here);
if (types.second().isReusable() && isSSE2Present()) {
if (!types.first().definitelyIsNumber()) {
- m_jit.link((++iter)->from, here);
- m_jit.link((++iter)->from, here);
+ if (linkSlowCaseIfNotJSCell(++iter, src2))
+ ++iter;
+ m_jit.link(iter->from, here);
}
if (!types.second().definitelyIsNumber()) {
- m_jit.link((++iter)->from, here);
- m_jit.link((++iter)->from, here);
+ if (linkSlowCaseIfNotJSCell(++iter, src1))
+ ++iter;
+ m_jit.link(iter->from, here);
}
m_jit.link((++iter)->from, here);
} else if (types.first().isReusable() && isSSE2Present()) {
if (!types.first().definitelyIsNumber()) {
- m_jit.link((++iter)->from, here);
- m_jit.link((++iter)->from, here);
+ if (linkSlowCaseIfNotJSCell(++iter, src1))
+ ++iter;
+ m_jit.link(iter->from, here);
}
if (!types.second().definitelyIsNumber()) {
- m_jit.link((++iter)->from, here);
- m_jit.link((++iter)->from, here);
+ if (linkSlowCaseIfNotJSCell(++iter, src2))
+ ++iter;
+ m_jit.link(iter->from, here);
}
m_jit.link((++iter)->from, here);
} else
@@ -1183,16 +1204,19 @@
// to just after the arguments have been loaded into registers 'hotPathBegin', and we generate code
// such that the StructureID & offset are always at the same distance from this.
- emitGetArg(instruction[i + 1].u.operand, X86::eax, i);
+ int baseVReg = instruction[i + 1].u.operand;
+ emitGetArg(baseVReg, X86::eax, i);
emitGetArg(instruction[i + 3].u.operand, X86::edx, i);
ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].opcodeIndex == i);
+
+ // Jump to a slow case if either the base object is an immediate, or if the StructureID does not match.
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, baseVReg);
+
X86Assembler::JmpDst hotPathBegin = m_jit.label();
m_propertyAccessCompilationInfo[propertyAccessInstructionIndex].hotPathBegin = hotPathBegin;
++propertyAccessInstructionIndex;
- // Jump to a slow case if either the base object is an immediate, or if the StructureID does not match.
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
// It is important that the following instruction plants a 32bit immediate, in order that it can be patched over.
m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetPutByIdStructureID);
@@ -1212,15 +1236,17 @@
// to array-length / prototype access tranpolines, and finally we also the the property-map access offset as a label
// to jump back to if one of these trampolies finds a match.
- emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+ int baseVReg = instruction[i + 2].u.operand;
+ emitGetArg(baseVReg, X86::eax, i);
ASSERT(m_codeBlock->propertyAccessInstructions[propertyAccessInstructionIndex].opcodeIndex == i);
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, baseVReg);
+
X86Assembler::JmpDst hotPathBegin = m_jit.label();
m_propertyAccessCompilationInfo[propertyAccessInstructionIndex].hotPathBegin = hotPathBegin;
++propertyAccessInstructionIndex;
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
m_jit.cmpl_i32m(repatchGetByIdDefaultStructureID, OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
ASSERT(X86Assembler::getDifferenceBetweenLabels(hotPathBegin, m_jit.label()) == repatchOffsetGetByIdStructureID);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
@@ -1553,7 +1579,9 @@
break;
}
case op_negate: {
- emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+ int srcVReg = instruction[i + 2].u.operand;
+ emitGetArg(srcVReg, X86::eax, i);
+
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
X86Assembler::JmpSrc notImmediate = m_jit.emitUnlinkedJe();
@@ -1579,7 +1607,7 @@
m_jit.link(notImmediate, m_jit.label());
ResultType resultType(instruction[i + 3].u.resultType);
if (!resultType.definitelyIsNumber()) {
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, srcVReg);
StructureID* numberStructureID = m_globalData->numberStructureID.get();
m_jit.cmpl_i32m(reinterpret_cast<unsigned>(numberStructureID), OBJECT_OFFSET(JSCell, m_structureID), X86::eax);
m_slowCases.append(SlowCaseEntry(m_jit.emitUnlinkedJne(), i));
@@ -2027,12 +2055,13 @@
break;
}
case op_to_jsnumber: {
- emitGetArg(instruction[i + 2].u.operand, X86::eax, i);
+ int srcVReg = instruction[i + 2].u.operand;
+ emitGetArg(srcVReg, X86::eax, i);
m_jit.testl_i32r(JSImmediate::TagBitTypeInteger, X86::eax);
X86Assembler::JmpSrc wasImmediate = m_jit.emitUnlinkedJnz();
- emitJumpSlowCaseIfNotJSCell(X86::eax, i);
+ emitJumpSlowCaseIfNotJSCell(X86::eax, i, srcVReg);
m_jit.movl_mr(OBJECT_OFFSET(JSCell, m_structureID), X86::eax, X86::ecx);
m_jit.cmpl_i32m(NumberType, OBJECT_OFFSET(StructureID, m_typeInfo.m_type), X86::ecx);
@@ -2338,6 +2367,18 @@
i += 4; \
break; \
}
+
+#define CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(name) \
+ case name: { \
+ m_jit.link(iter->from, m_jit.label()); \
+ m_jit.link((++iter)->from, m_jit.label()); \
+ emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx); \
+ emitGetPutArg(instruction[i + 3].u.operand, 4, X86::ecx); \
+ emitCTICall(instruction + i, i, Machine::cti_##name); \
+ emitPutResult(instruction[i + 1].u.operand); \
+ i += 4; \
+ break; \
+ }
void CTI::privateCompileSlowCases()
{
@@ -2350,6 +2391,10 @@
killLastResultRegister();
unsigned i = iter->to;
+#ifndef NDEBUG
+ unsigned firstTo = i;
+#endif
+
switch (OpcodeID opcodeID = m_machine->getOpcodeID(instruction[i].u.opcode)) {
case op_convert_this: {
m_jit.link(iter->from, m_jit.label());
@@ -2432,6 +2477,14 @@
}
case op_negate: {
m_jit.link(iter->from, m_jit.label());
+ m_jit.link((++iter)->from, m_jit.label());
+ ResultType resultType(instruction[i + 3].u.resultType);
+ if (!resultType.definitelyIsNumber()) {
+ if (linkSlowCaseIfNotJSCell(++iter, instruction[i + 2].u.operand))
+ ++iter;
+ m_jit.link(iter->from, m_jit.label());
+ }
+
emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
emitCTICall(instruction + i, i, Machine::cti_op_negate);
emitPutResult(instruction[i + 1].u.operand);
@@ -2488,8 +2541,9 @@
break;
}
case op_put_by_id: {
+ if (linkSlowCaseIfNotJSCell(iter, instruction[i + 1].u.operand))
+ ++iter;
m_jit.link(iter->from, m_jit.label());
- m_jit.link((++iter)->from, m_jit.label());
Identifier* ident = &(m_codeBlock->identifiers[instruction[i + 2].u.operand]);
emitPutArgConstant(reinterpret_cast<unsigned>(ident), 4);
@@ -2512,8 +2566,9 @@
// prototype access trampoline fail we want to bail out back to here. To do so we can subtract back
// the distance from the call to the head of the slow case.
+ if (linkSlowCaseIfNotJSCell(iter, instruction[i + 2].u.operand))
+ ++iter;
m_jit.link(iter->from, m_jit.label());
- m_jit.link((++iter)->from, m_jit.label());
#ifndef NDEBUG
X86Assembler::JmpDst coldPathBegin = m_jit.label();
@@ -2762,10 +2817,12 @@
i += 4;
break;
}
- CTI_COMPILE_BINARY_OP_SLOW_CASE(op_stricteq);
- CTI_COMPILE_BINARY_OP_SLOW_CASE(op_nstricteq);
+ CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(op_stricteq);
+ CTI_COMPILE_BINARY_OP_SLOW_CASE_DOUBLE_ENTRY(op_nstricteq);
case op_instanceof: {
m_jit.link(iter->from, m_jit.label());
+ m_jit.link((++iter)->from, m_jit.label());
+ m_jit.link((++iter)->from, m_jit.label());
emitGetPutArg(instruction[i + 2].u.operand, 0, X86::ecx);
emitGetPutArg(instruction[i + 3].u.operand, 4, X86::ecx);
emitGetPutArg(instruction[i + 4].u.operand, 8, X86::ecx);
@@ -2798,6 +2855,7 @@
int32_t value;
if (src1Value && ((value = JSImmediate::intValue(src1Value)) > 0)) {
m_jit.link(iter->from, m_jit.label());
+ m_jit.link((++iter)->from, m_jit.label());
// There is an extra slow case for (op1 * -N) or (-N * op2), to check for 0 since this should produce a result of -0.
emitGetPutArg(src1, 0, X86::ecx);
emitGetPutArg(src2, 4, X86::ecx);
@@ -2805,6 +2863,7 @@
emitPutResult(dst);
} else if (src2Value && ((value = JSImmediate::intValue(src2Value)) > 0)) {
m_jit.link(iter->from, m_jit.label());
+ m_jit.link((++iter)->from, m_jit.label());
// There is an extra slow case for (op1 * -N) or (-N * op2), to check for 0 since this should produce a result of -0.
emitGetPutArg(src1, 0, X86::ecx);
emitGetPutArg(src2, 4, X86::ecx);
@@ -2956,7 +3015,8 @@
break;
}
case op_to_jsnumber: {
- m_jit.link(iter->from, m_jit.label());
+ if (linkSlowCaseIfNotJSCell(iter, instruction[i + 2].u.operand))
+ ++iter;
m_jit.link(iter->from, m_jit.label());
emitPutArg(X86::eax, 0);
@@ -2972,6 +3032,9 @@
break;
}
+ ASSERT_WITH_MESSAGE((iter + 1) == m_slowCases.end() || firstTo != (iter + 1)->to,"Not enough jumps linked in slow case codegen.");
+ ASSERT_WITH_MESSAGE(firstTo == iter->to, "Too many jumps linked in slow case codegen.");
+
m_jit.link(m_jit.emitUnlinkedJmp(), m_labels[i]);
}
diff --git a/JavaScriptCore/VM/CTI.h b/JavaScriptCore/VM/CTI.h
index 909262c..3d65d9c 100644
--- a/JavaScriptCore/VM/CTI.h
+++ b/JavaScriptCore/VM/CTI.h
@@ -269,12 +269,12 @@
static const int ctiArgumentInitSize = 0;
#endif
// These architecture specific value are used to enable repatching - see comment on op_put_by_id.
- static const int repatchOffsetPutByIdStructureID = 19;
- static const int repatchOffsetPutByIdPropertyMapOffset = 34;
+ static const int repatchOffsetPutByIdStructureID = 7;
+ static const int repatchOffsetPutByIdPropertyMapOffset = 22;
// These architecture specific value are used to enable repatching - see comment on op_get_by_id.
- static const int repatchOffsetGetByIdStructureID = 19;
- static const int repatchOffsetGetByIdBranchToSlowCase = 25;
- static const int repatchOffsetGetByIdPropertyMapOffset = 34;
+ static const int repatchOffsetGetByIdStructureID = 7;
+ static const int repatchOffsetGetByIdBranchToSlowCase = 13;
+ static const int repatchOffsetGetByIdPropertyMapOffset = 22;
#if ENABLE(OPCODE_SAMPLING)
static const int repatchOffsetGetByIdSlowCaseCall = 27 + 4 + ctiArgumentInitSize;
#else
@@ -392,8 +392,9 @@
JSValue* getConstantImmediateNumericArg(unsigned src);
unsigned getDeTaggedConstantImmediate(JSValue* imm);
- void emitJumpSlowCaseIfIsJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex);
- void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID reg, unsigned opcodeIndex);
+ bool linkSlowCaseIfNotJSCell(const Vector<SlowCaseEntry>::iterator&, int vReg);
+ void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID, unsigned opcodeIndex);
+ void emitJumpSlowCaseIfNotJSCell(X86Assembler::RegisterID, unsigned opcodeIndex, int VReg);
void emitJumpSlowCaseIfNotImmNum(X86Assembler::RegisterID, unsigned opcodeIndex);
void emitJumpSlowCaseIfNotImmNums(X86Assembler::RegisterID, X86Assembler::RegisterID, unsigned opcodeIndex);
diff --git a/JavaScriptCore/VM/CodeBlock.h b/JavaScriptCore/VM/CodeBlock.h
index 41a749b..4ca6b97 100644
--- a/JavaScriptCore/VM/CodeBlock.h
+++ b/JavaScriptCore/VM/CodeBlock.h
@@ -255,6 +255,17 @@
linkedCallerList.shrink(lastPos);
}
+ inline bool isKnownNotImmediate(int index)
+ {
+ if (index == thisRegister)
+ return true;
+
+ if (isConstantRegisterIndex(index))
+ return !JSImmediate::isImmediate(getConstant(index));
+
+ return false;
+ }
+
ALWAYS_INLINE bool isConstantRegisterIndex(int index)
{
return index >= numVars && index < numVars + numConstants;