Tidy up card walking logic
https://bugs.webkit.org/show_bug.cgi?id=69883

Reviewed by Gavin Barraclough.

Special case common cell sizes when walking a block's
cards.

* heap/CardSet.h:
(JSC::::testAndClear):
* heap/Heap.cpp:
(JSC::GCTimer::GCCounter::GCCounter):
(JSC::GCTimer::GCCounter::count):
(JSC::GCTimer::GCCounter::~GCCounter):
(JSC::Heap::markRoots):
* heap/MarkStack.cpp:
(JSC::MarkStack::reset):
* heap/MarkStack.h:
(JSC::MarkStack::visitCount):
(JSC::MarkStack::MarkStack):
(JSC::MarkStack::append):
* heap/MarkedBlock.h:
(JSC::MarkedBlock::gatherDirtyCellsWithSize):
(JSC::MarkedBlock::gatherDirtyCells):
* runtime/Structure.h:
(JSC::MarkStack::internalAppend):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@97203 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog
index d6282c8..aabd78e 100644
--- a/Source/JavaScriptCore/ChangeLog
+++ b/Source/JavaScriptCore/ChangeLog
@@ -1,3 +1,32 @@
+2011-10-11  Oliver Hunt  <oliver@apple.com>
+
+        Tidy up card walking logic
+        https://bugs.webkit.org/show_bug.cgi?id=69883
+
+        Reviewed by Gavin Barraclough.
+
+        Special case common cell sizes when walking a block's
+        cards.
+
+        * heap/CardSet.h:
+        (JSC::::testAndClear):
+        * heap/Heap.cpp:
+        (JSC::GCTimer::GCCounter::GCCounter):
+        (JSC::GCTimer::GCCounter::count):
+        (JSC::GCTimer::GCCounter::~GCCounter):
+        (JSC::Heap::markRoots):
+        * heap/MarkStack.cpp:
+        (JSC::MarkStack::reset):
+        * heap/MarkStack.h:
+        (JSC::MarkStack::visitCount):
+        (JSC::MarkStack::MarkStack):
+        (JSC::MarkStack::append):
+        * heap/MarkedBlock.h:
+        (JSC::MarkedBlock::gatherDirtyCellsWithSize):
+        (JSC::MarkedBlock::gatherDirtyCells):
+        * runtime/Structure.h:
+        (JSC::MarkStack::internalAppend):
+
 2011-10-11  Filip Pizlo  <fpizlo@apple.com>
 
         DFG virtual register allocator should be more aggressive in
diff --git a/Source/JavaScriptCore/heap/CardSet.h b/Source/JavaScriptCore/heap/CardSet.h
index 9951e99..dc44c02 100644
--- a/Source/JavaScriptCore/heap/CardSet.h
+++ b/Source/JavaScriptCore/heap/CardSet.h
@@ -47,7 +47,7 @@
     void markCardForAtom(const void*);
     uint8_t& cardForAtom(const void*);
     bool isCardMarked(size_t);
-    void clearCard(size_t);
+    bool testAndClear(size_t);
 
 private:
     uint8_t m_cards[cardCount];
@@ -78,10 +78,12 @@
     return m_cards[i];
 }
 
-template <size_t cardSize, size_t blockSize> void CardSet<cardSize, blockSize>::clearCard(size_t i)
+template <size_t cardSize, size_t blockSize> bool CardSet<cardSize, blockSize>::testAndClear(size_t i)
 {
     ASSERT(i < cardCount);
+    bool result = m_cards[i];
     m_cards[i] = 0;
+    return result;
 }
 
 }
diff --git a/Source/JavaScriptCore/heap/Heap.cpp b/Source/JavaScriptCore/heap/Heap.cpp
index 94ddbb1..ea63b6e 100644
--- a/Source/JavaScriptCore/heap/Heap.cpp
+++ b/Source/JavaScriptCore/heap/Heap.cpp
@@ -43,9 +43,20 @@
 
 static const size_t largeHeapSize = 16 * 1024 * 1024;
 static const size_t smallHeapSize = 512 * 1024;
-
+#define ENABLE_GC_LOGGING 1
 #if ENABLE(GC_LOGGING)
-    
+#if COMPILER(CLANG)
+#define DEFINE_GC_LOGGING_GLOBAL(type, name, arguments) \
+_Pragma("clang diagnostic push") \
+_Pragma("clang diagnostic ignored \"-Wglobal-constructors\"") \
+_Pragma("clang diagnostic ignored \"-Wexit-time-destructors\"") \
+static type name arguments; \
+_Pragma("clang diagnostic pop")
+#else
+#define DEFINE_GC_LOGGING_GLOBAL(type, name, arguments) \
+static type name arguments;
+#endif // COMPILER(CLANG)
+
 struct GCTimer {
     GCTimer(const char* name)
         : m_time(0)
@@ -86,14 +97,45 @@
     double m_start;
 };
 
-#define GCPHASE(name) static GCTimer name##Timer(#name); GCTimerScope name##TimerScope(&name##Timer)
-#define COND_GCPHASE(cond, name1, name2) static GCTimer name1##Timer(#name1); static GCTimer name2##Timer(#name2); GCTimerScope name1##CondTimerScope(cond ? &name1##Timer : &name2##Timer)
+struct GCCounter {
+    GCCounter(const char* name)
+        : m_name(name)
+        , m_count(0)
+        , m_total(0)
+        , m_min(10000000)
+        , m_max(0)
+    {
+    }
+    
+    void count(size_t amount)
+    {
+        m_count++;
+        m_total += amount;
+        if (amount < m_min)
+            m_min = amount;
+        if (amount > m_max)
+            m_max = amount;
+    }
+    ~GCCounter()
+    {
+        printf("%s: %zu values (avg. %zu, min. %zu, max. %zu)\n", m_name, m_total, m_total / m_count, m_min, m_max);
+    }
+    const char* m_name;
+    size_t m_count;
+    size_t m_total;
+    size_t m_min;
+    size_t m_max;
+};
 
+#define GCPHASE(name) DEFINE_GC_LOGGING_GLOBAL(GCTimer, name##Timer, (#name)); GCTimerScope name##TimerScope(&name##Timer)
+#define COND_GCPHASE(cond, name1, name2) DEFINE_GC_LOGGING_GLOBAL(GCTimer, name1##Timer, (#name1)); DEFINE_GC_LOGGING_GLOBAL(GCTimer, name2##Timer, (#name2)); GCTimerScope name1##CondTimerScope(cond ? &name1##Timer : &name2##Timer)
+#define GCCOUNTER(name, value) do { DEFINE_GC_LOGGING_GLOBAL(GCCounter, name##Counter, (#name)); name##Counter.count(value); } while (false)
+    
 #else
 
 #define GCPHASE(name) do { } while (false)
 #define COND_GCPHASE(cond, name1, name2) do { } while (false)
-
+#define GCCOUNTER(name, value) do { } while (false)
 #endif
 
 static size_t heapSizeForHint(HeapSize heapSize)
@@ -548,8 +590,10 @@
     HeapRootVisitor heapRootVisitor(visitor);
 
 #if ENABLE(GGC)
-    if (size_t dirtyCellCount = dirtyCells.size()) {
+    {
+        size_t dirtyCellCount = dirtyCells.size();
         GCPHASE(VisitDirtyCells);
+        GCCOUNTER(DirtyCellCount, dirtyCellCount);
         for (size_t i = 0; i < dirtyCellCount; i++) {
             heapRootVisitor.visitChildren(dirtyCells[i]);
             visitor.drain();
@@ -621,6 +665,7 @@
             // If the set of opaque roots has grown, more weak handles may have become reachable.
         } while (lastOpaqueRootCount != visitor.opaqueRootCount());
     }
+    GCCOUNTER(VisitedValueCount, visitor.visitCount());
     visitor.reset();
 
     m_operationInProgress = NoOperation;
diff --git a/Source/JavaScriptCore/heap/MarkStack.cpp b/Source/JavaScriptCore/heap/MarkStack.cpp
index bf74abb..eaf3d6c 100644
--- a/Source/JavaScriptCore/heap/MarkStack.cpp
+++ b/Source/JavaScriptCore/heap/MarkStack.cpp
@@ -38,6 +38,7 @@
 
 void MarkStack::reset()
 {
+    m_visitCount = 0;
     m_values.shrinkAllocation(pageSize());
     m_markSets.shrinkAllocation(pageSize());
     m_opaqueRoots.clear();
diff --git a/Source/JavaScriptCore/heap/MarkStack.h b/Source/JavaScriptCore/heap/MarkStack.h
index 455d44c..477537f 100644
--- a/Source/JavaScriptCore/heap/MarkStack.h
+++ b/Source/JavaScriptCore/heap/MarkStack.h
@@ -103,6 +103,8 @@
 
         void reset();
 
+        size_t visitCount() const { return m_visitCount; }
+
 #if ENABLE(SIMPLE_HEAP_PROFILING)
         VTableSpectrum m_visitedTypeCounts;
 #endif
@@ -139,6 +141,8 @@
         bool m_isCheckingForDefaultMarkViolation;
         bool m_isDraining;
 #endif
+    protected:
+        size_t m_visitCount;
     };
 
     inline MarkStack::MarkStack(void* jsArrayVPtr)
@@ -148,6 +152,7 @@
         , m_isCheckingForDefaultMarkViolation(false)
         , m_isDraining(false)
 #endif
+        , m_visitCount(0)
     {
     }
 
@@ -264,6 +269,7 @@
     {
         if (!count)
             return;
+        m_visitCount += count;
 #if ENABLE(GC_VALIDATION)
         validateSet(slot, count);
 #endif
diff --git a/Source/JavaScriptCore/heap/MarkedBlock.h b/Source/JavaScriptCore/heap/MarkedBlock.h
index ed81a9c..0ad4aa2 100644
--- a/Source/JavaScriptCore/heap/MarkedBlock.h
+++ b/Source/JavaScriptCore/heap/MarkedBlock.h
@@ -75,7 +75,7 @@
 
         static const size_t atomsPerBlock = blockSize / atomSize; // ~0.4% overhead
         static const size_t atomMask = atomsPerBlock - 1;
-        static const int cardShift = 10; // This is log2 of bytes per card.
+        static const int cardShift = 8; // This is log2 of bytes per card.
         static const size_t bytesPerCard = 1 << cardShift;
         static const int cardCount = blockSize / bytesPerCard;
         static const int cardMask = cardCount - 1;
@@ -150,6 +150,7 @@
 
         typedef Vector<JSCell*, 32> DirtyCellVector;
         inline void gatherDirtyCells(DirtyCellVector&);
+        template <int size> inline void gatherDirtyCellsWithSize(DirtyCellVector&);
 #endif
 
         template <typename Functor> void forEachCell(Functor&);
@@ -316,6 +317,35 @@
     }
 
 #if ENABLE(GGC)
+template <int _cellSize> void MarkedBlock::gatherDirtyCellsWithSize(DirtyCellVector& dirtyCells)
+{
+    if (m_cards.testAndClear(0)) {
+        char* ptr = reinterpret_cast<char*>(&atoms()[firstAtom()]);
+        const char* end = reinterpret_cast<char*>(this) + bytesPerCard;
+        while (ptr < end) {
+            JSCell* cell = reinterpret_cast<JSCell*>(ptr);
+            if (isMarked(cell))
+                dirtyCells.append(cell);
+            ptr += _cellSize;
+        }
+    }
+    
+    const size_t cellOffset = firstAtom() * atomSize % _cellSize;
+    for (size_t i = 1; i < m_cards.cardCount; i++) {
+        if (!m_cards.testAndClear(i))
+            continue;
+        char* ptr = reinterpret_cast<char*>(this) + i * bytesPerCard + cellOffset;
+        char* end = reinterpret_cast<char*>(this) + (i + 1) * bytesPerCard;
+        
+        while (ptr < end) {
+            JSCell* cell = reinterpret_cast<JSCell*>(ptr);
+            if (isMarked(cell))
+                dirtyCells.append(cell);
+            ptr += _cellSize;
+        }
+    }
+}
+
 void MarkedBlock::gatherDirtyCells(DirtyCellVector& dirtyCells)
 {
     COMPILE_ASSERT((int)m_cards.cardCount == (int)cardCount, MarkedBlockCardCountsMatch);
@@ -330,26 +360,39 @@
         return;
     
     size_t cellSize = this->cellSize();
+    if (cellSize == 32) {
+        gatherDirtyCellsWithSize<32>(dirtyCells);
+        return;
+    }
+    if (cellSize == 64) {
+        gatherDirtyCellsWithSize<64>(dirtyCells);
+        return;
+    }
+
     const size_t firstCellOffset = firstAtom() * atomSize % cellSize;
     
-    for (size_t i = 0; i < m_cards.cardCount; i++) {
-        if (!m_cards.isCardMarked(i))
-            continue;
-        char* ptr = reinterpret_cast<char*>(this);
-        if (i)
-            ptr += firstCellOffset + cellSize * ((i * bytesPerCard + cellSize - 1 - firstCellOffset) / cellSize);
-        else
-            ptr += firstAtom() * atomSize;
-        char* end = reinterpret_cast<char*>(this) + std::min((i + 1) * bytesPerCard, m_endAtom * atomSize);
-        
+    if (m_cards.testAndClear(0)) {
+        char* ptr = reinterpret_cast<char*>(this) + firstAtom() * atomSize;
+        char* end = reinterpret_cast<char*>(this) + bytesPerCard;
         while (ptr < end) {
             JSCell* cell = reinterpret_cast<JSCell*>(ptr);
-            ASSERT(*addressOfCardFor(cell));
             if (isMarked(cell))
                 dirtyCells.append(cell);
             ptr += cellSize;
         }
-        m_cards.clearCard(i);
+    }
+    for (size_t i = 1; i < m_cards.cardCount; i++) {
+        if (!m_cards.testAndClear(i))
+            continue;
+        char* ptr = reinterpret_cast<char*>(this) + firstCellOffset + cellSize * ((i * bytesPerCard + cellSize - 1 - firstCellOffset) / cellSize);
+        char* end = reinterpret_cast<char*>(this) + std::min((i + 1) * bytesPerCard, m_endAtom * atomSize);
+        
+        while (ptr < end) {
+            JSCell* cell = reinterpret_cast<JSCell*>(ptr);
+            if (isMarked(cell))
+                dirtyCells.append(cell);
+            ptr += cellSize;
+        }
     }
 }
 #endif
diff --git a/Source/JavaScriptCore/runtime/Structure.h b/Source/JavaScriptCore/runtime/Structure.h
index 0d2ffa1..235b682 100644
--- a/Source/JavaScriptCore/runtime/Structure.h
+++ b/Source/JavaScriptCore/runtime/Structure.h
@@ -345,6 +345,7 @@
     {
         ASSERT(!m_isCheckingForDefaultMarkViolation);
         ASSERT(cell);
+        m_visitCount++;
         if (Heap::testAndSetMarked(cell))
             return;
         if (cell->structure() && cell->structure()->typeInfo().type() >= CompoundType)