| /* |
| * Copyright (C) 2015-2017 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "AirAllocateRegistersByGraphColoring.h" |
| |
| #if ENABLE(B3_JIT) |
| |
| #include "AirCode.h" |
| #include "AirFixSpillsAfterTerminals.h" |
| #include "AirInsertionSet.h" |
| #include "AirInstInlines.h" |
| #include "AirLiveness.h" |
| #include "AirPadInterference.h" |
| #include "AirPhaseScope.h" |
| #include "AirTmpInlines.h" |
| #include "AirTmpWidth.h" |
| #include "AirUseCounts.h" |
| #include <wtf/ListDump.h> |
| |
| namespace JSC { namespace B3 { namespace Air { |
| |
| namespace { |
| |
| bool debug = false; |
| bool traceDebug = false; |
| bool reportStats = false; |
| |
| // The AbstractColoringAllocator defines all the code that is independant |
| // from the bank or register and can be shared when allocating registers. |
| template<typename IndexType, typename TmpMapper> |
| class AbstractColoringAllocator { |
| public: |
| AbstractColoringAllocator(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts) |
| : m_regsInPriorityOrder(regsInPriorityOrder) |
| , m_lastPrecoloredRegisterIndex(lastPrecoloredRegisterIndex) |
| , m_unspillableTmps(unspillableTmps) |
| , m_useCounts(useCounts) |
| , m_code(code) |
| { |
| initializeDegrees(tmpArraySize); |
| |
| m_adjacencyList.resize(tmpArraySize); |
| m_moveList.resize(tmpArraySize); |
| m_coalescedTmps.fill(0, tmpArraySize); |
| m_isOnSelectStack.ensureSize(tmpArraySize); |
| m_spillWorklist.ensureSize(tmpArraySize); |
| } |
| |
| protected: |
| |
| unsigned registerCount() const { return m_regsInPriorityOrder.size(); } |
| |
| IndexType getAlias(IndexType tmpIndex) const |
| { |
| IndexType alias = tmpIndex; |
| while (IndexType nextAlias = m_coalescedTmps[alias]) |
| alias = nextAlias; |
| return alias; |
| } |
| |
| void addEdge(IndexType a, IndexType b) |
| { |
| if (a == b) |
| return; |
| addEdgeDistinct(a, b); |
| } |
| |
| void addToSpill(unsigned toSpill) |
| { |
| if (m_unspillableTmps.contains(toSpill)) |
| return; |
| |
| m_spillWorklist.add(toSpill); |
| } |
| |
| bool isPrecolored(IndexType tmpIndex) |
| { |
| return tmpIndex <= m_lastPrecoloredRegisterIndex; |
| } |
| |
| void initializeDegrees(unsigned tmpArraySize) |
| { |
| m_degrees.resize(tmpArraySize); |
| |
| // All precolored registers have an "infinite" degree. |
| unsigned firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; |
| for (unsigned i = 0; i < firstNonRegIndex; ++i) |
| m_degrees[i] = std::numeric_limits<unsigned>::max(); |
| |
| memset(m_degrees.data() + firstNonRegIndex, 0, (tmpArraySize - firstNonRegIndex) * sizeof(unsigned)); |
| } |
| |
| void addEdgeDistinct(IndexType a, IndexType b) |
| { |
| ASSERT(a != b); |
| bool isNewEdge = addInterferenceEdge(InterferenceEdge(a, b)); |
| if (isNewEdge) { |
| if (!isPrecolored(a)) { |
| ASSERT(!m_adjacencyList[a].contains(b)); |
| m_adjacencyList[a].append(b); |
| m_degrees[a]++; |
| } |
| |
| if (!isPrecolored(b)) { |
| ASSERT(!m_adjacencyList[b].contains(a)); |
| m_adjacencyList[b].append(a); |
| m_degrees[b]++; |
| } |
| } |
| } |
| |
| bool addEdgeDistinctWithoutDegreeChange(IndexType a, IndexType b) |
| { |
| ASSERT(a != b); |
| bool isNewEdge = addInterferenceEdge(InterferenceEdge(a, b)); |
| if (isNewEdge) { |
| if (!isPrecolored(a)) { |
| ASSERT(!m_adjacencyList[a].contains(b)); |
| m_adjacencyList[a].append(b); |
| } |
| |
| if (!isPrecolored(b)) { |
| ASSERT(!m_adjacencyList[b].contains(a)); |
| m_adjacencyList[b].append(a); |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| template<typename Function> |
| void forEachAdjacent(IndexType tmpIndex, Function function) |
| { |
| for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { |
| if (!hasBeenSimplified(adjacentTmpIndex)) |
| function(adjacentTmpIndex); |
| } |
| } |
| |
| bool hasBeenSimplified(IndexType tmpIndex) |
| { |
| if (!ASSERT_DISABLED) { |
| if (!!m_coalescedTmps[tmpIndex]) |
| ASSERT(getAlias(tmpIndex) != tmpIndex); |
| } |
| |
| return m_isOnSelectStack.quickGet(tmpIndex) || !!m_coalescedTmps[tmpIndex]; |
| } |
| |
| bool canBeSafelyCoalesced(IndexType u, IndexType v) |
| { |
| ASSERT(!isPrecolored(v)); |
| if (isPrecolored(u)) |
| return precoloredCoalescingHeuristic(u, v); |
| return conservativeHeuristic(u, v); |
| } |
| |
| bool conservativeHeuristic(IndexType u, IndexType v) |
| { |
| // This is using the Briggs' conservative coalescing rule: |
| // If the number of combined adjacent node with a degree >= K is less than K, |
| // it is safe to combine the two nodes. The reason is that we know that if the graph |
| // is colorable, we have fewer than K adjacents with high order and there is a color |
| // for the current node. |
| ASSERT(u != v); |
| ASSERT(!isPrecolored(u)); |
| ASSERT(!isPrecolored(v)); |
| |
| const auto& adjacentsOfU = m_adjacencyList[u]; |
| const auto& adjacentsOfV = m_adjacencyList[v]; |
| |
| if (adjacentsOfU.size() + adjacentsOfV.size() < registerCount()) { |
| // Shortcut: if the total number of adjacents is less than the number of register, the condition is always met. |
| return true; |
| } |
| |
| HashSet<IndexType> highOrderAdjacents; |
| |
| for (IndexType adjacentTmpIndex : adjacentsOfU) { |
| ASSERT(adjacentTmpIndex != v); |
| ASSERT(adjacentTmpIndex != u); |
| if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) { |
| auto addResult = highOrderAdjacents.add(adjacentTmpIndex); |
| if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount()) |
| return false; |
| } |
| } |
| for (IndexType adjacentTmpIndex : adjacentsOfV) { |
| ASSERT(adjacentTmpIndex != u); |
| ASSERT(adjacentTmpIndex != v); |
| if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= registerCount()) { |
| auto addResult = highOrderAdjacents.add(adjacentTmpIndex); |
| if (addResult.isNewEntry && highOrderAdjacents.size() >= registerCount()) |
| return false; |
| } |
| } |
| |
| ASSERT(highOrderAdjacents.size() < registerCount()); |
| return true; |
| } |
| |
| bool precoloredCoalescingHeuristic(IndexType u, IndexType v) |
| { |
| if (traceDebug) |
| dataLog(" Checking precoloredCoalescingHeuristic\n"); |
| ASSERT(isPrecolored(u)); |
| ASSERT(!isPrecolored(v)); |
| |
| // If any adjacent of the non-colored node is not an adjacent of the colored node AND has a degree >= K |
| // there is a risk that this node needs to have the same color as our precolored node. If we coalesce such |
| // move, we may create an uncolorable graph. |
| const auto& adjacentsOfV = m_adjacencyList[v]; |
| for (unsigned adjacentTmpIndex : adjacentsOfV) { |
| if (!isPrecolored(adjacentTmpIndex) |
| && !hasBeenSimplified(adjacentTmpIndex) |
| && m_degrees[adjacentTmpIndex] >= registerCount() |
| && !hasInterferenceEdge(InterferenceEdge(u, adjacentTmpIndex))) |
| return false; |
| } |
| return true; |
| } |
| |
| void addBias(IndexType u, IndexType v) |
| { |
| // We implement biased coloring as proposed by Briggs. See section |
| // 5.3.3 of his thesis for more information: http://www.cs.utexas.edu/users/mckinley/380C/lecs/briggs-thesis-1992.pdf |
| // The main idea of biased coloring is this: |
| // We try to coalesce a move between u and v, but the conservative heuristic |
| // fails. We don't want coalesce the move because we don't want to risk |
| // creating an uncolorable graph. However, if the conservative heuristic fails, |
| // it's not proof that the graph is uncolorable if the move were indeed coalesced. |
| // So, when we go to color the tmps, we'll remember that we really want the |
| // same register for u and v, and if legal, we will assign them to the same register. |
| if (!isPrecolored(u)) |
| m_biases.add(u, IndexTypeSet()).iterator->value.add(v); |
| if (!isPrecolored(v)) |
| m_biases.add(v, IndexTypeSet()).iterator->value.add(u); |
| } |
| |
| IndexType selectSpill() |
| { |
| if (!m_hasSelectedSpill) { |
| m_hasSelectedSpill = true; |
| |
| if (m_hasCoalescedNonTrivialMove) |
| m_coalescedTmpsAtSpill = m_coalescedTmps; |
| } |
| |
| auto iterator = m_spillWorklist.begin(); |
| |
| RELEASE_ASSERT_WITH_MESSAGE(iterator != m_spillWorklist.end(), "selectSpill() called when there was no spill."); |
| RELEASE_ASSERT_WITH_MESSAGE(!m_unspillableTmps.contains(*iterator), "trying to spill unspillable tmp"); |
| |
| // Higher score means more desirable to spill. Lower scores maximize the likelihood that a tmp |
| // gets a register. |
| auto score = [&] (Tmp tmp) -> double { |
| // Air exposes the concept of "fast tmps", and we interpret that to mean that the tmp |
| // should always be in a register. |
| if (m_code.isFastTmp(tmp)) |
| return 0; |
| |
| // All else being equal, the score should be directly related to the degree. |
| double degree = static_cast<double>(m_degrees[TmpMapper::absoluteIndex(tmp)]); |
| |
| // All else being equal, the score should be inversely related to the number of warm uses and |
| // defs. |
| const UseCounts<Tmp>::Counts* counts = m_useCounts[tmp]; |
| if (!counts) |
| return std::numeric_limits<double>::infinity(); |
| |
| double uses = counts->numWarmUses + counts->numDefs; |
| |
| // If it's a constant, then it's not as bad to spill. We can rematerialize it in many |
| // cases. |
| if (counts->numConstDefs == 1 && counts->numDefs == 1) |
| uses /= 2; |
| |
| return degree / uses; |
| }; |
| |
| auto victimIterator = iterator; |
| double maxScore = score(TmpMapper::tmpFromAbsoluteIndex(*iterator)); |
| |
| ++iterator; |
| for (;iterator != m_spillWorklist.end(); ++iterator) { |
| double tmpScore = score(TmpMapper::tmpFromAbsoluteIndex(*iterator)); |
| if (tmpScore > maxScore) { |
| ASSERT(!m_unspillableTmps.contains(*iterator)); |
| victimIterator = iterator; |
| maxScore = tmpScore; |
| } |
| } |
| |
| IndexType victimIndex = *victimIterator; |
| if (traceDebug) |
| dataLogLn("Selecting spill ", victimIndex); |
| ASSERT(!isPrecolored(victimIndex)); |
| return victimIndex; |
| } |
| |
| void assignColors() |
| { |
| ASSERT(m_simplifyWorklist.isEmpty()); |
| ASSERT(m_spillWorklist.isEmpty()); |
| |
| // Reclaim as much memory as possible. |
| m_interferenceEdges.clear(); |
| |
| m_degrees.clear(); |
| m_moveList.clear(); |
| m_simplifyWorklist.clear(); |
| m_spillWorklist.clearAll(); |
| |
| // Try to color the Tmp on the stack. |
| m_coloredTmp.resize(m_adjacencyList.size()); |
| |
| { |
| Vector<IndexType, 4> nowAliasedBiases; |
| for (IndexType key : m_biases.keys()) { |
| if (key != getAlias(key)) |
| nowAliasedBiases.append(key); |
| } |
| for (IndexType key : nowAliasedBiases) { |
| IndexTypeSet keysBiases(m_biases.take(key)); |
| auto addResult = m_biases.add(getAlias(key), IndexTypeSet()); |
| if (addResult.isNewEntry) { |
| ASSERT(!addResult.iterator->value.size()); |
| addResult.iterator->value = WTFMove(keysBiases); |
| } else { |
| IndexTypeSet& setToAddTo = addResult.iterator->value; |
| for (IndexType tmp : keysBiases) |
| setToAddTo.add(tmp); |
| } |
| } |
| } |
| |
| while (!m_selectStack.isEmpty()) { |
| unsigned tmpIndex = m_selectStack.takeLast(); |
| ASSERT(!isPrecolored(tmpIndex)); |
| ASSERT(!m_coloredTmp[tmpIndex]); |
| ASSERT(getAlias(tmpIndex) == tmpIndex); |
| |
| RegisterSet coloredRegisters; |
| for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { |
| IndexType aliasTmpIndex = getAlias(adjacentTmpIndex); |
| Reg reg = m_coloredTmp[aliasTmpIndex]; |
| |
| ASSERT(!isPrecolored(aliasTmpIndex) || (isPrecolored(aliasTmpIndex) && reg)); |
| |
| if (reg) |
| coloredRegisters.set(reg); |
| } |
| |
| bool colorAssigned = false; |
| auto iter = m_biases.find(tmpIndex); |
| if (iter != m_biases.end()) { |
| for (IndexType desiredBias : iter->value) { |
| if (Reg desiredColor = m_coloredTmp[getAlias(desiredBias)]) { |
| if (!coloredRegisters.get(desiredColor)) { |
| m_coloredTmp[tmpIndex] = desiredColor; |
| colorAssigned = true; |
| break; |
| } |
| } |
| } |
| } |
| if (!colorAssigned) { |
| for (Reg reg : m_regsInPriorityOrder) { |
| if (!coloredRegisters.get(reg)) { |
| m_coloredTmp[tmpIndex] = reg; |
| colorAssigned = true; |
| break; |
| } |
| } |
| } |
| |
| if (!colorAssigned) |
| m_spilledTmps.append(tmpIndex); |
| } |
| |
| m_selectStack.clear(); |
| |
| if (m_spilledTmps.isEmpty()) |
| m_coalescedTmpsAtSpill.clear(); |
| else |
| m_coloredTmp.clear(); |
| } |
| |
| void dumpInterferenceGraphInDot(PrintStream& out) |
| { |
| out.print("graph InterferenceGraph { \n"); |
| |
| HashSet<Tmp> tmpsWithInterferences; |
| for (const auto& edge : m_interferenceEdges) { |
| tmpsWithInterferences.add(TmpMapper::tmpFromAbsoluteIndex(edge.first())); |
| tmpsWithInterferences.add(TmpMapper::tmpFromAbsoluteIndex(edge.second())); |
| } |
| |
| for (const auto& tmp : tmpsWithInterferences) { |
| unsigned tmpIndex = TmpMapper::absoluteIndex(tmp); |
| if (tmpIndex < m_degrees.size()) |
| out.print(" ", tmp.internalValue(), " [label=\"", tmp, " (", m_degrees[tmpIndex], ")\"];\n"); |
| else |
| out.print(" ", tmp.internalValue(), " [label=\"", tmp, "\"];\n"); |
| } |
| |
| for (const auto& edge : m_interferenceEdges) |
| out.print(" ", edge.first(), " -- ", edge.second(), ";\n"); |
| out.print("}\n"); |
| } |
| |
| // Interference edges are not directed. An edge between any two Tmps is represented |
| // by the concatenated values of the smallest Tmp followed by the bigger Tmp. |
| class InterferenceEdge { |
| public: |
| InterferenceEdge() |
| { |
| } |
| |
| InterferenceEdge(IndexType a, IndexType b) |
| { |
| ASSERT(a); |
| ASSERT(b); |
| ASSERT_WITH_MESSAGE(a != b, "A Tmp can never interfere with itself. Doing so would force it to be the superposition of two registers."); |
| |
| if (b < a) |
| std::swap(a, b); |
| m_value = static_cast<uint64_t>(a) << 32 | b; |
| } |
| |
| InterferenceEdge(WTF::HashTableDeletedValueType) |
| : m_value(std::numeric_limits<uint64_t>::max()) |
| { |
| } |
| |
| IndexType first() const |
| { |
| return m_value >> 32 & 0xffffffff; |
| } |
| |
| IndexType second() const |
| { |
| return m_value & 0xffffffff; |
| } |
| |
| bool operator==(const InterferenceEdge& other) const |
| { |
| return m_value == other.m_value; |
| } |
| |
| bool isHashTableDeletedValue() const |
| { |
| return *this == InterferenceEdge(WTF::HashTableDeletedValue); |
| } |
| |
| unsigned hash() const |
| { |
| return WTF::IntHash<uint64_t>::hash(m_value); |
| } |
| |
| void dump(PrintStream& out) const |
| { |
| out.print(first(), "<=>", second()); |
| } |
| |
| private: |
| uint64_t m_value { 0 }; |
| }; |
| |
| bool addInterferenceEdge(InterferenceEdge edge) |
| { |
| return m_interferenceEdges.add(edge).isNewEntry; |
| } |
| |
| bool hasInterferenceEdge(InterferenceEdge edge) |
| { |
| return m_interferenceEdges.contains(edge); |
| } |
| |
| struct InterferenceEdgeHash { |
| static unsigned hash(const InterferenceEdge& key) { return key.hash(); } |
| static bool equal(const InterferenceEdge& a, const InterferenceEdge& b) { return a == b; } |
| static const bool safeToCompareToEmptyOrDeleted = true; |
| }; |
| typedef SimpleClassHashTraits<InterferenceEdge> InterferenceEdgeHashTraits; |
| |
| Vector<Reg> m_regsInPriorityOrder; |
| IndexType m_lastPrecoloredRegisterIndex { 0 }; |
| |
| // The interference graph. |
| HashSet<InterferenceEdge, InterferenceEdgeHash, InterferenceEdgeHashTraits> m_interferenceEdges; |
| |
| Vector<Vector<IndexType, 0, UnsafeVectorOverflow, 4>, 0, UnsafeVectorOverflow> m_adjacencyList; |
| Vector<IndexType, 0, UnsafeVectorOverflow> m_degrees; |
| |
| using IndexTypeSet = HashSet<IndexType, typename DefaultHash<IndexType>::Hash, WTF::UnsignedWithZeroKeyHashTraits<IndexType>>; |
| |
| HashMap<IndexType, IndexTypeSet, typename DefaultHash<IndexType>::Hash, WTF::UnsignedWithZeroKeyHashTraits<IndexType>> m_biases; |
| |
| // Instead of keeping track of the move instructions, we just keep their operands around and use the index |
| // in the vector as the "identifier" for the move. |
| struct MoveOperands { |
| IndexType srcIndex; |
| IndexType dstIndex; |
| }; |
| Vector<MoveOperands, 0, UnsafeVectorOverflow> m_coalescingCandidates; |
| |
| // List of every move instruction associated with a Tmp. |
| Vector<IndexTypeSet> m_moveList; |
| |
| // Colors. |
| Vector<Reg, 0, UnsafeVectorOverflow> m_coloredTmp; |
| Vector<IndexType> m_spilledTmps; |
| |
| // Values that have been coalesced with an other value. |
| Vector<IndexType, 0, UnsafeVectorOverflow> m_coalescedTmps; |
| |
| // The stack of Tmp removed from the graph and ready for coloring. |
| BitVector m_isOnSelectStack; |
| Vector<IndexType> m_selectStack; |
| |
| // Low-degree, non-Move related. |
| Vector<IndexType> m_simplifyWorklist; |
| // High-degree Tmp. |
| BitVector m_spillWorklist; |
| |
| bool m_hasSelectedSpill { false }; |
| bool m_hasCoalescedNonTrivialMove { false }; |
| |
| // The mapping of Tmp to their alias for Moves that are always coalescing regardless of spilling. |
| Vector<IndexType, 0, UnsafeVectorOverflow> m_coalescedTmpsAtSpill; |
| |
| const HashSet<unsigned>& m_unspillableTmps; |
| const UseCounts<Tmp>& m_useCounts; |
| Code& m_code; |
| |
| Vector<Tmp, 4> m_pinnedRegs; |
| }; |
| |
| template <typename IndexType, typename TmpMapper> |
| class Briggs : public AbstractColoringAllocator<IndexType, TmpMapper> { |
| using Base = AbstractColoringAllocator<IndexType, TmpMapper>; |
| protected: |
| using Base::m_isOnSelectStack; |
| using Base::m_selectStack; |
| using Base::m_simplifyWorklist; |
| using Base::m_spillWorklist; |
| using Base::m_hasSelectedSpill; |
| using Base::m_hasCoalescedNonTrivialMove; |
| using Base::m_degrees; |
| using Base::registerCount; |
| using Base::m_coalescedTmps; |
| using Base::m_moveList; |
| using Base::m_useCounts; |
| using Base::m_coalescedTmpsAtSpill; |
| using Base::m_spilledTmps; |
| using MoveOperands = typename Base::MoveOperands; |
| using Base::m_coalescingCandidates; |
| using Base::m_lastPrecoloredRegisterIndex; |
| using Base::m_coloredTmp; |
| using Base::m_code; |
| using InterferenceEdge = typename Base::InterferenceEdge; |
| using Base::m_unspillableTmps; |
| using Base::hasInterferenceEdge; |
| using Base::getAlias; |
| using Base::addEdge; |
| using Base::isPrecolored; |
| using Base::canBeSafelyCoalesced; |
| using Base::addEdgeDistinctWithoutDegreeChange; |
| using Base::forEachAdjacent; |
| using Base::hasBeenSimplified; |
| using Base::addToSpill; |
| using Base::m_interferenceEdges; |
| using Base::addBias; |
| using Base::m_pinnedRegs; |
| using Base::m_regsInPriorityOrder; |
| |
| public: |
| Briggs(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts) |
| : Base(code, regsInPriorityOrder, lastPrecoloredRegisterIndex, tmpArraySize, unspillableTmps, useCounts) |
| { |
| } |
| |
| void allocate() |
| { |
| bool changed = false; |
| |
| auto coalesceMove = [&] (unsigned& move) { |
| bool coalesced = coalesce(move); |
| if (coalesced) { |
| changed = true; |
| // We won't need to handle this move in the future since it's already coalesced. |
| move = UINT_MAX; |
| } |
| }; |
| |
| // We first coalesce until we can't coalesce any more. |
| do { |
| changed = false; |
| m_worklistMoves.forEachMove(coalesceMove); |
| } while (changed); |
| do { |
| changed = false; |
| m_worklistMoves.forEachLowPriorityMove(coalesceMove); |
| } while (changed); |
| |
| // Then we create our select stack. The invariant we start with here is that nodes in |
| // the interference graph with degree >= k are on the spill list. Nodes with degree < k |
| // are on the simplify worklist. A node can move from the spill list to the simplify |
| // list (but not the other way around, note that this is different than IRC because IRC |
| // runs this while coalescing, but we do all our coalescing before this). Once a node is |
| // added to the select stack, it's not on either list, but only on the select stack. |
| // Once on the select stack, logically, it's no longer in the interference graph. |
| auto assertInvariants = [&] () { |
| if (ASSERT_DISABLED) |
| return; |
| if (!shouldValidateIRAtEachPhase()) |
| return; |
| |
| IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; |
| unsigned registerCount = this->registerCount(); |
| for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) { |
| if (getAlias(i) != i) |
| continue; |
| if (m_isOnSelectStack.contains(i)) { |
| ASSERT(!m_simplifyWorklist.contains(i) && !m_spillWorklist.contains(i)); |
| continue; |
| } |
| unsigned degree = m_degrees[i]; |
| if (degree >= registerCount) { |
| ASSERT(m_unspillableTmps.contains(i) || m_spillWorklist.contains(i)); |
| ASSERT(!m_simplifyWorklist.contains(i)); |
| continue; |
| } |
| ASSERT(m_simplifyWorklist.contains(i)); |
| } |
| }; |
| |
| makeInitialWorklist(); |
| assertInvariants(); |
| do { |
| changed = false; |
| |
| while (m_simplifyWorklist.size()) { |
| simplify(); |
| assertInvariants(); |
| } |
| |
| if (!m_spillWorklist.isEmpty()) { |
| selectSpill(); |
| changed = true; |
| ASSERT(m_simplifyWorklist.size() == 1); |
| } |
| } while (changed); |
| |
| if (!ASSERT_DISABLED) { |
| ASSERT(!m_simplifyWorklist.size()); |
| ASSERT(m_spillWorklist.isEmpty()); |
| IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; |
| for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) |
| ASSERT(hasBeenSimplified(i)); |
| } |
| |
| assignColors(); |
| } |
| |
| protected: |
| |
| bool coalesce(unsigned& moveIndex) |
| { |
| const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; |
| IndexType u = getAlias(moveOperands.srcIndex); |
| IndexType v = getAlias(moveOperands.dstIndex); |
| |
| if (isPrecolored(v)) |
| std::swap(u, v); |
| |
| if (traceDebug) |
| dataLog("Coalescing move at index ", moveIndex, " u = ", TmpMapper::tmpFromAbsoluteIndex(u), " v = ", TmpMapper::tmpFromAbsoluteIndex(v), " "); |
| |
| if (u == v) { |
| if (traceDebug) |
| dataLog("Already Coalesced. They're equal.\n"); |
| return false; |
| } |
| |
| if (isPrecolored(v) |
| || hasInterferenceEdge(InterferenceEdge(u, v))) { |
| |
| // No need to ever consider this move again if it interferes. |
| // No coalescing will remove the interference. |
| moveIndex = UINT_MAX; |
| |
| if (!ASSERT_DISABLED) { |
| if (isPrecolored(v)) |
| ASSERT(isPrecolored(u)); |
| } |
| |
| if (traceDebug) |
| dataLog("Constrained\n"); |
| |
| return false; |
| } |
| |
| if (canBeSafelyCoalesced(u, v)) { |
| combine(u, v); |
| m_hasCoalescedNonTrivialMove = true; |
| |
| if (traceDebug) |
| dataLog(" Safe Coalescing\n"); |
| return true; |
| } |
| |
| addBias(u, v); |
| |
| if (traceDebug) |
| dataLog(" Failed coalescing.\n"); |
| |
| return false; |
| } |
| |
| void combine(IndexType u, IndexType v) |
| { |
| ASSERT(!m_coalescedTmps[v]); |
| m_coalescedTmps[v] = u; |
| |
| auto& vMoves = m_moveList[v]; |
| m_moveList[u].add(vMoves.begin(), vMoves.end()); |
| |
| forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) { |
| if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) { |
| // If we added a new edge between the adjacentTmp and u, it replaces the edge |
| // that existed with v. |
| // The degree of adjacentTmp remains the same since the edge just changed from u to v. |
| // All we need to do is update the degree of u. |
| if (!isPrecolored(u)) |
| m_degrees[u]++; |
| } else { |
| // If we already had an edge between the adjacentTmp and u, the degree of u |
| // is already correct. The degree of the adjacentTmp decreases since the edge |
| // with v is no longer relevant (we can think of it as merged with the edge with u). |
| decrementDegree(adjacentTmpIndex); |
| } |
| }); |
| } |
| |
| |
| void makeInitialWorklist() |
| { |
| m_simplifyWorklist.clear(); |
| m_spillWorklist.clearAll(); |
| |
| if (traceDebug) |
| dataLogLn("------------------\nMaking initial worklist"); |
| |
| IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; |
| unsigned registerCount = this->registerCount(); |
| for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) { |
| ASSERT(!isPrecolored(i)); |
| if (hasBeenSimplified(i)) |
| continue; |
| unsigned degree = m_degrees[i]; |
| if (degree < registerCount) { |
| if (traceDebug) |
| dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to simplify worklist"); |
| m_simplifyWorklist.append(i); |
| } else { |
| if (traceDebug) |
| dataLogLn("Adding ", TmpMapper::tmpFromAbsoluteIndex(i), " to spill worklist"); |
| addToSpill(i); |
| } |
| } |
| } |
| |
| // Low-degree vertex can always be colored: just pick any of the color taken by any |
| // other adjacent verices. |
| // The "Simplify" phase takes a low-degree out of the interference graph to simplify it. |
| void simplify() |
| { |
| IndexType lastIndex = m_simplifyWorklist.takeLast(); |
| |
| ASSERT(!m_selectStack.contains(lastIndex)); |
| ASSERT(!m_isOnSelectStack.get(lastIndex)); |
| ASSERT(!m_spillWorklist.contains(lastIndex)); |
| |
| m_selectStack.append(lastIndex); |
| m_isOnSelectStack.quickSet(lastIndex); |
| |
| if (traceDebug) |
| dataLogLn("Simplifying ", lastIndex, " by adding it to select stack"); |
| |
| forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) { |
| decrementDegreeInSimplification(adjacentTmpIndex); |
| }); |
| } |
| |
| void selectSpill() |
| { |
| IndexType victimIndex = Base::selectSpill(); |
| m_spillWorklist.quickClear(victimIndex); |
| m_simplifyWorklist.append(victimIndex); |
| } |
| |
| struct MoveSet { |
| unsigned addMove() |
| { |
| ASSERT(m_lowPriorityMoveList.isEmpty()); |
| |
| unsigned nextIndex = m_positionInMoveList++; |
| m_moveList.append(nextIndex); |
| return nextIndex; |
| } |
| |
| void startAddingLowPriorityMoves() |
| { |
| ASSERT(m_lowPriorityMoveList.isEmpty()); |
| } |
| |
| unsigned addLowPriorityMove() |
| { |
| unsigned nextIndex = m_positionInMoveList++; |
| m_lowPriorityMoveList.append(nextIndex); |
| |
| return nextIndex; |
| } |
| |
| // We use references to moves here because the function |
| // may do coalescing, indicating it doesn't need to see |
| // the move again. |
| template <typename Function> |
| void forEachMove(Function function) |
| { |
| for (unsigned& move : m_moveList) { |
| if (move != UINT_MAX) |
| function(move); |
| } |
| } |
| |
| template <typename Function> |
| void forEachLowPriorityMove(Function function) |
| { |
| for (unsigned& move : m_lowPriorityMoveList) { |
| if (move != UINT_MAX) |
| function(move); |
| } |
| } |
| |
| void clear() |
| { |
| m_positionInMoveList = 0; |
| m_moveList.clear(); |
| m_lowPriorityMoveList.clear(); |
| } |
| |
| private: |
| unsigned m_positionInMoveList; |
| Vector<unsigned, 0, UnsafeVectorOverflow> m_moveList; |
| Vector<unsigned, 0, UnsafeVectorOverflow> m_lowPriorityMoveList; |
| }; |
| |
| void decrementDegree(IndexType tmpIndex) |
| { |
| ASSERT(m_degrees[tmpIndex]); |
| --m_degrees[tmpIndex]; |
| } |
| |
| void decrementDegreeInSimplification(IndexType tmpIndex) |
| { |
| ASSERT(m_degrees[tmpIndex]); |
| unsigned oldDegree = m_degrees[tmpIndex]--; |
| |
| if (oldDegree == registerCount()) { |
| ASSERT(m_degrees[tmpIndex] < registerCount()); |
| if (traceDebug) |
| dataLogLn("Moving tmp ", tmpIndex, " from spill list to simplify list because it's degree is now less than k"); |
| |
| if (!ASSERT_DISABLED) |
| ASSERT(m_unspillableTmps.contains(tmpIndex) || m_spillWorklist.contains(tmpIndex)); |
| m_spillWorklist.quickClear(tmpIndex); |
| |
| ASSERT(!m_simplifyWorklist.contains(tmpIndex)); |
| m_simplifyWorklist.append(tmpIndex); |
| } |
| } |
| |
| void assignColors() |
| { |
| m_worklistMoves.clear(); |
| Base::assignColors(); |
| } |
| |
| // Set of "move" enabled for possible coalescing. |
| MoveSet m_worklistMoves; |
| }; |
| |
| template <typename IndexType, typename TmpMapper> |
| class IRC : public AbstractColoringAllocator<IndexType, TmpMapper> { |
| using Base = AbstractColoringAllocator<IndexType, TmpMapper>; |
| protected: |
| using Base::m_isOnSelectStack; |
| using Base::m_selectStack; |
| using Base::m_simplifyWorklist; |
| using Base::m_spillWorklist; |
| using Base::m_hasSelectedSpill; |
| using Base::m_hasCoalescedNonTrivialMove; |
| using Base::m_degrees; |
| using Base::registerCount; |
| using Base::m_coalescedTmps; |
| using Base::m_moveList; |
| using Base::m_useCounts; |
| using Base::m_coalescedTmpsAtSpill; |
| using Base::m_spilledTmps; |
| using MoveOperands = typename Base::MoveOperands; |
| using Base::m_coalescingCandidates; |
| using Base::m_lastPrecoloredRegisterIndex; |
| using Base::m_coloredTmp; |
| using Base::m_code; |
| using InterferenceEdge = typename Base::InterferenceEdge; |
| using Base::m_unspillableTmps; |
| using Base::hasInterferenceEdge; |
| using Base::getAlias; |
| using Base::addEdge; |
| using Base::isPrecolored; |
| using Base::canBeSafelyCoalesced; |
| using Base::addEdgeDistinctWithoutDegreeChange; |
| using Base::forEachAdjacent; |
| using Base::hasBeenSimplified; |
| using Base::addToSpill; |
| using Base::m_interferenceEdges; |
| using Base::m_adjacencyList; |
| using Base::dumpInterferenceGraphInDot; |
| using Base::addBias; |
| using Base::m_pinnedRegs; |
| using Base::m_regsInPriorityOrder; |
| |
| public: |
| IRC(Code& code, const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmps, const UseCounts<Tmp>& useCounts) |
| : Base(code, regsInPriorityOrder, lastPrecoloredRegisterIndex, tmpArraySize, unspillableTmps, useCounts) |
| { |
| } |
| |
| void allocate() |
| { |
| m_activeMoves.ensureSize(m_worklistMoves.totalNumberOfMoves()); |
| ASSERT_WITH_MESSAGE(m_activeMoves.size() >= m_coalescingCandidates.size(), "The activeMove set should be big enough for the quick operations of BitVector."); |
| |
| makeWorkList(); |
| |
| if (debug) { |
| dumpInterferenceGraphInDot(WTF::dataFile()); |
| dataLog("Coalescing candidates:\n"); |
| for (MoveOperands& moveOp : m_coalescingCandidates) { |
| dataLog(" ", TmpMapper::tmpFromAbsoluteIndex(moveOp.srcIndex), |
| " -> ", TmpMapper::tmpFromAbsoluteIndex(moveOp.dstIndex), "\n"); |
| } |
| dataLog("Initial work list\n"); |
| dumpWorkLists(WTF::dataFile()); |
| } |
| |
| do { |
| if (traceDebug) { |
| dataLog("Before Graph simplification iteration\n"); |
| dumpWorkLists(WTF::dataFile()); |
| } |
| |
| if (!m_simplifyWorklist.isEmpty()) |
| simplify(); |
| else if (!m_worklistMoves.isEmpty()) |
| coalesce(); |
| else if (!m_freezeWorklist.isEmpty()) |
| freeze(); |
| else if (!m_spillWorklist.isEmpty()) |
| selectSpill(); |
| |
| if (traceDebug) { |
| dataLog("After Graph simplification iteration\n"); |
| dumpWorkLists(WTF::dataFile()); |
| } |
| } while (!m_simplifyWorklist.isEmpty() || !m_worklistMoves.isEmpty() || !m_freezeWorklist.isEmpty() || !m_spillWorklist.isEmpty()); |
| |
| assignColors(); |
| } |
| |
| protected: |
| |
| void makeWorkList() |
| { |
| IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; |
| for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) { |
| unsigned degree = m_degrees[i]; |
| if (degree >= registerCount()) |
| addToSpill(i); |
| else if (!m_moveList[i].isEmpty()) |
| m_freezeWorklist.add(i); |
| else |
| m_simplifyWorklist.append(i); |
| } |
| } |
| |
| // Low-degree vertex can always be colored: just pick any of the color taken by any |
| // other adjacent verices. |
| // The "Simplify" phase takes a low-degree out of the interference graph to simplify it. |
| void simplify() |
| { |
| IndexType lastIndex = m_simplifyWorklist.takeLast(); |
| |
| ASSERT(!m_selectStack.contains(lastIndex)); |
| ASSERT(!m_isOnSelectStack.get(lastIndex)); |
| m_selectStack.append(lastIndex); |
| m_isOnSelectStack.quickSet(lastIndex); |
| |
| forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) { |
| decrementDegree(adjacentTmpIndex); |
| }); |
| } |
| |
| void coalesce() |
| { |
| unsigned moveIndex = m_worklistMoves.takeLastMove(); |
| const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; |
| IndexType u = getAlias(moveOperands.srcIndex); |
| IndexType v = getAlias(moveOperands.dstIndex); |
| |
| if (isPrecolored(v)) |
| std::swap(u, v); |
| |
| if (traceDebug) |
| dataLog("Coalescing move at index ", moveIndex, " u = ", u, " v = ", v, "\n"); |
| |
| if (u == v) { |
| addWorkList(u); |
| |
| if (traceDebug) |
| dataLog(" Coalesced\n"); |
| } else if (isPrecolored(v) |
| || hasInterferenceEdge(InterferenceEdge(u, v))) { |
| addWorkList(u); |
| addWorkList(v); |
| |
| if (traceDebug) |
| dataLog(" Constrained\n"); |
| } else if (canBeSafelyCoalesced(u, v)) { |
| combine(u, v); |
| addWorkList(u); |
| m_hasCoalescedNonTrivialMove = true; |
| |
| if (traceDebug) |
| dataLog(" Safe Coalescing\n"); |
| } else { |
| m_activeMoves.quickSet(moveIndex); |
| if (traceDebug) |
| dataLog(" Failed coalescing, added to active moves.\n"); |
| |
| addBias(u, v); |
| } |
| } |
| |
| void addWorkList(IndexType tmpIndex) |
| { |
| if (!isPrecolored(tmpIndex) && m_degrees[tmpIndex] < registerCount() && !isMoveRelated(tmpIndex)) { |
| m_freezeWorklist.remove(tmpIndex); |
| m_simplifyWorklist.append(tmpIndex); |
| } |
| } |
| |
| void combine(IndexType u, IndexType v) |
| { |
| if (!m_freezeWorklist.remove(v)) |
| m_spillWorklist.quickClear(v); |
| |
| ASSERT(!m_coalescedTmps[v]); |
| m_coalescedTmps[v] = u; |
| |
| auto& vMoves = m_moveList[v]; |
| m_moveList[u].add(vMoves.begin(), vMoves.end()); |
| |
| forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) { |
| if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) { |
| // If we added a new edge between the adjacentTmp and u, it replaces the edge |
| // that existed with v. |
| // The degree of adjacentTmp remains the same since the edge just changed from u to v. |
| // All we need to do is update the degree of u. |
| if (!isPrecolored(u)) |
| m_degrees[u]++; |
| } else { |
| // If we already had an edge between the adjacentTmp and u, the degree of u |
| // is already correct. The degree of the adjacentTmp decreases since the edge |
| // with v is no longer relevant (we can think of it as merged with the edge with u). |
| decrementDegree(adjacentTmpIndex); |
| } |
| }); |
| |
| if (m_degrees[u] >= registerCount() && m_freezeWorklist.remove(u)) |
| addToSpill(u); |
| } |
| |
| void freeze() |
| { |
| IndexType victimIndex = m_freezeWorklist.takeAny(); |
| ASSERT_WITH_MESSAGE(getAlias(victimIndex) == victimIndex, "coalesce() should not leave aliased Tmp in the worklist."); |
| m_simplifyWorklist.append(victimIndex); |
| freezeMoves(victimIndex); |
| } |
| |
| void freezeMoves(IndexType tmpIndex) |
| { |
| forEachNodeMoves(tmpIndex, [this, tmpIndex] (IndexType moveIndex) { |
| if (!m_activeMoves.quickClear(moveIndex)) |
| m_worklistMoves.takeMove(moveIndex); |
| |
| const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; |
| IndexType srcTmpIndex = moveOperands.srcIndex; |
| IndexType dstTmpIndex = moveOperands.dstIndex; |
| |
| IndexType originalOtherTmp = srcTmpIndex != tmpIndex ? srcTmpIndex : dstTmpIndex; |
| IndexType otherTmpIndex = getAlias(originalOtherTmp); |
| if (m_degrees[otherTmpIndex] < registerCount() && !isMoveRelated(otherTmpIndex)) { |
| if (m_freezeWorklist.remove(otherTmpIndex)) |
| m_simplifyWorklist.append(otherTmpIndex); |
| } |
| }); |
| } |
| |
| void decrementDegree(IndexType tmpIndex) |
| { |
| ASSERT(m_degrees[tmpIndex]); |
| |
| unsigned oldDegree = m_degrees[tmpIndex]--; |
| if (oldDegree == registerCount()) { |
| enableMovesOnValueAndAdjacents(tmpIndex); |
| m_spillWorklist.quickClear(tmpIndex); |
| if (isMoveRelated(tmpIndex)) |
| m_freezeWorklist.add(tmpIndex); |
| else |
| m_simplifyWorklist.append(tmpIndex); |
| } |
| } |
| |
| void selectSpill() |
| { |
| IndexType victimIndex = Base::selectSpill(); |
| m_spillWorklist.quickClear(victimIndex); |
| m_simplifyWorklist.append(victimIndex); |
| freezeMoves(victimIndex); |
| } |
| |
| void assignColors() |
| { |
| ASSERT(m_freezeWorklist.isEmpty()); |
| m_worklistMoves.clear(); |
| Base::assignColors(); |
| } |
| |
| |
| bool isMoveRelated(IndexType tmpIndex) |
| { |
| for (unsigned moveIndex : m_moveList[tmpIndex]) { |
| if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) |
| return true; |
| } |
| return false; |
| } |
| |
| template<typename Function> |
| void forEachNodeMoves(IndexType tmpIndex, Function function) |
| { |
| for (unsigned moveIndex : m_moveList[tmpIndex]) { |
| if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) |
| function(moveIndex); |
| } |
| } |
| |
| void enableMovesOnValue(IndexType tmpIndex) |
| { |
| for (unsigned moveIndex : m_moveList[tmpIndex]) { |
| if (m_activeMoves.quickClear(moveIndex)) |
| m_worklistMoves.returnMove(moveIndex); |
| } |
| } |
| |
| void enableMovesOnValueAndAdjacents(IndexType tmpIndex) |
| { |
| enableMovesOnValue(tmpIndex); |
| |
| forEachAdjacent(tmpIndex, [this] (IndexType adjacentTmpIndex) { |
| enableMovesOnValue(adjacentTmpIndex); |
| }); |
| } |
| |
| struct OrderedMoveSet { |
| unsigned addMove() |
| { |
| ASSERT(m_lowPriorityMoveList.isEmpty()); |
| ASSERT(!m_firstLowPriorityMoveIndex); |
| |
| unsigned nextIndex = m_positionInMoveList.size(); |
| unsigned position = m_moveList.size(); |
| m_moveList.append(nextIndex); |
| m_positionInMoveList.append(position); |
| return nextIndex; |
| } |
| |
| void startAddingLowPriorityMoves() |
| { |
| ASSERT(m_lowPriorityMoveList.isEmpty()); |
| m_firstLowPriorityMoveIndex = m_moveList.size(); |
| } |
| |
| unsigned addLowPriorityMove() |
| { |
| ASSERT(m_firstLowPriorityMoveIndex == m_moveList.size()); |
| |
| unsigned nextIndex = m_positionInMoveList.size(); |
| unsigned position = m_lowPriorityMoveList.size(); |
| m_lowPriorityMoveList.append(nextIndex); |
| m_positionInMoveList.append(position); |
| |
| ASSERT(nextIndex >= m_firstLowPriorityMoveIndex); |
| |
| return nextIndex; |
| } |
| |
| bool isEmpty() const |
| { |
| return m_moveList.isEmpty() && m_lowPriorityMoveList.isEmpty(); |
| } |
| |
| bool contains(unsigned index) |
| { |
| return m_positionInMoveList[index] != std::numeric_limits<unsigned>::max(); |
| } |
| |
| void takeMove(unsigned moveIndex) |
| { |
| unsigned positionInMoveList = m_positionInMoveList[moveIndex]; |
| if (positionInMoveList == std::numeric_limits<unsigned>::max()) |
| return; |
| |
| if (moveIndex < m_firstLowPriorityMoveIndex) { |
| ASSERT(m_moveList[positionInMoveList] == moveIndex); |
| unsigned lastIndex = m_moveList.last(); |
| m_positionInMoveList[lastIndex] = positionInMoveList; |
| m_moveList[positionInMoveList] = lastIndex; |
| m_moveList.removeLast(); |
| } else { |
| ASSERT(m_lowPriorityMoveList[positionInMoveList] == moveIndex); |
| unsigned lastIndex = m_lowPriorityMoveList.last(); |
| m_positionInMoveList[lastIndex] = positionInMoveList; |
| m_lowPriorityMoveList[positionInMoveList] = lastIndex; |
| m_lowPriorityMoveList.removeLast(); |
| } |
| |
| m_positionInMoveList[moveIndex] = std::numeric_limits<unsigned>::max(); |
| |
| ASSERT(!contains(moveIndex)); |
| } |
| |
| unsigned takeLastMove() |
| { |
| ASSERT(!isEmpty()); |
| |
| unsigned lastIndex; |
| if (!m_moveList.isEmpty()) { |
| lastIndex = m_moveList.takeLast(); |
| ASSERT(m_positionInMoveList[lastIndex] == m_moveList.size()); |
| } else { |
| lastIndex = m_lowPriorityMoveList.takeLast(); |
| ASSERT(m_positionInMoveList[lastIndex] == m_lowPriorityMoveList.size()); |
| } |
| m_positionInMoveList[lastIndex] = std::numeric_limits<unsigned>::max(); |
| |
| ASSERT(!contains(lastIndex)); |
| return lastIndex; |
| } |
| |
| void returnMove(unsigned index) |
| { |
| // This assertion is a bit strict but that is how the move list should be used. The only kind of moves that can |
| // return to the list are the ones that we previously failed to coalesce with the conservative heuristics. |
| // Values should not be added back if they were never taken out when attempting coalescing. |
| ASSERT(!contains(index)); |
| |
| if (index < m_firstLowPriorityMoveIndex) { |
| unsigned position = m_moveList.size(); |
| m_moveList.append(index); |
| m_positionInMoveList[index] = position; |
| } else { |
| unsigned position = m_lowPriorityMoveList.size(); |
| m_lowPriorityMoveList.append(index); |
| m_positionInMoveList[index] = position; |
| } |
| |
| ASSERT(contains(index)); |
| } |
| |
| void clear() |
| { |
| m_positionInMoveList.clear(); |
| m_moveList.clear(); |
| m_lowPriorityMoveList.clear(); |
| } |
| |
| unsigned totalNumberOfMoves() |
| { |
| return m_moveList.size() + m_lowPriorityMoveList.size(); |
| } |
| |
| private: |
| Vector<unsigned, 0, UnsafeVectorOverflow> m_positionInMoveList; |
| Vector<unsigned, 0, UnsafeVectorOverflow> m_moveList; |
| Vector<unsigned, 0, UnsafeVectorOverflow> m_lowPriorityMoveList; |
| unsigned m_firstLowPriorityMoveIndex { 0 }; |
| }; |
| |
| void dumpWorkLists(PrintStream& out) |
| { |
| out.print("Simplify work list:\n"); |
| for (unsigned tmpIndex : m_simplifyWorklist) |
| out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); |
| out.printf("Moves work list is empty? %d\n", m_worklistMoves.isEmpty()); |
| out.print("Freeze work list:\n"); |
| for (unsigned tmpIndex : m_freezeWorklist) |
| out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); |
| out.print("Spill work list:\n"); |
| for (unsigned tmpIndex : m_spillWorklist) |
| out.print(" ", TmpMapper::tmpFromAbsoluteIndex(tmpIndex), "\n"); |
| } |
| |
| // Work lists. |
| // Low-degree, Move related. |
| HashSet<IndexType> m_freezeWorklist; |
| // Set of "move" enabled for possible coalescing. |
| OrderedMoveSet m_worklistMoves; |
| // Set of "move" not yet ready for coalescing. |
| BitVector m_activeMoves; |
| }; |
| |
| // This perform all the tasks that are specific to certain register type. |
| template<Bank bank, template<typename, typename> class AllocatorType> |
| class ColoringAllocator : public AllocatorType<unsigned, AbsoluteTmpMapper<bank>> { |
| using TmpMapper = AbsoluteTmpMapper<bank>; |
| using Base = AllocatorType<unsigned, TmpMapper>; |
| using Base::m_isOnSelectStack; |
| using Base::m_selectStack; |
| using Base::m_simplifyWorklist; |
| using Base::m_spillWorklist; |
| using Base::m_hasSelectedSpill; |
| using Base::m_hasCoalescedNonTrivialMove; |
| using Base::m_degrees; |
| using Base::registerCount; |
| using Base::m_coalescedTmps; |
| using Base::m_moveList; |
| using Base::m_useCounts; |
| using Base::m_coalescedTmpsAtSpill; |
| using Base::m_spilledTmps; |
| using MoveOperands = typename Base::MoveOperands; |
| using Base::m_coalescingCandidates; |
| using Base::m_lastPrecoloredRegisterIndex; |
| using Base::m_coloredTmp; |
| using Base::m_code; |
| using InterferenceEdge = typename Base::InterferenceEdge; |
| using Base::m_worklistMoves; |
| using Base::hasInterferenceEdge; |
| using Base::getAlias; |
| using Base::addEdge; |
| using Base::m_interferenceEdges; |
| using Base::m_pinnedRegs; |
| using Base::m_regsInPriorityOrder; |
| |
| public: |
| |
| ColoringAllocator(Code& code, TmpWidth& tmpWidth, const UseCounts<Tmp>& useCounts, const HashSet<unsigned>& unspillableTmp) |
| : Base(code, code.regsInPriorityOrder(bank), TmpMapper::lastMachineRegisterIndex(), tmpArraySize(code), unspillableTmp, useCounts) |
| , m_tmpWidth(tmpWidth) |
| { |
| for (Reg reg : code.pinnedRegisters()) { |
| if ((bank == GP && reg.isGPR()) || (bank == FP && reg.isFPR())) { |
| m_pinnedRegs.append(Tmp(reg)); |
| ASSERT(!m_regsInPriorityOrder.contains(reg)); |
| m_regsInPriorityOrder.append(reg); |
| } |
| } |
| |
| initializePrecoloredTmp(); |
| build(); |
| } |
| |
| Tmp getAlias(Tmp tmp) const |
| { |
| return TmpMapper::tmpFromAbsoluteIndex(getAlias(TmpMapper::absoluteIndex(tmp))); |
| } |
| |
| // This tells you if a Move will be coalescable if the src and dst end up matching. This method |
| // relies on an analysis that is invalidated by register allocation, so you it's only meaningful to |
| // call this *before* replacing the Tmp's in this Inst with registers or spill slots. |
| bool mayBeCoalescable(const Inst& inst) const |
| { |
| return mayBeCoalescableImpl(inst, &m_tmpWidth); |
| } |
| |
| bool isUselessMove(const Inst& inst) const |
| { |
| return mayBeCoalescableImpl(inst, nullptr) && inst.args[0].tmp() == inst.args[1].tmp(); |
| } |
| |
| Tmp getAliasWhenSpilling(Tmp tmp) const |
| { |
| ASSERT_WITH_MESSAGE(!m_spilledTmps.isEmpty(), "This function is only valid for coalescing during spilling."); |
| |
| if (m_coalescedTmpsAtSpill.isEmpty()) |
| return tmp; |
| |
| unsigned aliasIndex = TmpMapper::absoluteIndex(tmp); |
| while (unsigned nextAliasIndex = m_coalescedTmpsAtSpill[aliasIndex]) |
| aliasIndex = nextAliasIndex; |
| |
| Tmp alias = TmpMapper::tmpFromAbsoluteIndex(aliasIndex); |
| |
| ASSERT_WITH_MESSAGE(!m_spilledTmps.contains(aliasIndex) || alias == tmp, "The aliases at spill should always be colorable. Something went horribly wrong."); |
| |
| return alias; |
| } |
| |
| template<typename IndexIterator> |
| class IndexToTmpIteratorAdaptor { |
| public: |
| IndexToTmpIteratorAdaptor(IndexIterator&& indexIterator) |
| : m_indexIterator(WTFMove(indexIterator)) |
| { |
| } |
| |
| Tmp operator*() const { return TmpMapper::tmpFromAbsoluteIndex(*m_indexIterator); } |
| IndexToTmpIteratorAdaptor& operator++() { ++m_indexIterator; return *this; } |
| |
| bool operator==(const IndexToTmpIteratorAdaptor& other) const |
| { |
| return m_indexIterator == other.m_indexIterator; |
| } |
| |
| bool operator!=(const IndexToTmpIteratorAdaptor& other) const |
| { |
| return !(*this == other); |
| } |
| |
| private: |
| IndexIterator m_indexIterator; |
| }; |
| |
| template<typename Collection> |
| class IndexToTmpIterableAdaptor { |
| public: |
| IndexToTmpIterableAdaptor(const Collection& collection) |
| : m_collection(collection) |
| { |
| } |
| |
| IndexToTmpIteratorAdaptor<typename Collection::const_iterator> begin() const |
| { |
| return m_collection.begin(); |
| } |
| |
| IndexToTmpIteratorAdaptor<typename Collection::const_iterator> end() const |
| { |
| return m_collection.end(); |
| } |
| |
| private: |
| const Collection& m_collection; |
| }; |
| |
| IndexToTmpIterableAdaptor<Vector<unsigned>> spilledTmps() const { return m_spilledTmps; } |
| |
| bool requiresSpilling() const { return !m_spilledTmps.isEmpty(); } |
| |
| Reg allocatedReg(Tmp tmp) const |
| { |
| ASSERT(!tmp.isReg()); |
| ASSERT(m_coloredTmp.size()); |
| ASSERT(tmp.isGP() == (bank == GP)); |
| |
| Reg reg = m_coloredTmp[TmpMapper::absoluteIndex(tmp)]; |
| if (!reg) { |
| dataLog("FATAL: No color for ", tmp, "\n"); |
| dataLog("Code:\n"); |
| dataLog(m_code); |
| RELEASE_ASSERT_NOT_REACHED(); |
| } |
| return reg; |
| } |
| |
| protected: |
| static unsigned tmpArraySize(Code& code) |
| { |
| unsigned numTmps = code.numTmps(bank); |
| return TmpMapper::absoluteIndex(numTmps); |
| } |
| |
| void initializePrecoloredTmp() |
| { |
| m_coloredTmp.resize(m_lastPrecoloredRegisterIndex + 1); |
| for (unsigned i = 1; i <= m_lastPrecoloredRegisterIndex; ++i) { |
| Tmp tmp = TmpMapper::tmpFromAbsoluteIndex(i); |
| ASSERT(tmp.isReg()); |
| m_coloredTmp[i] = tmp.reg(); |
| } |
| } |
| |
| bool mayBeCoalesced(Arg left, Arg right) |
| { |
| if (!left.isTmp() || !right.isTmp()) |
| return false; |
| |
| Tmp leftTmp = left.tmp(); |
| Tmp rightTmp = right.tmp(); |
| |
| if (leftTmp == rightTmp) |
| return false; |
| |
| if (leftTmp.isGP() != (bank == GP) || rightTmp.isGP() != (bank == GP)) |
| return false; |
| |
| unsigned leftIndex = TmpMapper::absoluteIndex(leftTmp); |
| unsigned rightIndex = TmpMapper::absoluteIndex(rightTmp); |
| |
| return !hasInterferenceEdge(InterferenceEdge(leftIndex, rightIndex)); |
| } |
| |
| void addToLowPriorityCoalescingCandidates(Arg left, Arg right) |
| { |
| ASSERT(mayBeCoalesced(left, right)); |
| Tmp leftTmp = left.tmp(); |
| Tmp rightTmp = right.tmp(); |
| |
| unsigned leftIndex = TmpMapper::absoluteIndex(leftTmp); |
| unsigned rightIndex = TmpMapper::absoluteIndex(rightTmp); |
| |
| unsigned nextMoveIndex = m_coalescingCandidates.size(); |
| m_coalescingCandidates.append({ leftIndex, rightIndex }); |
| |
| unsigned newIndexInWorklist = m_worklistMoves.addLowPriorityMove(); |
| ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); |
| |
| m_moveList[leftIndex].add(nextMoveIndex); |
| m_moveList[rightIndex].add(nextMoveIndex); |
| } |
| |
| void build() |
| { |
| m_coalescingCandidates.clear(); |
| m_worklistMoves.clear(); |
| |
| // FIXME: It seems like we don't need to recompute liveness. We just need to update its data |
| // structures so that it knows that the newly introduced temporaries are not live at any basic |
| // block boundary. This should trivially be true for now. |
| TmpLiveness<bank> liveness(m_code); |
| for (BasicBlock* block : m_code) { |
| typename TmpLiveness<bank>::LocalCalc localCalc(liveness, block); |
| for (unsigned instIndex = block->size(); instIndex--;) { |
| Inst& inst = block->at(instIndex); |
| Inst* nextInst = block->get(instIndex + 1); |
| build(&inst, nextInst, localCalc); |
| localCalc.execute(instIndex); |
| } |
| build(nullptr, &block->at(0), localCalc); |
| } |
| buildLowPriorityMoveList(); |
| } |
| |
| void build(Inst* prevInst, Inst* nextInst, const typename TmpLiveness<bank>::LocalCalc& localCalc) |
| { |
| if (traceDebug) |
| dataLog("Building between ", pointerDump(prevInst), " and ", pointerDump(nextInst), ":\n"); |
| |
| Inst::forEachDefWithExtraClobberedRegs<Tmp>( |
| prevInst, nextInst, |
| [&] (const Tmp& arg, Arg::Role, Bank argBank, Width) { |
| if (argBank != bank) |
| return; |
| |
| // All the Def()s interfere with each other and with all the extra clobbered Tmps. |
| // We should not use forEachDefWithExtraClobberedRegs() here since colored Tmps |
| // do not need interference edges in our implementation. |
| Inst::forEachDef<Tmp>( |
| prevInst, nextInst, |
| [&] (Tmp& otherArg, Arg::Role, Bank argBank, Width) { |
| if (argBank != bank) |
| return; |
| |
| if (traceDebug) |
| dataLog(" Adding def-def edge: ", arg, ", ", otherArg, "\n"); |
| this->addEdge(arg, otherArg); |
| }); |
| }); |
| |
| if (prevInst && mayBeCoalescable(*prevInst)) { |
| // We do not want the Use() of this move to interfere with the Def(), even if it is live |
| // after the Move. If we were to add the interference edge, it would be impossible to |
| // coalesce the Move even if the two Tmp never interfere anywhere. |
| Tmp defTmp; |
| Tmp useTmp; |
| prevInst->forEachTmp([&defTmp, &useTmp] (Tmp& argTmp, Arg::Role role, Bank, Width) { |
| if (Arg::isLateDef(role)) |
| defTmp = argTmp; |
| else { |
| ASSERT(Arg::isEarlyUse(role)); |
| useTmp = argTmp; |
| } |
| }); |
| ASSERT(defTmp); |
| ASSERT(useTmp); |
| |
| unsigned nextMoveIndex = m_coalescingCandidates.size(); |
| m_coalescingCandidates.append({ TmpMapper::absoluteIndex(useTmp), TmpMapper::absoluteIndex(defTmp) }); |
| if (traceDebug) |
| dataLogLn("Move at index ", nextMoveIndex, " is: ", *prevInst); |
| |
| unsigned newIndexInWorklist = m_worklistMoves.addMove(); |
| ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); |
| |
| for (const Arg& arg : prevInst->args) { |
| auto& list = m_moveList[TmpMapper::absoluteIndex(arg.tmp())]; |
| list.add(nextMoveIndex); |
| } |
| |
| auto considerEdge = [&] (const Tmp& liveTmp) { |
| if (liveTmp != useTmp) { |
| if (traceDebug) |
| dataLog(" Adding def-live for coalescable: ", defTmp, ", ", liveTmp, "\n"); |
| addEdge(defTmp, liveTmp); |
| } |
| }; |
| |
| for (Tmp liveTmp : localCalc.live()) |
| considerEdge(liveTmp); |
| for (const Tmp& pinnedRegTmp : m_pinnedRegs) |
| considerEdge(pinnedRegTmp); |
| |
| // The next instruction could have early clobbers or early def's. We need to consider |
| // those now. |
| addEdges(nullptr, nextInst, localCalc.live()); |
| } else |
| addEdges(prevInst, nextInst, localCalc.live()); |
| } |
| |
| void buildLowPriorityMoveList() |
| { |
| if (!isX86()) |
| return; |
| |
| m_worklistMoves.startAddingLowPriorityMoves(); |
| for (BasicBlock* block : m_code) { |
| for (Inst& inst : *block) { |
| if (Optional<unsigned> defArgIndex = inst.shouldTryAliasingDef()) { |
| Arg op1 = inst.args[*defArgIndex - 2]; |
| Arg op2 = inst.args[*defArgIndex - 1]; |
| Arg dest = inst.args[*defArgIndex]; |
| |
| if (op1 == dest || op2 == dest) |
| continue; |
| |
| if (mayBeCoalesced(op1, dest)) |
| addToLowPriorityCoalescingCandidates(op1, dest); |
| if (op1 != op2 && mayBeCoalesced(op2, dest)) |
| addToLowPriorityCoalescingCandidates(op2, dest); |
| } |
| } |
| } |
| } |
| |
| void addEdges(Inst* prevInst, Inst* nextInst, typename TmpLiveness<bank>::LocalCalc::Iterable liveTmps) |
| { |
| // All the Def()s interfere with everthing live. |
| Inst::forEachDefWithExtraClobberedRegs<Tmp>( |
| prevInst, nextInst, |
| [&] (const Tmp& arg, Arg::Role, Bank argBank, Width) { |
| if (argBank != bank) |
| return; |
| |
| for (Tmp liveTmp : liveTmps) { |
| ASSERT(liveTmp.isGP() == (bank == GP)); |
| |
| if (traceDebug) |
| dataLog(" Adding def-live edge: ", arg, ", ", liveTmp, "\n"); |
| |
| addEdge(arg, liveTmp); |
| } |
| for (const Tmp& pinnedRegTmp : m_pinnedRegs) |
| addEdge(arg, pinnedRegTmp); |
| }); |
| } |
| |
| void addEdge(Tmp a, Tmp b) |
| { |
| ASSERT_WITH_MESSAGE(a.isGP() == b.isGP(), "An interference between registers of different types does not make sense, it can lead to non-colorable graphs."); |
| |
| addEdge(TmpMapper::absoluteIndex(a), TmpMapper::absoluteIndex(b)); |
| } |
| |
| // Calling this without a tmpWidth will perform a more conservative coalescing analysis that assumes |
| // that Move32's are not coalescable. |
| static bool mayBeCoalescableImpl(const Inst& inst, TmpWidth* tmpWidth) |
| { |
| switch (bank) { |
| case GP: |
| switch (inst.kind.opcode) { |
| case Move: |
| case Move32: |
| break; |
| default: |
| return false; |
| } |
| break; |
| case FP: |
| switch (inst.kind.opcode) { |
| case MoveFloat: |
| case MoveDouble: |
| break; |
| default: |
| return false; |
| } |
| break; |
| } |
| |
| // Avoid the three-argument coalescable spill moves. |
| if (inst.args.size() != 2) |
| return false; |
| |
| if (!inst.args[0].isTmp() || !inst.args[1].isTmp()) |
| return false; |
| |
| ASSERT(inst.args[0].bank() == bank); |
| ASSERT(inst.args[1].bank() == bank); |
| |
| // We can coalesce a Move32 so long as either of the following holds: |
| // - The input is already zero-filled. |
| // - The output only cares about the low 32 bits. |
| // |
| // Note that the input property requires an analysis over ZDef's, so it's only valid so long |
| // as the input gets a register. We don't know if the input gets a register, but we do know |
| // that if it doesn't get a register then we will still emit this Move32. |
| if (inst.kind.opcode == Move32) { |
| if (!tmpWidth) |
| return false; |
| |
| if (tmpWidth->defWidth(inst.args[0].tmp()) > Width32 |
| && tmpWidth->useWidth(inst.args[1].tmp()) > Width32) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| TmpWidth& m_tmpWidth; |
| }; |
| |
| class GraphColoringRegisterAllocation { |
| public: |
| GraphColoringRegisterAllocation(Code& code, UseCounts<Tmp>& useCounts) |
| : m_code(code) |
| , m_useCounts(useCounts) |
| { |
| } |
| |
| void run() |
| { |
| padInterference(m_code); |
| |
| allocateOnBank<GP>(); |
| m_numIterations = 0; |
| allocateOnBank<FP>(); |
| |
| fixSpillsAfterTerminals(m_code); |
| |
| if (reportStats) |
| dataLog("Num iterations = ", m_numIterations, "\n"); |
| } |
| |
| private: |
| template<Bank bank> |
| void allocateOnBank() |
| { |
| HashSet<unsigned> unspillableTmps = computeUnspillableTmps<bank>(); |
| |
| // FIXME: If a Tmp is used only from a Scratch role and that argument is !admitsStack, then |
| // we should add the Tmp to unspillableTmps. That will help avoid relooping only to turn the |
| // Tmp into an unspillable Tmp. |
| // https://bugs.webkit.org/show_bug.cgi?id=152699 |
| |
| while (true) { |
| ++m_numIterations; |
| |
| if (traceDebug) |
| dataLog("Code at iteration ", m_numIterations, ":\n", m_code); |
| |
| // FIXME: One way to optimize this code is to remove the recomputation inside the fixpoint. |
| // We need to recompute because spilling adds tmps, but we could just update tmpWidth when we |
| // add those tmps. Note that one easy way to remove the recomputation is to make any newly |
| // added Tmps get the same use/def widths that the original Tmp got. But, this may hurt the |
| // spill code we emit. Since we currently recompute TmpWidth after spilling, the newly |
| // created Tmps may get narrower use/def widths. On the other hand, the spiller already |
| // selects which move instruction to use based on the original Tmp's widths, so it may not |
| // matter than a subsequent iteration sees a coservative width for the new Tmps. Also, the |
| // recomputation may not actually be a performance problem; it's likely that a better way to |
| // improve performance of TmpWidth is to replace its HashMap with something else. It's |
| // possible that most of the TmpWidth overhead is from queries of TmpWidth rather than the |
| // recomputation, in which case speeding up the lookup would be a bigger win. |
| // https://bugs.webkit.org/show_bug.cgi?id=152478 |
| m_tmpWidth.recompute(m_code); |
| |
| auto doAllocation = [&] (auto& allocator) -> bool { |
| allocator.allocate(); |
| if (!allocator.requiresSpilling()) { |
| this->assignRegistersToTmp<bank>(allocator); |
| if (traceDebug) |
| dataLog("Successfull allocation at iteration ", m_numIterations, ":\n", m_code); |
| |
| return true; |
| } |
| |
| this->addSpillAndFill<bank>(allocator, unspillableTmps); |
| return false; |
| }; |
| |
| bool done; |
| if (useIRC()) { |
| ColoringAllocator<bank, IRC> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps); |
| done = doAllocation(allocator); |
| } else { |
| ColoringAllocator<bank, Briggs> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps); |
| done = doAllocation(allocator); |
| } |
| if (done) |
| return; |
| } |
| } |
| |
| template<Bank bank> |
| HashSet<unsigned> computeUnspillableTmps() |
| { |
| |
| HashSet<unsigned> unspillableTmps; |
| |
| struct Range { |
| unsigned first { std::numeric_limits<unsigned>::max() }; |
| unsigned last { 0 }; |
| unsigned count { 0 }; |
| unsigned admitStackCount { 0 }; |
| }; |
| |
| unsigned numTmps = m_code.numTmps(bank); |
| unsigned arraySize = AbsoluteTmpMapper<bank>::absoluteIndex(numTmps); |
| |
| Vector<Range, 0, UnsafeVectorOverflow> ranges; |
| ranges.fill(Range(), arraySize); |
| |
| unsigned globalIndex = 0; |
| for (BasicBlock* block : m_code) { |
| for (Inst& inst : *block) { |
| inst.forEachArg([&] (Arg& arg, Arg::Role, Bank argBank, Width) { |
| if (arg.isTmp() && inst.admitsStack(arg)) { |
| if (argBank != bank) |
| return; |
| |
| Tmp tmp = arg.tmp(); |
| Range& range = ranges[AbsoluteTmpMapper<bank>::absoluteIndex(tmp)]; |
| range.count++; |
| range.admitStackCount++; |
| if (globalIndex < range.first) { |
| range.first = globalIndex; |
| range.last = globalIndex; |
| } else |
| range.last = globalIndex; |
| |
| return; |
| } |
| |
| arg.forEachTmpFast([&] (Tmp& tmp) { |
| if (tmp.isGP() != (bank == GP)) |
| return; |
| |
| Range& range = ranges[AbsoluteTmpMapper<bank>::absoluteIndex(tmp)]; |
| range.count++; |
| if (globalIndex < range.first) { |
| range.first = globalIndex; |
| range.last = globalIndex; |
| } else |
| range.last = globalIndex; |
| }); |
| }); |
| |
| ++globalIndex; |
| } |
| ++globalIndex; |
| } |
| for (unsigned i = AbsoluteTmpMapper<bank>::lastMachineRegisterIndex() + 1; i < ranges.size(); ++i) { |
| Range& range = ranges[i]; |
| if (range.last - range.first <= 1 && range.count > range.admitStackCount) |
| unspillableTmps.add(i); |
| } |
| |
| return unspillableTmps; |
| } |
| |
| template<Bank bank, typename AllocatorType> |
| void assignRegistersToTmp(const AllocatorType& allocator) |
| { |
| for (BasicBlock* block : m_code) { |
| // Give Tmp a valid register. |
| for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { |
| Inst& inst = block->at(instIndex); |
| |
| // The mayBeCoalescable() method will change its mind for some operations after we |
| // complete register allocation. So, we record this before starting. |
| bool mayBeCoalescable = allocator.mayBeCoalescable(inst); |
| |
| // Move32 is cheaper if we know that it's equivalent to a Move. It's |
| // equivalent if the destination's high bits are not observable or if the source's high |
| // bits are all zero. Note that we don't have the opposite optimization for other |
| // architectures, which may prefer Move over Move32, because Move is canonical already. |
| if (bank == GP && inst.kind.opcode == Move |
| && inst.args[0].isTmp() && inst.args[1].isTmp()) { |
| if (m_tmpWidth.useWidth(inst.args[1].tmp()) <= Width32 |
| || m_tmpWidth.defWidth(inst.args[0].tmp()) <= Width32) |
| inst.kind.opcode = Move32; |
| } |
| |
| inst.forEachTmpFast([&] (Tmp& tmp) { |
| if (tmp.isReg() || tmp.bank() != bank) |
| return; |
| |
| Tmp aliasTmp = allocator.getAlias(tmp); |
| Tmp assignedTmp; |
| if (aliasTmp.isReg()) |
| assignedTmp = Tmp(aliasTmp.reg()); |
| else { |
| auto reg = allocator.allocatedReg(aliasTmp); |
| ASSERT(reg); |
| assignedTmp = Tmp(reg); |
| } |
| ASSERT(assignedTmp.isReg()); |
| tmp = assignedTmp; |
| }); |
| |
| if (mayBeCoalescable && inst.args[0].isTmp() && inst.args[1].isTmp() |
| && inst.args[0].tmp() == inst.args[1].tmp()) |
| inst = Inst(); |
| } |
| |
| // Remove all the useless moves we created in this block. |
| block->insts().removeAllMatching([&] (const Inst& inst) { |
| return !inst; |
| }); |
| } |
| } |
| |
| static unsigned stackSlotMinimumWidth(Width width) |
| { |
| return width <= Width32 ? 4 : 8; |
| } |
| |
| template<Bank bank, typename AllocatorType> |
| void addSpillAndFill(const AllocatorType& allocator, HashSet<unsigned>& unspillableTmps) |
| { |
| HashMap<Tmp, StackSlot*> stackSlots; |
| for (Tmp tmp : allocator.spilledTmps()) { |
| // All the spilled values become unspillable. |
| unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp)); |
| |
| // Allocate stack slot for each spilled value. |
| StackSlot* stackSlot = m_code.addStackSlot( |
| stackSlotMinimumWidth(m_tmpWidth.requiredWidth(tmp)), StackSlotKind::Spill); |
| bool isNewTmp = stackSlots.add(tmp, stackSlot).isNewEntry; |
| ASSERT_UNUSED(isNewTmp, isNewTmp); |
| } |
| |
| // Rewrite the program to get rid of the spilled Tmp. |
| InsertionSet insertionSet(m_code); |
| for (BasicBlock* block : m_code) { |
| bool hasAliasedTmps = false; |
| |
| for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { |
| Inst& inst = block->at(instIndex); |
| |
| // The TmpWidth analysis will say that a Move only stores 32 bits into the destination, |
| // if the source only had 32 bits worth of non-zero bits. Same for the source: it will |
| // only claim to read 32 bits from the source if only 32 bits of the destination are |
| // read. Note that we only apply this logic if this turns into a load or store, since |
| // Move is the canonical way to move data between GPRs. |
| bool canUseMove32IfDidSpill = false; |
| bool didSpill = false; |
| bool needScratch = false; |
| if (bank == GP && inst.kind.opcode == Move) { |
| if ((inst.args[0].isTmp() && m_tmpWidth.width(inst.args[0].tmp()) <= Width32) |
| || (inst.args[1].isTmp() && m_tmpWidth.width(inst.args[1].tmp()) <= Width32)) |
| canUseMove32IfDidSpill = true; |
| } |
| |
| // Try to replace the register use by memory use when possible. |
| inst.forEachArg( |
| [&] (Arg& arg, Arg::Role role, Bank argBank, Width width) { |
| if (!arg.isTmp()) |
| return; |
| if (argBank != bank) |
| return; |
| if (arg.isReg()) |
| return; |
| |
| auto stackSlotEntry = stackSlots.find(arg.tmp()); |
| if (stackSlotEntry == stackSlots.end()) |
| return; |
| bool needScratchIfSpilledInPlace = false; |
| if (!inst.admitsStack(arg)) { |
| if (traceDebug) |
| dataLog("Have an inst that won't admit stack: ", inst, "\n"); |
| switch (inst.kind.opcode) { |
| case Move: |
| case MoveDouble: |
| case MoveFloat: |
| case Move32: { |
| unsigned argIndex = &arg - &inst.args[0]; |
| unsigned otherArgIndex = argIndex ^ 1; |
| Arg otherArg = inst.args[otherArgIndex]; |
| if (inst.args.size() == 2 |
| && otherArg.isStack() |
| && otherArg.stackSlot()->isSpill()) { |
| needScratchIfSpilledInPlace = true; |
| break; |
| } |
| return; |
| } |
| default: |
| return; |
| } |
| } |
| |
| // If the Tmp holds a constant then we want to rematerialize its |
| // value rather than loading it from the stack. In order for that |
| // optimization to kick in, we need to avoid placing the Tmp's stack |
| // address into the instruction. |
| if (!Arg::isColdUse(role)) { |
| const UseCounts<Tmp>::Counts* counts = m_useCounts[arg.tmp()]; |
| if (counts && counts->numConstDefs == 1 && counts->numDefs == 1) |
| return; |
| } |
| |
| Width spillWidth = m_tmpWidth.requiredWidth(arg.tmp()); |
| if (Arg::isAnyDef(role) && width < spillWidth) { |
| // Either there are users of this tmp who will use more than width, |
| // or there are producers who will produce more than width non-zero |
| // bits. |
| // FIXME: It's not clear why we should have to return here. We have |
| // a ZDef fixup in allocateStack. And if this isn't a ZDef, then it |
| // doesn't seem like it matters what happens to the high bits. Note |
| // that this isn't the case where we're storing more than what the |
| // spill slot can hold - we already got that covered because we |
| // stretch the spill slot on demand. One possibility is that it's ZDefs of |
| // smaller width than 32-bit. |
| // https://bugs.webkit.org/show_bug.cgi?id=169823 |
| return; |
| } |
| ASSERT(inst.kind.opcode == Move || !(Arg::isAnyUse(role) && width > spillWidth)); |
| |
| if (spillWidth != Width32) |
| canUseMove32IfDidSpill = false; |
| |
| stackSlotEntry->value->ensureSize( |
| canUseMove32IfDidSpill ? 4 : bytes(width)); |
| arg = Arg::stack(stackSlotEntry->value); |
| didSpill = true; |
| if (needScratchIfSpilledInPlace) |
| needScratch = true; |
| }); |
| |
| if (didSpill && canUseMove32IfDidSpill) |
| inst.kind.opcode = Move32; |
| |
| if (needScratch) { |
| Bank instBank; |
| switch (inst.kind.opcode) { |
| case Move: |
| case Move32: |
| instBank = GP; |
| break; |
| case MoveDouble: |
| case MoveFloat: |
| instBank = FP; |
| break; |
| default: |
| RELEASE_ASSERT_NOT_REACHED(); |
| instBank = GP; |
| break; |
| } |
| |
| RELEASE_ASSERT(instBank == bank); |
| |
| Tmp tmp = m_code.newTmp(bank); |
| unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp)); |
| inst.args.append(tmp); |
| RELEASE_ASSERT(inst.args.size() == 3); |
| |
| // Without this, a chain of spill moves would need two registers, not one, because |
| // the scratch registers of successive moves would appear to interfere with each |
| // other. As well, we need this if the previous instruction had any late effects, |
| // since otherwise the scratch would appear to interfere with those. On the other |
| // hand, the late use added at the end of this spill move (previously it was just a |
| // late def) doesn't change the padding situation.: the late def would have already |
| // caused it to report hasLateUseOrDef in Inst::needsPadding. |
| insertionSet.insert(instIndex, Nop, inst.origin); |
| continue; |
| } |
| |
| // For every other case, add Load/Store as needed. |
| inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank argBank, Width) { |
| if (tmp.isReg() || argBank != bank) |
| return; |
| |
| auto stackSlotEntry = stackSlots.find(tmp); |
| if (stackSlotEntry == stackSlots.end()) { |
| Tmp alias = allocator.getAliasWhenSpilling(tmp); |
| if (alias != tmp) { |
| tmp = alias; |
| hasAliasedTmps = true; |
| } |
| return; |
| } |
| |
| Width spillWidth = m_tmpWidth.requiredWidth(tmp); |
| Opcode move = Oops; |
| switch (stackSlotMinimumWidth(spillWidth)) { |
| case 4: |
| move = bank == GP ? Move32 : MoveFloat; |
| break; |
| case 8: |
| move = bank == GP ? Move : MoveDouble; |
| break; |
| default: |
| RELEASE_ASSERT_NOT_REACHED(); |
| break; |
| } |
| |
| tmp = m_code.newTmp(bank); |
| unspillableTmps.add(AbsoluteTmpMapper<bank>::absoluteIndex(tmp)); |
| |
| if (role == Arg::Scratch) |
| return; |
| |
| Arg arg = Arg::stack(stackSlotEntry->value); |
| if (Arg::isAnyUse(role)) |
| insertionSet.insert(instIndex, move, inst.origin, arg, tmp); |
| if (Arg::isAnyDef(role)) |
| insertionSet.insert(instIndex + 1, move, inst.origin, tmp, arg); |
| }); |
| } |
| insertionSet.execute(block); |
| |
| if (hasAliasedTmps) { |
| block->insts().removeAllMatching([&] (const Inst& inst) { |
| return allocator.isUselessMove(inst); |
| }); |
| } |
| } |
| } |
| |
| Code& m_code; |
| TmpWidth m_tmpWidth; |
| UseCounts<Tmp>& m_useCounts; |
| unsigned m_numIterations { 0 }; |
| }; |
| |
| } // anonymous namespace |
| |
| void allocateRegistersByGraphColoring(Code& code) |
| { |
| PhaseScope phaseScope(code, "allocateRegistersByGraphColoring"); |
| |
| if (false) |
| dataLog("Code before graph coloring:\n", code); |
| |
| UseCounts<Tmp> useCounts(code); |
| GraphColoringRegisterAllocation graphColoringRegisterAllocation(code, useCounts); |
| graphColoringRegisterAllocation.run(); |
| } |
| |
| } } } // namespace JSC::B3::Air |
| |
| #endif // ENABLE(B3_JIT) |