blob: 037c40fcc247718186d44624a8c71ab3da72e7b7 [file] [log] [blame]
/*
* Copyright (C) 2015-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "AirFixPartialRegisterStalls.h"
#if ENABLE(B3_JIT)
#include "AirBasicBlock.h"
#include "AirCode.h"
#include "AirInsertionSet.h"
#include "AirInst.h"
#include "AirInstInlines.h"
#include "AirPhaseScope.h"
#include "MacroAssembler.h"
#include <wtf/IndexMap.h>
#include <wtf/IndexSet.h>
#include <wtf/Vector.h>
namespace JSC { namespace B3 { namespace Air {
namespace {
bool hasPartialXmmRegUpdate(const Inst& inst)
{
switch (inst.kind.opcode) {
case ConvertDoubleToFloat:
case ConvertFloatToDouble:
case ConvertInt32ToDouble:
case ConvertInt64ToDouble:
case ConvertInt32ToFloat:
case ConvertInt64ToFloat:
case SqrtDouble:
case SqrtFloat:
case CeilDouble:
case CeilFloat:
case FloorDouble:
case FloorFloat:
return true;
default:
break;
}
return false;
}
bool isDependencyBreaking(const Inst& inst)
{
// "xorps reg, reg" is used by the frontend to remove the dependency on its argument.
return inst.kind.opcode == MoveZeroToDouble;
}
// FIXME: find a good distance per architecture experimentally.
// LLVM uses a distance of 16 but that comes from Nehalem.
unsigned char minimumSafeDistance = 16;
struct FPDefDistance {
FPDefDistance()
{
for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i)
distance[i] = 255;
}
void reset(FPRReg reg)
{
unsigned index = MacroAssembler::fpRegisterIndex(reg);
distance[index] = 255;
}
void add(FPRReg reg, unsigned registerDistance)
{
unsigned index = MacroAssembler::fpRegisterIndex(reg);
if (registerDistance < distance[index])
distance[index] = static_cast<unsigned char>(registerDistance);
}
bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0)
{
bool changed = false;
for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) {
unsigned regDistance = precessorDistance.distance[i] + constantOffset;
if (regDistance < minimumSafeDistance && regDistance < distance[i]) {
distance[i] = regDistance;
changed = true;
}
}
return changed;
}
unsigned char distance[MacroAssembler::numberOfFPRegisters()];
};
void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd)
{
--distanceToBlockEnd;
if (isDependencyBreaking(inst)) {
localDistance.reset(inst.args[0].tmp().fpr());
return;
}
inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation.");
if (tmp.isFPR() && Arg::isAnyDef(role))
localDistance.add(tmp.fpr(), distanceToBlockEnd);
});
}
}
void fixPartialRegisterStalls(Code& code)
{
if (!isX86())
return;
PhaseScope phaseScope(code, "fixPartialRegisterStalls");
Vector<BasicBlock*> candidates;
for (BasicBlock* block : code) {
for (const Inst& inst : *block) {
if (hasPartialXmmRegUpdate(inst)) {
candidates.append(block);
break;
}
}
}
// Fortunately, Partial Stalls are rarely used. Return early if no block
// cares about them.
if (candidates.isEmpty())
return;
// For each block, this provides the distance to the last instruction setting each register
// on block *entry*.
IndexMap<BasicBlock*, FPDefDistance> lastDefDistance(code.size());
// Blocks with dirty distance at head.
IndexSet<BasicBlock*> dirty;
// First, we compute the local distance for each block and push it to the successors.
for (BasicBlock* block : code) {
FPDefDistance localDistance;
unsigned distanceToBlockEnd = block->size();
for (Inst& inst : *block)
updateDistances(inst, localDistance, distanceToBlockEnd);
for (BasicBlock* successor : block->successorBlocks()) {
if (lastDefDistance[successor].updateFromPrecessor(localDistance))
dirty.add(successor);
}
}
// Now we propagate the minimums accross blocks.
bool changed;
do {
changed = false;
for (BasicBlock* block : code) {
if (!dirty.remove(block))
continue;
// Little shortcut: if the block is big enough, propagating it won't add any information.
if (block->size() >= minimumSafeDistance)
continue;
unsigned blockSize = block->size();
FPDefDistance& blockDistance = lastDefDistance[block];
for (BasicBlock* successor : block->successorBlocks()) {
if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
dirty.add(successor);
changed = true;
}
}
}
} while (changed);
// Finally, update each block as needed.
InsertionSet insertionSet(code);
for (BasicBlock* block : candidates) {
unsigned distanceToBlockEnd = block->size();
FPDefDistance& localDistance = lastDefDistance[block];
for (unsigned i = 0; i < block->size(); ++i) {
Inst& inst = block->at(i);
if (hasPartialXmmRegUpdate(inst)) {
RegisterSet defs;
RegisterSet uses;
inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Bank, Width) {
if (tmp.isFPR()) {
if (Arg::isAnyDef(role))
defs.set(tmp.fpr());
if (Arg::isAnyUse(role))
uses.set(tmp.fpr());
}
});
// We only care about values we define but not use. Otherwise we have to wait
// for the value to be resolved anyway.
defs.exclude(uses);
defs.forEach([&] (Reg reg) {
if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
});
}
updateDistances(inst, localDistance, distanceToBlockEnd);
}
insertionSet.execute(block);
}
}
} } } // namespace JSC::B3::Air
#endif // ENABLE(B3_JIT)