blob: e9181c5d6e97a68a66f37b1ce4322bf69076ecd7 [file] [log] [blame]
/*
* Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "NFAToDFA.h"
#if ENABLE(CONTENT_EXTENSIONS)
#include "ContentExtensionsDebugging.h"
#include "DFANode.h"
#include "NFA.h"
#include <wtf/DataLog.h>
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
namespace WebCore {
namespace ContentExtensions {
// FIXME: set a better initial size.
// FIXME: include the hash inside NodeIdSet.
typedef NFANodeIndexSet NodeIdSet;
static inline void epsilonClosureExcludingSelf(const Vector<NFANode>& nfaGraph, unsigned nodeId, unsigned epsilonTransitionCharacter, Vector<unsigned>& output)
{
const auto& transitions = nfaGraph[nodeId].transitions;
auto epsilonTransitionSlot = transitions.find(epsilonTransitionCharacter);
if (epsilonTransitionSlot == transitions.end())
return;
NodeIdSet closure({ nodeId });
Vector<unsigned, 64> unprocessedNodes;
copyToVector(epsilonTransitionSlot->value, unprocessedNodes);
closure.add(unprocessedNodes.begin(), unprocessedNodes.end());
output = unprocessedNodes;
do {
unsigned unprocessedNodeId = unprocessedNodes.takeLast();
const NFANode& node = nfaGraph[unprocessedNodeId];
auto epsilonTransitionSlot = node.transitions.find(epsilonTransitionCharacter);
if (epsilonTransitionSlot != node.transitions.end()) {
for (unsigned targetNodeId : epsilonTransitionSlot->value) {
auto addResult = closure.add(targetNodeId);
if (addResult.isNewEntry) {
unprocessedNodes.append(targetNodeId);
output.append(targetNodeId);
}
}
}
} while (!unprocessedNodes.isEmpty());
}
static void resolveEpsilonClosures(Vector<NFANode>& nfaGraph, unsigned epsilonTransitionCharacter, Vector<Vector<unsigned>>& nfaNodeClosures)
{
unsigned nfaGraphSize = nfaGraph.size();
nfaNodeClosures.resize(nfaGraphSize);
for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId)
epsilonClosureExcludingSelf(nfaGraph, nodeId, epsilonTransitionCharacter, nfaNodeClosures[nodeId]);
for (unsigned nodeId = 0; nodeId < nfaGraphSize; ++nodeId) {
if (!nfaNodeClosures[nodeId].isEmpty()) {
bool epsilonTransitionIsRemoved = nfaGraph[nodeId].transitions.remove(epsilonTransitionCharacter);
ASSERT_UNUSED(epsilonTransitionIsRemoved, epsilonTransitionIsRemoved);
}
}
}
static ALWAYS_INLINE void extendSetWithClosure(const Vector<Vector<unsigned>>& nfaNodeClosures, unsigned nodeId, NodeIdSet& set)
{
ASSERT(set.contains(nodeId));
const Vector<unsigned>& nodeClosure = nfaNodeClosures[nodeId];
if (!nodeClosure.isEmpty())
set.add(nodeClosure.begin(), nodeClosure.end());
}
struct UniqueNodeIdSetImpl {
unsigned* buffer()
{
return m_buffer;
}
const unsigned* buffer() const
{
return m_buffer;
}
unsigned m_size;
unsigned m_hash;
unsigned m_dfaNodeId;
private:
unsigned m_buffer[1];
};
class UniqueNodeIdSet {
public:
UniqueNodeIdSet() { }
enum EmptyValueTag { EmptyValue };
enum DeletedValueTag { DeletedValue };
UniqueNodeIdSet(EmptyValueTag) { }
UniqueNodeIdSet(DeletedValueTag)
: m_uniqueNodeIdSetBuffer(reinterpret_cast<UniqueNodeIdSetImpl*>(-1))
{
}
UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId)
{
ASSERT(nodeIdSet.size());
unsigned size = nodeIdSet.size();
size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned);
m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize));
m_uniqueNodeIdSetBuffer->m_size = size;
m_uniqueNodeIdSetBuffer->m_hash = hash;
m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId;
unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
for (unsigned nodeId : nodeIdSet) {
*buffer = nodeId;
++buffer;
}
}
UniqueNodeIdSet(UniqueNodeIdSet&& other)
: m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer)
{
other.m_uniqueNodeIdSetBuffer = nullptr;
}
UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other)
{
m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer;
other.m_uniqueNodeIdSetBuffer = nullptr;
return *this;
}
~UniqueNodeIdSet()
{
fastFree(m_uniqueNodeIdSetBuffer);
}
bool operator==(const UniqueNodeIdSet& other) const
{
return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer;
}
bool operator==(const NodeIdSet& other) const
{
if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size()))
return false;
unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) {
if (!other.contains(buffer[i]))
return false;
}
return true;
}
UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; }
unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; }
bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; }
bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast<UniqueNodeIdSetImpl*>(-1); }
private:
UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr;
};
struct UniqueNodeIdSetHash {
static unsigned hash(const UniqueNodeIdSet& p)
{
return p.hash();
}
static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b)
{
return a == b;
}
// It would be fine to compare empty or deleted here, but not for the HashTranslator.
static const bool safeToCompareToEmptyOrDeleted = false;
};
struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> {
static const bool emptyValueIsZero = true;
// FIXME: Get a good size.
static const int minimumTableSize = 128;
};
typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable;
struct NodeIdSetToUniqueNodeIdSetSource {
NodeIdSetToUniqueNodeIdSetSource(Vector<DFANode>& dfaGraph, const Vector<NFANode>& nfaGraph, const NodeIdSet& nodeIdSet)
: dfaGraph(dfaGraph)
, nfaGraph(nfaGraph)
, nodeIdSet(nodeIdSet)
{
// The hashing operation must be independant of the nodeId.
unsigned hash = 4207445155;
for (unsigned nodeId : nodeIdSet)
hash += nodeId;
this->hash = DefaultHash<unsigned>::Hash::hash(hash);
}
Vector<DFANode>& dfaGraph;
const Vector<NFANode>& nfaGraph;
const NodeIdSet& nodeIdSet;
unsigned hash;
};
struct NodeIdSetToUniqueNodeIdSetTranslator {
static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source)
{
return source.hash;
}
static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b)
{
return a == b.nodeIdSet;
}
static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash)
{
DFANode newDFANode;
HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
for (unsigned nfaNodeId : source.nodeIdSet) {
const NFANode& nfaNode = source.nfaGraph[nfaNodeId];
actions.add(nfaNode.finalRuleIds.begin(), nfaNode.finalRuleIds.end());
#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
newDFANode.correspondingNFANodes.append(nfaNodeId);
#endif
}
copyToVector(actions, newDFANode.actions);
unsigned dfaNodeId = source.dfaGraph.size();
source.dfaGraph.append(newDFANode);
new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId);
ASSERT(location.impl());
}
};
class SetTransitions {
public:
NodeIdSet& operator[](unsigned index)
{
ASSERT(index < size());
return m_targets[index];
}
unsigned size() const
{
return WTF_ARRAY_LENGTH(m_targets);
}
NodeIdSet* begin()
{
return m_targets;
}
NodeIdSet* end()
{
return m_targets + size();
}
private:
NodeIdSet m_targets[128];
};
static inline void populateTransitions(SetTransitions& setTransitions, NodeIdSet& setFallbackTransition, const UniqueNodeIdSetImpl& sourceNodeSet, const Vector<NFANode>& graph, const Vector<Vector<unsigned>>& nfaNodeclosures)
{
ASSERT(!graph.isEmpty());
ASSERT(setFallbackTransition.isEmpty());
#if !ASSERT_DISABLED
for (const NodeIdSet& set : setTransitions)
ASSERT(set.isEmpty());
#endif
Vector<unsigned, 8> allFallbackTransitions;
const unsigned* buffer = sourceNodeSet.buffer();
for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) {
unsigned nodeId = buffer[i];
const NFANode& nfaSourceNode = graph[nodeId];
for (unsigned targetTransition : nfaSourceNode.transitionsOnAnyCharacter)
allFallbackTransitions.append(targetTransition);
}
for (unsigned targetNodeId : allFallbackTransitions) {
auto addResult = setFallbackTransition.add(targetNodeId);
if (addResult.isNewEntry)
extendSetWithClosure(nfaNodeclosures, targetNodeId, setFallbackTransition);
}
for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) {
unsigned nodeId = buffer[i];
for (const auto& transitionSlot : graph[nodeId].transitions) {
NodeIdSet& targetSet = setTransitions[transitionSlot.key];
for (unsigned targetNodId : transitionSlot.value) {
targetSet.add(targetNodId);
extendSetWithClosure(nfaNodeclosures, targetNodId, targetSet);
}
if (transitionSlot.key)
targetSet.add(setFallbackTransition.begin(), setFallbackTransition.end());
}
}
}
static ALWAYS_INLINE unsigned getOrCreateDFANode(const NodeIdSet& nfaNodeSet, const Vector<NFANode>& nfaGraph, Vector<DFANode>& dfaGraph, UniqueNodeIdSetTable& uniqueNodeIdSetTable, Vector<UniqueNodeIdSetImpl*>& unprocessedNodes)
{
NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfaGraph, nfaGraph, nfaNodeSet);
auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource);
if (uniqueNodeIdAddResult.isNewEntry)
unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl());
return uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId;
}
DFA NFAToDFA::convert(NFA& nfa)
{
Vector<NFANode>& nfaGraph = nfa.m_nodes;
Vector<Vector<unsigned>> nfaNodeClosures;
resolveEpsilonClosures(nfaGraph, NFA::epsilonTransitionCharacter, nfaNodeClosures);
NodeIdSet initialSet({ nfa.root() });
extendSetWithClosure(nfaNodeClosures, nfa.root(), initialSet);
UniqueNodeIdSetTable uniqueNodeIdSetTable;
Vector<DFANode> dfaGraph;
NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfaGraph, nfaGraph, initialSet);
auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource);
Vector<UniqueNodeIdSetImpl*> unprocessedNodes;
unprocessedNodes.append(addResult.iterator->impl());
SetTransitions transitionsFromClosedSet;
do {
UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast();
unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId;
NodeIdSet setFallbackTransition;
populateTransitions(transitionsFromClosedSet, setFallbackTransition, *uniqueNodeIdSetImpl, nfaGraph, nfaNodeClosures);
for (unsigned key = 0; key < transitionsFromClosedSet.size(); ++key) {
NodeIdSet& targetNodeSet = transitionsFromClosedSet[key];
if (targetNodeSet.isEmpty())
continue;
unsigned targetNodeId = getOrCreateDFANode(targetNodeSet, nfaGraph, dfaGraph, uniqueNodeIdSetTable, unprocessedNodes);
const auto addResult = dfaGraph[dfaNodeId].transitions.add(key, targetNodeId);
ASSERT_UNUSED(addResult, addResult.isNewEntry);
targetNodeSet.clear();
}
if (!setFallbackTransition.isEmpty()) {
unsigned targetNodeId = getOrCreateDFANode(setFallbackTransition, nfaGraph, dfaGraph, uniqueNodeIdSetTable, unprocessedNodes);
DFANode& dfaSourceNode = dfaGraph[dfaNodeId];
ASSERT(!dfaSourceNode.hasFallbackTransition);
dfaSourceNode.hasFallbackTransition = true;
dfaSourceNode.fallbackTransition = targetNodeId;
}
} while (!unprocessedNodes.isEmpty());
return DFA(WTF::move(dfaGraph), 0);
}
} // namespace ContentExtensions
} // namespace WebCore
#endif // ENABLE(CONTENT_EXTENSIONS)