Source/WebCore/contentextensions/DFABytecodeCompiler.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 2015 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "DFABytecodeCompiler.h"

 #if ENABLE(CONTENT_EXTENSIONS)

 #include "ContentExtensionRule.h"
 #include "DFA.h"
 #include "DFANode.h"

 namespace WebCore {

 namespace ContentExtensions {

 template <typename IntType>
 inline void append(Vector<DFABytecode>& bytecode, IntType value)
 {
     bytecode.grow(bytecode.size() + sizeof(IntType));
     *reinterpret_cast<IntType*>(&bytecode[bytecode.size() - sizeof(IntType)]) = value;
 }

 inline void appendZeroes(Vector<DFABytecode>& bytecode, DFABytecodeJumpSize jumpSize)
 {
     switch (jumpSize) {
     case DFABytecodeJumpSize::Int8:
         append<int8_t>(bytecode, 0); // This value will be set when linking.
         break;
     case DFABytecodeJumpSize::Int16:
         append<int16_t>(bytecode, 0); // This value will be set when linking.
         break;
     case DFABytecodeJumpSize::Int24:
         append<uint16_t>(bytecode, 0);
         append<int8_t>(bytecode, 0); // These values will be set when linking.
         break;
     case DFABytecodeJumpSize::Int32:
         append<int32_t>(bytecode, 0); // This value will be set when linking.
         break;
     }
 }

 template <typename IntType>
 inline void setBits(Vector<DFABytecode>& bytecode, uint32_t index, IntType value)
 {
     RELEASE_ASSERT(index + sizeof(IntType) <= bytecode.size());
     ASSERT_WITH_MESSAGE(!*reinterpret_cast<IntType*>(&bytecode[index]), "Right now we should only be using setBits to overwrite values that were zero as a placeholder.");
     *reinterpret_cast<IntType*>(&bytecode[index]) = value;
 }

 static unsigned appendActionBytecodeSize(uint64_t action)
 {
     if (action & ActionFlagMask)
         return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t);
     return sizeof(DFABytecodeInstruction) + sizeof(uint32_t);
 }

 void DFABytecodeCompiler::emitAppendAction(uint64_t action)
 {
     // High bits are used to store flags. See compileRuleList.
     if (action & ActionFlagMask) {
         if (action & IfConditionFlag)
             append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendActionWithIfCondition);
         else
             append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction);
         append<uint16_t>(m_bytecode, static_cast<uint16_t>(action >> 32));
         append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
     } else {
         if (action & IfConditionFlag)
             append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendActionWithIfCondition);
         else
             append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendAction);
         append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
     }
 }

 int32_t DFABytecodeCompiler::longestPossibleJump(uint32_t instructionLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
 {
     if (m_nodeStartOffsets[destinationNodeIndex] == std::numeric_limits<uint32_t>::max()) {
         // Jumping to a node that hasn't been compiled yet, we don't know exactly how far forward we will need to jump,
         // so make sure we have enough room for the worst possible case, the farthest possible jump
         // which would be the distance if there were no compacted branches between this jump and its destination.
         ASSERT(instructionLocation >= m_nodeStartOffsets[sourceNodeIndex]);
         ASSERT(m_maxNodeStartOffsets[destinationNodeIndex] > m_maxNodeStartOffsets[sourceNodeIndex]);
         ASSERT(m_nodeStartOffsets[sourceNodeIndex] != std::numeric_limits<uint32_t>::max());
         return m_maxNodeStartOffsets[destinationNodeIndex] - m_maxNodeStartOffsets[sourceNodeIndex] - (m_nodeStartOffsets[sourceNodeIndex] - instructionLocation);
     }

     // Jumping to an already compiled node, we already know exactly where we will need to jump to.
     ASSERT(m_nodeStartOffsets[destinationNodeIndex] <= instructionLocation);
     return m_nodeStartOffsets[destinationNodeIndex] - instructionLocation;
 }

 void DFABytecodeCompiler::emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
 {
     uint32_t instructionLocation = m_bytecode.size();
     uint32_t jumpLocation = instructionLocation + sizeof(uint8_t);
     int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
     DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
     append<uint8_t>(m_bytecode, static_cast<uint8_t>(DFABytecodeInstruction::Jump) | jumpSize);

     m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
     appendZeroes(m_bytecode, jumpSize);
 }

 void DFABytecodeCompiler::emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
 {
     uint32_t instructionLocation = m_bytecode.size();
     uint32_t jumpLocation = instructionLocation + 2 * sizeof(uint8_t);
     int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
     DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
     DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive;
     append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
     append<uint8_t>(m_bytecode, value);
     m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
     appendZeroes(m_bytecode, jumpSize);
 }

 void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
 {
     ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue.");

     uint32_t instructionLocation = m_bytecode.size();
     uint32_t jumpLocation = instructionLocation + 3 * sizeof(uint8_t);
     int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
     DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
     DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive;
     append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
     append<uint8_t>(m_bytecode, lowValue);
     append<uint8_t>(m_bytecode, highValue);
     m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
     appendZeroes(m_bytecode, jumpSize);
 }

 void DFABytecodeCompiler::emitTerminate()
 {
     append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Terminate);
 }

 void DFABytecodeCompiler::compileNode(uint32_t index, bool root)
 {
     unsigned startSize = m_bytecode.size();

     const DFANode& node = m_dfa.nodes[index];
     if (node.isKilled()) {
         ASSERT(m_nodeStartOffsets[index] == std::numeric_limits<uint32_t>::max());
         return;
     }

     // Record starting index for linking.
     if (!root)
         m_nodeStartOffsets[index] = m_bytecode.size();

     for (uint64_t action : node.actions(m_dfa))
         emitAppendAction(action);

     // If we jump to the root, we don't want to re-add its actions to a HashSet.
     // We know we have already added them because the root is always compiled first and we always start interpreting at the beginning.
     if (root)
         m_nodeStartOffsets[index] = m_bytecode.size();

     compileNodeTransitions(index);

     ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= compiledNodeMaxBytecodeSize(index));
 }

 unsigned DFABytecodeCompiler::compiledNodeMaxBytecodeSize(uint32_t index)
 {
     const DFANode& node = m_dfa.nodes[index];
     if (node.isKilled())
         return 0;
     unsigned size = 0;
     for (uint64_t action : node.actions(m_dfa))
         size += appendActionBytecodeSize(action);
     size += nodeTransitionsMaxBytecodeSize(node);
     return size;
 }

 DFABytecodeCompiler::JumpTable DFABytecodeCompiler::extractJumpTable(Vector<DFABytecodeCompiler::Range>& ranges, unsigned firstRange, unsigned lastRange)
 {
     ASSERT(lastRange > firstRange);
     ASSERT(lastRange < ranges.size());

     JumpTable jumpTable;
     jumpTable.min = ranges[firstRange].min;
     jumpTable.max = ranges[lastRange].max;
     jumpTable.caseSensitive = ranges[lastRange].caseSensitive;

     unsigned size = lastRange - firstRange + 1;
     jumpTable.destinations.reserveInitialCapacity(size);
     for (unsigned i = firstRange; i <= lastRange; ++i) {
         const Range& range = ranges[i];

         ASSERT(range.caseSensitive == jumpTable.caseSensitive);
         ASSERT(range.min == range.max);
         ASSERT(range.min >= jumpTable.min);
         ASSERT(range.min <= jumpTable.max);

         jumpTable.destinations.uncheckedAppend(range.destination);
     }

     ranges.remove(firstRange, size);

     return jumpTable;
 }

 DFABytecodeCompiler::Transitions DFABytecodeCompiler::transitions(const DFANode& node)
 {
     Transitions transitions;

     uint32_t destinations[128];
     memset(destinations, 0xff, sizeof(destinations));
     const uint32_t noDestination = std::numeric_limits<uint32_t>::max();

     transitions.useFallbackTransition = node.canUseFallbackTransition(m_dfa);
     if (transitions.useFallbackTransition)
         transitions.fallbackTransitionTarget = node.bestFallbackTarget(m_dfa);

     for (const auto& transition : node.transitions(m_dfa)) {
         uint32_t targetNodeIndex = transition.target();
         if (transitions.useFallbackTransition && transitions.fallbackTransitionTarget == targetNodeIndex)
             continue;

         for (uint16_t i = transition.range().first; i <= transition.range().last; ++i)
             destinations[i] = targetNodeIndex;
     }

     Vector<Range>& ranges = transitions.ranges;
     uint8_t rangeMin;
     bool hasRangeMin = false;
     for (uint8_t i = 0; i < 128; i++) {
         if (hasRangeMin) {
             if (destinations[i] != destinations[rangeMin]) {

                 // This is the end of a range. Check if it can be case insensitive.
                 uint8_t rangeMax = i - 1;
                 bool caseSensitive = true;
                 if (rangeMin >= 'A' && rangeMax <= 'Z') {
                     caseSensitive = false;
                     for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
                         if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) {
                             caseSensitive = true;
                             break;
                         }
                     }
                 }

                 if (!caseSensitive) {
                     // If all the lower-case destinations are the same as the upper-case destinations,
                     // then they will be covered by a case-insensitive range and will not need their own range.
                     for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
                         ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]);
                         destinations[toASCIILower(rangeIndex)] = noDestination;
                     }
                     ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive));
                 } else
                     ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive));

                 if (destinations[i] == noDestination)
                     hasRangeMin = false;
                 else
                     rangeMin = i;
             }
         } else {
             if (destinations[i] != noDestination) {
                 rangeMin = i;
                 hasRangeMin = true;
             }
         }
     }
     if (hasRangeMin) {
         // Ranges are appended after passing the end of them.
         // If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128.
         // If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail.
         ranges.append(Range(rangeMin, 127, destinations[rangeMin], true));
     }

     Vector<JumpTable>& jumpTables = transitions.jumpTables;
     unsigned rangePosition = 0;
     unsigned baseRangePosition = std::numeric_limits<unsigned>::max();
     Range* baseRange = nullptr;
     while (rangePosition < ranges.size()) {
         auto& range = ranges[rangePosition];
         if (baseRange) {
             if (range.min != range.max
                 || baseRange->caseSensitive != range.caseSensitive
                 || ranges[rangePosition - 1].max + 1 != range.min) {
                 if (rangePosition - baseRangePosition > 1) {
                     jumpTables.append(extractJumpTable(ranges, baseRangePosition, rangePosition - 1));
                     rangePosition = baseRangePosition;
                 }
                 baseRangePosition = std::numeric_limits<unsigned>::max();
                 baseRange = nullptr;
             }
         } else {
             if (range.min == range.max) {
                 baseRangePosition = rangePosition;
                 baseRange = &range;
             }
         }
         ++rangePosition;
     }

     if (baseRange && ranges.size() - baseRangePosition > 1)
         jumpTables.append(extractJumpTable(ranges, baseRangePosition, ranges.size() - 1));

     return transitions;
 }

 unsigned DFABytecodeCompiler::checkForJumpTableMaxBytecodeSize(const JumpTable& jumpTable)
 {
     unsigned baselineSize = sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t);
     unsigned targetsSize = (jumpTable.max - jumpTable.min + 1) * sizeof(uint32_t);
     return baselineSize + targetsSize;
 }

 unsigned DFABytecodeCompiler::checkForRangeMaxBytecodeSize(const Range& range)
 {
     if (range.min == range.max)
         return sizeof(DFABytecodeInstruction::CheckValueCaseInsensitive) + sizeof(uint8_t) + sizeof(uint32_t);
     return sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t) + sizeof(uint32_t);
 }

 void DFABytecodeCompiler::compileJumpTable(uint32_t nodeIndex, const JumpTable& jumpTable)
 {
     unsigned startSize = m_bytecode.size();
     ASSERT_WITH_MESSAGE(jumpTable.max < 128, "The DFA engine only supports the ASCII alphabet.");
     ASSERT(jumpTable.min <= jumpTable.max);

     uint32_t instructionLocation = m_bytecode.size();
     DFABytecodeJumpSize jumpSize = Int8;
     for (uint32_t destinationNodeIndex : jumpTable.destinations) {
         int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
         DFABytecodeJumpSize localJumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
         jumpSize = std::max(jumpSize, localJumpSize);
     }

     DFABytecodeInstruction instruction = jumpTable.caseSensitive ? DFABytecodeInstruction::JumpTableCaseSensitive : DFABytecodeInstruction::JumpTableCaseInsensitive;
     append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) | jumpSize);
     append<uint8_t>(m_bytecode, jumpTable.min);
     append<uint8_t>(m_bytecode, jumpTable.max);

     for (uint32_t destinationNodeIndex : jumpTable.destinations) {
         int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
         uint32_t jumpLocation = m_bytecode.size();
         m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
         appendZeroes(m_bytecode, jumpSize);
     }

     ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForJumpTableMaxBytecodeSize(jumpTable));
 }

 void DFABytecodeCompiler::compileCheckForRange(uint32_t nodeIndex, const Range& range)
 {
     unsigned startSize = m_bytecode.size();
     ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet.");
     ASSERT(range.min <= range.max);

     if (range.min == range.max)
         emitCheckValue(range.min, nodeIndex, range.destination, range.caseSensitive);
     else
         emitCheckValueRange(range.min, range.max, nodeIndex, range.destination, range.caseSensitive);

     ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForRangeMaxBytecodeSize(range));
 }

 unsigned DFABytecodeCompiler::nodeTransitionsMaxBytecodeSize(const DFANode& node)
 {
     unsigned size = 0;
     Transitions nodeTransitions = transitions(node);
     for (const auto& jumpTable : nodeTransitions.jumpTables)
         size += checkForJumpTableMaxBytecodeSize(jumpTable);
     for (const auto& range : nodeTransitions.ranges)
         size += checkForRangeMaxBytecodeSize(range);
     if (nodeTransitions.useFallbackTransition)
         size += sizeof(DFABytecodeInstruction::Jump) + sizeof(uint32_t);
     else
         size += instructionSizeWithArguments(DFABytecodeInstruction::Terminate);
     return size;
 }

 void DFABytecodeCompiler::compileNodeTransitions(uint32_t nodeIndex)
 {
     const DFANode& node = m_dfa.nodes[nodeIndex];
     unsigned startSize = m_bytecode.size();

     Transitions nodeTransitions = transitions(node);
     for (const auto& jumpTable : nodeTransitions.jumpTables)
         compileJumpTable(nodeIndex, jumpTable);
     for (const auto& range : nodeTransitions.ranges)
         compileCheckForRange(nodeIndex, range);
     if (nodeTransitions.useFallbackTransition)
         emitJump(nodeIndex, nodeTransitions.fallbackTransitionTarget);
     else
         emitTerminate();

     ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= nodeTransitionsMaxBytecodeSize(node));
 }

 void DFABytecodeCompiler::compile()
 {
     uint32_t startLocation = m_bytecode.size();
     append<DFAHeader>(m_bytecode, 0); // This will be set when we are finished compiling this DFA.

     m_nodeStartOffsets.resize(m_dfa.nodes.size());
     for (unsigned i = 0; i < m_dfa.nodes.size(); ++i)
         m_nodeStartOffsets[i] = std::numeric_limits<uint32_t>::max();

     // Populate m_maxNodeStartOffsets with a worst-case index of where the node would be with no branch compaction.
     // Compacting the branches using 1-4 byte signed jump distances should only make nodes closer together than this.
     ASSERT(m_maxNodeStartOffsets.isEmpty());
     m_maxNodeStartOffsets.clear();
     m_maxNodeStartOffsets.resize(m_dfa.nodes.size());
     unsigned rootActionsSize = 0;
     for (uint64_t action : m_dfa.nodes[m_dfa.root].actions(m_dfa))
         rootActionsSize += appendActionBytecodeSize(action);
     m_maxNodeStartOffsets[m_dfa.root] = sizeof(DFAHeader) + rootActionsSize;
     unsigned nextIndex = sizeof(DFAHeader) + compiledNodeMaxBytecodeSize(m_dfa.root);
     for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
         if (i != m_dfa.root) {
             m_maxNodeStartOffsets[i] = nextIndex;
             nextIndex += compiledNodeMaxBytecodeSize(i);
         }
     }

     // Make sure the root is always at the beginning of the bytecode.
     compileNode(m_dfa.root, true);
     for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
         if (i != m_dfa.root)
             compileNode(i, false);
     }

     ASSERT(m_maxNodeStartOffsets.size() == m_nodeStartOffsets.size());
     for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) {
         if (m_nodeStartOffsets[i] != std::numeric_limits<uint32_t>::max())
             ASSERT(m_maxNodeStartOffsets[i] >= m_nodeStartOffsets[i]);
     }

     // Link.
     for (const auto& linkRecord : m_linkRecords) {
         uint32_t destination = m_nodeStartOffsets[linkRecord.destinationNodeIndex];
         RELEASE_ASSERT(destination < static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
         int32_t distance = destination - linkRecord.instructionLocation;
         ASSERT(abs(distance) <= abs(linkRecord.longestPossibleJump));

         switch (linkRecord.jumpSize) {
         case Int8:
             RELEASE_ASSERT(distance == static_cast<int8_t>(distance));
             setBits<int8_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int8_t>(distance));
             break;
         case Int16:
             RELEASE_ASSERT(distance == static_cast<int16_t>(distance));
             setBits<int16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int16_t>(distance));
             break;
         case Int24:
             RELEASE_ASSERT(distance >= Int24Min && distance <= Int24Max);
             setBits<uint16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<uint16_t>(distance));
             setBits<int8_t>(m_bytecode, linkRecord.jumpLocation + sizeof(int16_t), static_cast<int8_t>(distance >> 16));
             break;
         case Int32:
             setBits<int32_t>(m_bytecode, linkRecord.jumpLocation, distance);
             break;
         }
     }

     setBits<DFAHeader>(m_bytecode, startLocation, m_bytecode.size() - startLocation);
 }

 } // namespace ContentExtensions

 } // namespace WebCore

 #endif // ENABLE(CONTENT_EXTENSIONS)
	/*
	* Copyright (C) 2015 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
	* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
	* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	* THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "DFABytecodeCompiler.h"

	#if ENABLE(CONTENT_EXTENSIONS)

	#include "ContentExtensionRule.h"
	#include "DFA.h"
	#include "DFANode.h"

	namespace WebCore {

	namespace ContentExtensions {

	template <typename IntType>
	inline void append(Vector<DFABytecode>& bytecode, IntType value)
	{
	bytecode.grow(bytecode.size() + sizeof(IntType));
	reinterpret_cast<IntType>(&bytecode[bytecode.size() - sizeof(IntType)]) = value;
	}

	inline void appendZeroes(Vector<DFABytecode>& bytecode, DFABytecodeJumpSize jumpSize)
	{
	switch (jumpSize) {
	case DFABytecodeJumpSize::Int8:
	append<int8_t>(bytecode, 0); // This value will be set when linking.
	break;
	case DFABytecodeJumpSize::Int16:
	append<int16_t>(bytecode, 0); // This value will be set when linking.
	break;
	case DFABytecodeJumpSize::Int24:
	append<uint16_t>(bytecode, 0);
	append<int8_t>(bytecode, 0); // These values will be set when linking.
	break;
	case DFABytecodeJumpSize::Int32:
	append<int32_t>(bytecode, 0); // This value will be set when linking.
	break;
	}
	}

	template <typename IntType>
	inline void setBits(Vector<DFABytecode>& bytecode, uint32_t index, IntType value)
	{
	RELEASE_ASSERT(index + sizeof(IntType) <= bytecode.size());
	ASSERT_WITH_MESSAGE(!reinterpret_cast<IntType>(&bytecode[index]), "Right now we should only be using setBits to overwrite values that were zero as a placeholder.");
	reinterpret_cast<IntType>(&bytecode[index]) = value;
	}

	static unsigned appendActionBytecodeSize(uint64_t action)
	{
	if (action & ActionFlagMask)
	return sizeof(DFABytecodeInstruction) + sizeof(uint16_t) + sizeof(uint32_t);
	return sizeof(DFABytecodeInstruction) + sizeof(uint32_t);
	}

	void DFABytecodeCompiler::emitAppendAction(uint64_t action)
	{
	// High bits are used to store flags. See compileRuleList.
	if (action & ActionFlagMask) {
	if (action & IfConditionFlag)
	append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendActionWithIfCondition);
	else
	append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::TestFlagsAndAppendAction);
	append<uint16_t>(m_bytecode, static_cast<uint16_t>(action >> 32));
	append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
	} else {
	if (action & IfConditionFlag)
	append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendActionWithIfCondition);
	else
	append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::AppendAction);
	append<uint32_t>(m_bytecode, static_cast<uint32_t>(action));
	}
	}

	int32_t DFABytecodeCompiler::longestPossibleJump(uint32_t instructionLocation, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
	{
	if (m_nodeStartOffsets[destinationNodeIndex] == std::numeric_limits<uint32_t>::max()) {
	// Jumping to a node that hasn't been compiled yet, we don't know exactly how far forward we will need to jump,
	// so make sure we have enough room for the worst possible case, the farthest possible jump
	// which would be the distance if there were no compacted branches between this jump and its destination.
	ASSERT(instructionLocation >= m_nodeStartOffsets[sourceNodeIndex]);
	ASSERT(m_maxNodeStartOffsets[destinationNodeIndex] > m_maxNodeStartOffsets[sourceNodeIndex]);
	ASSERT(m_nodeStartOffsets[sourceNodeIndex] != std::numeric_limits<uint32_t>::max());
	return m_maxNodeStartOffsets[destinationNodeIndex] - m_maxNodeStartOffsets[sourceNodeIndex] - (m_nodeStartOffsets[sourceNodeIndex] - instructionLocation);
	}

	// Jumping to an already compiled node, we already know exactly where we will need to jump to.
	ASSERT(m_nodeStartOffsets[destinationNodeIndex] <= instructionLocation);
	return m_nodeStartOffsets[destinationNodeIndex] - instructionLocation;
	}

	void DFABytecodeCompiler::emitJump(uint32_t sourceNodeIndex, uint32_t destinationNodeIndex)
	{
	uint32_t instructionLocation = m_bytecode.size();
	uint32_t jumpLocation = instructionLocation + sizeof(uint8_t);
	int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
	DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
	append<uint8_t>(m_bytecode, static_cast<uint8_t>(DFABytecodeInstruction::Jump) \| jumpSize);

	m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
	appendZeroes(m_bytecode, jumpSize);
	}

	void DFABytecodeCompiler::emitCheckValue(uint8_t value, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
	{
	uint32_t instructionLocation = m_bytecode.size();
	uint32_t jumpLocation = instructionLocation + 2 * sizeof(uint8_t);
	int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
	DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
	DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueCaseSensitive : DFABytecodeInstruction::CheckValueCaseInsensitive;
	append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) \| jumpSize);
	append<uint8_t>(m_bytecode, value);
	m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
	appendZeroes(m_bytecode, jumpSize);
	}

	void DFABytecodeCompiler::emitCheckValueRange(uint8_t lowValue, uint8_t highValue, uint32_t sourceNodeIndex, uint32_t destinationNodeIndex, bool caseSensitive)
	{
	ASSERT_WITH_MESSAGE(lowValue < highValue, "The instruction semantic impose lowValue is strictly less than highValue.");

	uint32_t instructionLocation = m_bytecode.size();
	uint32_t jumpLocation = instructionLocation + 3 * sizeof(uint8_t);
	int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, sourceNodeIndex, destinationNodeIndex);
	DFABytecodeJumpSize jumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
	DFABytecodeInstruction instruction = caseSensitive ? DFABytecodeInstruction::CheckValueRangeCaseSensitive : DFABytecodeInstruction::CheckValueRangeCaseInsensitive;
	append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) \| jumpSize);
	append<uint8_t>(m_bytecode, lowValue);
	append<uint8_t>(m_bytecode, highValue);
	m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
	appendZeroes(m_bytecode, jumpSize);
	}

	void DFABytecodeCompiler::emitTerminate()
	{
	append<DFABytecodeInstruction>(m_bytecode, DFABytecodeInstruction::Terminate);
	}

	void DFABytecodeCompiler::compileNode(uint32_t index, bool root)
	{
	unsigned startSize = m_bytecode.size();

	const DFANode& node = m_dfa.nodes[index];
	if (node.isKilled()) {
	ASSERT(m_nodeStartOffsets[index] == std::numeric_limits<uint32_t>::max());
	return;
	}

	// Record starting index for linking.
	if (!root)
	m_nodeStartOffsets[index] = m_bytecode.size();

	for (uint64_t action : node.actions(m_dfa))
	emitAppendAction(action);

	// If we jump to the root, we don't want to re-add its actions to a HashSet.
	// We know we have already added them because the root is always compiled first and we always start interpreting at the beginning.
	if (root)
	m_nodeStartOffsets[index] = m_bytecode.size();

	compileNodeTransitions(index);

	ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= compiledNodeMaxBytecodeSize(index));
	}

	unsigned DFABytecodeCompiler::compiledNodeMaxBytecodeSize(uint32_t index)
	{
	const DFANode& node = m_dfa.nodes[index];
	if (node.isKilled())
	return 0;
	unsigned size = 0;
	for (uint64_t action : node.actions(m_dfa))
	size += appendActionBytecodeSize(action);
	size += nodeTransitionsMaxBytecodeSize(node);
	return size;
	}

	DFABytecodeCompiler::JumpTable DFABytecodeCompiler::extractJumpTable(Vector<DFABytecodeCompiler::Range>& ranges, unsigned firstRange, unsigned lastRange)
	{
	ASSERT(lastRange > firstRange);
	ASSERT(lastRange < ranges.size());

	JumpTable jumpTable;
	jumpTable.min = ranges[firstRange].min;
	jumpTable.max = ranges[lastRange].max;
	jumpTable.caseSensitive = ranges[lastRange].caseSensitive;

	unsigned size = lastRange - firstRange + 1;
	jumpTable.destinations.reserveInitialCapacity(size);
	for (unsigned i = firstRange; i <= lastRange; ++i) {
	const Range& range = ranges[i];

	ASSERT(range.caseSensitive == jumpTable.caseSensitive);
	ASSERT(range.min == range.max);
	ASSERT(range.min >= jumpTable.min);
	ASSERT(range.min <= jumpTable.max);

	jumpTable.destinations.uncheckedAppend(range.destination);
	}

	ranges.remove(firstRange, size);

	return jumpTable;
	}

	DFABytecodeCompiler::Transitions DFABytecodeCompiler::transitions(const DFANode& node)
	{
	Transitions transitions;

	uint32_t destinations[128];
	memset(destinations, 0xff, sizeof(destinations));
	const uint32_t noDestination = std::numeric_limits<uint32_t>::max();

	transitions.useFallbackTransition = node.canUseFallbackTransition(m_dfa);
	if (transitions.useFallbackTransition)
	transitions.fallbackTransitionTarget = node.bestFallbackTarget(m_dfa);

	for (const auto& transition : node.transitions(m_dfa)) {
	uint32_t targetNodeIndex = transition.target();
	if (transitions.useFallbackTransition && transitions.fallbackTransitionTarget == targetNodeIndex)
	continue;

	for (uint16_t i = transition.range().first; i <= transition.range().last; ++i)
	destinations[i] = targetNodeIndex;
	}

	Vector<Range>& ranges = transitions.ranges;
	uint8_t rangeMin;
	bool hasRangeMin = false;
	for (uint8_t i = 0; i < 128; i++) {
	if (hasRangeMin) {
	if (destinations[i] != destinations[rangeMin]) {

	// This is the end of a range. Check if it can be case insensitive.
	uint8_t rangeMax = i - 1;
	bool caseSensitive = true;
	if (rangeMin >= 'A' && rangeMax <= 'Z') {
	caseSensitive = false;
	for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
	if (destinations[rangeMin] != destinations[toASCIILower(rangeIndex)]) {
	caseSensitive = true;
	break;
	}
	}
	}

	if (!caseSensitive) {
	// If all the lower-case destinations are the same as the upper-case destinations,
	// then they will be covered by a case-insensitive range and will not need their own range.
	for (uint8_t rangeIndex = rangeMin; rangeIndex <= rangeMax; rangeIndex++) {
	ASSERT(destinations[rangeMin] == destinations[toASCIILower(rangeIndex)]);
	destinations[toASCIILower(rangeIndex)] = noDestination;
	}
	ranges.append(Range(toASCIILower(rangeMin), toASCIILower(rangeMax), destinations[rangeMin], caseSensitive));
	} else
	ranges.append(Range(rangeMin, rangeMax, destinations[rangeMin], caseSensitive));

	if (destinations[i] == noDestination)
	hasRangeMin = false;
	else
	rangeMin = i;
	}
	} else {
	if (destinations[i] != noDestination) {
	rangeMin = i;
	hasRangeMin = true;
	}
	}
	}
	if (hasRangeMin) {
	// Ranges are appended after passing the end of them.
	// If a range goes to 127, we will have an uncommitted rangeMin because the loop does not check 128.
	// If a range goes to 127, there will never be values higher than it, so checking for case-insensitive ranges would always fail.
	ranges.append(Range(rangeMin, 127, destinations[rangeMin], true));
	}

	Vector<JumpTable>& jumpTables = transitions.jumpTables;
	unsigned rangePosition = 0;
	unsigned baseRangePosition = std::numeric_limits<unsigned>::max();
	Range* baseRange = nullptr;
	while (rangePosition < ranges.size()) {
	auto& range = ranges[rangePosition];
	if (baseRange) {
	if (range.min != range.max
	\|\| baseRange->caseSensitive != range.caseSensitive
	\|\| ranges[rangePosition - 1].max + 1 != range.min) {
	if (rangePosition - baseRangePosition > 1) {
	jumpTables.append(extractJumpTable(ranges, baseRangePosition, rangePosition - 1));
	rangePosition = baseRangePosition;
	}
	baseRangePosition = std::numeric_limits<unsigned>::max();
	baseRange = nullptr;
	}
	} else {
	if (range.min == range.max) {
	baseRangePosition = rangePosition;
	baseRange = &range;
	}
	}
	++rangePosition;
	}

	if (baseRange && ranges.size() - baseRangePosition > 1)
	jumpTables.append(extractJumpTable(ranges, baseRangePosition, ranges.size() - 1));

	return transitions;
	}

	unsigned DFABytecodeCompiler::checkForJumpTableMaxBytecodeSize(const JumpTable& jumpTable)
	{
	unsigned baselineSize = sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t);
	unsigned targetsSize = (jumpTable.max - jumpTable.min + 1) * sizeof(uint32_t);
	return baselineSize + targetsSize;
	}

	unsigned DFABytecodeCompiler::checkForRangeMaxBytecodeSize(const Range& range)
	{
	if (range.min == range.max)
	return sizeof(DFABytecodeInstruction::CheckValueCaseInsensitive) + sizeof(uint8_t) + sizeof(uint32_t);
	return sizeof(DFABytecodeInstruction::CheckValueRangeCaseInsensitive) + 2 * sizeof(uint8_t) + sizeof(uint32_t);
	}

	void DFABytecodeCompiler::compileJumpTable(uint32_t nodeIndex, const JumpTable& jumpTable)
	{
	unsigned startSize = m_bytecode.size();
	ASSERT_WITH_MESSAGE(jumpTable.max < 128, "The DFA engine only supports the ASCII alphabet.");
	ASSERT(jumpTable.min <= jumpTable.max);

	uint32_t instructionLocation = m_bytecode.size();
	DFABytecodeJumpSize jumpSize = Int8;
	for (uint32_t destinationNodeIndex : jumpTable.destinations) {
	int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
	DFABytecodeJumpSize localJumpSize = smallestPossibleJumpSize(longestPossibleJumpDistance);
	jumpSize = std::max(jumpSize, localJumpSize);
	}

	DFABytecodeInstruction instruction = jumpTable.caseSensitive ? DFABytecodeInstruction::JumpTableCaseSensitive : DFABytecodeInstruction::JumpTableCaseInsensitive;
	append<uint8_t>(m_bytecode, static_cast<uint8_t>(instruction) \| jumpSize);
	append<uint8_t>(m_bytecode, jumpTable.min);
	append<uint8_t>(m_bytecode, jumpTable.max);

	for (uint32_t destinationNodeIndex : jumpTable.destinations) {
	int32_t longestPossibleJumpDistance = longestPossibleJump(instructionLocation, nodeIndex, destinationNodeIndex);
	uint32_t jumpLocation = m_bytecode.size();
	m_linkRecords.append(LinkRecord({jumpSize, longestPossibleJumpDistance, instructionLocation, jumpLocation, destinationNodeIndex}));
	appendZeroes(m_bytecode, jumpSize);
	}

	ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForJumpTableMaxBytecodeSize(jumpTable));
	}

	void DFABytecodeCompiler::compileCheckForRange(uint32_t nodeIndex, const Range& range)
	{
	unsigned startSize = m_bytecode.size();
	ASSERT_WITH_MESSAGE(range.max < 128, "The DFA engine only supports the ASCII alphabet.");
	ASSERT(range.min <= range.max);

	if (range.min == range.max)
	emitCheckValue(range.min, nodeIndex, range.destination, range.caseSensitive);
	else
	emitCheckValueRange(range.min, range.max, nodeIndex, range.destination, range.caseSensitive);

	ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= checkForRangeMaxBytecodeSize(range));
	}

	unsigned DFABytecodeCompiler::nodeTransitionsMaxBytecodeSize(const DFANode& node)
	{
	unsigned size = 0;
	Transitions nodeTransitions = transitions(node);
	for (const auto& jumpTable : nodeTransitions.jumpTables)
	size += checkForJumpTableMaxBytecodeSize(jumpTable);
	for (const auto& range : nodeTransitions.ranges)
	size += checkForRangeMaxBytecodeSize(range);
	if (nodeTransitions.useFallbackTransition)
	size += sizeof(DFABytecodeInstruction::Jump) + sizeof(uint32_t);
	else
	size += instructionSizeWithArguments(DFABytecodeInstruction::Terminate);
	return size;
	}

	void DFABytecodeCompiler::compileNodeTransitions(uint32_t nodeIndex)
	{
	const DFANode& node = m_dfa.nodes[nodeIndex];
	unsigned startSize = m_bytecode.size();

	Transitions nodeTransitions = transitions(node);
	for (const auto& jumpTable : nodeTransitions.jumpTables)
	compileJumpTable(nodeIndex, jumpTable);
	for (const auto& range : nodeTransitions.ranges)
	compileCheckForRange(nodeIndex, range);
	if (nodeTransitions.useFallbackTransition)
	emitJump(nodeIndex, nodeTransitions.fallbackTransitionTarget);
	else
	emitTerminate();

	ASSERT_UNUSED(startSize, m_bytecode.size() - startSize <= nodeTransitionsMaxBytecodeSize(node));
	}

	void DFABytecodeCompiler::compile()
	{
	uint32_t startLocation = m_bytecode.size();
	append<DFAHeader>(m_bytecode, 0); // This will be set when we are finished compiling this DFA.

	m_nodeStartOffsets.resize(m_dfa.nodes.size());
	for (unsigned i = 0; i < m_dfa.nodes.size(); ++i)
	m_nodeStartOffsets[i] = std::numeric_limits<uint32_t>::max();

	// Populate m_maxNodeStartOffsets with a worst-case index of where the node would be with no branch compaction.
	// Compacting the branches using 1-4 byte signed jump distances should only make nodes closer together than this.
	ASSERT(m_maxNodeStartOffsets.isEmpty());
	m_maxNodeStartOffsets.clear();
	m_maxNodeStartOffsets.resize(m_dfa.nodes.size());
	unsigned rootActionsSize = 0;
	for (uint64_t action : m_dfa.nodes[m_dfa.root].actions(m_dfa))
	rootActionsSize += appendActionBytecodeSize(action);
	m_maxNodeStartOffsets[m_dfa.root] = sizeof(DFAHeader) + rootActionsSize;
	unsigned nextIndex = sizeof(DFAHeader) + compiledNodeMaxBytecodeSize(m_dfa.root);
	for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
	if (i != m_dfa.root) {
	m_maxNodeStartOffsets[i] = nextIndex;
	nextIndex += compiledNodeMaxBytecodeSize(i);
	}
	}

	// Make sure the root is always at the beginning of the bytecode.
	compileNode(m_dfa.root, true);
	for (uint32_t i = 0; i < m_dfa.nodes.size(); i++) {
	if (i != m_dfa.root)
	compileNode(i, false);
	}

	ASSERT(m_maxNodeStartOffsets.size() == m_nodeStartOffsets.size());
	for (unsigned i = 0; i < m_dfa.nodes.size(); ++i) {
	if (m_nodeStartOffsets[i] != std::numeric_limits<uint32_t>::max())
	ASSERT(m_maxNodeStartOffsets[i] >= m_nodeStartOffsets[i]);
	}

	// Link.
	for (const auto& linkRecord : m_linkRecords) {
	uint32_t destination = m_nodeStartOffsets[linkRecord.destinationNodeIndex];
	RELEASE_ASSERT(destination < static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
	int32_t distance = destination - linkRecord.instructionLocation;
	ASSERT(abs(distance) <= abs(linkRecord.longestPossibleJump));

	switch (linkRecord.jumpSize) {
	case Int8:
	RELEASE_ASSERT(distance == static_cast<int8_t>(distance));
	setBits<int8_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int8_t>(distance));
	break;
	case Int16:
	RELEASE_ASSERT(distance == static_cast<int16_t>(distance));
	setBits<int16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<int16_t>(distance));
	break;
	case Int24:
	RELEASE_ASSERT(distance >= Int24Min && distance <= Int24Max);
	setBits<uint16_t>(m_bytecode, linkRecord.jumpLocation, static_cast<uint16_t>(distance));
	setBits<int8_t>(m_bytecode, linkRecord.jumpLocation + sizeof(int16_t), static_cast<int8_t>(distance >> 16));
	break;
	case Int32:
	setBits<int32_t>(m_bytecode, linkRecord.jumpLocation, distance);
	break;
	}
	}

	setBits<DFAHeader>(m_bytecode, startLocation, m_bytecode.size() - startLocation);
	}

	} // namespace ContentExtensions

	} // namespace WebCore

	#endif // ENABLE(CONTENT_EXTENSIONS)