JavaScriptCore/yarr/RegexInterpreter.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "RegexInterpreter.h"

 #include "RegexCompiler.h"
 #include "RegexPattern.h"

 #ifndef NDEBUG
 #include <stdio.h>
 #endif

 #if ENABLE(YARR)

 using namespace WTF;

 namespace JSC { namespace Yarr {

 class Interpreter {
 public:
     struct ParenthesesDisjunctionContext;

     struct BackTrackInfoPatternCharacter {
         uintptr_t matchAmount;
     };
     struct BackTrackInfoCharacterClass {
         uintptr_t matchAmount;
     };
     struct BackTrackInfoBackReference {
         uintptr_t begin; // Not really needed for greedy quantifiers.
         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
     };
     struct BackTrackInfoAlternative {
         uintptr_t offset;
     };
     struct BackTrackInfoParentheticalAssertion {
         uintptr_t begin;
     };
     struct BackTrackInfoParenthesesOnce {
         uintptr_t inParentheses;
     };
     struct BackTrackInfoParentheses {
         uintptr_t matchAmount;
         ParenthesesDisjunctionContext* lastContext;
         uintptr_t prevBegin;
         uintptr_t prevEnd;
     };

     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
     {
         context->next = backTrack->lastContext;
         backTrack->lastContext = context;
         ++backTrack->matchAmount;
     }

     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
     {
         ASSERT(backTrack->matchAmount);
         ASSERT(backTrack->lastContext);
         backTrack->lastContext = backTrack->lastContext->next;
         --backTrack->matchAmount;
     }

     struct DisjunctionContext
     {
         DisjunctionContext()
             : term(0)
         {
         }

         void* operator new(size_t, void* where)
         {
             return where;
         }

         int term;
         unsigned matchBegin;
         unsigned matchEnd;
         uintptr_t frame[1];
     };

     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
     {
         return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
     }

     void freeDisjunctionContext(DisjunctionContext* context)
     {
         free(context);
     }

     struct ParenthesesDisjunctionContext
     {
         ParenthesesDisjunctionContext(int* output, ByteTerm& term)
             : next(0)
         {
             unsigned firstSubpatternId = term.atom.subpatternId;
             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;

             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
                 output[(firstSubpatternId << 1) + i] = -1;
             }

             new(getDisjunctionContext(term)) DisjunctionContext();
         }

         void* operator new(size_t, void* where)
         {
             return where;
         }

         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
         {
             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
         }

         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
         {
             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
         }

         ParenthesesDisjunctionContext* next;
         int subpatternBackup[1];
     };

     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
     {
         return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
     }

     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
     {
         free(context);
     }

     class InputStream {
     public:
         InputStream(const UChar* input, unsigned start, unsigned length)
             : input(input)
             , pos(start)
             , length(length)
         {
         }

         void next()
         {
             ++pos;
         }

         void rewind(unsigned amount)
         {
             ASSERT(pos >= amount);
             pos -= amount;
         }

         int read()
         {
             ASSERT(pos < length);
             if (pos < length)
                 return input[pos];
             return -1;
         }

         int readChecked(int position)
         {
             ASSERT(position < 0);
             ASSERT((unsigned)-position <= pos);
             unsigned p = pos + position;
             ASSERT(p < length);
             return input[p];
         }

         int reread(unsigned from)
         {
             ASSERT(from < length);
             return input[from];
         }

         int prev()
         {
             ASSERT(!(pos > length));
             if (pos && length)
                 return input[pos - 1];
             return -1;
         }

         unsigned getPos()
         {
             return pos;
         }

         void setPos(unsigned p)
         {
             pos = p;
         }

         bool atStart()
         {
             return pos == 0;
         }

         bool atEnd()
         {
             return pos == length;
         }

         bool checkInput(int count)
         {
             if ((pos + count) <= length) {
                 pos += count;
                 return true;
             } else
                 return false;
         }

         void uncheckInput(int count)
         {
             pos -= count;
         }

         bool atStart(int position)
         {
             return (pos + position) == 0;
         }

         bool atEnd(int position)
         {
             return (pos + position) == length;
         }

     private:
         const UChar* input;
         unsigned pos;
         unsigned length;
     };

     bool testCharacterClass(CharacterClass* characterClass, int ch)
     {
         if (ch & 0xFF80) {
             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
                 if (ch == characterClass->m_matchesUnicode[i])
                     return true;
             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
                     return true;
         } else {
             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
                 if (ch == characterClass->m_matches[i])
                     return true;
             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
                     return true;
         }

         return false;
     }

     bool checkCharacter(int testChar, int inputPosition)
     {
         return testChar == input.readChecked(inputPosition);
     }

     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
     {
         int ch = input.readChecked(inputPosition);
         return (loChar == ch) || (hiChar == ch);
     }

     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
     {
         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
         return invert ? !match : match;
     }

     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
     {
         int matchSize = matchEnd - matchBegin;

         if (!input.checkInput(matchSize))
             return false;

         for (int i = 0; i < matchSize; ++i) {
             if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
                 input.uncheckInput(matchSize);
                 return false;
             }
         }

         return true;
     }

     bool matchAssertionBOL(ByteTerm& term)
     {
         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
     }

     bool matchAssertionEOL(ByteTerm& term)
     {
         if (term.inputPosition)
             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
         else
             return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
     }

     bool matchAssertionWordBoundary(ByteTerm& term)
     {
         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
         bool readIsWordchar;
         if (term.inputPosition)
             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
         else
             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());

         bool wordBoundary = prevIsWordchar != readIsWordchar;
         return term.invert() ? !wordBoundary : wordBoundary;
     }

     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
     {
         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierFixedCount:
             break;

         case QuantifierGreedy:
             if (backTrack->matchAmount) {
                 --backTrack->matchAmount;
                 input.uncheckInput(1);
                 return true;
             }
             break;

         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
             break;
         }

         return false;
     }

     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
     {
         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierFixedCount:
             break;

         case QuantifierGreedy:
             if (backTrack->matchAmount) {
                 --backTrack->matchAmount;
                 input.uncheckInput(1);
                 return true;
             }
             break;

         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
             break;
         }

         return false;
     }

     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeCharacterClass);
         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierFixedCount: {
             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
                     return false;
             }
             return true;
         }

         case QuantifierGreedy: {
             unsigned matchAmount = 0;
             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
                     input.uncheckInput(1);
                     break;
                 }
                 ++matchAmount;
             }
             backTrack->matchAmount = matchAmount;

             return true;
         }

         case QuantifierNonGreedy:
             backTrack->matchAmount = 0;
             return true;
         }

         ASSERT_NOT_REACHED();
         return false;
     }

     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeCharacterClass);
         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierFixedCount:
             break;

         case QuantifierGreedy:
             if (backTrack->matchAmount) {
                 --backTrack->matchAmount;
                 input.uncheckInput(1);
                 return true;
             }
             break;

         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
                 ++backTrack->matchAmount;
                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
                     return true;
             }
             input.uncheckInput(backTrack->matchAmount);
             break;
         }

         return false;
     }

     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeBackReference);
         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);

         int matchBegin = output[(term.atom.subpatternId << 1)];
         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
         ASSERT((matchBegin == -1) == (matchEnd == -1));
         ASSERT(matchBegin <= matchEnd);

         if (matchBegin == matchEnd)
             return true;

         switch (term.atom.quantityType) {
         case QuantifierFixedCount: {
             backTrack->begin = input.getPos();
             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
                     input.setPos(backTrack->begin);
                     return false;
                 }
             }
             return true;
         }

         case QuantifierGreedy: {
             unsigned matchAmount = 0;
             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
                 ++matchAmount;
             backTrack->matchAmount = matchAmount;
             return true;
         }

         case QuantifierNonGreedy:
             backTrack->begin = input.getPos();
             backTrack->matchAmount = 0;
             return true;
         }

         ASSERT_NOT_REACHED();
         return false;
     }

     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeBackReference);
         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);

         int matchBegin = output[(term.atom.subpatternId << 1)];
         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
         ASSERT((matchBegin == -1) == (matchEnd == -1));
         ASSERT(matchBegin <= matchEnd);

         if (matchBegin == matchEnd)
             return false;

         switch (term.atom.quantityType) {
         case QuantifierFixedCount:
             // for quantityCount == 1, could rewind.
             input.setPos(backTrack->begin);
             break;

         case QuantifierGreedy:
             if (backTrack->matchAmount) {
                 --backTrack->matchAmount;
                 input.rewind(matchEnd - matchBegin);
                 return true;
             }
             break;

         case QuantifierNonGreedy:
             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
                 ++backTrack->matchAmount;
                 return true;
             } else
                 input.setPos(backTrack->begin);
             break;
         }

         return false;
     }

     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
     {
         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
         }
     }
     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
     {
         unsigned firstSubpatternId = term.atom.subpatternId;
         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
         context->restoreOutput(output, firstSubpatternId, count);
     }
     void resetAssertionMatches(ByteTerm& term)
     {
         unsigned firstSubpatternId = term.atom.subpatternId;
         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
         for (unsigned i = 0; i < (count << 1); ++i)
             output[(firstSubpatternId << 1) + i] = -1;
     }
     bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
     {
         while (backTrack->matchAmount) {
             ParenthesesDisjunctionContext* context = backTrack->lastContext;

             if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
                 return true;

             resetMatches(term, context);
             popParenthesesDisjunctionContext(backTrack);
             freeParenthesesDisjunctionContext(context);
         }

         return false;
     }

     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierGreedy: {
             // set this speculatively; if we get to the parens end this will be true.
             backTrack->inParentheses = 1;
             break;
         }
         case QuantifierNonGreedy: {
             backTrack->inParentheses = 0;
             context->term += term.atom.parenthesesWidth;
             return true;
         }
         case QuantifierFixedCount:
             break;
         }

         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
         }

         return true;
     }

     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
         ASSERT(term.atom.quantityCount == 1);

         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
         }
         return true;
     }

     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1)] = -1;
             output[(subpatternId << 1) + 1] = -1;
         }

         switch (term.atom.quantityType) {
         case QuantifierGreedy:
             // if we backtrack to this point, there is another chance - try matching nothing.
             ASSERT(backTrack->inParentheses);
             backTrack->inParentheses = 0;
             context->term += term.atom.parenthesesWidth;
             return true;
         case QuantifierNonGreedy:
             ASSERT(backTrack->inParentheses);
         case QuantifierFixedCount:
             break;
         }

         return false;
     }

     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

         switch (term.atom.quantityType) {
         case QuantifierGreedy:
             if (!backTrack->inParentheses) {
                 context->term -= term.atom.parenthesesWidth;
                 return false;
             }
         case QuantifierNonGreedy:
             if (!backTrack->inParentheses) {
                 // now try to match the parens; set this speculatively.
                 backTrack->inParentheses = 1;
                 if (term.capture()) {
                     unsigned subpatternId = term.atom.subpatternId;
                     output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
                 }
                 context->term -= term.atom.parenthesesWidth;
                 return true;
             }
         case QuantifierFixedCount:
             break;
         }

         return false;
     }

     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

         backTrack->begin = input.getPos();
         return true;
     }

     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

         input.setPos(backTrack->begin);

         // We've reached the end of the parens; if they are inverted, this is failure.
         if (term.invert()) {
             context->term -= term.atom.parenthesesWidth;
             return false;
         }

         return true;
     }

     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
         ASSERT(term.atom.quantityCount == 1);

         // We've failed to match parens; if they are inverted, this is win!
         if (term.invert()) {
             context->term += term.atom.parenthesesWidth;
             return true;
         }

         return false;
     }

     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
         ASSERT(term.atom.quantityCount == 1);

         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

         input.setPos(backTrack->begin);

         context->term -= term.atom.parenthesesWidth;
         return false;
     }

     bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);

         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);

         unsigned subpatternId = term.atom.subpatternId;
         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;

         backTrack->prevBegin = output[(subpatternId << 1)];
         backTrack->prevEnd = output[(subpatternId << 1) + 1];

         backTrack->matchAmount = 0;
         backTrack->lastContext = 0;

         switch (term.atom.quantityType) {
         case QuantifierFixedCount: {
             // While we haven't yet reached our fixed limit,
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
                     if (!parenthesesDoBacktrack(term, backTrack))
                         return false;
                 }
             }

             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
             return true;
         }

         case QuantifierGreedy: {
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
                     break;
                 }
             }

             if (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
             return true;
         }

         case QuantifierNonGreedy:
             return true;
         }

         ASSERT_NOT_REACHED();
         return false;
     }

     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
     //
     // Greedy matches never should try just adding more - you should already have done
     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
     // you backtrack an item off the list needs checking, since we'll never have matched
     // the one less case.  Tracking forwards, still add as much as possible.
     //
     // Non-greedy, we've already done the one less case, so don't match on popping.
     // We haven't done the one more case, so always try to add that.
     //
     bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);

         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);

         if (term.capture()) {
             unsigned subpatternId = term.atom.subpatternId;
             output[(subpatternId << 1)] = backTrack->prevBegin;
             output[(subpatternId << 1) + 1] = backTrack->prevEnd;
         }

         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;

         switch (term.atom.quantityType) {
         case QuantifierFixedCount: {
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);

             ParenthesesDisjunctionContext* context = 0;

             if (!parenthesesDoBacktrack(term, backTrack))
                 return false;

             // While we haven't yet reached our fixed limit,
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
                     if (!parenthesesDoBacktrack(term, backTrack))
                         return false;
                 }
             }

             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
             return true;
         }

         case QuantifierGreedy: {
             if (!backTrack->matchAmount)
                 return false;

             ParenthesesDisjunctionContext* context = backTrack->lastContext;
             if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
                 while (backTrack->matchAmount < term.atom.quantityCount) {
                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
                     if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
                         appendParenthesesDisjunctionContext(backTrack, context);
                     else {
                         resetMatches(term, context);
                         freeParenthesesDisjunctionContext(context);
                         break;
                     }
                 }
             } else {
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
             }

             if (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
             return true;
         }

         case QuantifierNonGreedy: {
             // If we've not reached the limit, try to add one more match.
             if (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
                     appendParenthesesDisjunctionContext(backTrack, context);
                     recordParenthesesMatch(term, context);
                     return true;
                 } else {
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
                 }
             }

             // Nope - okay backtrack looking for an alternative.
             while (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
                     // successful backtrack! we're back in the game!
                     if (backTrack->matchAmount) {
                         context = backTrack->lastContext;
                         recordParenthesesMatch(term, context);
                     }
                     return true;
                 }

                 // pop a match off the stack
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
             }

             return false;
         }
         }

         ASSERT_NOT_REACHED();
         return false;
     }

 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
         if (btrack)
             BACKTRACK();

         context->matchBegin = input.getPos();
         context->term = 0;

     matchAgain:
         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));

         switch (currentTerm().type) {
         case ByteTerm::TypeSubpatternBegin:
             MATCH_NEXT();
         case ByteTerm::TypeSubpatternEnd:
             context->matchEnd = input.getPos();
             return true;

         case ByteTerm::TypeBodyAlternativeBegin:
             MATCH_NEXT();
         case ByteTerm::TypeBodyAlternativeDisjunction:
         case ByteTerm::TypeBodyAlternativeEnd:
             context->matchEnd = input.getPos();
             return true;

         case ByteTerm::TypeAlternativeBegin:
             MATCH_NEXT();
         case ByteTerm::TypeAlternativeDisjunction:
         case ByteTerm::TypeAlternativeEnd: {
             int offset = currentTerm().alternative.end;
             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
             backTrack->offset = offset;
             context->term += offset;
             MATCH_NEXT();
         }

         case ByteTerm::TypeAssertionBOL:
             if (matchAssertionBOL(currentTerm()))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeAssertionEOL:
             if (matchAssertionEOL(currentTerm()))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeAssertionWordBoundary:
             if (matchAssertionWordBoundary(currentTerm()))
                 MATCH_NEXT();
             BACKTRACK();

         case ByteTerm::TypePatternCharacterOnce:
         case ByteTerm::TypePatternCharacterFixed: {
             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
                     BACKTRACK();
             }
             MATCH_NEXT();
         }
         case ByteTerm::TypePatternCharacterGreedy: {
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             unsigned matchAmount = 0;
             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
                     input.uncheckInput(1);
                     break;
                 }
                 ++matchAmount;
             }
             backTrack->matchAmount = matchAmount;

             MATCH_NEXT();
         }
         case ByteTerm::TypePatternCharacterNonGreedy: {
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             backTrack->matchAmount = 0;
             MATCH_NEXT();
         }

         case ByteTerm::TypePatternCasedCharacterOnce:
         case ByteTerm::TypePatternCasedCharacterFixed: {
             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
                     BACKTRACK();
             }
             MATCH_NEXT();
         }
         case ByteTerm::TypePatternCasedCharacterGreedy: {
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             unsigned matchAmount = 0;
             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
                     input.uncheckInput(1);
                     break;
                 }
                 ++matchAmount;
             }
             backTrack->matchAmount = matchAmount;

             MATCH_NEXT();
         }
         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             backTrack->matchAmount = 0;
             MATCH_NEXT();
         }

         case ByteTerm::TypeCharacterClass:
             if (matchCharacterClass(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeBackReference:
             if (matchBackReference(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeParenthesesSubpattern:
             if (matchParentheses(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
             if (matchParenthesesOnceBegin(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
             if (matchParenthesesOnceEnd(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeParentheticalAssertionBegin:
             if (matchParentheticalAssertionBegin(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
         case ByteTerm::TypeParentheticalAssertionEnd:
             if (matchParentheticalAssertionEnd(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();

         case ByteTerm::TypeCheckInput:
             if (input.checkInput(currentTerm().checkInputCount))
                 MATCH_NEXT();
             BACKTRACK();
         }

         // We should never fall-through to here.
         ASSERT_NOT_REACHED();

     backtrack:
         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));

         switch (currentTerm().type) {
         case ByteTerm::TypeSubpatternBegin:
             return false;
         case ByteTerm::TypeSubpatternEnd:
             ASSERT_NOT_REACHED();

         case ByteTerm::TypeBodyAlternativeBegin:
         case ByteTerm::TypeBodyAlternativeDisjunction: {
             int offset = currentTerm().alternative.next;
             context->term += offset;
             if (offset > 0)
                 MATCH_NEXT();

             if (input.atEnd())
                 return false;

             input.next();
             context->matchBegin = input.getPos();
             MATCH_NEXT();
         }
         case ByteTerm::TypeBodyAlternativeEnd:
             ASSERT_NOT_REACHED();

             case ByteTerm::TypeAlternativeBegin:
             case ByteTerm::TypeAlternativeDisjunction: {
                 int offset = currentTerm().alternative.next;
                 context->term += offset;
                 if (offset > 0)
                     MATCH_NEXT();
                 BACKTRACK();
             }
             case ByteTerm::TypeAlternativeEnd: {
                 // We should never backtrack back into an alternative of the main body of the regex.
                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
                 unsigned offset = backTrack->offset;
                 context->term -= offset;
                 BACKTRACK();
             }

             case ByteTerm::TypeAssertionBOL:
             case ByteTerm::TypeAssertionEOL:
             case ByteTerm::TypeAssertionWordBoundary:
                 BACKTRACK();

             case ByteTerm::TypePatternCharacterOnce:
             case ByteTerm::TypePatternCharacterFixed:
             case ByteTerm::TypePatternCharacterGreedy:
             case ByteTerm::TypePatternCharacterNonGreedy:
                 if (backtrackPatternCharacter(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypePatternCasedCharacterOnce:
             case ByteTerm::TypePatternCasedCharacterFixed:
             case ByteTerm::TypePatternCasedCharacterGreedy:
             case ByteTerm::TypePatternCasedCharacterNonGreedy:
                 if (backtrackPatternCasedCharacter(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeCharacterClass:
                 if (backtrackCharacterClass(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeBackReference:
                 if (backtrackBackReference(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeParenthesesSubpattern:
                 if (backtrackParentheses(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
                 if (backtrackParenthesesOnceEnd(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeParentheticalAssertionBegin:
                 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
             case ByteTerm::TypeParentheticalAssertionEnd:
                 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();

             case ByteTerm::TypeCheckInput:
                 input.uncheckInput(currentTerm().checkInputCount);
                 BACKTRACK();
         }

         ASSERT_NOT_REACHED();
         return false;
     }

     bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
         if (matchDisjunction(disjunction, context, btrack)) {
             while (context->matchBegin == context->matchEnd) {
                 if (!matchDisjunction(disjunction, context, true))
                     return false;
             }
             return true;
         }

         return false;
     }

     int interpret()
     {
         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
             output[i] = -1;

         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());

         if (matchDisjunction(pattern->m_body.get(), context)) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;
         }

         freeDisjunctionContext(context);

         return output[0];
     }

     Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
         : pattern(pattern)
         , output(output)
         , input(inputChar, start, length)
     {
     }

 private:
     BytecodePattern *pattern;
     int* output;
     InputStream input;
 };


 class ByteCompiler {
     struct ParenthesesStackEntry {
         unsigned beginTerm;
         unsigned savedAlternativeIndex;
         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
             : beginTerm(beginTerm)
             , savedAlternativeIndex(savedAlternativeIndex)
         {
         }
     };

 public:
     ByteCompiler(RegexPattern& pattern)
         : m_pattern(pattern)
     {
         m_bodyDisjunction = 0;
         m_currentAlternativeIndex = 0;
     }

     BytecodePattern* compile()
     {
         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
         emitDisjunction(m_pattern.m_body);
         regexEnd();

         return new BytecodePattern(m_bodyDisjunction, m_allParenthesesInfo, m_pattern);
     }

     void checkInput(unsigned count)
     {
         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
     }

     void assertionBOL(int inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
     }

     void assertionEOL(int inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
     }

     void assertionWordBoundary(bool invert, int inputPosition)
     {
         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
     }

     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     {
         if (m_pattern.m_ignoreCase) {
             UChar lo = Unicode::toLower(ch);
             UChar hi = Unicode::toUpper(ch);

             if (lo != hi) {
                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
                 return;
             }
         }

         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
     }

     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     {
         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));

         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     }

     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
     {
         ASSERT(subpatternId);

         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));

         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
     }

     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
         int beginTerm = m_bodyDisjunction->terms.size();

         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
         m_currentAlternativeIndex = beginTerm + 1;
     }

     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
     {
         int beginTerm = m_bodyDisjunction->terms.size();

         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
         m_currentAlternativeIndex = beginTerm + 1;
     }

     unsigned popParenthesesStack()
     {
         ASSERT(m_parenthesesStack.size());
         int stackEnd = m_parenthesesStack.size() - 1;
         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
         m_parenthesesStack.shrink(stackEnd);

         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());

         return beginTerm;
     }

 #ifndef NDEBUG
     void dumpDisjunction(ByteDisjunction* disjunction)
     {
         printf("ByteDisjunction(%p):\n\t", disjunction);
         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
             printf("{ %d } ", disjunction->terms[i].type);
         printf("\n");
     }
 #endif

     void closeAlternative(int beginTerm)
     {
         int origBeginTerm = beginTerm;
         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
         int endIndex = m_bodyDisjunction->terms.size();

         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;

         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
             m_bodyDisjunction->terms.remove(beginTerm);
         else {
             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
             }

             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;

             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
         }
     }

     void closeBodyAlternative()
     {
         int beginTerm = 0;
         int origBeginTerm = 0;
         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
         int endIndex = m_bodyDisjunction->terms.size();

         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;

         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
         }

         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;

         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
     }

     void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
     {
         unsigned beginTerm = popParenthesesStack();
         closeAlternative(beginTerm + 1);
         unsigned endTerm = m_bodyDisjunction->terms.size();

         bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
         bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;

         m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;

         if (doInline) {
             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
             m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
             m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
         } else {
             ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
             ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

             bool invertOrCapture = parenthesesBegin.invertOrCapture;
             unsigned subpatternId = parenthesesBegin.atom.subpatternId;

             unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
             ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);

             parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
             for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
                 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
             parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());

             m_bodyDisjunction->terms.shrink(beginTerm);

             m_allParenthesesInfo.append(parenthesesDisjunction);
             m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));

             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
         }
     }

     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
     {
         m_bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
         m_bodyDisjunction->terms[0].frameLocation = 0;
         m_currentAlternativeIndex = 0;
     }

     void regexEnd()
     {
         closeBodyAlternative();
     }

     void alternativeBodyDisjunction()
     {
         int newAlternativeIndex = m_bodyDisjunction->terms.size();
         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());

         m_currentAlternativeIndex = newAlternativeIndex;
     }

     void alternativeDisjunction()
     {
         int newAlternativeIndex = m_bodyDisjunction->terms.size();
         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());

         m_currentAlternativeIndex = newAlternativeIndex;
     }

     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
     {
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;

             if (alt) {
                 if (disjunction == m_pattern.m_body)
                     alternativeBodyDisjunction();
                 else
                     alternativeDisjunction();
             }

             PatternAlternative* alternative = disjunction->m_alternatives[alt];
             unsigned minimumSize = alternative->m_minimumSize;

             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
             if (countToCheck)
                 checkInput(countToCheck);
             currentCountAlreadyChecked += countToCheck;

             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
                 PatternTerm& term = alternative->m_terms[i];

                 switch (term.type) {
                 case PatternTerm::TypeAssertionBOL:
                     assertionBOL(term.inputPosition - currentCountAlreadyChecked);
                     break;

                 case PatternTerm::TypeAssertionEOL:
                     assertionEOL(term.inputPosition - currentCountAlreadyChecked);
                     break;

                 case PatternTerm::TypeAssertionWordBoundary:
                     assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
                     break;

                 case PatternTerm::TypePatternCharacter:
                     atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
                     break;

                 case PatternTerm::TypeCharacterClass:
                     atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
                     break;

                 case PatternTerm::TypeBackReference:
                     atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
                         break;

                 case PatternTerm::TypeForwardReference:
                     break;

                 case PatternTerm::TypeParenthesesSubpattern: {
                     unsigned disjunctionAlreadyCheckedCount = 0;
                     if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
                         if (term.quantityType == QuantifierFixedCount) {
                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
                         } else {
                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
                         }
                     } else {
                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
                         atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
                     }
                     break;
                 }

                 case PatternTerm::TypeParentheticalAssertion: {
                     unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;

                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
                     atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
                     break;
                 }
                 }
             }
         }
     }

 private:
     RegexPattern& m_pattern;
     ByteDisjunction* m_bodyDisjunction;
     unsigned m_currentAlternativeIndex;
     Vector<ParenthesesStackEntry> m_parenthesesStack;
     Vector<ByteDisjunction*> m_allParenthesesInfo;
 };


 BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
 {
     RegexPattern pattern(ignoreCase, multiline);

     if ((error = compileRegex(patternString, pattern)))
         return 0;

     numSubpatterns = pattern.m_numSubpatterns;

     return ByteCompiler(pattern).compile();
 }

 int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
 {
     return Interpreter(regex, output, input, start, length).interpret();
 }


 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);


 } }

 #endif