| /* |
| * Copyright (C) 2009 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifndef RegexPattern_h |
| #define RegexPattern_h |
| |
| #include <wtf/Platform.h> |
| |
| #if ENABLE(YARR) |
| |
| #include <wtf/Vector.h> |
| #include <wtf/unicode/Unicode.h> |
| |
| |
| namespace JSC { namespace Yarr { |
| |
| #define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. |
| #define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. |
| #define RegexStackSpaceForBackTrackInfoBackReference 2 |
| #define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. |
| #define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 |
| #define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. |
| #define RegexStackSpaceForBackTrackInfoParentheses 4 |
| |
| struct PatternDisjunction; |
| |
| struct CharacterRange { |
| UChar begin; |
| UChar end; |
| |
| CharacterRange(UChar begin, UChar end) |
| : begin(begin) |
| , end(end) |
| { |
| } |
| }; |
| |
| struct CharacterClass { |
| Vector<UChar> m_matches; |
| Vector<CharacterRange> m_ranges; |
| Vector<UChar> m_matchesUnicode; |
| Vector<CharacterRange> m_rangesUnicode; |
| }; |
| |
| enum QuantifierType { |
| QuantifierFixedCount, |
| QuantifierGreedy, |
| QuantifierNonGreedy, |
| }; |
| |
| struct PatternTerm { |
| enum Type { |
| TypeAssertionBOL, |
| TypeAssertionEOL, |
| TypeAssertionWordBoundary, |
| TypePatternCharacter, |
| TypeCharacterClass, |
| TypeBackReference, |
| TypeForwardReference, |
| TypeParenthesesSubpattern, |
| TypeParentheticalAssertion, |
| } type; |
| bool invertOrCapture; |
| union { |
| UChar patternCharacter; |
| CharacterClass* characterClass; |
| unsigned subpatternId; |
| struct { |
| PatternDisjunction* disjunction; |
| unsigned subpatternId; |
| unsigned lastSubpatternId; |
| bool isCopy; |
| } parentheses; |
| }; |
| QuantifierType quantityType; |
| unsigned quantityCount; |
| int inputPosition; |
| unsigned frameLocation; |
| |
| PatternTerm(UChar ch) |
| : type(PatternTerm::TypePatternCharacter) |
| { |
| patternCharacter = ch; |
| quantityType = QuantifierFixedCount; |
| quantityCount = 1; |
| } |
| |
| PatternTerm(CharacterClass* charClass, bool invert) |
| : type(PatternTerm::TypeCharacterClass) |
| , invertOrCapture(invert) |
| { |
| characterClass = charClass; |
| quantityType = QuantifierFixedCount; |
| quantityCount = 1; |
| } |
| |
| PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) |
| : type(type) |
| , invertOrCapture(invertOrCapture) |
| { |
| parentheses.disjunction = disjunction; |
| parentheses.subpatternId = subpatternId; |
| parentheses.isCopy = false; |
| quantityType = QuantifierFixedCount; |
| quantityCount = 1; |
| } |
| |
| PatternTerm(Type type, bool invert = false) |
| : type(type) |
| , invertOrCapture(invert) |
| { |
| quantityType = QuantifierFixedCount; |
| quantityCount = 1; |
| } |
| |
| PatternTerm(unsigned spatternId) |
| : type(TypeBackReference) |
| , invertOrCapture(invertOrCapture) |
| { |
| subpatternId = spatternId; |
| quantityType = QuantifierFixedCount; |
| quantityCount = 1; |
| } |
| |
| static PatternTerm ForwardReference() |
| { |
| return PatternTerm(TypeForwardReference); |
| } |
| |
| static PatternTerm BOL() |
| { |
| return PatternTerm(TypeAssertionBOL); |
| } |
| |
| static PatternTerm EOL() |
| { |
| return PatternTerm(TypeAssertionEOL); |
| } |
| |
| static PatternTerm WordBoundary(bool invert) |
| { |
| return PatternTerm(TypeAssertionWordBoundary, invert); |
| } |
| |
| bool invert() |
| { |
| return invertOrCapture; |
| } |
| |
| bool capture() |
| { |
| return invertOrCapture; |
| } |
| |
| void quantify(unsigned count, QuantifierType type) |
| { |
| quantityCount = count; |
| quantityType = type; |
| } |
| }; |
| |
| struct PatternAlternative { |
| PatternAlternative(PatternDisjunction* disjunction) |
| : m_parent(disjunction) |
| { |
| } |
| |
| PatternTerm& lastTerm() |
| { |
| ASSERT(m_terms.size()); |
| return m_terms[m_terms.size() - 1]; |
| } |
| |
| void removeLastTerm() |
| { |
| ASSERT(m_terms.size()); |
| m_terms.shrink(m_terms.size() - 1); |
| } |
| |
| Vector<PatternTerm> m_terms; |
| PatternDisjunction* m_parent; |
| unsigned m_minimumSize; |
| bool m_hasFixedSize; |
| }; |
| |
| struct PatternDisjunction { |
| PatternDisjunction(PatternAlternative* parent = 0) |
| : m_parent(parent) |
| { |
| } |
| |
| ~PatternDisjunction() |
| { |
| deleteAllValues(m_alternatives); |
| } |
| |
| PatternAlternative* addNewAlternative() |
| { |
| PatternAlternative* alternative = new PatternAlternative(this); |
| m_alternatives.append(alternative); |
| return alternative; |
| } |
| |
| Vector<PatternAlternative*> m_alternatives; |
| PatternAlternative* m_parent; |
| unsigned m_minimumSize; |
| unsigned m_callFrameSize; |
| bool m_hasFixedSize; |
| }; |
| |
| // You probably don't want to be calling these functions directly |
| // (please to be calling newlineCharacterClass() et al on your |
| // friendly neighborhood RegexPattern instance to get nicely |
| // cached copies). |
| CharacterClass* newlineCreate(); |
| CharacterClass* digitsCreate(); |
| CharacterClass* spacesCreate(); |
| CharacterClass* wordcharCreate(); |
| CharacterClass* nondigitsCreate(); |
| CharacterClass* nonspacesCreate(); |
| CharacterClass* nonwordcharCreate(); |
| |
| struct RegexPattern { |
| RegexPattern(bool ignoreCase, bool multiline) |
| : m_ignoreCase(ignoreCase) |
| , m_multiline(multiline) |
| , m_numSubpatterns(0) |
| , m_maxBackReference(0) |
| , newlineCached(0) |
| , digitsCached(0) |
| , spacesCached(0) |
| , wordcharCached(0) |
| , nondigitsCached(0) |
| , nonspacesCached(0) |
| , nonwordcharCached(0) |
| { |
| } |
| |
| ~RegexPattern() |
| { |
| deleteAllValues(m_disjunctions); |
| deleteAllValues(m_userCharacterClasses); |
| } |
| |
| void reset() |
| { |
| m_numSubpatterns = 0; |
| m_maxBackReference = 0; |
| |
| newlineCached = 0; |
| digitsCached = 0; |
| spacesCached = 0; |
| wordcharCached = 0; |
| nondigitsCached = 0; |
| nonspacesCached = 0; |
| nonwordcharCached = 0; |
| |
| deleteAllValues(m_disjunctions); |
| m_disjunctions.clear(); |
| deleteAllValues(m_userCharacterClasses); |
| m_userCharacterClasses.clear(); |
| } |
| |
| bool containsIllegalBackReference() |
| { |
| return m_maxBackReference > m_numSubpatterns; |
| } |
| |
| CharacterClass* newlineCharacterClass() |
| { |
| if (!newlineCached) |
| m_userCharacterClasses.append(newlineCached = newlineCreate()); |
| return newlineCached; |
| } |
| CharacterClass* digitsCharacterClass() |
| { |
| if (!digitsCached) |
| m_userCharacterClasses.append(digitsCached = digitsCreate()); |
| return digitsCached; |
| } |
| CharacterClass* spacesCharacterClass() |
| { |
| if (!spacesCached) |
| m_userCharacterClasses.append(spacesCached = spacesCreate()); |
| return spacesCached; |
| } |
| CharacterClass* wordcharCharacterClass() |
| { |
| if (!wordcharCached) |
| m_userCharacterClasses.append(wordcharCached = wordcharCreate()); |
| return wordcharCached; |
| } |
| CharacterClass* nondigitsCharacterClass() |
| { |
| if (!nondigitsCached) |
| m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); |
| return nondigitsCached; |
| } |
| CharacterClass* nonspacesCharacterClass() |
| { |
| if (!nonspacesCached) |
| m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); |
| return nonspacesCached; |
| } |
| CharacterClass* nonwordcharCharacterClass() |
| { |
| if (!nonwordcharCached) |
| m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); |
| return nonwordcharCached; |
| } |
| |
| bool m_ignoreCase; |
| bool m_multiline; |
| unsigned m_numSubpatterns; |
| unsigned m_maxBackReference; |
| PatternDisjunction* m_body; |
| Vector<PatternDisjunction*, 4> m_disjunctions; |
| Vector<CharacterClass*> m_userCharacterClasses; |
| |
| private: |
| CharacterClass* newlineCached; |
| CharacterClass* digitsCached; |
| CharacterClass* spacesCached; |
| CharacterClass* wordcharCached; |
| CharacterClass* nondigitsCached; |
| CharacterClass* nonspacesCached; |
| CharacterClass* nonwordcharCached; |
| }; |
| |
| } } // namespace JSC::Yarr |
| |
| #endif |
| |
| #endif // RegexPattern_h |