| /* |
| * Copyright (C) 2008 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "WREC.h" |
| |
| #if ENABLE(WREC) |
| |
| #include "CharacterClassConstructor.h" |
| #include "Interpreter.h" |
| #include "WRECFunctors.h" |
| #include "WRECParser.h" |
| #include "pcre_internal.h" |
| |
| using namespace WTF; |
| |
| namespace JSC { namespace WREC { |
| |
| void Generator::generateEnter() |
| { |
| #if PLATFORM(X86) |
| // On x86 edi & esi are callee preserved registers. |
| push(X86Registers::edi); |
| push(X86Registers::esi); |
| |
| #if COMPILER(MSVC) |
| // Move the arguments into registers. |
| peek(input, 3); |
| peek(index, 4); |
| peek(length, 5); |
| peek(output, 6); |
| #else |
| // On gcc the function is regparm(3), so the input, index, and length registers |
| // (eax, edx, and ecx respectively) already contain the appropriate values. |
| // Just load the fourth argument (output) into edi |
| peek(output, 3); |
| #endif |
| #endif |
| } |
| |
| void Generator::generateReturnSuccess() |
| { |
| ASSERT(returnRegister != index); |
| ASSERT(returnRegister != output); |
| |
| // Set return value. |
| pop(returnRegister); // match begin |
| store32(returnRegister, output); |
| store32(index, Address(output, 4)); // match end |
| |
| // Restore callee save registers. |
| #if PLATFORM(X86) |
| pop(X86Registers::esi); |
| pop(X86Registers::edi); |
| #endif |
| ret(); |
| } |
| |
| void Generator::generateSaveIndex() |
| { |
| push(index); |
| } |
| |
| void Generator::generateIncrementIndex(Jump* failure) |
| { |
| peek(index); |
| if (failure) |
| *failure = branch32(Equal, length, index); |
| add32(Imm32(1), index); |
| poke(index); |
| } |
| |
| void Generator::generateLoadCharacter(JumpList& failures) |
| { |
| failures.append(branch32(Equal, length, index)); |
| load16(BaseIndex(input, index, TimesTwo), character); |
| } |
| |
| // For the sake of end-of-line assertions, we treat one-past-the-end as if it |
| // were part of the input string. |
| void Generator::generateJumpIfNotEndOfInput(Label target) |
| { |
| branch32(LessThanOrEqual, index, length, target); |
| } |
| |
| void Generator::generateReturnFailure() |
| { |
| pop(); |
| move(Imm32(-1), returnRegister); |
| |
| #if PLATFORM(X86) |
| pop(X86Registers::esi); |
| pop(X86Registers::edi); |
| #endif |
| ret(); |
| } |
| |
| void Generator::generateBacktrack1() |
| { |
| sub32(Imm32(1), index); |
| } |
| |
| void Generator::generateBacktrackBackreference(unsigned subpatternId) |
| { |
| sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index); |
| add32(Address(output, (2 * subpatternId) * sizeof(int)), index); |
| } |
| |
| void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max) |
| { |
| GenerateBackreferenceFunctor functor(subpatternId); |
| |
| load32(Address(output, (2 * subpatternId) * sizeof(int)), character); |
| Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character); |
| |
| ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy); |
| if (quantifierType == Quantifier::Greedy) |
| generateGreedyQuantifier(failures, functor, min, max); |
| else |
| generateNonGreedyQuantifier(failures, functor, min, max); |
| |
| skipIfEmpty.link(this); |
| } |
| |
| void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) |
| { |
| JumpList atomFailedList; |
| JumpList alternativeFailedList; |
| |
| // (0) Setup: Save, then init repeatCount. |
| push(repeatCount); |
| move(Imm32(0), repeatCount); |
| Jump start = jump(); |
| |
| // (4) Quantifier failed: No more atom reading possible. |
| Label quantifierFailed(this); |
| pop(repeatCount); |
| failures.append(jump()); |
| |
| // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again. |
| Label alternativeFailed(this); |
| pop(index); |
| if (max != Quantifier::Infinity) |
| branch32(Equal, repeatCount, Imm32(max), quantifierFailed); |
| |
| // (1) Read an atom. |
| if (min) |
| start.link(this); |
| Label readAtom(this); |
| functor.generateAtom(this, atomFailedList); |
| atomFailedList.linkTo(quantifierFailed, this); |
| add32(Imm32(1), repeatCount); |
| |
| // (2) Keep reading if we're under the minimum. |
| if (min > 1) |
| branch32(LessThan, repeatCount, Imm32(min), readAtom); |
| |
| // (3) Test the rest of the alternative. |
| if (!min) |
| start.link(this); |
| push(index); |
| m_parser.parseAlternative(alternativeFailedList); |
| alternativeFailedList.linkTo(alternativeFailed, this); |
| |
| pop(); |
| pop(repeatCount); |
| } |
| |
| void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) |
| { |
| if (!max) |
| return; |
| |
| JumpList doneReadingAtomsList; |
| JumpList alternativeFailedList; |
| |
| // (0) Setup: Save, then init repeatCount. |
| push(repeatCount); |
| move(Imm32(0), repeatCount); |
| |
| // (1) Greedily read as many copies of the atom as possible, then jump to (2). |
| Label readAtom(this); |
| functor.generateAtom(this, doneReadingAtomsList); |
| add32(Imm32(1), repeatCount); |
| if (max == Quantifier::Infinity) |
| jump(readAtom); |
| else if (max == 1) |
| doneReadingAtomsList.append(jump()); |
| else { |
| branch32(NotEqual, repeatCount, Imm32(max), readAtom); |
| doneReadingAtomsList.append(jump()); |
| } |
| |
| // (5) Quantifier failed: No more backtracking possible. |
| Label quantifierFailed(this); |
| pop(repeatCount); |
| failures.append(jump()); |
| |
| // (4) Alternative failed: Backtrack, then fall through to (2) to try again. |
| Label alternativeFailed(this); |
| pop(index); |
| functor.backtrack(this); |
| sub32(Imm32(1), repeatCount); |
| |
| // (2) Verify that we have enough atoms. |
| doneReadingAtomsList.link(this); |
| branch32(LessThan, repeatCount, Imm32(min), quantifierFailed); |
| |
| // (3) Test the rest of the alternative. |
| push(index); |
| m_parser.parseAlternative(alternativeFailedList); |
| alternativeFailedList.linkTo(alternativeFailed, this); |
| |
| pop(); |
| pop(repeatCount); |
| } |
| |
| void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count) |
| { |
| for (size_t i = 0; i < count;) { |
| if (i < count - 1) { |
| if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) { |
| i += 2; |
| continue; |
| } |
| } |
| |
| generatePatternCharacter(failures, sequence[i]); |
| ++i; |
| } |
| } |
| |
| bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2) |
| { |
| if (m_parser.ignoreCase()) { |
| // Non-trivial case folding requires more than one test, so we can't |
| // test as a pair with an adjacent character. |
| if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1)) |
| return false; |
| if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2)) |
| return false; |
| } |
| |
| // Optimistically consume 2 characters. |
| add32(Imm32(2), index); |
| failures.append(branch32(GreaterThan, index, length)); |
| |
| // Load the characters we just consumed, offset -2 characters from index. |
| load32(BaseIndex(input, index, TimesTwo, -2 * 2), character); |
| |
| if (m_parser.ignoreCase()) { |
| // Convert ASCII alphabet characters to upper case before testing for |
| // equality. (ASCII non-alphabet characters don't require upper-casing |
| // because they have no uppercase equivalents. Unicode characters don't |
| // require upper-casing because we only handle Unicode characters whose |
| // upper and lower cases are equal.) |
| int ch1Mask = 0; |
| if (isASCIIAlpha(ch1)) { |
| ch1 |= 32; |
| ch1Mask = 32; |
| } |
| |
| int ch2Mask = 0; |
| if (isASCIIAlpha(ch2)) { |
| ch2 |= 32; |
| ch2Mask = 32; |
| } |
| |
| int mask = ch1Mask | (ch2Mask << 16); |
| if (mask) |
| or32(Imm32(mask), character); |
| } |
| int pair = ch1 | (ch2 << 16); |
| |
| failures.append(branch32(NotEqual, character, Imm32(pair))); |
| return true; |
| } |
| |
| void Generator::generatePatternCharacter(JumpList& failures, int ch) |
| { |
| generateLoadCharacter(failures); |
| |
| // used for unicode case insensitive |
| bool hasUpper = false; |
| Jump isUpper; |
| |
| // if case insensitive match |
| if (m_parser.ignoreCase()) { |
| UChar lower, upper; |
| |
| // check for ascii case sensitive characters |
| if (isASCIIAlpha(ch)) { |
| or32(Imm32(32), character); |
| ch |= 32; |
| } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) { |
| // handle unicode case sentitive characters - branch to success on upper |
| isUpper = branch32(Equal, character, Imm32(upper)); |
| hasUpper = true; |
| ch = lower; |
| } |
| } |
| |
| // checks for ch, or lower case version of ch, if insensitive |
| failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch))); |
| |
| if (m_parser.ignoreCase() && hasUpper) { |
| // for unicode case insensitive matches, branch here if upper matches. |
| isUpper.link(this); |
| } |
| |
| // on success consume the char |
| add32(Imm32(1), index); |
| } |
| |
| void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) |
| { |
| do { |
| // pick which range we're going to generate |
| int which = count >> 1; |
| char lo = ranges[which].begin; |
| char hi = ranges[which].end; |
| |
| // check if there are any ranges or matches below lo. If not, just jl to failure - |
| // if there is anything else to check, check that first, if it falls through jmp to failure. |
| if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
| Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
| |
| // generate code for all ranges before this one |
| if (which) |
| generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
| |
| while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); |
| ++*matchIndex; |
| } |
| failures.append(jump()); |
| |
| loOrAbove.link(this); |
| } else if (which) { |
| Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
| |
| generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
| failures.append(jump()); |
| |
| loOrAbove.link(this); |
| } else |
| failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); |
| |
| while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) |
| ++*matchIndex; |
| |
| matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); |
| // fall through to here, the value is above hi. |
| |
| // shuffle along & loop around if there are any more matches to handle. |
| unsigned next = which + 1; |
| ranges += next; |
| count -= next; |
| } while (count); |
| } |
| |
| void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass) |
| { |
| Jump unicodeFail; |
| if (charClass.numMatchesUnicode || charClass.numRangesUnicode) { |
| Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); |
| |
| if (charClass.numMatchesUnicode) { |
| for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) { |
| UChar ch = charClass.matchesUnicode[i]; |
| matchDest.append(branch32(Equal, character, Imm32(ch))); |
| } |
| } |
| |
| if (charClass.numRangesUnicode) { |
| for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) { |
| UChar lo = charClass.rangesUnicode[i].begin; |
| UChar hi = charClass.rangesUnicode[i].end; |
| |
| Jump below = branch32(LessThan, character, Imm32(lo)); |
| matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); |
| below.link(this); |
| } |
| } |
| |
| unicodeFail = jump(); |
| isAscii.link(this); |
| } |
| |
| if (charClass.numRanges) { |
| unsigned matchIndex = 0; |
| JumpList failures; |
| generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches); |
| while (matchIndex < charClass.numMatches) |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++]))); |
| |
| failures.link(this); |
| } else if (charClass.numMatches) { |
| // optimization: gather 'a','A' etc back together, can mask & test once. |
| Vector<char> matchesAZaz; |
| |
| for (unsigned i = 0; i < charClass.numMatches; ++i) { |
| char ch = charClass.matches[i]; |
| if (m_parser.ignoreCase()) { |
| if (isASCIILower(ch)) { |
| matchesAZaz.append(ch); |
| continue; |
| } |
| if (isASCIIUpper(ch)) |
| continue; |
| } |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); |
| } |
| |
| if (unsigned countAZaz = matchesAZaz.size()) { |
| or32(Imm32(32), character); |
| for (unsigned i = 0; i < countAZaz; ++i) |
| matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); |
| } |
| } |
| |
| if (charClass.numMatchesUnicode || charClass.numRangesUnicode) |
| unicodeFail.link(this); |
| } |
| |
| void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert) |
| { |
| generateLoadCharacter(failures); |
| |
| if (invert) |
| generateCharacterClassInverted(failures, charClass); |
| else { |
| JumpList successes; |
| generateCharacterClassInverted(successes, charClass); |
| failures.append(jump()); |
| successes.link(this); |
| } |
| |
| add32(Imm32(1), index); |
| } |
| |
| void Generator::generateParenthesesAssertion(JumpList& failures) |
| { |
| JumpList disjunctionFailed; |
| |
| push(index); |
| m_parser.parseDisjunction(disjunctionFailed); |
| Jump success = jump(); |
| |
| disjunctionFailed.link(this); |
| pop(index); |
| failures.append(jump()); |
| |
| success.link(this); |
| pop(index); |
| } |
| |
| void Generator::generateParenthesesInvertedAssertion(JumpList& failures) |
| { |
| JumpList disjunctionFailed; |
| |
| push(index); |
| m_parser.parseDisjunction(disjunctionFailed); |
| |
| // If the disjunction succeeded, the inverted assertion failed. |
| pop(index); |
| failures.append(jump()); |
| |
| // If the disjunction failed, the inverted assertion succeeded. |
| disjunctionFailed.link(this); |
| pop(index); |
| } |
| |
| void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail) |
| { |
| jump(start); |
| success.link(this); |
| failures.append(fail); |
| } |
| |
| Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter) |
| { |
| Jump skip = jump(); |
| newFailures.link(this); |
| for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) { |
| store32(Imm32(-1), Address(output, (2 * i) * sizeof(int))); |
| store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int))); |
| } |
| |
| Jump newFailJump = jump(); |
| skip.link(this); |
| |
| return newFailJump; |
| } |
| |
| void Generator::generateAssertionBOL(JumpList& failures) |
| { |
| if (m_parser.multiline()) { |
| JumpList previousIsNewline; |
| |
| // begin of input == success |
| previousIsNewline.append(branch32(Equal, index, Imm32(0))); |
| |
| // now check prev char against newline characters. |
| load16(BaseIndex(input, index, TimesTwo, -2), character); |
| generateCharacterClassInverted(previousIsNewline, CharacterClass::newline()); |
| |
| failures.append(jump()); |
| |
| previousIsNewline.link(this); |
| } else |
| failures.append(branch32(NotEqual, index, Imm32(0))); |
| } |
| |
| void Generator::generateAssertionEOL(JumpList& failures) |
| { |
| if (m_parser.multiline()) { |
| JumpList nextIsNewline; |
| |
| generateLoadCharacter(nextIsNewline); // end of input == success |
| generateCharacterClassInverted(nextIsNewline, CharacterClass::newline()); |
| failures.append(jump()); |
| nextIsNewline.link(this); |
| } else { |
| failures.append(branch32(NotEqual, length, index)); |
| } |
| } |
| |
| void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert) |
| { |
| JumpList wordBoundary; |
| JumpList notWordBoundary; |
| |
| // (1) Check if the previous value was a word char |
| |
| // (1.1) check for begin of input |
| Jump atBegin = branch32(Equal, index, Imm32(0)); |
| // (1.2) load the last char, and chck if is word character |
| load16(BaseIndex(input, index, TimesTwo, -2), character); |
| JumpList previousIsWord; |
| generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar()); |
| // (1.3) if we get here, previous is not a word char |
| atBegin.link(this); |
| |
| // (2) Handle situation where previous was NOT a \w |
| |
| generateLoadCharacter(notWordBoundary); |
| generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar()); |
| // (2.1) If we get here, neither chars are word chars |
| notWordBoundary.append(jump()); |
| |
| // (3) Handle situation where previous was a \w |
| |
| // (3.0) link success in first match to here |
| previousIsWord.link(this); |
| generateLoadCharacter(wordBoundary); |
| generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar()); |
| // (3.1) If we get here, this is an end of a word, within the input. |
| |
| // (4) Link everything up |
| |
| if (invert) { |
| // handle the fall through case |
| wordBoundary.append(jump()); |
| |
| // looking for non word boundaries, so link boundary fails to here. |
| notWordBoundary.link(this); |
| |
| failures.append(wordBoundary); |
| } else { |
| // looking for word boundaries, so link successes here. |
| wordBoundary.link(this); |
| |
| failures.append(notWordBoundary); |
| } |
| } |
| |
| void Generator::generateBackreference(JumpList& failures, unsigned subpatternId) |
| { |
| push(index); |
| push(repeatCount); |
| |
| // get the start pos of the backref into repeatCount (multipurpose!) |
| load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount); |
| |
| Jump skipIncrement = jump(); |
| Label topOfLoop(this); |
| |
| add32(Imm32(1), index); |
| add32(Imm32(1), repeatCount); |
| skipIncrement.link(this); |
| |
| // check if we're at the end of backref (if we are, success!) |
| Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount); |
| |
| load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character); |
| |
| // check if we've run out of input (this would be a can o'fail) |
| Jump endOfInput = branch32(Equal, length, index); |
| |
| branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop); |
| |
| endOfInput.link(this); |
| |
| // Failure |
| pop(repeatCount); |
| pop(index); |
| failures.append(jump()); |
| |
| // Success |
| endOfBackRef.link(this); |
| pop(repeatCount); |
| pop(); |
| } |
| |
| void Generator::terminateAlternative(JumpList& successes, JumpList& failures) |
| { |
| successes.append(jump()); |
| |
| failures.link(this); |
| peek(index); |
| } |
| |
| void Generator::terminateDisjunction(JumpList& successes) |
| { |
| successes.link(this); |
| } |
| |
| } } // namespace JSC::WREC |
| |
| #endif // ENABLE(WREC) |