| /* |
| * Copyright (C) 2009 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "RegexJIT.h" |
| |
| #include "ASCIICType.h" |
| #include "JSGlobalData.h" |
| #include "LinkBuffer.h" |
| #include "MacroAssembler.h" |
| #include "RegexCompiler.h" |
| |
| #include "pcre.h" // temporary, remove when fallback is removed. |
| |
| #if ENABLE(YARR_JIT) |
| |
| using namespace WTF; |
| |
| namespace JSC { namespace Yarr { |
| |
| |
| class RegexGenerator : private MacroAssembler { |
| friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); |
| |
| #if PLATFORM_ARM_ARCH(7) |
| static const RegisterID input = ARM::r0; |
| static const RegisterID index = ARM::r1; |
| static const RegisterID length = ARM::r2; |
| |
| static const RegisterID output = ARM::r4; |
| static const RegisterID regT0 = ARM::r5; |
| static const RegisterID regT1 = ARM::r6; |
| |
| static const RegisterID returnRegister = ARM::r0; |
| #endif |
| #if PLATFORM(X86) |
| static const RegisterID input = X86::eax; |
| static const RegisterID index = X86::edx; |
| static const RegisterID length = X86::ecx; |
| static const RegisterID output = X86::edi; |
| |
| static const RegisterID regT0 = X86::ebx; |
| static const RegisterID regT1 = X86::esi; |
| |
| static const RegisterID returnRegister = X86::eax; |
| #endif |
| #if PLATFORM(X86_64) |
| static const RegisterID input = X86::edi; |
| static const RegisterID index = X86::esi; |
| static const RegisterID length = X86::edx; |
| static const RegisterID output = X86::ecx; |
| |
| static const RegisterID regT0 = X86::eax; |
| static const RegisterID regT1 = X86::ebx; |
| |
| static const RegisterID returnRegister = X86::eax; |
| #endif |
| |
| void optimizeAlternative(PatternAlternative* alternative) |
| { |
| if (!alternative->m_terms.size()) |
| return; |
| |
| for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { |
| PatternTerm& term = alternative->m_terms[i]; |
| PatternTerm& nextTerm = alternative->m_terms[i + 1]; |
| |
| if ((term.type == PatternTerm::TypeCharacterClass) |
| && (term.quantityType == QuantifierFixedCount) |
| && (nextTerm.type == PatternTerm::TypePatternCharacter) |
| && (nextTerm.quantityType == QuantifierFixedCount)) { |
| PatternTerm termCopy = term; |
| alternative->m_terms[i] = nextTerm; |
| alternative->m_terms[i + 1] = termCopy; |
| } |
| } |
| } |
| |
| void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) |
| { |
| do { |
| // pick which range we're going to generate |
| int which = count >> 1; |
| char lo = ranges[which].begin; |
| char hi = ranges[which].end; |
| |
| // check if there are any ranges or matches below lo. If not, just jl to failure - |
| // if there is anything else to check, check that first, if it falls through jmp to failure. |
| if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
| Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
| |
| // generate code for all ranges before this one |
| if (which) |
| matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
| |
| while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); |
| ++*matchIndex; |
| } |
| failures.append(jump()); |
| |
| loOrAbove.link(this); |
| } else if (which) { |
| Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
| |
| matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
| failures.append(jump()); |
| |
| loOrAbove.link(this); |
| } else |
| failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); |
| |
| while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) |
| ++*matchIndex; |
| |
| matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); |
| // fall through to here, the value is above hi. |
| |
| // shuffle along & loop around if there are any more matches to handle. |
| unsigned next = which + 1; |
| ranges += next; |
| count -= next; |
| } while (count); |
| } |
| |
| void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) |
| { |
| Jump unicodeFail; |
| if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { |
| Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); |
| |
| if (charClass->m_matchesUnicode.size()) { |
| for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { |
| UChar ch = charClass->m_matchesUnicode[i]; |
| matchDest.append(branch32(Equal, character, Imm32(ch))); |
| } |
| } |
| |
| if (charClass->m_rangesUnicode.size()) { |
| for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { |
| UChar lo = charClass->m_rangesUnicode[i].begin; |
| UChar hi = charClass->m_rangesUnicode[i].end; |
| |
| Jump below = branch32(LessThan, character, Imm32(lo)); |
| matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); |
| below.link(this); |
| } |
| } |
| |
| unicodeFail = jump(); |
| isAscii.link(this); |
| } |
| |
| if (charClass->m_ranges.size()) { |
| unsigned matchIndex = 0; |
| JumpList failures; |
| matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); |
| while (matchIndex < charClass->m_matches.size()) |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); |
| |
| failures.link(this); |
| } else if (charClass->m_matches.size()) { |
| // optimization: gather 'a','A' etc back together, can mask & test once. |
| Vector<char> matchesAZaz; |
| |
| for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { |
| char ch = charClass->m_matches[i]; |
| if (m_pattern.m_ignoreCase) { |
| if (isASCIILower(ch)) { |
| matchesAZaz.append(ch); |
| continue; |
| } |
| if (isASCIIUpper(ch)) |
| continue; |
| } |
| matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); |
| } |
| |
| if (unsigned countAZaz = matchesAZaz.size()) { |
| or32(Imm32(32), character); |
| for (unsigned i = 0; i < countAZaz; ++i) |
| matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); |
| } |
| } |
| |
| if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) |
| unicodeFail.link(this); |
| } |
| |
| // Jumps if input not available; will have (incorrectly) incremented already! |
| Jump jumpIfNoAvailableInput(unsigned countToCheck) |
| { |
| add32(Imm32(countToCheck), index); |
| return branch32(Above, index, length); |
| } |
| |
| Jump jumpIfAvailableInput(unsigned countToCheck) |
| { |
| add32(Imm32(countToCheck), index); |
| return branch32(BelowOrEqual, index, length); |
| } |
| |
| Jump checkInput() |
| { |
| return branch32(BelowOrEqual, index, length); |
| } |
| |
| Jump atEndOfInput() |
| { |
| return branch32(Equal, index, length); |
| } |
| |
| Jump notAtEndOfInput() |
| { |
| return branch32(NotEqual, index, length); |
| } |
| |
| Jump jumpIfCharEquals(UChar ch, int inputPosition) |
| { |
| return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); |
| } |
| |
| Jump jumpIfCharNotEquals(UChar ch, int inputPosition) |
| { |
| return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); |
| } |
| |
| void readCharacter(int inputPosition, RegisterID reg) |
| { |
| load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); |
| } |
| |
| void storeToFrame(RegisterID reg, unsigned frameLocation) |
| { |
| poke(reg, frameLocation); |
| } |
| |
| void storeToFrame(Imm32 imm, unsigned frameLocation) |
| { |
| poke(imm, frameLocation); |
| } |
| |
| DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) |
| { |
| return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); |
| } |
| |
| void loadFromFrame(unsigned frameLocation, RegisterID reg) |
| { |
| peek(reg, frameLocation); |
| } |
| |
| void loadFromFrameAndJump(unsigned frameLocation) |
| { |
| jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); |
| } |
| |
| struct AlternativeBacktrackRecord { |
| DataLabelPtr dataLabel; |
| Label backtrackLocation; |
| |
| AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) |
| : dataLabel(dataLabel) |
| , backtrackLocation(backtrackLocation) |
| { |
| } |
| }; |
| |
| struct TermGenerationState { |
| TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) |
| : disjunction(disjunction) |
| , checkedTotal(checkedTotal) |
| { |
| } |
| |
| void resetAlternative() |
| { |
| isBackTrackGenerated = false; |
| alt = 0; |
| } |
| bool alternativeValid() |
| { |
| return alt < disjunction->m_alternatives.size(); |
| } |
| void nextAlternative() |
| { |
| ++alt; |
| } |
| PatternAlternative* alternative() |
| { |
| return disjunction->m_alternatives[alt]; |
| } |
| |
| void resetTerm() |
| { |
| ASSERT(alternativeValid()); |
| t = 0; |
| } |
| bool termValid() |
| { |
| ASSERT(alternativeValid()); |
| return t < alternative()->m_terms.size(); |
| } |
| void nextTerm() |
| { |
| ASSERT(alternativeValid()); |
| ++t; |
| } |
| PatternTerm& term() |
| { |
| ASSERT(alternativeValid()); |
| return alternative()->m_terms[t]; |
| } |
| |
| PatternTerm& lookaheadTerm() |
| { |
| ASSERT(alternativeValid()); |
| ASSERT((t + 1) < alternative()->m_terms.size()); |
| return alternative()->m_terms[t + 1]; |
| } |
| bool isSinglePatternCharacterLookaheadTerm() |
| { |
| ASSERT(alternativeValid()); |
| return ((t + 1) < alternative()->m_terms.size()) |
| && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) |
| && (lookaheadTerm().quantityType == QuantifierFixedCount) |
| && (lookaheadTerm().quantityCount == 1); |
| } |
| |
| int inputOffset() |
| { |
| return term().inputPosition - checkedTotal; |
| } |
| |
| void jumpToBacktrack(Jump jump, MacroAssembler* masm) |
| { |
| if (isBackTrackGenerated) |
| jump.linkTo(backtrackLabel, masm); |
| else |
| backTrackJumps.append(jump); |
| } |
| void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm) |
| { |
| if (isBackTrackGenerated) |
| jumps.linkTo(backtrackLabel, masm); |
| else |
| backTrackJumps.append(jumps); |
| } |
| bool plantJumpToBacktrackIfExists(MacroAssembler* masm) |
| { |
| if (isBackTrackGenerated) { |
| masm->jump(backtrackLabel); |
| return true; |
| } |
| return false; |
| } |
| void addBacktrackJump(Jump jump) |
| { |
| backTrackJumps.append(jump); |
| } |
| void setBacktrackGenerated(Label label) |
| { |
| isBackTrackGenerated = true; |
| backtrackLabel = label; |
| } |
| void linkAlternativeBacktracks(MacroAssembler* masm) |
| { |
| isBackTrackGenerated = false; |
| backTrackJumps.link(masm); |
| } |
| void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm) |
| { |
| isBackTrackGenerated = false; |
| backTrackJumps.linkTo(label, masm); |
| } |
| void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm) |
| { |
| jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm); |
| if (nestedParenthesesState.isBackTrackGenerated) |
| setBacktrackGenerated(nestedParenthesesState.backtrackLabel); |
| } |
| |
| PatternDisjunction* disjunction; |
| int checkedTotal; |
| private: |
| unsigned alt; |
| unsigned t; |
| JumpList backTrackJumps; |
| Label backtrackLabel; |
| bool isBackTrackGenerated; |
| }; |
| |
| void generateAssertionBOL(TermGenerationState& state) |
| { |
| PatternTerm& term = state.term(); |
| |
| if (m_pattern.m_multiline) { |
| const RegisterID character = regT0; |
| |
| JumpList matchDest; |
| if (!term.inputPosition) |
| matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); |
| |
| readCharacter(state.inputOffset() - 1, character); |
| matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
| state.jumpToBacktrack(jump(), this); |
| |
| matchDest.link(this); |
| } else { |
| // Erk, really should poison out these alternatives early. :-/ |
| if (term.inputPosition) |
| state.jumpToBacktrack(jump(), this); |
| else |
| state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this); |
| } |
| } |
| |
| void generateAssertionEOL(TermGenerationState& state) |
| { |
| PatternTerm& term = state.term(); |
| |
| if (m_pattern.m_multiline) { |
| const RegisterID character = regT0; |
| |
| JumpList matchDest; |
| if (term.inputPosition == state.checkedTotal) |
| matchDest.append(atEndOfInput()); |
| |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
| state.jumpToBacktrack(jump(), this); |
| |
| matchDest.link(this); |
| } else { |
| if (term.inputPosition == state.checkedTotal) |
| state.jumpToBacktrack(notAtEndOfInput(), this); |
| // Erk, really should poison out these alternatives early. :-/ |
| else |
| state.jumpToBacktrack(jump(), this); |
| } |
| } |
| |
| // Also falls though on nextIsNotWordChar. |
| void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) |
| { |
| const RegisterID character = regT0; |
| PatternTerm& term = state.term(); |
| |
| if (term.inputPosition == state.checkedTotal) |
| nextIsNotWordChar.append(atEndOfInput()); |
| |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); |
| } |
| |
| void generateAssertionWordBoundary(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| PatternTerm& term = state.term(); |
| |
| Jump atBegin; |
| JumpList matchDest; |
| if (!term.inputPosition) |
| atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); |
| readCharacter(state.inputOffset() - 1, character); |
| matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); |
| if (!term.inputPosition) |
| atBegin.link(this); |
| |
| // We fall through to here if the last character was not a wordchar. |
| JumpList nonWordCharThenWordChar; |
| JumpList nonWordCharThenNonWordChar; |
| if (term.invertOrCapture) { |
| matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); |
| nonWordCharThenWordChar.append(jump()); |
| } else { |
| matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); |
| nonWordCharThenNonWordChar.append(jump()); |
| } |
| state.jumpToBacktrack(nonWordCharThenNonWordChar, this); |
| |
| // We jump here if the last character was a wordchar. |
| matchDest.link(this); |
| JumpList wordCharThenWordChar; |
| JumpList wordCharThenNonWordChar; |
| if (term.invertOrCapture) { |
| matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); |
| wordCharThenWordChar.append(jump()); |
| } else { |
| matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); |
| // This can fall-though! |
| } |
| |
| state.jumpToBacktrack(wordCharThenWordChar, this); |
| |
| nonWordCharThenWordChar.link(this); |
| wordCharThenNonWordChar.link(this); |
| } |
| |
| void generatePatternCharacterSingle(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| UChar ch = state.term().patternCharacter; |
| |
| if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
| readCharacter(state.inputOffset(), character); |
| or32(Imm32(32), character); |
| state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); |
| } else { |
| ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
| state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this); |
| } |
| } |
| |
| void generatePatternCharacterPair(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| UChar ch1 = state.term().patternCharacter; |
| UChar ch2 = state.lookaheadTerm().patternCharacter; |
| |
| int mask = 0; |
| int chPair = ch1 | (ch2 << 16); |
| |
| if (m_pattern.m_ignoreCase) { |
| if (isASCIIAlpha(ch1)) |
| mask |= 32; |
| if (isASCIIAlpha(ch2)) |
| mask |= 32 << 16; |
| } |
| |
| if (mask) { |
| load32(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); |
| or32(Imm32(mask), character); |
| state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this); |
| } else |
| state.jumpToBacktrack(branch32(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this); |
| } |
| |
| void generatePatternCharacterFixed(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| UChar ch = term.patternCharacter; |
| |
| move(index, countRegister); |
| sub32(Imm32(term.quantityCount), countRegister); |
| |
| Label loop(this); |
| if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
| load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); |
| or32(Imm32(32), character); |
| state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); |
| } else { |
| ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
| state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this); |
| } |
| add32(Imm32(1), countRegister); |
| branch32(NotEqual, countRegister, index).linkTo(loop, this); |
| } |
| |
| void generatePatternCharacterGreedy(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| UChar ch = term.patternCharacter; |
| |
| move(Imm32(0), countRegister); |
| |
| JumpList failures; |
| Label loop(this); |
| failures.append(atEndOfInput()); |
| if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
| readCharacter(state.inputOffset(), character); |
| or32(Imm32(32), character); |
| failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); |
| } else { |
| ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
| failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); |
| } |
| add32(Imm32(1), countRegister); |
| add32(Imm32(1), index); |
| branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); |
| failures.append(jump()); |
| |
| Label backtrackBegin(this); |
| loadFromFrame(term.frameLocation, countRegister); |
| state.jumpToBacktrack(branchTest32(Zero, countRegister), this); |
| sub32(Imm32(1), countRegister); |
| sub32(Imm32(1), index); |
| |
| failures.link(this); |
| |
| storeToFrame(countRegister, term.frameLocation); |
| |
| state.setBacktrackGenerated(backtrackBegin); |
| } |
| |
| void generatePatternCharacterNonGreedy(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| UChar ch = term.patternCharacter; |
| |
| move(Imm32(0), countRegister); |
| |
| Jump firstTimeDoNothing = jump(); |
| |
| Label hardFail(this); |
| sub32(countRegister, index); |
| state.jumpToBacktrack(jump(), this); |
| |
| Label backtrackBegin(this); |
| loadFromFrame(term.frameLocation, countRegister); |
| |
| atEndOfInput().linkTo(hardFail, this); |
| branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); |
| if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
| readCharacter(state.inputOffset(), character); |
| or32(Imm32(32), character); |
| branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); |
| } else { |
| ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); |
| jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); |
| } |
| |
| add32(Imm32(1), countRegister); |
| add32(Imm32(1), index); |
| |
| firstTimeDoNothing.link(this); |
| storeToFrame(countRegister, term.frameLocation); |
| |
| state.setBacktrackGenerated(backtrackBegin); |
| } |
| |
| void generateCharacterClassSingle(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| PatternTerm& term = state.term(); |
| |
| JumpList matchDest; |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, matchDest, term.characterClass); |
| |
| if (term.invertOrCapture) |
| state.jumpToBacktrack(matchDest, this); |
| else { |
| state.jumpToBacktrack(jump(), this); |
| matchDest.link(this); |
| } |
| } |
| |
| void generateCharacterClassFixed(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| |
| move(index, countRegister); |
| sub32(Imm32(term.quantityCount), countRegister); |
| |
| Label loop(this); |
| JumpList matchDest; |
| load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); |
| matchCharacterClass(character, matchDest, term.characterClass); |
| |
| if (term.invertOrCapture) |
| state.jumpToBacktrack(matchDest, this); |
| else { |
| state.jumpToBacktrack(jump(), this); |
| matchDest.link(this); |
| } |
| |
| add32(Imm32(1), countRegister); |
| branch32(NotEqual, countRegister, index).linkTo(loop, this); |
| } |
| |
| void generateCharacterClassGreedy(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| |
| move(Imm32(0), countRegister); |
| |
| JumpList failures; |
| Label loop(this); |
| failures.append(atEndOfInput()); |
| |
| if (term.invertOrCapture) { |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, failures, term.characterClass); |
| } else { |
| JumpList matchDest; |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, matchDest, term.characterClass); |
| failures.append(jump()); |
| matchDest.link(this); |
| } |
| |
| add32(Imm32(1), countRegister); |
| add32(Imm32(1), index); |
| branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); |
| failures.append(jump()); |
| |
| Label backtrackBegin(this); |
| loadFromFrame(term.frameLocation, countRegister); |
| state.jumpToBacktrack(branchTest32(Zero, countRegister), this); |
| sub32(Imm32(1), countRegister); |
| sub32(Imm32(1), index); |
| |
| failures.link(this); |
| |
| storeToFrame(countRegister, term.frameLocation); |
| |
| state.setBacktrackGenerated(backtrackBegin); |
| } |
| |
| void generateCharacterClassNonGreedy(TermGenerationState& state) |
| { |
| const RegisterID character = regT0; |
| const RegisterID countRegister = regT1; |
| PatternTerm& term = state.term(); |
| |
| move(Imm32(0), countRegister); |
| |
| Jump firstTimeDoNothing = jump(); |
| |
| Label hardFail(this); |
| sub32(countRegister, index); |
| state.jumpToBacktrack(jump(), this); |
| |
| Label backtrackBegin(this); |
| loadFromFrame(term.frameLocation, countRegister); |
| |
| atEndOfInput().linkTo(hardFail, this); |
| branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); |
| |
| JumpList matchDest; |
| readCharacter(state.inputOffset(), character); |
| matchCharacterClass(character, matchDest, term.characterClass); |
| |
| if (term.invertOrCapture) |
| matchDest.linkTo(hardFail, this); |
| else { |
| jump(hardFail); |
| matchDest.link(this); |
| } |
| |
| add32(Imm32(1), countRegister); |
| add32(Imm32(1), index); |
| |
| firstTimeDoNothing.link(this); |
| storeToFrame(countRegister, term.frameLocation); |
| |
| state.setBacktrackGenerated(backtrackBegin); |
| } |
| |
| void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) |
| { |
| ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); |
| ASSERT(parenthesesTerm.quantityCount == 1); |
| |
| PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; |
| unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; |
| |
| if (disjunction->m_alternatives.size() == 1) { |
| state.resetAlternative(); |
| ASSERT(state.alternativeValid()); |
| PatternAlternative* alternative = state.alternative(); |
| optimizeAlternative(alternative); |
| |
| int countToCheck = alternative->m_minimumSize - preCheckedCount; |
| if (countToCheck) { |
| ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); |
| |
| // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' |
| // will be forced to always trampoline into here, just to decrement the index. |
| // Ick. |
| Jump skip = jump(); |
| |
| Label backtrackBegin(this); |
| sub32(Imm32(countToCheck), index); |
| state.addBacktrackJump(jump()); |
| |
| skip.link(this); |
| |
| state.setBacktrackGenerated(backtrackBegin); |
| |
| state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this); |
| state.checkedTotal += countToCheck; |
| } |
| |
| for (state.resetTerm(); state.termValid(); state.nextTerm()) |
| generateTerm(state); |
| |
| state.checkedTotal -= countToCheck; |
| } else { |
| JumpList successes; |
| |
| for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) { |
| |
| PatternAlternative* alternative = state.alternative(); |
| optimizeAlternative(alternative); |
| |
| ASSERT(alternative->m_minimumSize >= preCheckedCount); |
| int countToCheck = alternative->m_minimumSize - preCheckedCount; |
| if (countToCheck) { |
| state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); |
| state.checkedTotal += countToCheck; |
| } |
| |
| for (state.resetTerm(); state.termValid(); state.nextTerm()) |
| generateTerm(state); |
| |
| // Matched an alternative. |
| DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); |
| successes.append(jump()); |
| |
| // Alternative did not match. |
| Label backtrackLocation(this); |
| |
| // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one. |
| state.plantJumpToBacktrackIfExists(this); |
| |
| state.linkAlternativeBacktracks(this); |
| |
| if (countToCheck) { |
| sub32(Imm32(countToCheck), index); |
| state.checkedTotal -= countToCheck; |
| } |
| |
| m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation)); |
| } |
| // We fall through to here when the last alternative fails. |
| // Add a backtrack out of here for the parenthese handling code to link up. |
| state.addBacktrackJump(jump()); |
| |
| // Generate a trampoline for the parens code to backtrack to, to retry the |
| // next alternative. |
| state.setBacktrackGenerated(label()); |
| loadFromFrameAndJump(alternativeFrameLocation); |
| |
| // FIXME: both of the above hooks are a little inefficient, in that you |
| // may end up trampolining here, just to trampoline back out to the |
| // parentheses code, or vice versa. We can probably eliminate a jump |
| // by restructuring, but coding this way for now for simplicity during |
| // development. |
| |
| successes.link(this); |
| } |
| } |
| |
| void generateParenthesesSingle(TermGenerationState& state) |
| { |
| const RegisterID indexTemporary = regT0; |
| PatternTerm& term = state.term(); |
| PatternDisjunction* disjunction = term.parentheses.disjunction; |
| ASSERT(term.quantityCount == 1); |
| |
| unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0; |
| |
| unsigned parenthesesFrameLocation = term.frameLocation; |
| unsigned alternativeFrameLocation = parenthesesFrameLocation; |
| if (term.quantityType != QuantifierFixedCount) |
| alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; |
| |
| // optimized case - no capture & no quantifier can be handled in a light-weight manner. |
| if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) { |
| TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
| generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
| // this expects that any backtracks back out of the parentheses will be in the |
| // parenthesesState's backTrackJumps vector, and that if they need backtracking |
| // they will have set an entry point on the parenthesesState's backtrackLabel. |
| state.propagateBacktrackingFrom(parenthesesState, this); |
| } else { |
| Jump nonGreedySkipParentheses; |
| Label nonGreedyTryParentheses; |
| if (term.quantityType == QuantifierGreedy) |
| storeToFrame(Imm32(1), parenthesesFrameLocation); |
| else if (term.quantityType == QuantifierNonGreedy) { |
| storeToFrame(Imm32(0), parenthesesFrameLocation); |
| nonGreedySkipParentheses = jump(); |
| nonGreedyTryParentheses = label(); |
| storeToFrame(Imm32(1), parenthesesFrameLocation); |
| } |
| |
| // store the match start index |
| if (term.invertOrCapture) { |
| int inputOffset = state.inputOffset() - preCheckedCount; |
| if (inputOffset) { |
| move(index, indexTemporary); |
| add32(Imm32(inputOffset), indexTemporary); |
| store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
| } else |
| store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
| } |
| |
| // generate the body of the parentheses |
| TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
| generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
| |
| // store the match end index |
| if (term.invertOrCapture) { |
| int inputOffset = state.inputOffset(); |
| if (inputOffset) { |
| move(index, indexTemporary); |
| add32(Imm32(state.inputOffset()), indexTemporary); |
| store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
| } else |
| store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
| } |
| Jump success = jump(); |
| |
| // A failure AFTER the parens jumps here |
| Label backtrackFromAfterParens(this); |
| |
| if (term.quantityType == QuantifierGreedy) { |
| // If this is zero we have now tested with both with and without the parens. |
| loadFromFrame(parenthesesFrameLocation, indexTemporary); |
| state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this); |
| } else if (term.quantityType == QuantifierNonGreedy) { |
| // If this is zero we have now tested with both with and without the parens. |
| loadFromFrame(parenthesesFrameLocation, indexTemporary); |
| branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this); |
| } |
| |
| parenthesesState.plantJumpToBacktrackIfExists(this); |
| // A failure WITHIN the parens jumps here |
| parenthesesState.linkAlternativeBacktracks(this); |
| if (term.invertOrCapture) { |
| store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); |
| store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); |
| } |
| |
| if (term.quantityType == QuantifierGreedy) |
| storeToFrame(Imm32(0), parenthesesFrameLocation); |
| else |
| state.jumpToBacktrack(jump(), this); |
| |
| state.setBacktrackGenerated(backtrackFromAfterParens); |
| if (term.quantityType == QuantifierNonGreedy) |
| nonGreedySkipParentheses.link(this); |
| success.link(this); |
| } |
| } |
| |
| void generateParentheticalAssertion(TermGenerationState& state) |
| { |
| PatternTerm& term = state.term(); |
| PatternDisjunction* disjunction = term.parentheses.disjunction; |
| ASSERT(term.quantityCount == 1); |
| ASSERT(term.quantityType == QuantifierFixedCount); |
| |
| unsigned parenthesesFrameLocation = term.frameLocation; |
| unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; |
| |
| int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; |
| |
| if (term.invertOrCapture) { |
| // Inverted case |
| storeToFrame(index, parenthesesFrameLocation); |
| |
| state.checkedTotal -= countCheckedAfterAssertion; |
| if (countCheckedAfterAssertion) |
| sub32(Imm32(countCheckedAfterAssertion), index); |
| |
| TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
| generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
| // Success! - which means - Fail! |
| loadFromFrame(parenthesesFrameLocation, index); |
| state.jumpToBacktrack(jump(), this); |
| |
| // And fail means success. |
| parenthesesState.linkAlternativeBacktracks(this); |
| loadFromFrame(parenthesesFrameLocation, index); |
| |
| state.checkedTotal += countCheckedAfterAssertion; |
| } else { |
| // Normal case |
| storeToFrame(index, parenthesesFrameLocation); |
| |
| state.checkedTotal -= countCheckedAfterAssertion; |
| if (countCheckedAfterAssertion) |
| sub32(Imm32(countCheckedAfterAssertion), index); |
| |
| TermGenerationState parenthesesState(disjunction, state.checkedTotal); |
| generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); |
| // Success! - which means - Success! |
| loadFromFrame(parenthesesFrameLocation, index); |
| Jump success = jump(); |
| |
| parenthesesState.linkAlternativeBacktracks(this); |
| loadFromFrame(parenthesesFrameLocation, index); |
| state.jumpToBacktrack(jump(), this); |
| |
| success.link(this); |
| |
| state.checkedTotal += countCheckedAfterAssertion; |
| } |
| } |
| |
| void generateTerm(TermGenerationState& state) |
| { |
| PatternTerm& term = state.term(); |
| |
| switch (term.type) { |
| case PatternTerm::TypeAssertionBOL: |
| generateAssertionBOL(state); |
| break; |
| |
| case PatternTerm::TypeAssertionEOL: |
| generateAssertionEOL(state); |
| break; |
| |
| case PatternTerm::TypeAssertionWordBoundary: |
| generateAssertionWordBoundary(state); |
| break; |
| |
| case PatternTerm::TypePatternCharacter: |
| switch (term.quantityType) { |
| case QuantifierFixedCount: |
| if (term.quantityCount == 1) { |
| if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { |
| generatePatternCharacterPair(state); |
| state.nextTerm(); |
| } else |
| generatePatternCharacterSingle(state); |
| } else |
| generatePatternCharacterFixed(state); |
| break; |
| case QuantifierGreedy: |
| generatePatternCharacterGreedy(state); |
| break; |
| case QuantifierNonGreedy: |
| generatePatternCharacterNonGreedy(state); |
| break; |
| } |
| break; |
| |
| case PatternTerm::TypeCharacterClass: |
| switch (term.quantityType) { |
| case QuantifierFixedCount: |
| if (term.quantityCount == 1) |
| generateCharacterClassSingle(state); |
| else |
| generateCharacterClassFixed(state); |
| break; |
| case QuantifierGreedy: |
| generateCharacterClassGreedy(state); |
| break; |
| case QuantifierNonGreedy: |
| generateCharacterClassNonGreedy(state); |
| break; |
| } |
| break; |
| |
| case PatternTerm::TypeBackReference: |
| m_generationFailed = true; |
| break; |
| |
| case PatternTerm::TypeForwardReference: |
| break; |
| |
| case PatternTerm::TypeParenthesesSubpattern: |
| if ((term.quantityCount == 1) && !term.parentheses.isCopy) |
| generateParenthesesSingle(state); |
| else |
| m_generationFailed = true; |
| break; |
| |
| case PatternTerm::TypeParentheticalAssertion: |
| generateParentheticalAssertion(state); |
| break; |
| } |
| } |
| |
| void generateDisjunction(PatternDisjunction* disjunction) |
| { |
| TermGenerationState state(disjunction, 0); |
| state.resetAlternative(); |
| |
| // Plant a check to see if there is sufficient input available to run the first alternative. |
| // Jumping back to the label 'firstAlternative' will get to this check, jumping to |
| // 'firstAlternativeInputChecked' will jump directly to matching the alternative having |
| // skipped this check. |
| |
| Label firstAlternative(this); |
| |
| // check availability for the next alternative |
| int countCheckedForCurrentAlternative = 0; |
| int countToCheckForFirstAlternative = 0; |
| bool hasShorterAlternatives = false; |
| JumpList notEnoughInputForPreviousAlternative; |
| |
| if (state.alternativeValid()) { |
| PatternAlternative* alternative = state.alternative(); |
| countToCheckForFirstAlternative = alternative->m_minimumSize; |
| state.checkedTotal += countToCheckForFirstAlternative; |
| if (countToCheckForFirstAlternative) |
| notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); |
| countCheckedForCurrentAlternative = countToCheckForFirstAlternative; |
| } |
| |
| Label firstAlternativeInputChecked(this); |
| |
| while (state.alternativeValid()) { |
| // Track whether any alternatives are shorter than the first one. |
| hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); |
| |
| PatternAlternative* alternative = state.alternative(); |
| optimizeAlternative(alternative); |
| |
| for (state.resetTerm(); state.termValid(); state.nextTerm()) |
| generateTerm(state); |
| |
| // If we get here, the alternative matched. |
| if (m_pattern.m_body->m_callFrameSize) |
| addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); |
| |
| ASSERT(index != returnRegister); |
| if (m_pattern.m_body->m_hasFixedSize) { |
| move(index, returnRegister); |
| if (alternative->m_minimumSize) |
| sub32(Imm32(alternative->m_minimumSize), returnRegister); |
| } else |
| pop(returnRegister); |
| store32(index, Address(output, 4)); |
| store32(returnRegister, output); |
| |
| generateReturn(); |
| |
| state.nextAlternative(); |
| |
| // if there are any more alternatives, plant the check for input before looping. |
| if (state.alternativeValid()) { |
| PatternAlternative* nextAlternative = state.alternative(); |
| int countToCheckForNextAlternative = nextAlternative->m_minimumSize; |
| |
| if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. |
| // If we get here, there the last input checked failed. |
| notEnoughInputForPreviousAlternative.link(this); |
| |
| // Check if sufficent input available to run the next alternative |
| notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); |
| // We are now in the correct state to enter the next alternative; this add is only required |
| // to mirror and revert operation of the sub32, just below. |
| add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); |
| |
| // If we get here, there the last input checked passed. |
| state.linkAlternativeBacktracks(this); |
| // No need to check if we can run the next alternative, since it is shorter - |
| // just update index. |
| sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); |
| } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. |
| // If we get here, there the last input checked failed. |
| // If there is insufficient input to run the current alternative, and the next alternative is longer, |
| // then there is definitely not enough input to run it - don't even check. Just adjust index, as if |
| // we had checked. |
| notEnoughInputForPreviousAlternative.link(this); |
| add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); |
| notEnoughInputForPreviousAlternative.append(jump()); |
| |
| // The next alternative is longer than the current one; check the difference. |
| state.linkAlternativeBacktracks(this); |
| notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); |
| } else { // CASE 3: Both alternatives are the same length. |
| ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); |
| |
| // If the next alterative is the same length as this one, then no need to check the input - |
| // if there was sufficent input to run the current alternative then there is sufficient |
| // input to run the next one; if not, there isn't. |
| state.linkAlternativeBacktracks(this); |
| } |
| |
| state.checkedTotal -= countCheckedForCurrentAlternative; |
| countCheckedForCurrentAlternative = countToCheckForNextAlternative; |
| state.checkedTotal += countCheckedForCurrentAlternative; |
| } |
| } |
| |
| // If we get here, all Alternatives failed... |
| |
| state.checkedTotal -= countCheckedForCurrentAlternative; |
| |
| // How much more input need there be to be able to retry from the first alternative? |
| // examples: |
| // /yarr_jit/ or /wrec|pcre/ |
| // In these examples we need check for one more input before looping. |
| // /yarr_jit|pcre/ |
| // In this case we need check for 5 more input to loop (+4 to allow for the first alterative |
| // being four longer than the last alternative checked, and another +1 to effectively move |
| // the start position along by one). |
| // /yarr|rules/ or /wrec|notsomuch/ |
| // In these examples, provided that there was sufficient input to have just been matching for |
| // the second alternative we can loop without checking for available input (since the second |
| // alternative is longer than the first). In the latter example we need to decrement index |
| // (by 4) so the start position is only progressed by 1 from the last iteration. |
| int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; |
| |
| // First, deal with the cases where there was sufficient input to try the last alternative. |
| if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. |
| state.linkAlternativeBacktracks(this); |
| else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! |
| state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this); |
| else { // no need to check the input, but we do have some bookkeeping to do first. |
| state.linkAlternativeBacktracks(this); |
| |
| // Where necessary update our preserved start position. |
| if (!m_pattern.m_body->m_hasFixedSize) { |
| move(index, regT0); |
| sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); |
| poke(regT0, m_pattern.m_body->m_callFrameSize); |
| } |
| |
| // Update index if necessary, and loop (without checking). |
| if (incrementForNextIter) |
| add32(Imm32(incrementForNextIter), index); |
| jump().linkTo(firstAlternativeInputChecked, this); |
| } |
| |
| notEnoughInputForPreviousAlternative.link(this); |
| // Update our idea of the start position, if we're tracking this. |
| if (!m_pattern.m_body->m_hasFixedSize) { |
| if (countCheckedForCurrentAlternative - 1) { |
| move(index, regT0); |
| sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); |
| poke(regT0, m_pattern.m_body->m_callFrameSize); |
| } else |
| poke(index, m_pattern.m_body->m_callFrameSize); |
| } |
| // Check if there is sufficent input to run the first alternative again. |
| jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); |
| // No - insufficent input to run the first alteranative, are there any other alternatives we |
| // might need to check? If so, the last check will have left the index incremented by |
| // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative |
| // LESS input is available, to have the effect of just progressing the start position by 1 |
| // from the last iteration. If this check passes we can just jump up to the check associated |
| // with the first alternative in the loop. This is a bit sad, since we'll end up trying the |
| // first alternative again, and this check will fail (otherwise the check planted just above |
| // here would have passed). This is a bit sad, however it saves trying to do something more |
| // complex here in compilation, and in the common case we should end up coallescing the checks. |
| // |
| // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least |
| // of the minimum-alterantive-lengths. E.g. if I have two alternatives of length 200 and 150, |
| // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there |
| // is sufficient input to run either alternative (constantly failing). If there had been only |
| // one alternative, or if the shorter alternative had come first, we would have terminated |
| // immediately. :-/ |
| if (hasShorterAlternatives) |
| jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); |
| // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, |
| // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... |
| // but since we're about to return a failure this doesn't really matter!) |
| |
| unsigned frameSize = m_pattern.m_body->m_callFrameSize; |
| if (!m_pattern.m_body->m_hasFixedSize) |
| ++frameSize; |
| if (frameSize) |
| addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister); |
| |
| move(Imm32(-1), returnRegister); |
| |
| generateReturn(); |
| } |
| |
| void generateEnter() |
| { |
| #if PLATFORM(X86_64) |
| push(X86::ebp); |
| move(stackPointerRegister, X86::ebp); |
| push(X86::ebx); |
| #elif PLATFORM(X86) |
| push(X86::ebp); |
| move(stackPointerRegister, X86::ebp); |
| // TODO: do we need spill registers to fill the output pointer if there are no sub captures? |
| push(X86::ebx); |
| push(X86::edi); |
| push(X86::esi); |
| // load output into edi (2 = saved ebp + return address). |
| #if COMPILER(MSVC) |
| loadPtr(Address(X86::ebp, 2 * sizeof(void*)), input); |
| loadPtr(Address(X86::ebp, 3 * sizeof(void*)), index); |
| loadPtr(Address(X86::ebp, 4 * sizeof(void*)), length); |
| loadPtr(Address(X86::ebp, 5 * sizeof(void*)), output); |
| #else |
| loadPtr(Address(X86::ebp, 2 * sizeof(void*)), output); |
| #endif |
| #elif PLATFORM_ARM_ARCH(7) |
| push(ARM::r4); |
| push(ARM::r5); |
| push(ARM::r6); |
| move(ARM::r3, output); |
| #endif |
| } |
| |
| void generateReturn() |
| { |
| #if PLATFORM(X86_64) |
| pop(X86::ebx); |
| pop(X86::ebp); |
| #elif PLATFORM(X86) |
| pop(X86::esi); |
| pop(X86::edi); |
| pop(X86::ebx); |
| pop(X86::ebp); |
| #elif PLATFORM_ARM_ARCH(7) |
| pop(ARM::r6); |
| pop(ARM::r5); |
| pop(ARM::r4); |
| #endif |
| ret(); |
| } |
| |
| public: |
| RegexGenerator(RegexPattern& pattern) |
| : m_pattern(pattern) |
| , m_generationFailed(false) |
| { |
| } |
| |
| void generate() |
| { |
| generateEnter(); |
| |
| // TODO: do I really want this on the stack? |
| if (!m_pattern.m_body->m_hasFixedSize) |
| push(index); |
| |
| if (m_pattern.m_body->m_callFrameSize) |
| subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); |
| |
| generateDisjunction(m_pattern.m_body); |
| } |
| |
| void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject) |
| { |
| generate(); |
| |
| LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size())); |
| |
| for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) |
| patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation)); |
| |
| jitObject.set(patchBuffer.finalizeCode()); |
| } |
| |
| bool generationFailed() |
| { |
| return m_generationFailed; |
| } |
| |
| private: |
| RegexPattern& m_pattern; |
| Vector<AlternativeBacktrackRecord> m_backtrackRecords; |
| bool m_generationFailed; |
| }; |
| |
| void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) |
| { |
| RegexPattern pattern(ignoreCase, multiline); |
| |
| if ((error = compileRegex(patternString, pattern))) |
| return; |
| |
| numSubpatterns = pattern.m_numSubpatterns; |
| |
| RegexGenerator generator(pattern); |
| generator.compile(globalData, jitObject); |
| |
| if (generator.generationFailed()) { |
| JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
| JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine; |
| jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error)); |
| } |
| } |
| |
| int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize) |
| { |
| if (JSRegExp* fallback = jitObject.getFallback()) |
| return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0]; |
| |
| return jitObject.execute(input, start, length, output); |
| } |
| |
| }} |
| |
| #endif |
| |
| |
| |
| |
| |