| /* |
| * Copyright 2005 Maksim Orlovich <maksim@kde.org> |
| * Copyright (C) 2006, 2013 Apple Inc. All rights reserved. |
| * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "XPathParser.h" |
| |
| #include "XPathEvaluator.h" |
| #include "XPathNSResolver.h" |
| #include "XPathPath.h" |
| #include "XPathStep.h" |
| #include <wtf/NeverDestroyed.h> |
| #include <wtf/RobinHoodHashMap.h> |
| #include <wtf/StdLibExtras.h> |
| #include <wtf/text/StringHash.h> |
| |
| extern int xpathyyparse(WebCore::XPath::Parser&); |
| |
| #include "XPathGrammar.h" |
| |
| namespace WebCore { |
| namespace XPath { |
| |
| struct Parser::Token { |
| int type; |
| String string; |
| Step::Axis axis; |
| NumericOp::Opcode numericOpcode; |
| EqTestOp::Opcode equalityTestOpcode; |
| |
| Token(int type) : type(type) { } |
| Token(int type, const String& string) : type(type), string(string) { } |
| Token(int type, Step::Axis axis) : type(type), axis(axis) { } |
| Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { } |
| Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { } |
| }; |
| |
| enum XMLCat { NameStart, NameCont, NotPartOfName }; |
| |
| static XMLCat charCat(UChar character) |
| { |
| if (character == '_') |
| return NameStart; |
| |
| if (character == '.' || character == '-') |
| return NameCont; |
| unsigned characterTypeMask = U_GET_GC_MASK(character); |
| if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK)) |
| return NameStart; |
| if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK)) |
| return NameCont; |
| return NotPartOfName; |
| } |
| |
| static MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> createAxisNamesMap() |
| { |
| struct AxisName { |
| ASCIILiteral name; |
| Step::Axis axis; |
| }; |
| const AxisName axisNameList[] = { |
| { "ancestor"_s, Step::AncestorAxis }, |
| { "ancestor-or-self"_s, Step::AncestorOrSelfAxis }, |
| { "attribute"_s, Step::AttributeAxis }, |
| { "child"_s, Step::ChildAxis }, |
| { "descendant"_s, Step::DescendantAxis }, |
| { "descendant-or-self"_s, Step::DescendantOrSelfAxis }, |
| { "following"_s, Step::FollowingAxis }, |
| { "following-sibling"_s, Step::FollowingSiblingAxis }, |
| { "namespace"_s, Step::NamespaceAxis }, |
| { "parent"_s, Step::ParentAxis }, |
| { "preceding"_s, Step::PrecedingAxis }, |
| { "preceding-sibling"_s, Step::PrecedingSiblingAxis }, |
| { "self"_s, Step::SelfAxis } |
| }; |
| MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> map; |
| for (auto& axisName : axisNameList) |
| map.add(axisName.name, axisName.axis); |
| return map; |
| } |
| |
| static bool parseAxisName(const String& name, Step::Axis& type) |
| { |
| static NeverDestroyed axisNames = createAxisNamesMap(); |
| auto it = axisNames.get().find(name); |
| if (it == axisNames.get().end()) |
| return false; |
| type = it->value; |
| return true; |
| } |
| |
| // Returns whether the current token can possibly be a binary operator, given |
| // the previous token. Necessary to disambiguate some of the operators |
| // (* (multiply), div, and, or, mod) in the [32] Operator rule |
| // (check http://www.w3.org/TR/xpath#exprlex). |
| bool Parser::isBinaryOperatorContext() const |
| { |
| switch (m_lastTokenType) { |
| case 0: |
| case '@': case AXISNAME: case '(': case '[': case ',': |
| case AND: case OR: case MULOP: |
| case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: |
| case EQOP: case RELOP: |
| return false; |
| default: |
| return true; |
| } |
| } |
| |
| void Parser::skipWS() |
| { |
| while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) |
| ++m_nextPos; |
| } |
| |
| Parser::Token Parser::makeTokenAndAdvance(int code, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code); |
| } |
| |
| Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code, val); |
| } |
| |
| Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code, val); |
| } |
| |
| // Returns next char if it's there and interesting, 0 otherwise. |
| char Parser::peekAheadHelper() |
| { |
| if (m_nextPos + 1 >= m_data.length()) |
| return 0; |
| UChar next = m_data[m_nextPos + 1]; |
| if (next >= 0xff) |
| return 0; |
| return next; |
| } |
| |
| char Parser::peekCurHelper() |
| { |
| if (m_nextPos >= m_data.length()) |
| return 0; |
| UChar next = m_data[m_nextPos]; |
| if (next >= 0xff) |
| return 0; |
| return next; |
| } |
| |
| Parser::Token Parser::lexString() |
| { |
| UChar delimiter = m_data[m_nextPos]; |
| int startPos = m_nextPos + 1; |
| |
| for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { |
| if (m_data[m_nextPos] == delimiter) { |
| String value = m_data.substring(startPos, m_nextPos - startPos); |
| if (value.isNull()) |
| value = emptyString(); |
| ++m_nextPos; // Consume the char. |
| return Token(LITERAL, value); |
| } |
| } |
| |
| // Ouch, went off the end -- report error. |
| return Token(XPATH_ERROR); |
| } |
| |
| Parser::Token Parser::lexNumber() |
| { |
| int startPos = m_nextPos; |
| bool seenDot = false; |
| |
| // Go until end or a non-digits character. |
| for (; m_nextPos < m_data.length(); ++m_nextPos) { |
| UChar aChar = m_data[m_nextPos]; |
| if (aChar >= 0xff) break; |
| |
| if (!isASCIIDigit(aChar)) { |
| if (aChar == '.' && !seenDot) |
| seenDot = true; |
| else |
| break; |
| } |
| } |
| |
| return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); |
| } |
| |
| bool Parser::lexNCName(String& name) |
| { |
| int startPos = m_nextPos; |
| if (m_nextPos >= m_data.length()) |
| return false; |
| |
| if (charCat(m_data[m_nextPos]) != NameStart) |
| return false; |
| |
| // Keep going until we get a character that's not good for names. |
| for (; m_nextPos < m_data.length(); ++m_nextPos) |
| if (charCat(m_data[m_nextPos]) == NotPartOfName) |
| break; |
| |
| name = m_data.substring(startPos, m_nextPos - startPos); |
| return true; |
| } |
| |
| bool Parser::lexQName(String& name) |
| { |
| String n1; |
| if (!lexNCName(n1)) |
| return false; |
| |
| skipWS(); |
| |
| // If the next character is :, what we just got it the prefix, if not, |
| // it's the whole thing. |
| if (peekAheadHelper() != ':') { |
| name = n1; |
| return true; |
| } |
| |
| String n2; |
| if (!lexNCName(n2)) |
| return false; |
| |
| name = n1 + ":" + n2; |
| return true; |
| } |
| |
| inline Parser::Token Parser::nextTokenInternal() |
| { |
| skipWS(); |
| |
| if (m_nextPos >= m_data.length()) |
| return Token(0); |
| |
| char code = peekCurHelper(); |
| switch (code) { |
| case '(': case ')': case '[': case ']': |
| case '@': case ',': case '|': |
| return makeTokenAndAdvance(code); |
| case '\'': |
| case '\"': |
| return lexString(); |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| return lexNumber(); |
| case '.': { |
| char next = peekAheadHelper(); |
| if (next == '.') |
| return makeTokenAndAdvance(DOTDOT, 2); |
| if (isASCIIDigit(next)) |
| return lexNumber(); |
| return makeTokenAndAdvance('.'); |
| } |
| case '/': |
| if (peekAheadHelper() == '/') |
| return makeTokenAndAdvance(SLASHSLASH, 2); |
| return makeTokenAndAdvance('/'); |
| case '+': |
| return makeTokenAndAdvance(PLUS); |
| case '-': |
| return makeTokenAndAdvance(MINUS); |
| case '=': |
| return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); |
| case '!': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); |
| return Token(XPATH_ERROR); |
| case '<': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); |
| case '>': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); |
| case '*': |
| if (isBinaryOperatorContext()) |
| return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); |
| ++m_nextPos; |
| return Token(NAMETEST, "*"_s); |
| case '$': { // $ QName |
| m_nextPos++; |
| String name; |
| if (!lexQName(name)) |
| return Token(XPATH_ERROR); |
| return Token(VARIABLEREFERENCE, name); |
| } |
| } |
| |
| String name; |
| if (!lexNCName(name)) |
| return Token(XPATH_ERROR); |
| |
| skipWS(); |
| // If we're in an operator context, check for any operator names |
| if (isBinaryOperatorContext()) { |
| if (name == "and"_s) // ### hash? |
| return Token(AND); |
| if (name == "or"_s) |
| return Token(OR); |
| if (name == "mod"_s) |
| return Token(MULOP, NumericOp::OP_Mod); |
| if (name == "div"_s) |
| return Token(MULOP, NumericOp::OP_Div); |
| } |
| |
| // See whether we are at a : |
| if (peekCurHelper() == ':') { |
| m_nextPos++; |
| // Any chance it's an axis name? |
| if (peekCurHelper() == ':') { |
| m_nextPos++; |
| |
| //It might be an axis name. |
| Step::Axis axis; |
| if (parseAxisName(name, axis)) |
| return Token(AXISNAME, axis); |
| // Ugh, :: is only valid in axis names -> error |
| return Token(XPATH_ERROR); |
| } |
| |
| // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest |
| skipWS(); |
| if (peekCurHelper() == '*') { |
| m_nextPos++; |
| return Token(NAMETEST, name + ":*"); |
| } |
| |
| // Make a full qname. |
| String n2; |
| if (!lexNCName(n2)) |
| return Token(XPATH_ERROR); |
| |
| name = name + ":" + n2; |
| } |
| |
| skipWS(); |
| |
| if (peekCurHelper() == '(') { |
| // note: we don't swallow the '(' here! |
| |
| // Either node type oor function name. |
| |
| if (name == "processing-instruction"_s) |
| return Token(PI); |
| if (name == "node"_s) |
| return Token(NODE); |
| if (name == "text"_s) |
| return Token(TEXT_); |
| if (name == "comment"_s) |
| return Token(COMMENT); |
| |
| return Token(FUNCTIONNAME, name); |
| } |
| |
| // At this point, it must be NAMETEST. |
| return Token(NAMETEST, name); |
| } |
| |
| inline Parser::Token Parser::nextToken() |
| { |
| Token token = nextTokenInternal(); |
| m_lastTokenType = token.type; |
| return token; |
| } |
| |
| Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver) |
| : m_data(statement) |
| , m_resolver(WTFMove(resolver)) |
| { |
| } |
| |
| int Parser::lex(YYSTYPE& yylval) |
| { |
| Token token = nextToken(); |
| |
| switch (token.type) { |
| case AXISNAME: |
| yylval.axis = token.axis; |
| break; |
| case MULOP: |
| yylval.numericOpcode = token.numericOpcode; |
| break; |
| case RELOP: |
| case EQOP: |
| yylval.equalityTestOpcode = token.equalityTestOpcode; |
| break; |
| case NODETYPE: |
| case FUNCTIONNAME: |
| case LITERAL: |
| case VARIABLEREFERENCE: |
| case NUMBER: |
| case NAMETEST: |
| yylval.string = token.string.releaseImpl().leakRef(); |
| break; |
| } |
| |
| return token.type; |
| } |
| |
| bool Parser::expandQualifiedName(const String& qualifiedName, AtomString& localName, AtomString& namespaceURI) |
| { |
| size_t colon = qualifiedName.find(':'); |
| if (colon != notFound) { |
| if (!m_resolver) { |
| m_sawNamespaceError = true; |
| return false; |
| } |
| namespaceURI = m_resolver->lookupNamespaceURI(StringView(qualifiedName).left(colon).toAtomString()); |
| if (namespaceURI.isNull()) { |
| m_sawNamespaceError = true; |
| return false; |
| } |
| localName = StringView(qualifiedName).substring(colon + 1).toAtomString(); |
| } else |
| localName = AtomString { qualifiedName }; |
| return true; |
| } |
| |
| ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver) |
| { |
| Parser parser { statement, WTFMove(resolver) }; |
| |
| int parseError = xpathyyparse(parser); |
| |
| if (parser.m_sawNamespaceError) |
| return Exception { NamespaceError }; |
| |
| if (parseError) |
| return Exception { SyntaxError }; |
| |
| return WTFMove(parser.m_result); |
| } |
| |
| } } |