WebCore/xml/XPathParser.cpp - WebKit - Git at Google

 /*
  * Copyright 2005 Maksim Orlovich <maksim@kde.org>
  * Copyright (C) 2006 Apple Computer, Inc.
  * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "XPathParser.h"

 #if ENABLE(XPATH)

 #include "ExceptionCode.h"
 #include "StringHash.h"
 #include "XPathEvaluator.h"
 #include "XPathException.h"
 #include "XPathNSResolver.h"
 #include "XPathStep.h"
 #include <wtf/StdLibExtras.h>

 int xpathyyparse(void*);

 using namespace WTF;
 using namespace Unicode;

 namespace WebCore {
 namespace XPath {

 class LocationPath;

 #include "XPathGrammar.h"

 Parser* Parser::currentParser = 0;

 enum XMLCat { NameStart, NameCont, NotPartOfName };

 typedef HashMap<String, Step::Axis> AxisNamesMap;

 static XMLCat charCat(UChar aChar)
 {
     //### might need to add some special cases from the XML spec.

     if (aChar == '_')
         return NameStart;

     if (aChar == '.' || aChar == '-')
         return NameCont;
     CharCategory category = Unicode::category(aChar);
     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
         return NameStart;
     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
         return NameCont;
     return NotPartOfName;
 }

 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
 {
     struct AxisName {
         const char* name;
         Step::Axis axis;
     };
     const AxisName axisNameList[] = {
         { "ancestor", Step::AncestorAxis },
         { "ancestor-or-self", Step::AncestorOrSelfAxis },
         { "attribute", Step::AttributeAxis },
         { "child", Step::ChildAxis },
         { "descendant", Step::DescendantAxis },
         { "descendant-or-self", Step::DescendantOrSelfAxis },
         { "following", Step::FollowingAxis },
         { "following-sibling", Step::FollowingSiblingAxis },
         { "namespace", Step::NamespaceAxis },
         { "parent", Step::ParentAxis },
         { "preceding", Step::PrecedingAxis },
         { "preceding-sibling", Step::PrecedingSiblingAxis },
         { "self", Step::SelfAxis }
     };
     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
 }

 static bool isAxisName(const String& name, Step::Axis& type)
 {
     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());

     if (axisNames.isEmpty())
         setUpAxisNamesMap(axisNames);

     AxisNamesMap::iterator it = axisNames.find(name);
     if (it == axisNames.end())
         return false;
     type = it->second;
     return true;
 }

 static bool isNodeTypeName(const String& name)
 {
     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
     if (nodeTypeNames.isEmpty()) {
         nodeTypeNames.add("comment");
         nodeTypeNames.add("text");
         nodeTypeNames.add("processing-instruction");
         nodeTypeNames.add("node");
     }
     return nodeTypeNames.contains(name);
 }

 /* Returns whether the last parsed token matches the [32] Operator rule
  * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
  * the tokens.
  */
 bool Parser::isOperatorContext() const
 {
     if (m_nextPos == 0)
         return false;

     switch (m_lastTokenType) {
         case AND: case OR: case MULOP:
         case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
         case EQOP: case RELOP:
         case '@': case AXISNAME:   case '(': case '[':
             return false;
         default:
             return true;
     }
 }

 void Parser::skipWS()
 {
     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
         ++m_nextPos;
 }

 Token Parser::makeTokenAndAdvance(int code, int advance)
 {
     m_nextPos += advance;
     return Token(code);
 }

 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
 {
     m_nextPos += advance;
     return Token(code, val);
 }

 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
 {
     m_nextPos += advance;
     return Token(code, val);
 }

 // Returns next char if it's there and interesting, 0 otherwise
 char Parser::peekAheadHelper()
 {
     if (m_nextPos + 1 >= m_data.length())
         return 0;
     UChar next = m_data[m_nextPos + 1];
     if (next >= 0xff)
         return 0;
     return next;
 }

 char Parser::peekCurHelper()
 {
     if (m_nextPos >= m_data.length())
         return 0;
     UChar next = m_data[m_nextPos];
     if (next >= 0xff)
         return 0;
     return next;
 }

 Token Parser::lexString()
 {
     UChar delimiter = m_data[m_nextPos];
     int startPos = m_nextPos + 1;

     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
         if (m_data[m_nextPos] == delimiter) {
             String value = m_data.substring(startPos, m_nextPos - startPos);
             if (value.isNull())
                 value = "";
             ++m_nextPos; // Consume the char.
             return Token(LITERAL, value);
         }
     }

     // Ouch, went off the end -- report error.
     return Token(XPATH_ERROR);
 }

 Token Parser::lexNumber()
 {
     int startPos = m_nextPos;
     bool seenDot = false;

     // Go until end or a non-digits character.
     for (; m_nextPos < m_data.length(); ++m_nextPos) {
         UChar aChar = m_data[m_nextPos];
         if (aChar >= 0xff) break;

         if (aChar < '0' || aChar > '9') {
             if (aChar == '.' && !seenDot)
                 seenDot = true;
             else
                 break;
         }
     }

     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
 }

 bool Parser::lexNCName(String& name)
 {
     int startPos = m_nextPos;
     if (m_nextPos >= m_data.length())
         return false;

     if (charCat(m_data[m_nextPos]) != NameStart)
         return false;

     // Keep going until we get a character that's not good for names.
     for (; m_nextPos < m_data.length(); ++m_nextPos)
         if (charCat(m_data[m_nextPos]) == NotPartOfName)
             break;

     name = m_data.substring(startPos, m_nextPos - startPos);
     return true;
 }

 bool Parser::lexQName(String& name)
 {
     String n1;
     if (!lexNCName(n1))
         return false;

     skipWS();

     // If the next character is :, what we just got it the prefix, if not,
     // it's the whole thing.
     if (peekAheadHelper() != ':') {
         name = n1;
         return true;
     }

     String n2;
     if (!lexNCName(n2))
         return false;

     name = n1 + ":" + n2;
     return true;
 }

 Token Parser::nextTokenInternal()
 {
     skipWS();

     if (m_nextPos >= m_data.length())
         return Token(0);

     char code = peekCurHelper();
     switch (code) {
         case '(': case ')': case '[': case ']':
         case '@': case ',': case '|':
             return makeTokenAndAdvance(code);
         case '\'':
         case '\"':
             return lexString();
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
             return lexNumber();
         case '.': {
             char next = peekAheadHelper();
             if (next == '.')
                 return makeTokenAndAdvance(DOTDOT, 2);
             if (next >= '0' && next <= '9')
                 return lexNumber();
             return makeTokenAndAdvance('.');
         }
         case '/':
             if (peekAheadHelper() == '/')
                 return makeTokenAndAdvance(SLASHSLASH, 2);
             return makeTokenAndAdvance('/');
         case '+':
             return makeTokenAndAdvance(PLUS);
         case '-':
             return makeTokenAndAdvance(MINUS);
         case '=':
             return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
         case '!':
             if (peekAheadHelper() == '=')
                 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
             return Token(XPATH_ERROR);
         case '<':
             if (peekAheadHelper() == '=')
                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
         case '>':
             if (peekAheadHelper() == '=')
                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
         case '*':
             if (isOperatorContext())
                 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
             ++m_nextPos;
             return Token(NAMETEST, "*");
         case '$': { // $ QName
             m_nextPos++;
             String name;
             if (!lexQName(name))
                 return Token(XPATH_ERROR);
             return Token(VARIABLEREFERENCE, name);
         }
     }

     String name;
     if (!lexNCName(name))
         return Token(XPATH_ERROR);

     skipWS();
     // If we're in an operator context, check for any operator names
     if (isOperatorContext()) {
         if (name == "and") //### hash?
             return Token(AND);
         if (name == "or")
             return Token(OR);
         if (name == "mod")
             return Token(MULOP, NumericOp::OP_Mod);
         if (name == "div")
             return Token(MULOP, NumericOp::OP_Div);
     }

     // See whether we are at a :
     if (peekCurHelper() == ':') {
         m_nextPos++;
         // Any chance it's an axis name?
         if (peekCurHelper() == ':') {
             m_nextPos++;

             //It might be an axis name.
             Step::Axis axis;
             if (isAxisName(name, axis))
                 return Token(AXISNAME, axis);
             // Ugh, :: is only valid in axis names -> error
             return Token(XPATH_ERROR);
         }

         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
         skipWS();
         if (peekCurHelper() == '*') {
             m_nextPos++;
             return Token(NAMETEST, name + ":*");
         }

         // Make a full qname.
         String n2;
         if (!lexNCName(n2))
             return Token(XPATH_ERROR);

         name = name + ":" + n2;
     }

     skipWS();
     if (peekCurHelper() == '(') {
         //note: we don't swallow the (here!

         //either node type of function name
         if (isNodeTypeName(name)) {
             if (name == "processing-instruction")
                 return Token(PI, name);

             return Token(NODETYPE, name);
         }
         //must be a function name.
         return Token(FUNCTIONNAME, name);
     }

     // At this point, it must be NAMETEST.
     return Token(NAMETEST, name);
 }

 Token Parser::nextToken()
 {
     Token toRet = nextTokenInternal();
     m_lastTokenType = toRet.type;
     return toRet;
 }

 Parser::Parser()
 {
     reset(String());
 }

 void Parser::reset(const String& data)
 {
     m_nextPos = 0;
     m_data = data;
     m_lastTokenType = 0;

     m_topExpr = 0;
     m_gotNamespaceError = false;
 }

 int Parser::lex(void* data)
 {
     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
     Token tok = nextToken();

     switch (tok.type) {
         case AXISNAME:
             yylval->axis = tok.axis;
             break;
         case MULOP:
             yylval->numop = tok.numop;
             break;
         case RELOP:
         case EQOP:
             yylval->eqop = tok.eqop;
             break;
         case NODETYPE:
         case PI:
         case FUNCTIONNAME:
         case LITERAL:
         case VARIABLEREFERENCE:
         case NUMBER:
         case NAMETEST:
             yylval->str = new String(tok.str);
             registerString(yylval->str);
             break;
     }

     return tok.type;
 }

 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
 {
     int colon = qName.find(':');
     if (colon >= 0) {
         if (!m_resolver)
             return false;
         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
         if (namespaceURI.isNull())
             return false;
         localName = qName.substring(colon + 1);
     } else
         localName = qName;

     return true;
 }

 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
 {
     reset(statement);

     m_resolver = resolver;

     Parser* oldParser = currentParser;
     currentParser = this;
     int parseError = xpathyyparse(this);
     currentParser = oldParser;

     if (parseError) {
         deleteAllValues(m_parseNodes);
         m_parseNodes.clear();

         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
             deleteAllValues(**it);
             delete *it;
         }
         m_predicateVectors.clear();

         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
             deleteAllValues(**it);
             delete *it;
         }
         m_expressionVectors.clear();

         deleteAllValues(m_strings);
         m_strings.clear();

         deleteAllValues(m_nodeTests);
         m_nodeTests.clear();

         m_topExpr = 0;

         if (m_gotNamespaceError)
             ec = NAMESPACE_ERR;
         else
             ec = XPathException::INVALID_EXPRESSION_ERR;
         return 0;
     }

     ASSERT(m_parseNodes.size() == 1);
     ASSERT(*m_parseNodes.begin() == m_topExpr);
     ASSERT(m_expressionVectors.size() == 0);
     ASSERT(m_predicateVectors.size() == 0);
     ASSERT(m_strings.size() == 0);
     ASSERT(m_nodeTests.size() == 0);

     m_parseNodes.clear();
     Expression* result = m_topExpr;
     m_topExpr = 0;

     return result;
 }

 void Parser::registerParseNode(ParseNode* node)
 {
     if (node == 0)
         return;

     ASSERT(!m_parseNodes.contains(node));

     m_parseNodes.add(node);
 }

 void Parser::unregisterParseNode(ParseNode* node)
 {
     if (node == 0)
         return;

     ASSERT(m_parseNodes.contains(node));

     m_parseNodes.remove(node);
 }

 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
 {
     if (vector == 0)
         return;

     ASSERT(!m_predicateVectors.contains(vector));

     m_predicateVectors.add(vector);
 }

 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
 {
     if (vector == 0)
         return;

     ASSERT(m_predicateVectors.contains(vector));

     m_predicateVectors.remove(vector);
     delete vector;
 }


 void Parser::registerExpressionVector(Vector<Expression*>* vector)
 {
     if (vector == 0)
         return;

     ASSERT(!m_expressionVectors.contains(vector));

     m_expressionVectors.add(vector);
 }

 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
 {
     if (vector == 0)
         return;

     ASSERT(m_expressionVectors.contains(vector));

     m_expressionVectors.remove(vector);
     delete vector;
 }

 void Parser::registerString(String* s)
 {
     if (s == 0)
         return;

     ASSERT(!m_strings.contains(s));

     m_strings.add(s);
 }

 void Parser::deleteString(String* s)
 {
     if (s == 0)
         return;

     ASSERT(m_strings.contains(s));

     m_strings.remove(s);
     delete s;
 }

 void Parser::registerNodeTest(Step::NodeTest* t)
 {
     if (t == 0)
         return;

     ASSERT(!m_nodeTests.contains(t));

     m_nodeTests.add(t);
 }

 void Parser::deleteNodeTest(Step::NodeTest* t)
 {
     if (t == 0)
         return;

     ASSERT(m_nodeTests.contains(t));

     m_nodeTests.remove(t);
     delete t;
 }

 }
 }

 #endif // ENABLE(XPATH)
	/*
	* Copyright 2005 Maksim Orlovich <maksim@kde.org>
	* Copyright (C) 2006 Apple Computer, Inc.
	* Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "XPathParser.h"

	#if ENABLE(XPATH)

	#include "ExceptionCode.h"
	#include "StringHash.h"
	#include "XPathEvaluator.h"
	#include "XPathException.h"
	#include "XPathNSResolver.h"
	#include "XPathStep.h"
	#include <wtf/StdLibExtras.h>

	int xpathyyparse(void*);

	using namespace WTF;
	using namespace Unicode;

	namespace WebCore {
	namespace XPath {

	class LocationPath;

	#include "XPathGrammar.h"

	Parser* Parser::currentParser = 0;

	enum XMLCat { NameStart, NameCont, NotPartOfName };

	typedef HashMap<String, Step::Axis> AxisNamesMap;

	static XMLCat charCat(UChar aChar)
	{
	//### might need to add some special cases from the XML spec.

	if (aChar == '_')
	return NameStart;

	if (aChar == '.' \|\| aChar == '-')
	return NameCont;
	CharCategory category = Unicode::category(aChar);
	if (category & (Letter_Uppercase \| Letter_Lowercase \| Letter_Other \| Letter_Titlecase \| Number_Letter))
	return NameStart;
	if (category & (Mark_NonSpacing \| Mark_SpacingCombining \| Mark_Enclosing \| Letter_Modifier \| Number_DecimalDigit))
	return NameCont;
	return NotPartOfName;
	}

	static void setUpAxisNamesMap(AxisNamesMap& axisNames)
	{
	struct AxisName {
	const char* name;
	Step::Axis axis;
	};
	const AxisName axisNameList[] = {
	{ "ancestor", Step::AncestorAxis },
	{ "ancestor-or-self", Step::AncestorOrSelfAxis },
	{ "attribute", Step::AttributeAxis },
	{ "child", Step::ChildAxis },
	{ "descendant", Step::DescendantAxis },
	{ "descendant-or-self", Step::DescendantOrSelfAxis },
	{ "following", Step::FollowingAxis },
	{ "following-sibling", Step::FollowingSiblingAxis },
	{ "namespace", Step::NamespaceAxis },
	{ "parent", Step::ParentAxis },
	{ "preceding", Step::PrecedingAxis },
	{ "preceding-sibling", Step::PrecedingSiblingAxis },
	{ "self", Step::SelfAxis }
	};
	for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
	axisNames.set(axisNameList[i].name, axisNameList[i].axis);
	}

	static bool isAxisName(const String& name, Step::Axis& type)
	{
	DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());

	if (axisNames.isEmpty())
	setUpAxisNamesMap(axisNames);

	AxisNamesMap::iterator it = axisNames.find(name);
	if (it == axisNames.end())
	return false;
	type = it->second;
	return true;
	}

	static bool isNodeTypeName(const String& name)
	{
	DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
	if (nodeTypeNames.isEmpty()) {
	nodeTypeNames.add("comment");
	nodeTypeNames.add("text");
	nodeTypeNames.add("processing-instruction");
	nodeTypeNames.add("node");
	}
	return nodeTypeNames.contains(name);
	}

	/* Returns whether the last parsed token matches the [32] Operator rule
	* (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
	* the tokens.
	*/
	bool Parser::isOperatorContext() const
	{
	if (m_nextPos == 0)
	return false;

	switch (m_lastTokenType) {
	case AND: case OR: case MULOP:
	case '/': case SLASHSLASH: case '\|': case PLUS: case MINUS:
	case EQOP: case RELOP:
	case '@': case AXISNAME: case '(': case '[':
	return false;
	default:
	return true;
	}
	}

	void Parser::skipWS()
	{
	while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
	++m_nextPos;
	}

	Token Parser::makeTokenAndAdvance(int code, int advance)
	{
	m_nextPos += advance;
	return Token(code);
	}

	Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
	{
	m_nextPos += advance;
	return Token(code, val);
	}

	Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
	{
	m_nextPos += advance;
	return Token(code, val);
	}

	// Returns next char if it's there and interesting, 0 otherwise
	char Parser::peekAheadHelper()
	{
	if (m_nextPos + 1 >= m_data.length())
	return 0;
	UChar next = m_data[m_nextPos + 1];
	if (next >= 0xff)
	return 0;
	return next;
	}

	char Parser::peekCurHelper()
	{
	if (m_nextPos >= m_data.length())
	return 0;
	UChar next = m_data[m_nextPos];
	if (next >= 0xff)
	return 0;
	return next;
	}

	Token Parser::lexString()
	{
	UChar delimiter = m_data[m_nextPos];
	int startPos = m_nextPos + 1;

	for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
	if (m_data[m_nextPos] == delimiter) {
	String value = m_data.substring(startPos, m_nextPos - startPos);
	if (value.isNull())
	value = "";
	++m_nextPos; // Consume the char.
	return Token(LITERAL, value);
	}
	}

	// Ouch, went off the end -- report error.
	return Token(XPATH_ERROR);
	}

	Token Parser::lexNumber()
	{
	int startPos = m_nextPos;
	bool seenDot = false;

	// Go until end or a non-digits character.
	for (; m_nextPos < m_data.length(); ++m_nextPos) {
	UChar aChar = m_data[m_nextPos];
	if (aChar >= 0xff) break;

	if (aChar < '0' \|\| aChar > '9') {
	if (aChar == '.' && !seenDot)
	seenDot = true;
	else
	break;
	}
	}

	return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
	}

	bool Parser::lexNCName(String& name)
	{
	int startPos = m_nextPos;
	if (m_nextPos >= m_data.length())
	return false;

	if (charCat(m_data[m_nextPos]) != NameStart)
	return false;

	// Keep going until we get a character that's not good for names.
	for (; m_nextPos < m_data.length(); ++m_nextPos)
	if (charCat(m_data[m_nextPos]) == NotPartOfName)
	break;

	name = m_data.substring(startPos, m_nextPos - startPos);
	return true;
	}

	bool Parser::lexQName(String& name)
	{
	String n1;
	if (!lexNCName(n1))
	return false;

	skipWS();

	// If the next character is :, what we just got it the prefix, if not,
	// it's the whole thing.
	if (peekAheadHelper() != ':') {
	name = n1;
	return true;
	}

	String n2;
	if (!lexNCName(n2))
	return false;

	name = n1 + ":" + n2;
	return true;
	}

	Token Parser::nextTokenInternal()
	{
	skipWS();

	if (m_nextPos >= m_data.length())
	return Token(0);

	char code = peekCurHelper();
	switch (code) {
	case '(': case ')': case '[': case ']':
	case '@': case ',': case '\|':
	return makeTokenAndAdvance(code);
	case '\'':
	case '\"':
	return lexString();
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	return lexNumber();
	case '.': {
	char next = peekAheadHelper();
	if (next == '.')
	return makeTokenAndAdvance(DOTDOT, 2);
	if (next >= '0' && next <= '9')
	return lexNumber();
	return makeTokenAndAdvance('.');
	}
	case '/':
	if (peekAheadHelper() == '/')
	return makeTokenAndAdvance(SLASHSLASH, 2);
	return makeTokenAndAdvance('/');
	case '+':
	return makeTokenAndAdvance(PLUS);
	case '-':
	return makeTokenAndAdvance(MINUS);
	case '=':
	return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
	case '!':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
	return Token(XPATH_ERROR);
	case '<':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
	case '>':
	if (peekAheadHelper() == '=')
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
	case '*':
	if (isOperatorContext())
	return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
	++m_nextPos;
	return Token(NAMETEST, "*");
	case '$': { // $ QName
	m_nextPos++;
	String name;
	if (!lexQName(name))
	return Token(XPATH_ERROR);
	return Token(VARIABLEREFERENCE, name);
	}
	}

	String name;
	if (!lexNCName(name))
	return Token(XPATH_ERROR);

	skipWS();
	// If we're in an operator context, check for any operator names
	if (isOperatorContext()) {
	if (name == "and") //### hash?
	return Token(AND);
	if (name == "or")
	return Token(OR);
	if (name == "mod")
	return Token(MULOP, NumericOp::OP_Mod);
	if (name == "div")
	return Token(MULOP, NumericOp::OP_Div);
	}

	// See whether we are at a :
	if (peekCurHelper() == ':') {
	m_nextPos++;
	// Any chance it's an axis name?
	if (peekCurHelper() == ':') {
	m_nextPos++;

	//It might be an axis name.
	Step::Axis axis;
	if (isAxisName(name, axis))
	return Token(AXISNAME, axis);
	// Ugh, :: is only valid in axis names -> error
	return Token(XPATH_ERROR);
	}

	// Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
	skipWS();
	if (peekCurHelper() == '*') {
	m_nextPos++;
	return Token(NAMETEST, name + ":*");
	}

	// Make a full qname.
	String n2;
	if (!lexNCName(n2))
	return Token(XPATH_ERROR);

	name = name + ":" + n2;
	}

	skipWS();
	if (peekCurHelper() == '(') {
	//note: we don't swallow the (here!

	//either node type of function name
	if (isNodeTypeName(name)) {
	if (name == "processing-instruction")
	return Token(PI, name);

	return Token(NODETYPE, name);
	}
	//must be a function name.
	return Token(FUNCTIONNAME, name);
	}

	// At this point, it must be NAMETEST.
	return Token(NAMETEST, name);
	}

	Token Parser::nextToken()
	{
	Token toRet = nextTokenInternal();
	m_lastTokenType = toRet.type;
	return toRet;
	}

	Parser::Parser()
	{
	reset(String());
	}

	void Parser::reset(const String& data)
	{
	m_nextPos = 0;
	m_data = data;
	m_lastTokenType = 0;

	m_topExpr = 0;
	m_gotNamespaceError = false;
	}

	int Parser::lex(void* data)
	{
	YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
	Token tok = nextToken();

	switch (tok.type) {
	case AXISNAME:
	yylval->axis = tok.axis;
	break;
	case MULOP:
	yylval->numop = tok.numop;
	break;
	case RELOP:
	case EQOP:
	yylval->eqop = tok.eqop;
	break;
	case NODETYPE:
	case PI:
	case FUNCTIONNAME:
	case LITERAL:
	case VARIABLEREFERENCE:
	case NUMBER:
	case NAMETEST:
	yylval->str = new String(tok.str);
	registerString(yylval->str);
	break;
	}

	return tok.type;
	}

	bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
	{
	int colon = qName.find(':');
	if (colon >= 0) {
	if (!m_resolver)
	return false;
	namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
	if (namespaceURI.isNull())
	return false;
	localName = qName.substring(colon + 1);
	} else
	localName = qName;

	return true;
	}

	Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
	{
	reset(statement);

	m_resolver = resolver;

	Parser* oldParser = currentParser;
	currentParser = this;
	int parseError = xpathyyparse(this);
	currentParser = oldParser;

	if (parseError) {
	deleteAllValues(m_parseNodes);
	m_parseNodes.clear();

	HashSet<Vector<Predicate>>::iterator pend = m_predicateVectors.end();
	for (HashSet<Vector<Predicate>>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
	deleteAllValues(**it);
	delete *it;
	}
	m_predicateVectors.clear();

	HashSet<Vector<Expression>>::iterator eend = m_expressionVectors.end();
	for (HashSet<Vector<Expression>>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
	deleteAllValues(**it);
	delete *it;
	}
	m_expressionVectors.clear();

	deleteAllValues(m_strings);
	m_strings.clear();

	deleteAllValues(m_nodeTests);
	m_nodeTests.clear();

	m_topExpr = 0;

	if (m_gotNamespaceError)
	ec = NAMESPACE_ERR;
	else
	ec = XPathException::INVALID_EXPRESSION_ERR;
	return 0;
	}

	ASSERT(m_parseNodes.size() == 1);
	ASSERT(*m_parseNodes.begin() == m_topExpr);
	ASSERT(m_expressionVectors.size() == 0);
	ASSERT(m_predicateVectors.size() == 0);
	ASSERT(m_strings.size() == 0);
	ASSERT(m_nodeTests.size() == 0);

	m_parseNodes.clear();
	Expression* result = m_topExpr;
	m_topExpr = 0;

	return result;
	}

	void Parser::registerParseNode(ParseNode* node)
	{
	if (node == 0)
	return;

	ASSERT(!m_parseNodes.contains(node));

	m_parseNodes.add(node);
	}

	void Parser::unregisterParseNode(ParseNode* node)
	{
	if (node == 0)
	return;

	ASSERT(m_parseNodes.contains(node));

	m_parseNodes.remove(node);
	}

	void Parser::registerPredicateVector(Vector<Predicate> vector)
	{
	if (vector == 0)
	return;

	ASSERT(!m_predicateVectors.contains(vector));

	m_predicateVectors.add(vector);
	}

	void Parser::deletePredicateVector(Vector<Predicate> vector)
	{
	if (vector == 0)
	return;

	ASSERT(m_predicateVectors.contains(vector));

	m_predicateVectors.remove(vector);
	delete vector;
	}


	void Parser::registerExpressionVector(Vector<Expression> vector)
	{
	if (vector == 0)
	return;

	ASSERT(!m_expressionVectors.contains(vector));

	m_expressionVectors.add(vector);
	}

	void Parser::deleteExpressionVector(Vector<Expression> vector)
	{
	if (vector == 0)
	return;

	ASSERT(m_expressionVectors.contains(vector));

	m_expressionVectors.remove(vector);
	delete vector;
	}

	void Parser::registerString(String* s)
	{
	if (s == 0)
	return;

	ASSERT(!m_strings.contains(s));

	m_strings.add(s);
	}

	void Parser::deleteString(String* s)
	{
	if (s == 0)
	return;

	ASSERT(m_strings.contains(s));

	m_strings.remove(s);
	delete s;
	}

	void Parser::registerNodeTest(Step::NodeTest* t)
	{
	if (t == 0)
	return;

	ASSERT(!m_nodeTests.contains(t));

	m_nodeTests.add(t);
	}

	void Parser::deleteNodeTest(Step::NodeTest* t)
	{
	if (t == 0)
	return;

	ASSERT(m_nodeTests.contains(t));

	m_nodeTests.remove(t);
	delete t;
	}

	}
	}

	#endif // ENABLE(XPATH)