SVGSupport/kdom/xpath/impl/tokenizer.cpp - WebKit - Git at Google

 /*
  * tokenizer.cc - Copyright 2005 Maksim Orlovich <maksim@kde.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include "config.h"
 #include "tokenizer.h"

 struct AxisNameMapping
 {
     AxisNameMapping( const char *_name, Step::AxisType _type ) :
         name( _name ), type( _type )
     {
     }

     const char *name;
     Step::AxisType type;
 };

 static AxisNameMapping axisNames[] = {
     AxisNameMapping("ancestor", Step::AncestorAxis),
     AxisNameMapping("ancestor-or-self", Step::AncestorOrSelfAxis),
     AxisNameMapping("attribute", Step::AttributeAxis),
     AxisNameMapping("child", Step::ChildAxis),
     AxisNameMapping("descendant", Step::DescendantAxis),
     AxisNameMapping("descendant-or-self", Step::DescendantOrSelfAxis),
     AxisNameMapping("following", Step::FollowingAxis),
     AxisNameMapping("following-sibling", Step::FollowingSiblingAxis),
     AxisNameMapping("namespace", Step::NamespaceAxis),
     AxisNameMapping("parent", Step::ParentAxis),
     AxisNameMapping("preceding", Step::PrecedingAxis),
     AxisNameMapping("preceding-sibling", Step::PrecedingSiblingAxis),
     AxisNameMapping("self", Step::SelfAxis)
 };
 static unsigned int axisNamesCount = sizeof(axisNames) / sizeof(axisNames[0]);

 #ifndef APPLE_COMPILE_HACK
 static const char* const nodeTypeNames[] = {
     "comment",
     "text",
     "processing-instruction",
     "node",
     0
 };
 #endif

 Tokenizer* Tokenizer::s_instance = 0;

 Q3Dict<Step::AxisType>* Tokenizer::s_axisNamesDict     = 0;
 Q3Dict<char>* Tokenizer::s_nodeTypeNamesDict = 0;

 Tokenizer &Tokenizer::self()
 {
     if (!s_instance)
         s_instance = new Tokenizer;
     return *s_instance;
 }

 Tokenizer::XMLCat Tokenizer::charCat(QChar aChar)
 {
     //### might need to add some special cases from the XML spec.

     if (aChar.unicode() == '_')
         return NameStart;

     if (aChar.unicode() == '.' || aChar.unicode() == '-')
         return NameCont;

 #ifndef APPLE_COMPILE_HACK
     switch (aChar.category()) {
         case QChar::Letter_Lowercase: //Ll
         case QChar::Letter_Uppercase: //Lu
         case QChar::Letter_Other:     //Lo
         case QChar::Letter_Titlecase: //Lt
         case QChar::Number_Letter:    //Nl
             return NameStart;

         case QChar::Mark_SpacingCombining: //Mc
         case QChar::Mark_Enclosing:        //Me
         case QChar::Mark_NonSpacing:       //Mn
         case QChar::Letter_Modifier:       //Lm
         case QChar::Number_DecimalDigit:   //Nd
             return NameCont;

         default:
             return NotPartOfName;
     }
 #else
     return NotPartOfName;
 #endif
 }

 bool Tokenizer::isAxisName(QString name, Step::AxisType *type)
 {
     if (!s_axisNamesDict) {
         s_axisNamesDict = new Q3Dict<Step::AxisType>;
         s_axisNamesDict->setAutoDelete( true );
         for (unsigned int p = 0; p < axisNamesCount; ++p)
             s_axisNamesDict->insert(QString::fromLatin1(axisNames[p].name),
                                     new Step::AxisType(axisNames[p].type));
     }

     Step::AxisType *t = s_axisNamesDict->find(name);
     if ( t && type ) {
         *type = *t;
     }
     return t != 0;
 }

 bool Tokenizer::isNodeTypeName(QString name)
 {
 #ifndef APPLE_COMPILE_HACK
     if (!s_nodeTypeNamesDict) {
         s_nodeTypeNamesDict = new Q3Dict<char>;
         for (int p = 0; nodeTypeNames[p]; ++p)
             s_nodeTypeNamesDict->insert(QString::fromLatin1(nodeTypeNames[p]),
                                         nodeTypeNames /*dummy*/);
     }
     return s_nodeTypeNamesDict->find(name);
 #else
     return false;
 #endif
 }

 /* Returns whether the last parsed token matches the [32] Operator rule
  * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
  * the tokens.
  */
 bool Tokenizer::isOperatorContext()
 {
     switch ( m_lastTokenType ) {
         case AND: case OR: case MULOP:
         case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
         case EQOP: case RELOP:
         case '@': case AXISNAME:   case '(': case '[':
             return false;
         default:
             return true;
     }
 }

 void Tokenizer::skipWS()
 {
     while (m_nextPos < m_data.length() && m_data[m_nextPos].isSpace())
         ++m_nextPos;
 }

 Token Tokenizer::makeTokenAndAdvance(int code, int advance)
 {
     m_nextPos += advance;
     return Token(code);
 }

 Token Tokenizer::makeIntTokenAndAdvance(int code, int val, int advance)
 {
     m_nextPos += advance;
     return Token(code, val);
 }

 //Returns next char if it's there and interesting, 0 otherwise
 char Tokenizer::peekAheadHelper()
 {
     if (m_nextPos + 1 >= m_data.length())
         return 0;
     QChar next = m_data[m_nextPos + 1];
     if (next.row() != 0)
         return 0;
     else
         return next.cell();
 }

 char Tokenizer::peekCurHelper()
 {
     if (m_nextPos >= m_data.length())
         return 0;
     QChar next = m_data[m_nextPos];
     if (next.row() != 0)
         return 0;
     else
         return next.cell();
 }

 Token Tokenizer::lexString()
 {
     QChar delimiter = m_data[m_nextPos];
     int   startPos  = m_nextPos + 1;

     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
         if (m_data[m_nextPos] == delimiter) {
             QString value = m_data.mid(startPos, m_nextPos - startPos);
             ++m_nextPos; //Consume the char;
             return Token(LITERAL, value);
         }
     }

     //Ouch, went off the end -- report error
     return Token(ERROR);
 }

 Token Tokenizer::lexNumber()
 {
     int startPos = m_nextPos;
     bool seenDot = false;

     //Go until end or a non-digits character
     for (; m_nextPos < m_data.length(); ++m_nextPos) {
         QChar aChar = m_data[m_nextPos];
         if (aChar.row() != 0) break;

         if (aChar.cell() < '0' || aChar.cell() > '9') {
             if (aChar.cell() == '.' && !seenDot)
                 seenDot = true;
             else
                 break;
         }
     }

     QString value = m_data.mid(startPos, m_nextPos - startPos);
     return Token(NUMBER, value);
 }

 Token Tokenizer::lexNCName()
 {
     int startPos = m_nextPos;
     if (m_nextPos < m_data.length() && charCat(m_data[m_nextPos]) == NameStart)
     {
         //Keep going until we get a character that's not good for names.
         for (; m_nextPos < m_data.length(); ++m_nextPos) {
             if (charCat(m_data[m_nextPos]) == NotPartOfName)
                 break;
         }

         QString value = m_data.mid(startPos, m_nextPos - startPos);
         return Token(value);
     }
     else
         return makeTokenAndAdvance(ERROR);
 }

 Token Tokenizer::lexQName()
 {
     Token t1 = lexNCName();
     if (t1.type == ERROR) return t1;
     skipWS();
     //If the next character is :, what we just got it the prefix, if not,
     //it's the whole thing
     if (peekAheadHelper() != ':')
         return t1;

     Token t2 = lexNCName();
     if (t2.type == ERROR) return t2;

     return Token(t1.value + ":" + t2.value);
 }

 Token Tokenizer::nextTokenInternal()
 {
     skipWS();

     if (m_nextPos >= m_data.length()) {
         return Token(0);
     }

     char code = peekCurHelper();
     switch (code) {
         case '(': case ')': case '[': case ']':
         case '@': case ',': case '|':
             return makeTokenAndAdvance(code);
         case '\'':
         case '\"':
             return lexString();
         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9':
             return lexNumber();
         case '.': {
             char next = peekAheadHelper();
             if (next == '.')
                 return makeTokenAndAdvance(DOTDOT, 2);
             else if (next >= '0' && next <= '9')
                 return lexNumber();
             else
                 return makeTokenAndAdvance('.');
         }
         case '/':
             if (peekAheadHelper() == '/')
                 return makeTokenAndAdvance(SLASHSLASH, 2);
             else
                 return makeTokenAndAdvance('/');
         case '+':
             return makeTokenAndAdvance(PLUS);
         case '-':
             return makeTokenAndAdvance(MINUS);
         case '=':
             return makeIntTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
         case '!':
             if (peekAheadHelper() == '=')
                 return makeIntTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
             else {
                 return Token(ERROR);
             }
         case '<':
             if (peekAheadHelper() == '=')
                 return makeIntTokenAndAdvance(RELOP, NumericOp::OP_LE, 2);
             else
                 return makeIntTokenAndAdvance(RELOP, NumericOp::OP_LT);
         case '>':
             if (peekAheadHelper() == '=')
                 return makeIntTokenAndAdvance(RELOP, NumericOp::OP_GE, 2);
             else
                 return makeIntTokenAndAdvance(RELOP, NumericOp::OP_GT);
         case '*':
             if (isOperatorContext())
                 return makeIntTokenAndAdvance(MULOP, NumericOp::OP_Mul);
             else {
                 ++m_nextPos;
                 return Token(NAMETEST, "*");
             }
         case '$': {//$ QName
             m_nextPos++;
             Token par = lexQName();
             if (par.type == ERROR)
                 return par;
             else
                 return Token(VARIABLEREFERENCE, par.value);
         }
     }

     Token t1 = lexNCName();
     if (t1.type == ERROR) return t1;

     skipWS();

     //If we're in an operator context, check for any operator names
     if (isOperatorContext()) {
         if (t1.value == QString::fromLatin1("and")) //### hash?
             return Token(AND);
         if (t1.value == QString::fromLatin1("or"))
             return Token(OR);
         if (t1.value == QString::fromLatin1("mod"))
             return Token(MULOP, NumericOp::OP_Mod);
         if (t1.value == QString::fromLatin1("div"))
             return Token(MULOP, NumericOp::OP_Div);
     }

     //See whether we are at a :
     if (peekCurHelper() == ':') {
         m_nextPos++;
         //Any chance it's an axis name?
         if (peekCurHelper() == ':') {
             m_nextPos++;

             //It might be an axis name.
             Step::AxisType axisType;
             if (isAxisName(t1.value, &axisType))
                 return Token(AXISNAME, axisType);
             //Ugh, :: is only valid in axis names -> error
             return Token(ERROR);
         }

         //Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
         skipWS();
         if (peekCurHelper() == '*') {
             m_nextPos++;
             return Token(NAMETEST, t1.value + ":*");
         }

         //Make a full qname..
         Token t2 = lexNCName();
         if (t2.type == ERROR) return t2;

         t1.value = t1.value + ':' + t2.value;
     }

     skipWS();
     if (peekCurHelper() == '(') {
         //note: we don't swallow the ( here!

         //either node type of function name
         if (isNodeTypeName(t1.value)) {
             if (t1.value == "processing-instruction")
                 return Token(PI, t1.value);
             else
                 return Token(NODETYPE, t1.value);
         }
         //must be a function name.
         return Token(FUNCTIONNAME, t1.value);
     }

     //At this point, it must be NAMETEST
     return Token(NAMETEST, t1.value);
 }

 Token Tokenizer::nextToken()
 {
     Token toRet = nextTokenInternal();
     m_lastTokenType = toRet.type;
     return toRet;
 }

 Tokenizer::Tokenizer()
 {
     reset(QString());
 }

 void Tokenizer::reset(QString data)
 {
     m_nextPos = 0;
     m_data = data;
     m_lastTokenType = 0;
 }

 int xpathyylex()
 {
     Token tok = Tokenizer::self().nextToken();
     if (!tok.value.isEmpty()) {
         xpathyylval.str = new DomString(tok.value);
     } else if (tok.intValue) {
         xpathyylval.num = tok.intValue;
     }
     return tok.type;
 }

 void initTokenizer(QString string)
 {
     Tokenizer::self().reset(string);
 }

 void xpathyyerror(const char *str)
 {
     fprintf(stderr, "error: %s\n", str);
 }

 class TokenizerDeleter
 {
     public:
         ~TokenizerDeleter()
         {
             delete Tokenizer::s_instance;
             delete Tokenizer::s_axisNamesDict;
             delete Tokenizer::s_nodeTypeNamesDict;
         }
 };

 static TokenizerDeleter tokenizerDeleter;

 // kate: indent-width 4; replace-tabs off; tab-width 4; indent-spaces: off;
	/*
	* tokenizer.cc - Copyright 2005 Maksim Orlovich <maksim@kde.org>
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/
	#include "config.h"
	#include "tokenizer.h"

	struct AxisNameMapping
	{
	AxisNameMapping( const char *_name, Step::AxisType _type ) :
	name( _name ), type( _type )
	{
	}

	const char *name;
	Step::AxisType type;
	};

	static AxisNameMapping axisNames[] = {
	AxisNameMapping("ancestor", Step::AncestorAxis),
	AxisNameMapping("ancestor-or-self", Step::AncestorOrSelfAxis),
	AxisNameMapping("attribute", Step::AttributeAxis),
	AxisNameMapping("child", Step::ChildAxis),
	AxisNameMapping("descendant", Step::DescendantAxis),
	AxisNameMapping("descendant-or-self", Step::DescendantOrSelfAxis),
	AxisNameMapping("following", Step::FollowingAxis),
	AxisNameMapping("following-sibling", Step::FollowingSiblingAxis),
	AxisNameMapping("namespace", Step::NamespaceAxis),
	AxisNameMapping("parent", Step::ParentAxis),
	AxisNameMapping("preceding", Step::PrecedingAxis),
	AxisNameMapping("preceding-sibling", Step::PrecedingSiblingAxis),
	AxisNameMapping("self", Step::SelfAxis)
	};
	static unsigned int axisNamesCount = sizeof(axisNames) / sizeof(axisNames[0]);

	#ifndef APPLE_COMPILE_HACK
	static const char* const nodeTypeNames[] = {
	"comment",
	"text",
	"processing-instruction",
	"node",
	0
	};
	#endif

	Tokenizer* Tokenizer::s_instance = 0;

	Q3Dict<Step::AxisType>* Tokenizer::s_axisNamesDict = 0;
	Q3Dict<char>* Tokenizer::s_nodeTypeNamesDict = 0;

	Tokenizer &Tokenizer::self()
	{
	if (!s_instance)
	s_instance = new Tokenizer;
	return *s_instance;
	}

	Tokenizer::XMLCat Tokenizer::charCat(QChar aChar)
	{
	//### might need to add some special cases from the XML spec.

	if (aChar.unicode() == '_')
	return NameStart;

	if (aChar.unicode() == '.' \|\| aChar.unicode() == '-')
	return NameCont;

	#ifndef APPLE_COMPILE_HACK
	switch (aChar.category()) {
	case QChar::Letter_Lowercase: //Ll
	case QChar::Letter_Uppercase: //Lu
	case QChar::Letter_Other: //Lo
	case QChar::Letter_Titlecase: //Lt
	case QChar::Number_Letter: //Nl
	return NameStart;

	case QChar::Mark_SpacingCombining: //Mc
	case QChar::Mark_Enclosing: //Me
	case QChar::Mark_NonSpacing: //Mn
	case QChar::Letter_Modifier: //Lm
	case QChar::Number_DecimalDigit: //Nd
	return NameCont;

	default:
	return NotPartOfName;
	}
	#else
	return NotPartOfName;
	#endif
	}

	bool Tokenizer::isAxisName(QString name, Step::AxisType *type)
	{
	if (!s_axisNamesDict) {
	s_axisNamesDict = new Q3Dict<Step::AxisType>;
	s_axisNamesDict->setAutoDelete( true );
	for (unsigned int p = 0; p < axisNamesCount; ++p)
	s_axisNamesDict->insert(QString::fromLatin1(axisNames[p].name),
	new Step::AxisType(axisNames[p].type));
	}

	Step::AxisType *t = s_axisNamesDict->find(name);
	if ( t && type ) {
	type = t;
	}
	return t != 0;
	}

	bool Tokenizer::isNodeTypeName(QString name)
	{
	#ifndef APPLE_COMPILE_HACK
	if (!s_nodeTypeNamesDict) {
	s_nodeTypeNamesDict = new Q3Dict<char>;
	for (int p = 0; nodeTypeNames[p]; ++p)
	s_nodeTypeNamesDict->insert(QString::fromLatin1(nodeTypeNames[p]),
	nodeTypeNames /dummy/);
	}
	return s_nodeTypeNamesDict->find(name);
	#else
	return false;
	#endif
	}

	/* Returns whether the last parsed token matches the [32] Operator rule
	* (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
	* the tokens.
	*/
	bool Tokenizer::isOperatorContext()
	{
	switch ( m_lastTokenType ) {
	case AND: case OR: case MULOP:
	case '/': case SLASHSLASH: case '\|': case PLUS: case MINUS:
	case EQOP: case RELOP:
	case '@': case AXISNAME: case '(': case '[':
	return false;
	default:
	return true;
	}
	}

	void Tokenizer::skipWS()
	{
	while (m_nextPos < m_data.length() && m_data[m_nextPos].isSpace())
	++m_nextPos;
	}

	Token Tokenizer::makeTokenAndAdvance(int code, int advance)
	{
	m_nextPos += advance;
	return Token(code);
	}

	Token Tokenizer::makeIntTokenAndAdvance(int code, int val, int advance)
	{
	m_nextPos += advance;
	return Token(code, val);
	}

	//Returns next char if it's there and interesting, 0 otherwise
	char Tokenizer::peekAheadHelper()
	{
	if (m_nextPos + 1 >= m_data.length())
	return 0;
	QChar next = m_data[m_nextPos + 1];
	if (next.row() != 0)
	return 0;
	else
	return next.cell();
	}

	char Tokenizer::peekCurHelper()
	{
	if (m_nextPos >= m_data.length())
	return 0;
	QChar next = m_data[m_nextPos];
	if (next.row() != 0)
	return 0;
	else
	return next.cell();
	}

	Token Tokenizer::lexString()
	{
	QChar delimiter = m_data[m_nextPos];
	int startPos = m_nextPos + 1;

	for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
	if (m_data[m_nextPos] == delimiter) {
	QString value = m_data.mid(startPos, m_nextPos - startPos);
	++m_nextPos; //Consume the char;
	return Token(LITERAL, value);
	}
	}

	//Ouch, went off the end -- report error
	return Token(ERROR);
	}

	Token Tokenizer::lexNumber()
	{
	int startPos = m_nextPos;
	bool seenDot = false;

	//Go until end or a non-digits character
	for (; m_nextPos < m_data.length(); ++m_nextPos) {
	QChar aChar = m_data[m_nextPos];
	if (aChar.row() != 0) break;

	if (aChar.cell() < '0' \|\| aChar.cell() > '9') {
	if (aChar.cell() == '.' && !seenDot)
	seenDot = true;
	else
	break;
	}
	}

	QString value = m_data.mid(startPos, m_nextPos - startPos);
	return Token(NUMBER, value);
	}

	Token Tokenizer::lexNCName()
	{
	int startPos = m_nextPos;
	if (m_nextPos < m_data.length() && charCat(m_data[m_nextPos]) == NameStart)
	{
	//Keep going until we get a character that's not good for names.
	for (; m_nextPos < m_data.length(); ++m_nextPos) {
	if (charCat(m_data[m_nextPos]) == NotPartOfName)
	break;
	}

	QString value = m_data.mid(startPos, m_nextPos - startPos);
	return Token(value);
	}
	else
	return makeTokenAndAdvance(ERROR);
	}

	Token Tokenizer::lexQName()
	{
	Token t1 = lexNCName();
	if (t1.type == ERROR) return t1;
	skipWS();
	//If the next character is :, what we just got it the prefix, if not,
	//it's the whole thing
	if (peekAheadHelper() != ':')
	return t1;

	Token t2 = lexNCName();
	if (t2.type == ERROR) return t2;

	return Token(t1.value + ":" + t2.value);
	}

	Token Tokenizer::nextTokenInternal()
	{
	skipWS();

	if (m_nextPos >= m_data.length()) {
	return Token(0);
	}

	char code = peekCurHelper();
	switch (code) {
	case '(': case ')': case '[': case ']':
	case '@': case ',': case '\|':
	return makeTokenAndAdvance(code);
	case '\'':
	case '\"':
	return lexString();
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	return lexNumber();
	case '.': {
	char next = peekAheadHelper();
	if (next == '.')
	return makeTokenAndAdvance(DOTDOT, 2);
	else if (next >= '0' && next <= '9')
	return lexNumber();
	else
	return makeTokenAndAdvance('.');
	}
	case '/':
	if (peekAheadHelper() == '/')
	return makeTokenAndAdvance(SLASHSLASH, 2);
	else
	return makeTokenAndAdvance('/');
	case '+':
	return makeTokenAndAdvance(PLUS);
	case '-':
	return makeTokenAndAdvance(MINUS);
	case '=':
	return makeIntTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
	case '!':
	if (peekAheadHelper() == '=')
	return makeIntTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
	else {
	return Token(ERROR);
	}
	case '<':
	if (peekAheadHelper() == '=')
	return makeIntTokenAndAdvance(RELOP, NumericOp::OP_LE, 2);
	else
	return makeIntTokenAndAdvance(RELOP, NumericOp::OP_LT);
	case '>':
	if (peekAheadHelper() == '=')
	return makeIntTokenAndAdvance(RELOP, NumericOp::OP_GE, 2);
	else
	return makeIntTokenAndAdvance(RELOP, NumericOp::OP_GT);
	case '*':
	if (isOperatorContext())
	return makeIntTokenAndAdvance(MULOP, NumericOp::OP_Mul);
	else {
	++m_nextPos;
	return Token(NAMETEST, "*");
	}
	case '$': {//$ QName
	m_nextPos++;
	Token par = lexQName();
	if (par.type == ERROR)
	return par;
	else
	return Token(VARIABLEREFERENCE, par.value);
	}
	}

	Token t1 = lexNCName();
	if (t1.type == ERROR) return t1;

	skipWS();

	//If we're in an operator context, check for any operator names
	if (isOperatorContext()) {
	if (t1.value == QString::fromLatin1("and")) //### hash?
	return Token(AND);
	if (t1.value == QString::fromLatin1("or"))
	return Token(OR);
	if (t1.value == QString::fromLatin1("mod"))
	return Token(MULOP, NumericOp::OP_Mod);
	if (t1.value == QString::fromLatin1("div"))
	return Token(MULOP, NumericOp::OP_Div);
	}

	//See whether we are at a :
	if (peekCurHelper() == ':') {
	m_nextPos++;
	//Any chance it's an axis name?
	if (peekCurHelper() == ':') {
	m_nextPos++;

	//It might be an axis name.
	Step::AxisType axisType;
	if (isAxisName(t1.value, &axisType))
	return Token(AXISNAME, axisType);
	//Ugh, :: is only valid in axis names -> error
	return Token(ERROR);
	}

	//Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
	skipWS();
	if (peekCurHelper() == '*') {
	m_nextPos++;
	return Token(NAMETEST, t1.value + ":*");
	}

	//Make a full qname..
	Token t2 = lexNCName();
	if (t2.type == ERROR) return t2;

	t1.value = t1.value + ':' + t2.value;
	}

	skipWS();
	if (peekCurHelper() == '(') {
	//note: we don't swallow the ( here!

	//either node type of function name
	if (isNodeTypeName(t1.value)) {
	if (t1.value == "processing-instruction")
	return Token(PI, t1.value);
	else
	return Token(NODETYPE, t1.value);
	}
	//must be a function name.
	return Token(FUNCTIONNAME, t1.value);
	}

	//At this point, it must be NAMETEST
	return Token(NAMETEST, t1.value);
	}

	Token Tokenizer::nextToken()
	{
	Token toRet = nextTokenInternal();
	m_lastTokenType = toRet.type;
	return toRet;
	}

	Tokenizer::Tokenizer()
	{
	reset(QString());
	}

	void Tokenizer::reset(QString data)
	{
	m_nextPos = 0;
	m_data = data;
	m_lastTokenType = 0;
	}

	int xpathyylex()
	{
	Token tok = Tokenizer::self().nextToken();
	if (!tok.value.isEmpty()) {
	xpathyylval.str = new DomString(tok.value);
	} else if (tok.intValue) {
	xpathyylval.num = tok.intValue;
	}
	return tok.type;
	}

	void initTokenizer(QString string)
	{
	Tokenizer::self().reset(string);
	}

	void xpathyyerror(const char *str)
	{
	fprintf(stderr, "error: %s\n", str);
	}

	class TokenizerDeleter
	{
	public:
	~TokenizerDeleter()
	{
	delete Tokenizer::s_instance;
	delete Tokenizer::s_axisNamesDict;
	delete Tokenizer::s_nodeTypeNamesDict;
	}
	};

	static TokenizerDeleter tokenizerDeleter;

	// kate: indent-width 4; replace-tabs off; tab-width 4; indent-spaces: off;