2010-06-11  Eric Seidel  <eric@webkit.org>

        Reviewed by Adam Barth.

        Rename the rest of the *Tokenizer classes to *DocumentParser
        https://bugs.webkit.org/show_bug.cgi?id=40507

        * wtf/Platform.h:
         - fixed a comment to match new names.
2010-06-11  Eric Seidel  <eric@webkit.org>

        Reviewed by Adam Barth.

        Rename the rest of the *Tokenizer classes to *DocumentParser
        https://bugs.webkit.org/show_bug.cgi?id=40507

        This search/replace was done by do-webcore-rename.
        Just looking for a rubber stamp.

        XMLTokenizer => XMLDocumentParser
        XMLTokenizerLibxml2 => XMLDocumentParserLibxml2
        XMLTokenizerQt => XMLDocumentParserQt
        XMLTokenizerScope => XMLDocumentParserScope
        HTML5Tokenizer => HTML5DocumentParser
        HTMLTokenizer => HTMLDocumentParser

        No functional change, thus no tests.

        * Android.mk:
        * CMakeLists.txt:
        * GNUmakefile.am:
        * WebCore.gypi:
        * WebCore.pro:
        * WebCore.vcproj/WebCore.vcproj:
        * WebCore.xcodeproj/project.pbxproj:
        * css/CSSStyleSheet.cpp:
        (WebCore::CSSStyleSheet::checkLoaded):
        * dom/Document.cpp:
        (WebCore::Document::createTokenizer):
        (WebCore::Document::write):
        * dom/Document.h:
        (WebCore::Document::setXMLEncoding):
        * dom/DocumentParser.h:
        (WebCore::DocumentParser::asHTMLTokenizer):
        * dom/Element.cpp:
        * dom/ProcessingInstruction.cpp:
        * dom/XMLDocumentParser.cpp: Added.
        (WebCore::XMLDocumentParser::isWMLDocument):
        (WebCore::XMLDocumentParser::pushCurrentNode):
        (WebCore::XMLDocumentParser::popCurrentNode):
        (WebCore::XMLDocumentParser::clearCurrentNodeStack):
        (WebCore::XMLDocumentParser::write):
        (WebCore::XMLDocumentParser::handleError):
        (WebCore::XMLDocumentParser::enterText):
        (WebCore::toString):
        (WebCore::XMLDocumentParser::exitText):
        (WebCore::XMLDocumentParser::end):
        (WebCore::XMLDocumentParser::finish):
        (WebCore::createXHTMLParserErrorHeader):
        (WebCore::XMLDocumentParser::insertErrorMessageBlock):
        (WebCore::XMLDocumentParser::notifyFinished):
        (WebCore::XMLDocumentParser::isWaitingForScripts):
        (WebCore::XMLDocumentParser::pauseParsing):
        * dom/XMLDocumentParser.h: Added.
        (WebCore::XMLParserContext::context):
        (WebCore::XMLParserContext::XMLParserContext):
        (WebCore::XMLDocumentParser::):
        (WebCore::XMLDocumentParser::setIsXHTMLDocument):
        (WebCore::XMLDocumentParser::isXHTMLDocument):
        (WebCore::XMLDocumentParser::setIsXHTMLMPDocument):
        (WebCore::XMLDocumentParser::isXHTMLMPDocument):
        (WebCore::XMLDocumentParser::wellFormed):
        (WebCore::XMLDocumentParser::context):
        * dom/XMLDocumentParserLibxml2.cpp: Added.
        (WebCore::PendingCallbacks::~PendingCallbacks):
        (WebCore::PendingCallbacks::appendStartElementNSCallback):
        (WebCore::PendingCallbacks::appendEndElementNSCallback):
        (WebCore::PendingCallbacks::appendCharactersCallback):
        (WebCore::PendingCallbacks::appendProcessingInstructionCallback):
        (WebCore::PendingCallbacks::appendCDATABlockCallback):
        (WebCore::PendingCallbacks::appendCommentCallback):
        (WebCore::PendingCallbacks::appendInternalSubsetCallback):
        (WebCore::PendingCallbacks::appendErrorCallback):
        (WebCore::PendingCallbacks::callAndRemoveFirstCallback):
        (WebCore::PendingCallbacks::isEmpty):
        (WebCore::PendingCallbacks::PendingCallback::~PendingCallback):
        (WebCore::PendingCallbacks::PendingStartElementNSCallback::~PendingStartElementNSCallback):
        (WebCore::PendingCallbacks::PendingStartElementNSCallback::call):
        (WebCore::PendingCallbacks::PendingEndElementNSCallback::call):
        (WebCore::PendingCallbacks::PendingCharactersCallback::~PendingCharactersCallback):
        (WebCore::PendingCallbacks::PendingCharactersCallback::call):
        (WebCore::PendingCallbacks::PendingProcessingInstructionCallback::~PendingProcessingInstructionCallback):
        (WebCore::PendingCallbacks::PendingProcessingInstructionCallback::call):
        (WebCore::PendingCallbacks::PendingCDATABlockCallback::~PendingCDATABlockCallback):
        (WebCore::PendingCallbacks::PendingCDATABlockCallback::call):
        (WebCore::PendingCallbacks::PendingCommentCallback::~PendingCommentCallback):
        (WebCore::PendingCallbacks::PendingCommentCallback::call):
        (WebCore::PendingCallbacks::PendingInternalSubsetCallback::~PendingInternalSubsetCallback):
        (WebCore::PendingCallbacks::PendingInternalSubsetCallback::call):
        (WebCore::PendingCallbacks::):
        (WebCore::matchFunc):
        (WebCore::OffsetBuffer::OffsetBuffer):
        (WebCore::OffsetBuffer::readOutBytes):
        (WebCore::shouldAllowExternalLoad):
        (WebCore::openFunc):
        (WebCore::readFunc):
        (WebCore::writeFunc):
        (WebCore::closeFunc):
        (WebCore::errorFunc):
        (WebCore::XMLParserContext::createStringParser):
        (WebCore::XMLParserContext::createMemoryParser):
        (WebCore::XMLDocumentParser::XMLDocumentParser):
        (WebCore::XMLParserContext::~XMLParserContext):
        (WebCore::XMLDocumentParser::~XMLDocumentParser):
        (WebCore::XMLDocumentParser::doWrite):
        (WebCore::toString):
        (WebCore::handleElementNamespaces):
        (WebCore::handleElementAttributes):
        (WebCore::XMLDocumentParser::startElementNs):
        (WebCore::XMLDocumentParser::endElementNs):
        (WebCore::XMLDocumentParser::characters):
        (WebCore::XMLDocumentParser::error):
        (WebCore::XMLDocumentParser::processingInstruction):
        (WebCore::XMLDocumentParser::cdataBlock):
        (WebCore::XMLDocumentParser::comment):
        (WebCore::XMLDocumentParser::startDocument):
        (WebCore::XMLDocumentParser::endDocument):
        (WebCore::XMLDocumentParser::internalSubset):
        (WebCore::getTokenizer):
        (WebCore::hackAroundLibXMLEntityBug):
        (WebCore::startElementNsHandler):
        (WebCore::endElementNsHandler):
        (WebCore::charactersHandler):
        (WebCore::processingInstructionHandler):
        (WebCore::cdataBlockHandler):
        (WebCore::commentHandler):
        (WebCore::warningHandler):
        (WebCore::fatalErrorHandler):
        (WebCore::normalErrorHandler):
        (WebCore::):
        (WebCore::sharedXHTMLEntity):
        (WebCore::getXHTMLEntity):
        (WebCore::getEntityHandler):
        (WebCore::startDocumentHandler):
        (WebCore::endDocumentHandler):
        (WebCore::internalSubsetHandler):
        (WebCore::externalSubsetHandler):
        (WebCore::ignorableWhitespaceHandler):
        (WebCore::XMLDocumentParser::initializeParserContext):
        (WebCore::XMLDocumentParser::doEnd):
        (WebCore::xmlDocPtrForString):
        (WebCore::XMLDocumentParser::lineNumber):
        (WebCore::XMLDocumentParser::columnNumber):
        (WebCore::XMLDocumentParser::stopParsing):
        (WebCore::XMLDocumentParser::resumeParsing):
        (WebCore::parseXMLDocumentFragment):
        (WebCore::attributesStartElementNsHandler):
        (WebCore::parseAttributes):
        * dom/XMLDocumentParserQt.cpp: Added.
        (WebCore::EntityResolver::resolveUndeclaredEntity):
        (WebCore::XMLDocumentParser::XMLDocumentParser):
        (WebCore::XMLDocumentParser::~XMLDocumentParser):
        (WebCore::XMLDocumentParser::doWrite):
        (WebCore::XMLDocumentParser::initializeParserContext):
        (WebCore::XMLDocumentParser::doEnd):
        (WebCore::XMLDocumentParser::lineNumber):
        (WebCore::XMLDocumentParser::columnNumber):
        (WebCore::XMLDocumentParser::stopParsing):
        (WebCore::XMLDocumentParser::resumeParsing):
        (WebCore::parseXMLDocumentFragment):
        (WebCore::attributesStartElementNsHandler):
        (WebCore::parseAttributes):
        (WebCore::prefixFromQName):
        (WebCore::handleElementNamespaces):
        (WebCore::handleElementAttributes):
        (WebCore::XMLDocumentParser::parse):
        (WebCore::XMLDocumentParser::startDocument):
        (WebCore::XMLDocumentParser::parseStartElement):
        (WebCore::XMLDocumentParser::parseEndElement):
        (WebCore::XMLDocumentParser::parseCharacters):
        (WebCore::XMLDocumentParser::parseProcessingInstruction):
        (WebCore::XMLDocumentParser::parseCdata):
        (WebCore::XMLDocumentParser::parseComment):
        (WebCore::XMLDocumentParser::endDocument):
        (WebCore::XMLDocumentParser::hasError):
        (WebCore::XMLDocumentParser::parseDtd):
        * dom/XMLDocumentParserScope.cpp: Added.
        (WebCore::XMLDocumentParserScope::XMLDocumentParserScope):
        (WebCore::XMLDocumentParserScope::~XMLDocumentParserScope):
        * dom/XMLDocumentParserScope.h: Added.
        * dom/XMLTokenizer.cpp: Removed.
        * dom/XMLTokenizer.h: Removed.
        * dom/XMLTokenizerLibxml2.cpp: Removed.
        * dom/XMLTokenizerQt.cpp: Removed.
        * dom/XMLTokenizerScope.cpp: Removed.
        * dom/XMLTokenizerScope.h: Removed.
        * html/HTML5DocumentParser.cpp: Added.
        (WebCore::):
        (WebCore::HTML5DocumentParser::HTML5DocumentParser):
        (WebCore::HTML5DocumentParser::~HTML5DocumentParser):
        (WebCore::HTML5DocumentParser::begin):
        (WebCore::HTML5DocumentParser::pumpLexerIfPossible):
        (WebCore::HTML5DocumentParser::pumpLexer):
        (WebCore::HTML5DocumentParser::write):
        (WebCore::HTML5DocumentParser::end):
        (WebCore::HTML5DocumentParser::attemptToEnd):
        (WebCore::HTML5DocumentParser::endIfDelayed):
        (WebCore::HTML5DocumentParser::finish):
        (WebCore::HTML5DocumentParser::executingScript):
        (WebCore::HTML5DocumentParser::lineNumber):
        (WebCore::HTML5DocumentParser::columnNumber):
        (WebCore::HTML5DocumentParser::htmlParser):
        (WebCore::HTML5DocumentParser::isWaitingForScripts):
        (WebCore::HTML5DocumentParser::resumeParsingAfterScriptExecution):
        (WebCore::HTML5DocumentParser::watchForLoad):
        (WebCore::HTML5DocumentParser::stopWatchingForLoad):
        (WebCore::HTML5DocumentParser::shouldLoadExternalScriptFromSrc):
        (WebCore::HTML5DocumentParser::executeScript):
        (WebCore::HTML5DocumentParser::notifyFinished):
        (WebCore::HTML5DocumentParser::executeScriptsWaitingForStylesheets):
        (WebCore::HTML5DocumentParser::script):
        * html/HTML5DocumentParser.h: Added.
        (WebCore::HTML5DocumentParser::InputStream::InputStream):
        (WebCore::HTML5DocumentParser::InputStream::appendToEnd):
        (WebCore::HTML5DocumentParser::InputStream::insertAtCurrentInsertionPoint):
        (WebCore::HTML5DocumentParser::InputStream::close):
        (WebCore::HTML5DocumentParser::InputStream::current):
        (WebCore::HTML5DocumentParser::InputStream::splitInto):
        (WebCore::HTML5DocumentParser::InputStream::mergeFrom):
        (WebCore::HTML5DocumentParser::InsertionPointRecord::InsertionPointRecord):
        (WebCore::HTML5DocumentParser::InsertionPointRecord::~InsertionPointRecord):
        (WebCore::HTML5DocumentParser::inWrite):
        * html/HTML5Lexer.h:
        (WebCore::HTML5Lexer::columnNumber):
        * html/HTML5Tokenizer.cpp: Removed.
        * html/HTML5Tokenizer.h: Removed.
        * html/HTML5TreeBuilder.cpp:
        * html/HTMLDocument.cpp:
        (WebCore::HTMLDocument::createTokenizer):
        * html/HTMLDocumentParser.cpp: Added.
        (WebCore::):
        (WebCore::fixUpChar):
        (WebCore::tagMatch):
        (WebCore::Token::addAttribute):
        (WebCore::HTMLDocumentParser::HTMLDocumentParser):
        (WebCore::HTMLDocumentParser::reset):
        (WebCore::HTMLDocumentParser::begin):
        (WebCore::HTMLDocumentParser::setForceSynchronous):
        (WebCore::HTMLDocumentParser::processListing):
        (WebCore::HTMLDocumentParser::parseNonHTMLText):
        (WebCore::HTMLDocumentParser::scriptHandler):
        (WebCore::HTMLDocumentParser::scriptExecution):
        (WebCore::HTMLDocumentParser::parseComment):
        (WebCore::HTMLDocumentParser::parseServer):
        (WebCore::HTMLDocumentParser::parseProcessingInstruction):
        (WebCore::HTMLDocumentParser::parseText):
        (WebCore::HTMLDocumentParser::parseEntity):
        (WebCore::HTMLDocumentParser::parseDoctype):
        (WebCore::HTMLDocumentParser::parseTag):
        (WebCore::HTMLDocumentParser::continueProcessing):
        (WebCore::HTMLDocumentParser::advance):
        (WebCore::HTMLDocumentParser::willWriteHTML):
        (WebCore::HTMLDocumentParser::didWriteHTML):
        (WebCore::HTMLDocumentParser::write):
        (WebCore::HTMLDocumentParser::stopParsing):
        (WebCore::HTMLDocumentParser::processingData):
        (WebCore::HTMLDocumentParser::timerFired):
        (WebCore::HTMLDocumentParser::end):
        (WebCore::HTMLDocumentParser::finish):
        (WebCore::HTMLDocumentParser::processToken):
        (WebCore::HTMLDocumentParser::processDoctypeToken):
        (WebCore::HTMLDocumentParser::~HTMLDocumentParser):
        (WebCore::HTMLDocumentParser::enlargeBuffer):
        (WebCore::HTMLDocumentParser::enlargeScriptBuffer):
        (WebCore::HTMLDocumentParser::executeScriptsWaitingForStylesheets):
        (WebCore::HTMLDocumentParser::notifyFinished):
        (WebCore::HTMLDocumentParser::executeExternalScriptsIfReady):
        (WebCore::HTMLDocumentParser::executeExternalScriptsTimerFired):
        (WebCore::HTMLDocumentParser::continueExecutingExternalScripts):
        (WebCore::HTMLDocumentParser::isWaitingForScripts):
        (WebCore::HTMLDocumentParser::setSrc):
        (WebCore::parseHTMLDocumentFragment):
        (WebCore::decodeNamedEntity):
        * html/HTMLDocumentParser.h: Added.
        (WebCore::Token::Token):
        (WebCore::Token::~Token):
        (WebCore::Token::isOpenTag):
        (WebCore::Token::isCloseTag):
        (WebCore::Token::reset):
        (WebCore::Token::addViewSourceChar):
        (WebCore::):
        (WebCore::DoctypeToken::DoctypeToken):
        (WebCore::DoctypeToken::reset):
        (WebCore::DoctypeToken::state):
        (WebCore::DoctypeToken::setState):
        (WebCore::HTMLDocumentParser::forceSynchronous):
        (WebCore::HTMLDocumentParser::executingScript):
        (WebCore::HTMLDocumentParser::lineNumber):
        (WebCore::HTMLDocumentParser::columnNumber):
        (WebCore::HTMLDocumentParser::processingContentWrittenByScript):
        (WebCore::HTMLDocumentParser::htmlParser):
        (WebCore::HTMLDocumentParser::asHTMLTokenizer):
        (WebCore::HTMLDocumentParser::checkBuffer):
        (WebCore::HTMLDocumentParser::checkScriptBuffer):
        (WebCore::HTMLDocumentParser::):
        (WebCore::HTMLDocumentParser::State::State):
        (WebCore::HTMLDocumentParser::State::tagState):
        (WebCore::HTMLDocumentParser::State::setTagState):
        (WebCore::HTMLDocumentParser::State::entityState):
        (WebCore::HTMLDocumentParser::State::setEntityState):
        (WebCore::HTMLDocumentParser::State::inScript):
        (WebCore::HTMLDocumentParser::State::setInScript):
        (WebCore::HTMLDocumentParser::State::inStyle):
        (WebCore::HTMLDocumentParser::State::setInStyle):
        (WebCore::HTMLDocumentParser::State::inXmp):
        (WebCore::HTMLDocumentParser::State::setInXmp):
        (WebCore::HTMLDocumentParser::State::inTitle):
        (WebCore::HTMLDocumentParser::State::setInTitle):
        (WebCore::HTMLDocumentParser::State::inIFrame):
        (WebCore::HTMLDocumentParser::State::setInIFrame):
        (WebCore::HTMLDocumentParser::State::inPlainText):
        (WebCore::HTMLDocumentParser::State::setInPlainText):
        (WebCore::HTMLDocumentParser::State::inProcessingInstruction):
        (WebCore::HTMLDocumentParser::State::setInProcessingInstruction):
        (WebCore::HTMLDocumentParser::State::inComment):
        (WebCore::HTMLDocumentParser::State::setInComment):
        (WebCore::HTMLDocumentParser::State::inDoctype):
        (WebCore::HTMLDocumentParser::State::setInDoctype):
        (WebCore::HTMLDocumentParser::State::inTextArea):
        (WebCore::HTMLDocumentParser::State::setInTextArea):
        (WebCore::HTMLDocumentParser::State::escaped):
        (WebCore::HTMLDocumentParser::State::setEscaped):
        (WebCore::HTMLDocumentParser::State::inServer):
        (WebCore::HTMLDocumentParser::State::setInServer):
        (WebCore::HTMLDocumentParser::State::skipLF):
        (WebCore::HTMLDocumentParser::State::setSkipLF):
        (WebCore::HTMLDocumentParser::State::startTag):
        (WebCore::HTMLDocumentParser::State::setStartTag):
        (WebCore::HTMLDocumentParser::State::discardLF):
        (WebCore::HTMLDocumentParser::State::setDiscardLF):
        (WebCore::HTMLDocumentParser::State::allowYield):
        (WebCore::HTMLDocumentParser::State::setAllowYield):
        (WebCore::HTMLDocumentParser::State::loadingExtScript):
        (WebCore::HTMLDocumentParser::State::setLoadingExtScript):
        (WebCore::HTMLDocumentParser::State::forceSynchronous):
        (WebCore::HTMLDocumentParser::State::setForceSynchronous):
        (WebCore::HTMLDocumentParser::State::inAnyNonHTMLText):
        (WebCore::HTMLDocumentParser::State::hasTagState):
        (WebCore::HTMLDocumentParser::State::hasEntityState):
        (WebCore::HTMLDocumentParser::State::needsSpecialWriteHandling):
        (WebCore::HTMLDocumentParser::State::):
        (WebCore::HTMLDocumentParser::State::setBit):
        (WebCore::HTMLDocumentParser::State::testBit):
        * html/HTMLElement.cpp:
        * html/HTMLFormControlElement.cpp:
        * html/HTMLParser.cpp:
        (WebCore::HTMLParser::reportErrorToConsole):
        * html/HTMLParser.h:
        * html/HTMLTokenizer.cpp: Removed.
        * html/HTMLTokenizer.h: Removed.
        * html/HTMLViewSourceDocument.cpp:
        (WebCore::HTMLViewSourceDocument::createTokenizer):
        (WebCore::HTMLViewSourceDocument::addViewSourceToken):
        * html/HTMLViewSourceDocument.h:
        * loader/DocumentLoader.cpp:
        * loader/FTPDirectoryDocument.cpp:
        (WebCore::FTPDirectoryTokenizer::FTPDirectoryTokenizer):
        (WebCore::FTPDirectoryTokenizer::loadDocumentTemplate):
        (WebCore::FTPDirectoryTokenizer::finish):
        * loader/FrameLoader.cpp:
        * loader/ImageDocument.cpp:
        * loader/MediaDocument.cpp:
        * loader/PluginDocument.cpp:
        * loader/TextDocument.cpp:
        * page/XSSAuditor.h:
        * svg/SVGDocumentExtensions.cpp:
        * wml/WMLErrorHandling.cpp:
        (WebCore::reportWMLError):
        * xml/XSLStyleSheetLibxslt.cpp:
        (WebCore::XSLStyleSheet::parseString):
        * xml/XSLTProcessor.cpp:
        * xml/XSLTProcessorLibxslt.cpp:
2010-06-11  Eric Seidel  <eric@webkit.org>

        Reviewed by Adam Barth.

        Rename the rest of the *Tokenizer classes to *DocumentParser
        https://bugs.webkit.org/show_bug.cgi?id=40507

        * src/WebEntities.cpp:
         - Fix a comment to match new class names.

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@61073 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/html/HTMLDocumentParser.h b/WebCore/html/HTMLDocumentParser.h
new file mode 100644
index 0000000..9ccebe5
--- /dev/null
+++ b/WebCore/html/HTMLDocumentParser.h
@@ -0,0 +1,448 @@
+/*
+    Copyright (C) 1997 Martin Jones (mjones@kde.org)
+              (C) 1997 Torben Weis (weis@kde.org)
+              (C) 1998 Waldo Bastian (bastian@kde.org)
+              (C) 2001 Dirk Mueller (mueller@kde.org)
+    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public License
+    along with this library; see the file COPYING.LIB.  If not, write to
+    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+    Boston, MA 02110-1301, USA.
+*/
+
+#ifndef HTMLTokenizer_h
+#define HTMLTokenizer_h
+
+#include "CachedResourceClient.h"
+#include "CachedResourceHandle.h"
+#include "MappedAttributeEntry.h"
+#include "NamedNodeMap.h"
+#include "SegmentedString.h"
+#include "Timer.h"
+#include "DocumentParser.h"
+#include <wtf/Deque.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class CachedScript;
+class DocumentFragment;
+class Document;
+class HTMLDocument;
+class HTMLScriptElement;
+class HTMLViewSourceDocument;
+class FrameView;
+class HTMLParser;
+class Node;
+class PreloadScanner;
+class ScriptSourceCode;
+
+/**
+ * @internal
+ * represents one HTML tag. Consists of a numerical id, and the list
+ * of attributes. Can also represent text. In this case the id = 0 and
+ * text contains the text.
+ */
+struct Token {
+    Token()
+        : beginTag(true)
+        , selfClosingTag(false)
+        , brokenXMLStyle(false)
+        , m_sourceInfo(0)
+    { }
+    ~Token() { }
+
+    void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode);
+
+    bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; }
+    bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; }
+
+    void reset()
+    {
+        attrs = 0;
+        text = 0;
+        tagName = nullAtom;
+        beginTag = true;
+        selfClosingTag = false;
+        brokenXMLStyle = false;
+        if (m_sourceInfo)
+            m_sourceInfo->clear();
+    }
+
+    void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector<UChar>); m_sourceInfo->append(c); }
+
+    RefPtr<NamedNodeMap> attrs;
+    RefPtr<StringImpl> text;
+    AtomicString tagName;
+    bool beginTag;
+    bool selfClosingTag;
+    bool brokenXMLStyle;
+    OwnPtr<Vector<UChar> > m_sourceInfo;
+};
+
+enum DoctypeState {
+    DoctypeBegin,
+    DoctypeBeforeName,
+    DoctypeName,
+    DoctypeAfterName,
+    DoctypeBeforePublicID,
+    DoctypePublicID,
+    DoctypeAfterPublicID,
+    DoctypeBeforeSystemID,
+    DoctypeSystemID,
+    DoctypeAfterSystemID,
+    DoctypeBogus
+};
+
+class DoctypeToken {
+public:
+    DoctypeToken() {}
+
+    void reset()
+    {
+        m_name.clear();
+        m_publicID.clear();
+        m_systemID.clear();
+        m_state = DoctypeBegin;
+        m_source.clear();
+        m_forceQuirks = false;
+    }
+
+    DoctypeState state() { return m_state; }
+    void setState(DoctypeState s) { m_state = s; }
+
+    Vector<UChar> m_name;
+    Vector<UChar> m_publicID;
+    Vector<UChar> m_systemID;
+    DoctypeState m_state;
+
+    Vector<UChar> m_source;
+
+    bool m_forceQuirks; // Used by the HTML5 parser.
+};
+
+//-----------------------------------------------------------------------------
+
+// FIXME: This class does too much.  Right now it is both an HTML lexer as well
+// as handling all of the non-lexer-specific junk related to tokenizing HTML
+// (like dealing with <script> tags).  The HTML lexer bits should be pushed
+// down into a separate HTML lexer class.
+
+class HTMLDocumentParser : public DocumentParser, public CachedResourceClient {
+public:
+    HTMLDocumentParser(HTMLDocument*, bool reportErrors);
+    HTMLDocumentParser(HTMLViewSourceDocument*);
+    HTMLDocumentParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
+    virtual ~HTMLDocumentParser();
+
+    virtual void write(const SegmentedString&, bool appendData);
+    virtual void finish();
+    virtual bool forceSynchronous() const { return m_state.forceSynchronous(); }
+    virtual void setForceSynchronous(bool force);
+    virtual bool isWaitingForScripts() const;
+    virtual void stopParsing();
+    virtual bool processingData() const;
+    virtual int executingScript() const { return m_executingScript; }
+
+    virtual int lineNumber() const { return m_lineNumber; }
+    virtual int columnNumber() const { return 1; }
+
+    bool processingContentWrittenByScript() const { return m_src.excludeLineNumbers(); }
+
+    virtual void executeScriptsWaitingForStylesheets();
+
+    virtual HTMLParser* htmlParser() const { return m_parser.get(); }
+    virtual HTMLDocumentParser* asHTMLTokenizer() { return this; }
+
+private:
+    class State;
+
+    // Where we are in parsing a tag
+    void begin();
+    void end();
+
+    void reset();
+
+    void willWriteHTML(const SegmentedString&);
+    ALWAYS_INLINE void advance(State&);
+    void didWriteHTML();
+
+    PassRefPtr<Node> processToken();
+    void processDoctypeToken();
+
+    State processListing(SegmentedString, State);
+    State parseComment(SegmentedString&, State);
+    State parseDoctype(SegmentedString&, State);
+    State parseServer(SegmentedString&, State);
+    State parseText(SegmentedString&, State);
+    State parseNonHTMLText(SegmentedString&, State);
+    State parseTag(SegmentedString&, State);
+    State parseEntity(SegmentedString&, UChar*& dest, State, unsigned& cBufferPos, bool start, bool parsingTag);
+    State parseProcessingInstruction(SegmentedString&, State);
+    State scriptHandler(State);
+    State scriptExecution(const ScriptSourceCode&, State);
+    void setSrc(const SegmentedString&);
+
+    // check if we have enough space in the buffer.
+    // if not enlarge it
+    inline void checkBuffer(int len = 10)
+    {
+        if ((m_dest - m_buffer) > m_bufferSize - len)
+            enlargeBuffer(len);
+    }
+
+    inline void checkScriptBuffer(int len = 10)
+    {
+        if (m_scriptCodeSize + len >= m_scriptCodeCapacity)
+            enlargeScriptBuffer(len);
+    }
+
+    void enlargeBuffer(int len);
+    void enlargeScriptBuffer(int len);
+
+    bool continueProcessing(int& processedCount, double startTime, State&);
+    void timerFired(Timer<HTMLDocumentParser>*);
+    void allDataProcessed();
+
+    // from CachedResourceClient
+    void notifyFinished(CachedResource*);
+
+    void executeExternalScriptsIfReady();
+    void executeExternalScriptsTimerFired(Timer<HTMLDocumentParser>*);
+    bool continueExecutingExternalScripts(double startTime);
+
+    // Internal buffers
+    ///////////////////
+    UChar* m_buffer;
+    int m_bufferSize;
+    UChar* m_dest;
+
+    Token m_currentToken;
+
+    // This buffer holds the raw characters we've seen between the beginning of
+    // the attribute name and the first character of the attribute value.
+    Vector<UChar, 32> m_rawAttributeBeforeValue;
+
+    // DocumentParser flags
+    //////////////////
+    // are we in quotes within a html tag
+    enum { NoQuote, SingleQuote, DoubleQuote } tquote;
+
+    // Are we in a &... character entity description?
+    enum EntityState {
+        NoEntity = 0,
+        SearchEntity = 1,
+        NumericSearch = 2,
+        Hexadecimal = 3,
+        Decimal = 4,
+        EntityName = 5,
+        SearchSemicolon = 6
+    };
+    unsigned EntityUnicodeValue;
+
+    enum TagState {
+        NoTag = 0,
+        TagName = 1,
+        SearchAttribute = 2,
+        AttributeName = 3,
+        SearchEqual = 4,
+        SearchValue = 5,
+        QuotedValue = 6,
+        Value = 7,
+        SearchEnd = 8
+    };
+
+    class State {
+    public:
+        State() : m_bits(0) { }
+
+        TagState tagState() const { return static_cast<TagState>(m_bits & TagMask); }
+        void setTagState(TagState t) { m_bits = (m_bits & ~TagMask) | t; }
+        EntityState entityState() const { return static_cast<EntityState>((m_bits & EntityMask) >> EntityShift); }
+        void setEntityState(EntityState e) { m_bits = (m_bits & ~EntityMask) | (e << EntityShift); }
+
+        bool inScript() const { return testBit(InScript); }
+        void setInScript(bool v) { setBit(InScript, v); }
+        bool inStyle() const { return testBit(InStyle); }
+        void setInStyle(bool v) { setBit(InStyle, v); }
+        bool inXmp() const { return testBit(InXmp); }
+        void setInXmp(bool v) { setBit(InXmp, v); }
+        bool inTitle() const { return testBit(InTitle); }
+        void setInTitle(bool v) { setBit(InTitle, v); }
+        bool inIFrame() const { return testBit(InIFrame); }
+        void setInIFrame(bool v) { setBit(InIFrame, v); }
+        bool inPlainText() const { return testBit(InPlainText); }
+        void setInPlainText(bool v) { setBit(InPlainText, v); }
+        bool inProcessingInstruction() const { return testBit(InProcessingInstruction); }
+        void setInProcessingInstruction(bool v) { return setBit(InProcessingInstruction, v); }
+        bool inComment() const { return testBit(InComment); }
+        void setInComment(bool v) { setBit(InComment, v); }
+        bool inDoctype() const { return testBit(InDoctype); }
+        void setInDoctype(bool v) { setBit(InDoctype, v); }
+        bool inTextArea() const { return testBit(InTextArea); }
+        void setInTextArea(bool v) { setBit(InTextArea, v); }
+        bool escaped() const { return testBit(Escaped); }
+        void setEscaped(bool v) { setBit(Escaped, v); }
+        bool inServer() const { return testBit(InServer); }
+        void setInServer(bool v) { setBit(InServer, v); }
+        bool skipLF() const { return testBit(SkipLF); }
+        void setSkipLF(bool v) { setBit(SkipLF, v); }
+        bool startTag() const { return testBit(StartTag); }
+        void setStartTag(bool v) { setBit(StartTag, v); }
+        bool discardLF() const { return testBit(DiscardLF); }
+        void setDiscardLF(bool v) { setBit(DiscardLF, v); }
+        bool allowYield() const { return testBit(AllowYield); }
+        void setAllowYield(bool v) { setBit(AllowYield, v); }
+        bool loadingExtScript() const { return testBit(LoadingExtScript); }
+        void setLoadingExtScript(bool v) { setBit(LoadingExtScript, v); }
+        bool forceSynchronous() const { return testBit(ForceSynchronous); }
+        void setForceSynchronous(bool v) { setBit(ForceSynchronous, v); }
+
+        bool inAnyNonHTMLText() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame); }
+        bool hasTagState() const { return m_bits & TagMask; }
+        bool hasEntityState() const { return m_bits & EntityMask; }
+
+        bool needsSpecialWriteHandling() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame | TagMask | EntityMask | InPlainText | InComment | InDoctype | InServer | InProcessingInstruction | StartTag); }
+
+    private:
+        static const int EntityShift = 4;
+        enum StateBits {
+            TagMask = (1 << 4) - 1,
+            EntityMask = (1 << 7) - (1 << 4),
+            InScript = 1 << 7,
+            InStyle = 1 << 8,
+            // Bit 9 unused
+            InXmp = 1 << 10,
+            InTitle = 1 << 11,
+            InPlainText = 1 << 12,
+            InProcessingInstruction = 1 << 13,
+            InComment = 1 << 14,
+            InTextArea = 1 << 15,
+            Escaped = 1 << 16,
+            InServer = 1 << 17,
+            SkipLF = 1 << 18,
+            StartTag = 1 << 19,
+            DiscardLF = 1 << 20, // FIXME: should clarify difference between skip and discard
+            AllowYield = 1 << 21,
+            LoadingExtScript = 1 << 22,
+            ForceSynchronous = 1 << 23,
+            InIFrame = 1 << 24,
+            InDoctype = 1 << 25
+        };
+
+        void setBit(StateBits bit, bool value)
+        {
+            if (value)
+                m_bits |= bit;
+            else
+                m_bits &= ~bit;
+        }
+        bool testBit(StateBits bit) const { return m_bits & bit; }
+
+        unsigned m_bits;
+    };
+
+    State m_state;
+
+    DoctypeToken m_doctypeToken;
+    int m_doctypeSearchCount;
+    int m_doctypeSecondarySearchCount;
+
+    bool m_brokenServer;
+
+    // Name of an attribute that we just scanned.
+    AtomicString m_attrName;
+
+    // Used to store the code of a scripting sequence
+    UChar* m_scriptCode;
+    // Size of the script sequenze stored in @ref #scriptCode
+    int m_scriptCodeSize;
+    // Maximal size that can be stored in @ref #scriptCode
+    int m_scriptCodeCapacity;
+    // resync point of script code size
+    int m_scriptCodeResync;
+
+    // Stores characters if we are scanning for a string like "</script>"
+    UChar searchBuffer[10];
+
+    // Counts where we are in the string we are scanning for
+    int searchCount;
+    // the stopper string
+    const char* m_searchStopper;
+    int m_searchStopperLength;
+
+    // if no more data is coming, just parse what we have (including ext scripts that
+    // may be still downloading) and finish
+    bool m_noMoreData;
+    // URL to get source code of script from
+    String m_scriptTagSrcAttrValue;
+    String m_scriptTagCharsetAttrValue;
+    // the HTML code we will parse after the external script we are waiting for has loaded
+    SegmentedString m_pendingSrc;
+
+    // the HTML code we will parse after this particular script has
+    // loaded, but before all pending HTML
+    SegmentedString* m_currentPrependingSrc;
+
+    // true if we are executing a script while parsing a document. This causes the parsing of
+    // the output of the script to be postponed until after the script has finished executing
+    int m_executingScript;
+    Deque<CachedResourceHandle<CachedScript> > m_pendingScripts;
+    RefPtr<HTMLScriptElement> m_scriptNode;
+
+    bool m_requestingScript;
+    bool m_hasScriptsWaitingForStylesheets;
+
+    // if we found one broken comment, there are most likely others as well
+    // store a flag to get rid of the O(n^2) behaviour in such a case.
+    bool m_brokenComments;
+    // current line number
+    int m_lineNumber;
+    int m_currentScriptTagStartLineNumber;
+    int m_currentTagStartLineNumber;
+
+    double m_tokenizerTimeDelay;
+    int m_tokenizerChunkSize;
+
+    // The timer for continued processing.
+    Timer<HTMLDocumentParser> m_timer;
+
+    // The timer for continued executing external scripts.
+    Timer<HTMLDocumentParser> m_externalScriptsTimer;
+
+// This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags.
+// So any fixed number might be too small, but rather than rewriting all usage of this buffer
+// we'll just make it large enough to handle all imaginable cases.
+#define CBUFLEN 1024
+    UChar m_cBuffer[CBUFLEN + 2];
+    unsigned int m_cBufferPos;
+
+    SegmentedString m_src;
+    Document* m_doc;
+    OwnPtr<HTMLParser> m_parser;
+    bool m_inWrite;
+    bool m_fragment;
+    FragmentScriptingPermission m_scriptingPermission;
+
+    OwnPtr<PreloadScanner> m_preloadScanner;
+};
+
+void parseHTMLDocumentFragment(const String&, DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
+
+UChar decodeNamedEntity(const char*);
+
+} // namespace WebCore
+
+#endif // HTMLTokenizer_h