WebCore/html/HTMLTokenizer.cpp - WebKit - Git at Google

 /*
     Copyright (C) 1997 Martin Jones (mjones@kde.org)
               (C) 1997 Torben Weis (weis@kde.org)
               (C) 1998 Waldo Bastian (bastian@kde.org)
               (C) 1999 Lars Knoll (knoll@kde.org)
               (C) 1999 Antti Koivisto (koivisto@kde.org)
               (C) 2001 Dirk Mueller (mueller@kde.org)
     Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
     Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
     Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)

     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Library General Public
     License as published by the Free Software Foundation; either
     version 2 of the License, or (at your option) any later version.

     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Library General Public License for more details.

     You should have received a copy of the GNU Library General Public License
     along with this library; see the file COPYING.LIB.  If not, write to
     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     Boston, MA 02110-1301, USA.
 */

 #include "config.h"
 #include "HTMLTokenizer.h"

 #include "CSSHelper.h"
 #include "Cache.h"
 #include "CachedScript.h"
 #include "DocLoader.h"
 #include "DocumentFragment.h"
 #include "EventNames.h"
 #include "Frame.h"
 #include "FrameLoader.h"
 #include "FrameView.h"
 #include "HTMLElement.h"
 #include "HTMLNames.h"
 #include "HTMLParser.h"
 #include "HTMLScriptElement.h"
 #include "HTMLViewSourceDocument.h"
 #include "MappedAttribute.h"
 #include "Page.h"
 #include "PreloadScanner.h"
 #include "ScriptController.h"
 #include "ScriptSourceCode.h"
 #include "ScriptValue.h"
 #include "XSSAuditor.h"
 #include <wtf/ASCIICType.h>
 #include <wtf/CurrentTime.h>

 #include "HTMLEntityNames.c"

 #define PRELOAD_SCANNER_ENABLED 1
 // #define INSTRUMENT_LAYOUT_SCHEDULING 1

 using namespace WTF;
 using namespace std;

 namespace WebCore {

 using namespace HTMLNames;

 #if MOBILE
 // The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.
 // This value is used to define how many characters the tokenizer will process before
 // yeilding control.
 static const int defaultTokenizerChunkSize = 256;
 #else
 static const int defaultTokenizerChunkSize = 4096;
 #endif

 #if MOBILE
 // As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise
 // it will take way to long to load a page.
 static const double defaultTokenizerTimeDelay = 0.300;
 #else
 // FIXME: We would like this constant to be 200ms.
 // Yielding more aggressively results in increased responsiveness and better incremental rendering.
 // It slows down overall page-load on slower machines, though, so for now we set a value of 500.
 static const double defaultTokenizerTimeDelay = 0.500;
 #endif

 static const char commentStart [] = "<!--";
 static const char doctypeStart [] = "<!doctype";
 static const char publicStart [] = "public";
 static const char systemStart [] = "system";
 static const char scriptEnd [] = "</script";
 static const char xmpEnd [] = "</xmp";
 static const char styleEnd [] =  "</style";
 static const char textareaEnd [] = "</textarea";
 static const char titleEnd [] = "</title";
 static const char iframeEnd [] = "</iframe";

 // Full support for MS Windows extensions to Latin-1.
 // Technically these extensions should only be activated for pages
 // marked "windows-1252" or "cp1252", but
 // in the standard Microsoft way, these extensions infect hundreds of thousands
 // of web pages.  Note that people with non-latin-1 Microsoft extensions
 // are SOL.
 //
 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
 //      http://www.bbsinc.com/iso8859.html
 //      http://www.obviously.com/
 //
 // There may be better equivalents

 // We only need this for entities. For non-entity text, we handle this in the text encoding.

 static const UChar windowsLatin1ExtensionArray[32] = {
     0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
     0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
     0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
     0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178  // 98-9F
 };

 static inline UChar fixUpChar(UChar c)
 {
     if ((c & ~0x1F) != 0x0080)
         return c;
     return windowsLatin1ExtensionArray[c - 0x80];
 }

 static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
 {
     for (unsigned i = 0; i != length; ++i) {
         unsigned char c1 = s1[i];
         unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
         UChar c2 = s2[i];
         if (c1 != c2 && uc1 != c2)
             return false;
     }
     return true;
 }

 inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
 {
     if (!attrName.isEmpty()) {
         ASSERT(!attrName.contains('/'));
         RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);
         if (!attrs) {
             attrs = NamedMappedAttrMap::create();
             attrs->reserveInitialCapacity(10);
         }
         attrs->insertAttribute(a.release(), viewSourceMode);
     }

     attrName = emptyAtom;
 }

 // ----------------------------------------------------------------------------

 HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)
     : Tokenizer()
     , m_buffer(0)
     , m_scriptCode(0)
     , m_scriptCodeSize(0)
     , m_scriptCodeCapacity(0)
     , m_scriptCodeResync(0)
     , m_executingScript(0)
     , m_requestingScript(false)
     , m_hasScriptsWaitingForStylesheets(false)
     , m_timer(this, &HTMLTokenizer::timerFired)
     , m_doc(doc)
     , m_parser(new HTMLParser(doc, reportErrors))
     , m_inWrite(false)
     , m_fragment(false)
 {
     begin();
 }

 HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
     : Tokenizer(true)
     , m_buffer(0)
     , m_scriptCode(0)
     , m_scriptCodeSize(0)
     , m_scriptCodeCapacity(0)
     , m_scriptCodeResync(0)
     , m_executingScript(0)
     , m_requestingScript(false)
     , m_hasScriptsWaitingForStylesheets(false)
     , m_timer(this, &HTMLTokenizer::timerFired)
     , m_doc(doc)
     , m_parser(0)
     , m_inWrite(false)
     , m_fragment(false)
 {
     begin();
 }

 HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
     : m_buffer(0)
     , m_scriptCode(0)
     , m_scriptCodeSize(0)
     , m_scriptCodeCapacity(0)
     , m_scriptCodeResync(0)
     , m_executingScript(0)
     , m_requestingScript(false)
     , m_hasScriptsWaitingForStylesheets(false)
     , m_timer(this, &HTMLTokenizer::timerFired)
     , m_doc(frag->document())
     , m_parser(new HTMLParser(frag))
     , m_inWrite(false)
     , m_fragment(true)
 {
     begin();
 }

 void HTMLTokenizer::reset()
 {
     ASSERT(m_executingScript == 0);

     while (!m_pendingScripts.isEmpty()) {
         CachedScript* cs = m_pendingScripts.first().get();
         m_pendingScripts.removeFirst();
         ASSERT(cache()->disabled() || cs->accessCount() > 0);
         cs->removeClient(this);
     }

     fastFree(m_buffer);
     m_buffer = m_dest = 0;
     m_bufferSize = 0;

     fastFree(m_scriptCode);
     m_scriptCode = 0;
     m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;

     m_timer.stop();
     m_state.setAllowYield(false);
     m_state.setForceSynchronous(false);

     m_currentToken.reset();
     m_doctypeToken.reset();
     m_doctypeSearchCount = 0;
     m_doctypeSecondarySearchCount = 0;
     m_hasScriptsWaitingForStylesheets = false;
 }

 void HTMLTokenizer::begin()
 {
     m_executingScript = 0;
     m_requestingScript = false;
     m_hasScriptsWaitingForStylesheets = false;
     m_state.setLoadingExtScript(false);
     reset();
     m_bufferSize = 254;
     m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
     m_dest = m_buffer;
     tquote = NoQuote;
     searchCount = 0;
     m_state.setEntityState(NoEntity);
     m_scriptTagSrcAttrValue = String();
     m_pendingSrc.clear();
     m_currentPrependingSrc = 0;
     m_noMoreData = false;
     m_brokenComments = false;
     m_brokenServer = false;
     m_lineNumber = 0;
     m_currentScriptTagStartLineNumber = 0;
     m_currentTagStartLineNumber = 0;
     m_state.setForceSynchronous(false);

     Page* page = m_doc->page();
     if (page && page->hasCustomHTMLTokenizerTimeDelay())
         m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
     else
         m_tokenizerTimeDelay = defaultTokenizerTimeDelay;

     if (page && page->hasCustomHTMLTokenizerChunkSize())
         m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
     else
         m_tokenizerChunkSize = defaultTokenizerChunkSize;
 }

 void HTMLTokenizer::setForceSynchronous(bool force)
 {
     m_state.setForceSynchronous(force);
 }

 HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
 {
     // This function adds the listing 'list' as
     // preformatted text-tokens to the token-collection
     while (!list.isEmpty()) {
         if (state.skipLF()) {
             state.setSkipLF(false);
             if (*list == '\n') {
                 list.advance();
                 continue;
             }
         }

         checkBuffer();

         if (*list == '\n' || *list == '\r') {
             if (state.discardLF())
                 // Ignore this LF
                 state.setDiscardLF(false); // We have discarded 1 LF
             else
                 *m_dest++ = '\n';

             /* Check for MS-DOS CRLF sequence */
             if (*list == '\r')
                 state.setSkipLF(true);

             list.advance();
         } else {
             state.setDiscardLF(false);
             *m_dest++ = *list;
             list.advance();
         }
     }

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseNonHTMLText(SegmentedString& src, State state)
 {
     ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
     ASSERT(!state.hasTagState());
     ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );
     if (state.inScript() && !m_currentScriptTagStartLineNumber)
         m_currentScriptTagStartLineNumber = m_lineNumber;

     if (state.inComment())
         state = parseComment(src, state);

     int lastDecodedEntityPosition = -1;
     while (!src.isEmpty()) {
         checkScriptBuffer();
         UChar ch = *src;

         if (!m_scriptCodeResync && !m_brokenComments &&
             !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
             m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
             (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
             state.setInComment(true);
             state = parseComment(src, state);
             continue;
         }
         if (m_scriptCodeResync && !tquote && ch == '>') {
             src.advancePastNonNewline();
             m_scriptCodeSize = m_scriptCodeResync - 1;
             m_scriptCodeResync = 0;
             m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
             if (state.inScript())
                 state = scriptHandler(state);
             else {
                 state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
                 processToken();
                 if (state.inStyle()) {
                     m_currentToken.tagName = styleTag.localName();
                     m_currentToken.beginTag = false;
                 } else if (state.inTextArea()) {
                     m_currentToken.tagName = textareaTag.localName();
                     m_currentToken.beginTag = false;
                 } else if (state.inTitle()) {
                     m_currentToken.tagName = titleTag.localName();
                     m_currentToken.beginTag = false;
                 } else if (state.inXmp()) {
                     m_currentToken.tagName = xmpTag.localName();
                     m_currentToken.beginTag = false;
                 } else if (state.inIFrame()) {
                     m_currentToken.tagName = iframeTag.localName();
                     m_currentToken.beginTag = false;
                 }
                 processToken();
                 state.setInStyle(false);
                 state.setInScript(false);
                 state.setInTextArea(false);
                 state.setInTitle(false);
                 state.setInXmp(false);
                 state.setInIFrame(false);
                 tquote = NoQuote;
                 m_scriptCodeSize = m_scriptCodeResync = 0;
             }
             return state;
         }
         // possible end of tagname, lets check.
         if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
              m_scriptCodeSize >= m_searchStopperLength &&
              tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
              (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
             m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
             tquote = NoQuote;
             continue;
         }
         if (m_scriptCodeResync && !state.escaped()) {
             if (ch == '\"')
                 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
             else if (ch == '\'')
                 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
             else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
                 tquote = NoQuote;
         }
         state.setEscaped(!state.escaped() && ch == '\\');
         if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
             UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
             src.advancePastNonNewline();
             state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
             if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
                 lastDecodedEntityPosition = m_scriptCodeSize;
             else
                 m_scriptCodeSize = scriptCodeDest - m_scriptCode;
         } else {
             m_scriptCode[m_scriptCodeSize++] = ch;
             src.advance(m_lineNumber);
         }
     }

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
 {
     // We are inside a <script>
     bool doScriptExec = false;
     int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based

     // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
     m_currentScriptTagStartLineNumber = 0;

     // (Bugzilla 3837) Scripts following a frameset element should not execute or,
     // in the case of extern scripts, even load.
     bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));

     CachedScript* cs = 0;
     // don't load external scripts for standalone documents (for now)
     if (!inViewSourceMode()) {
         if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {
             // forget what we just got; load from src url instead
             if (!m_parser->skipMode() && !followingFrameset) {
 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
                 if (!m_doc->ownerElement())
                     printf("Requesting script at time %d\n", m_doc->elapsedTime());
 #endif
                 // The parser might have been stopped by for example a window.close call in an earlier script.
                 // If so, we don't want to load scripts.
                 if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
                     m_pendingScripts.append(cs);
                 else
                     m_scriptNode = 0;
             } else
                 m_scriptNode = 0;
             m_scriptTagSrcAttrValue = String();
         } else {
             // Parse m_scriptCode containing <script> info
             doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
 #if ENABLE(XHTMLMP)
             if (!doScriptExec)
                 m_doc->setShouldProcessNoscriptElement(true);
 #endif
             m_scriptNode = 0;
         }
     }

     state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
     RefPtr<Node> node = processToken();
     String scriptString = node ? node->textContent() : "";
     m_currentToken.tagName = scriptTag.localName();
     m_currentToken.beginTag = false;
     processToken();

     state.setInScript(false);
     m_scriptCodeSize = m_scriptCodeResync = 0;

     // FIXME: The script should be syntax highlighted.
     if (inViewSourceMode())
         return state;

     SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
     SegmentedString prependingSrc;
     m_currentPrependingSrc = &prependingSrc;

     if (!m_parser->skipMode() && !followingFrameset) {
         if (cs) {
             if (savedPrependingSrc)
                 savedPrependingSrc->append(m_src);
             else
                 m_pendingSrc.prepend(m_src);
             setSrc(SegmentedString());

             // the ref() call below may call notifyFinished if the script is already in cache,
             // and that mucks with the state directly, so we must write it back to the object.
             m_state = state;
             bool savedRequestingScript = m_requestingScript;
             m_requestingScript = true;
             cs->addClient(this);
             m_requestingScript = savedRequestingScript;
             state = m_state;
             // will be 0 if script was already loaded and ref() executed it
             if (!m_pendingScripts.isEmpty())
                 state.setLoadingExtScript(true);
         } else if (!m_fragment && doScriptExec) {
             if (!m_executingScript)
                 m_pendingSrc.prepend(m_src);
             else
                 prependingSrc = m_src;
             setSrc(SegmentedString());
             state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);
         }
     }

     if (!m_executingScript && !state.loadingExtScript()) {
         m_src.append(m_pendingSrc);
         m_pendingSrc.clear();
     } else if (!prependingSrc.isEmpty()) {
         // restore first so that the write appends in the right place
         // (does not hurt to do it again below)
         m_currentPrependingSrc = savedPrependingSrc;

         // we need to do this slightly modified bit of one of the write() cases
         // because we want to prepend to m_pendingSrc rather than appending
         // if there's no previous prependingSrc
         if (!m_pendingScripts.isEmpty()) {
             if (m_currentPrependingSrc)
                 m_currentPrependingSrc->append(prependingSrc);
             else
                 m_pendingSrc.prepend(prependingSrc);
         } else {
             m_state = state;
             write(prependingSrc, false);
             state = m_state;
         }
     }

 #if PRELOAD_SCANNER_ENABLED
     if (!m_pendingScripts.isEmpty() && !m_executingScript) {
         if (!m_preloadScanner)
             m_preloadScanner.set(new PreloadScanner(m_doc));
         if (!m_preloadScanner->inProgress()) {
             m_preloadScanner->begin();
             m_preloadScanner->write(m_pendingSrc);
         }
     }
 #endif
     m_currentPrependingSrc = savedPrependingSrc;

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state)
 {
     if (m_fragment || !m_doc->frame())
         return state;
     m_executingScript++;

     SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
     SegmentedString prependingSrc;
     m_currentPrependingSrc = &prependingSrc;

 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("beginning script execution at %d\n", m_doc->elapsedTime());
 #endif

     m_state = state;
     m_doc->frame()->loader()->executeScript(sourceCode);
     state = m_state;

     state.setAllowYield(true);

 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("ending script execution at %d\n", m_doc->elapsedTime());
 #endif

     m_executingScript--;

     if (!m_executingScript && !state.loadingExtScript()) {
         m_pendingSrc.prepend(prependingSrc);
         m_src.append(m_pendingSrc);
         m_pendingSrc.clear();
     } else if (!prependingSrc.isEmpty()) {
         // restore first so that the write appends in the right place
         // (does not hurt to do it again below)
         m_currentPrependingSrc = savedPrependingSrc;

         // we need to do this slightly modified bit of one of the write() cases
         // because we want to prepend to m_pendingSrc rather than appending
         // if there's no previous prependingSrc
         if (!m_pendingScripts.isEmpty()) {
             if (m_currentPrependingSrc)
                 m_currentPrependingSrc->append(prependingSrc);
             else
                 m_pendingSrc.prepend(prependingSrc);

 #if PRELOAD_SCANNER_ENABLED
             // We are stuck waiting for another script. Lets check the source that
             // was just document.write()n for anything to load.
             PreloadScanner documentWritePreloadScanner(m_doc);
             documentWritePreloadScanner.begin();
             documentWritePreloadScanner.write(prependingSrc);
             documentWritePreloadScanner.end();
 #endif
         } else {
             m_state = state;
             write(prependingSrc, false);
             state = m_state;
         }
     }

     m_currentPrependingSrc = savedPrependingSrc;

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state)
 {
     // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
     checkScriptBuffer(src.length());
     while (!src.isEmpty()) {
         UChar ch = *src;
         m_scriptCode[m_scriptCodeSize++] = ch;
         if (ch == '>') {
             bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
             int endCharsCount = 1; // start off with one for the '>' character
             if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
                 endCharsCount = 3;
             } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
                 m_scriptCode[m_scriptCodeSize-2] == '!') {
                 // Other browsers will accept --!> as a close comment, even though it's
                 // not technically valid.
                 endCharsCount = 4;
             }
             if (handleBrokenComments || endCharsCount > 1) {
                 src.advancePastNonNewline();
                 if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
                     checkScriptBuffer();
                     m_scriptCode[m_scriptCodeSize] = 0;
                     m_scriptCode[m_scriptCodeSize + 1] = 0;
                     m_currentToken.tagName = commentAtom;
                     m_currentToken.beginTag = true;
                     state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
                     processToken();
                     m_currentToken.tagName = commentAtom;
                     m_currentToken.beginTag = false;
                     processToken();
                     m_scriptCodeSize = 0;
                 }
                 state.setInComment(false);
                 return state; // Finished parsing comment
             }
         }
         src.advance(m_lineNumber);
     }

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
 {
     checkScriptBuffer(src.length());
     while (!src.isEmpty()) {
         UChar ch = *src;
         m_scriptCode[m_scriptCodeSize++] = ch;
         if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
             src.advancePastNonNewline();
             state.setInServer(false);
             m_scriptCodeSize = 0;
             return state; // Finished parsing server include
         }
         src.advance(m_lineNumber);
     }
     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state)
 {
     UChar oldchar = 0;
     while (!src.isEmpty()) {
         UChar chbegin = *src;
         if (chbegin == '\'')
             tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
         else if (chbegin == '\"')
             tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
         // Look for '?>'
         // Some crappy sites omit the "?" before it, so
         // we look for an unquoted '>' instead. (IE compatible)
         else if (chbegin == '>' && (!tquote || oldchar == '?')) {
             // We got a '?>' sequence
             state.setInProcessingInstruction(false);
             src.advancePastNonNewline();
             state.setDiscardLF(true);
             return state; // Finished parsing comment!
         }
         src.advance(m_lineNumber);
         oldchar = chbegin;
     }

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state)
 {
     while (!src.isEmpty()) {
         UChar cc = *src;

         if (state.skipLF()) {
             state.setSkipLF(false);
             if (cc == '\n') {
                 src.advancePastNewline(m_lineNumber);
                 continue;
             }
         }

         // do we need to enlarge the buffer?
         checkBuffer();

         if (cc == '\r') {
             state.setSkipLF(true);
             *m_dest++ = '\n';
         } else
             *m_dest++ = cc;
         src.advance(m_lineNumber);
     }

     return state;
 }


 HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
 {
     if (start) {
         cBufferPos = 0;
         state.setEntityState(SearchEntity);
         EntityUnicodeValue = 0;
     }

     while(!src.isEmpty()) {
         UChar cc = *src;
         switch(state.entityState()) {
         case NoEntity:
             ASSERT(state.entityState() != NoEntity);
             return state;

         case SearchEntity:
             if (cc == '#') {
                 m_cBuffer[cBufferPos++] = cc;
                 src.advancePastNonNewline();
                 state.setEntityState(NumericSearch);
             } else
                 state.setEntityState(EntityName);
             break;

         case NumericSearch:
             if (cc == 'x' || cc == 'X') {
                 m_cBuffer[cBufferPos++] = cc;
                 src.advancePastNonNewline();
                 state.setEntityState(Hexadecimal);
             } else if (cc >= '0' && cc <= '9')
                 state.setEntityState(Decimal);
             else
                 state.setEntityState(SearchSemicolon);
             break;

         case Hexadecimal: {
             int ll = min(src.length(), 10 - cBufferPos);
             while (ll--) {
                 cc = *src;
                 if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
                     state.setEntityState(SearchSemicolon);
                     break;
                 }
                 int digit;
                 if (cc < 'A')
                     digit = cc - '0';
                 else
                     digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
                 EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
                 m_cBuffer[cBufferPos++] = cc;
                 src.advancePastNonNewline();
             }
             if (cBufferPos == 10)
                 state.setEntityState(SearchSemicolon);
             break;
         }
         case Decimal:
         {
             int ll = min(src.length(), 9-cBufferPos);
             while(ll--) {
                 cc = *src;

                 if (!(cc >= '0' && cc <= '9')) {
                     state.setEntityState(SearchSemicolon);
                     break;
                 }

                 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
                 m_cBuffer[cBufferPos++] = cc;
                 src.advancePastNonNewline();
             }
             if (cBufferPos == 9)
                 state.setEntityState(SearchSemicolon);
             break;
         }
         case EntityName:
         {
             int ll = min(src.length(), 9-cBufferPos);
             while(ll--) {
                 cc = *src;

                 if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
                     state.setEntityState(SearchSemicolon);
                     break;
                 }

                 m_cBuffer[cBufferPos++] = cc;
                 src.advancePastNonNewline();
             }
             if (cBufferPos == 9)
                 state.setEntityState(SearchSemicolon);
             if (state.entityState() == SearchSemicolon) {
                 if(cBufferPos > 1) {
                     // Since the maximum length of entity name is 9,
                     // so a single char array which is allocated on
                     // the stack, its length is 10, should be OK.
                     // Also if we have an illegal character, we treat it
                     // as illegal entity name.
                     unsigned testedEntityNameLen = 0;
                     char tmpEntityNameBuffer[10];

                     ASSERT(cBufferPos < 10);
                     for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
                         if (m_cBuffer[testedEntityNameLen] > 0x7e)
                             break;
                         tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
                     }

                     const Entity *e;

                     if (testedEntityNameLen == cBufferPos)
                         e = findEntity(tmpEntityNameBuffer, cBufferPos);
                     else
                         e = 0;

                     if(e)
                         EntityUnicodeValue = e->code;

                     // be IE compatible
                     if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
                         EntityUnicodeValue = 0;
                 }
             }
             else
                 break;
         }
         case SearchSemicolon:
             // Don't allow values that are more than 21 bits.
             if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
                 if (!inViewSourceMode()) {
                     if (*src == ';')
                         src.advancePastNonNewline();
                     if (EntityUnicodeValue <= 0xFFFF) {
                         checkBuffer();
                         src.push(fixUpChar(EntityUnicodeValue));
                     } else {
                         // Convert to UTF-16, using surrogate code points.
                         checkBuffer(2);
                         src.push(U16_LEAD(EntityUnicodeValue));
                         src.push(U16_TRAIL(EntityUnicodeValue));
                     }
                 } else {
                     // FIXME: We should eventually colorize entities by sending them as a special token.
                     // 12 bytes required: up to 10 bytes in m_cBuffer plus the
                     // leading '&' and trailing ';'
                     checkBuffer(12);
                     *dest++ = '&';
                     for (unsigned i = 0; i < cBufferPos; i++)
                         dest[i] = m_cBuffer[i];
                     dest += cBufferPos;
                     if (*src == ';') {
                         *dest++ = ';';
                         src.advancePastNonNewline();
                     }
                 }
             } else {
                 // 11 bytes required: up to 10 bytes in m_cBuffer plus the
                 // leading '&'
                 checkBuffer(11);
                 // ignore the sequence, add it to the buffer as plaintext
                 *dest++ = '&';
                 for (unsigned i = 0; i < cBufferPos; i++)
                     dest[i] = m_cBuffer[i];
                 dest += cBufferPos;
             }

             state.setEntityState(NoEntity);
             return state;
         }
     }

     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state)
 {
     ASSERT(state.inDoctype());
     while (!src.isEmpty() && state.inDoctype()) {
         UChar c = *src;
         bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
         switch (m_doctypeToken.state()) {
             case DoctypeBegin: {
                 m_doctypeToken.setState(DoctypeBeforeName);
                 if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             }
             case DoctypeBeforeName: {
                 if (c == '>') {
                     // Malformed.  Just exit.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     if (inViewSourceMode())
                         processDoctypeToken();
                 } else if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else
                     m_doctypeToken.setState(DoctypeName);
                 break;
             }
             case DoctypeName: {
                 if (c == '>') {
                     // Valid doctype. Emit it.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     processDoctypeToken();
                 } else if (isWhitespace) {
                     m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
                     m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
                     m_doctypeToken.setState(DoctypeAfterName);
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else {
                     src.advancePastNonNewline();
                     m_doctypeToken.m_name.append(c);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             }
             case DoctypeAfterName: {
                 if (c == '>') {
                     // Valid doctype. Emit it.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     processDoctypeToken();
                 } else if (!isWhitespace) {
                     src.advancePastNonNewline();
                     if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
                         m_doctypeSearchCount++;
                         if (m_doctypeSearchCount == 6)
                             // Found 'PUBLIC' sequence
                             m_doctypeToken.setState(DoctypeBeforePublicID);
                     } else if (m_doctypeSearchCount > 0) {
                         m_doctypeSearchCount = 0;
                         m_doctypeToken.setState(DoctypeBogus);
                     } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
                         m_doctypeSecondarySearchCount++;
                         if (m_doctypeSecondarySearchCount == 6)
                             // Found 'SYSTEM' sequence
                             m_doctypeToken.setState(DoctypeBeforeSystemID);
                     } else {
                         m_doctypeSecondarySearchCount = 0;
                         m_doctypeToken.setState(DoctypeBogus);
                     }
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else {
                     src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             }
             case DoctypeBeforePublicID: {
                 if (c == '\"' || c == '\'') {
                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
                     m_doctypeToken.setState(DoctypePublicID);
                     src.advancePastNonNewline();
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else if (c == '>') {
                     // Considered bogus.  Don't process the doctype.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     if (inViewSourceMode())
                         processDoctypeToken();
                 } else if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else
                     m_doctypeToken.setState(DoctypeBogus);
                 break;
             }
             case DoctypePublicID: {
                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
                     src.advancePastNonNewline();
                     m_doctypeToken.setState(DoctypeAfterPublicID);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else if (c == '>') {
                      // Considered bogus.  Don't process the doctype.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     if (inViewSourceMode())
                         processDoctypeToken();
                 } else {
                     m_doctypeToken.m_publicID.append(c);
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             }
             case DoctypeAfterPublicID:
                 if (c == '\"' || c == '\'') {
                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
                     m_doctypeToken.setState(DoctypeSystemID);
                     src.advancePastNonNewline();
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else if (c == '>') {
                     // Valid doctype. Emit it now.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     processDoctypeToken();
                 } else if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else
                     m_doctypeToken.setState(DoctypeBogus);
                 break;
             case DoctypeBeforeSystemID:
                 if (c == '\"' || c == '\'') {
                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
                     m_doctypeToken.setState(DoctypeSystemID);
                     src.advancePastNonNewline();
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else if (c == '>') {
                     // Considered bogus.  Don't process the doctype.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                 } else if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else
                     m_doctypeToken.setState(DoctypeBogus);
                 break;
             case DoctypeSystemID:
                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
                     src.advancePastNonNewline();
                     m_doctypeToken.setState(DoctypeAfterSystemID);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else if (c == '>') {
                      // Considered bogus.  Don't process the doctype.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     if (inViewSourceMode())
                         processDoctypeToken();
                 } else {
                     m_doctypeToken.m_systemID.append(c);
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             case DoctypeAfterSystemID:
                 if (c == '>') {
                     // Valid doctype. Emit it now.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     processDoctypeToken();
                 } else if (isWhitespace) {
                     src.advance(m_lineNumber);
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 } else
                     m_doctypeToken.setState(DoctypeBogus);
                 break;
             case DoctypeBogus:
                 if (c == '>') {
                     // Done with the bogus doctype.
                     src.advancePastNonNewline();
                     state.setInDoctype(false);
                     if (inViewSourceMode())
                        processDoctypeToken();
                 } else {
                     src.advance(m_lineNumber); // Just keep scanning for '>'
                     if (inViewSourceMode())
                         m_doctypeToken.m_source.append(c);
                 }
                 break;
             default:
                 break;
         }
     }
     return state;
 }

 HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state)
 {
     ASSERT(!state.hasEntityState());

     unsigned cBufferPos = m_cBufferPos;

     bool lastIsSlash = false;

     while (!src.isEmpty()) {
         checkBuffer();
         switch(state.tagState()) {
         case NoTag:
         {
             m_cBufferPos = cBufferPos;
             return state;
         }
         case TagName:
         {
             if (searchCount > 0) {
                 if (*src == commentStart[searchCount]) {
                     searchCount++;
                     if (searchCount == 2)
                         m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
                     else
                         m_doctypeSearchCount = 0;
                     if (searchCount == 4) {
                         // Found '<!--' sequence
                         src.advancePastNonNewline();
                         m_dest = m_buffer; // ignore the previous part of this tag
                         state.setInComment(true);
                         state.setTagState(NoTag);

                         // Fix bug 34302 at kde.bugs.org.  Go ahead and treat
                         // <!--> as a valid comment, since both mozilla and IE on windows
                         // can handle this case.  Only do this in quirks mode. -dwh
                         if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
                             state.setInComment(false);
                             src.advancePastNonNewline();
                             if (!src.isEmpty())
                                 m_cBuffer[cBufferPos++] = *src;
                         } else
                           state = parseComment(src, state);

                         m_cBufferPos = cBufferPos;
                         return state; // Finished parsing tag!
                     }
                     m_cBuffer[cBufferPos++] = *src;
                     src.advancePastNonNewline();
                     break;
                 } else
                     searchCount = 0; // Stop looking for '<!--' sequence
             }

             if (m_doctypeSearchCount > 0) {
                 if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
                     m_doctypeSearchCount++;
                     m_cBuffer[cBufferPos++] = *src;
                     src.advancePastNonNewline();
                     if (m_doctypeSearchCount == 9) {
                         // Found '<!DOCTYPE' sequence
                         state.setInDoctype(true);
                         state.setTagState(NoTag);
                         m_doctypeToken.reset();
                         if (inViewSourceMode())
                             m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
                         state = parseDoctype(src, state);
                         m_cBufferPos = cBufferPos;
                         return state;
                     }
                     break;
                 } else
                     m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
             }

             bool finish = false;
             unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
             while (ll--) {
                 UChar curchar = *src;
                 if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
                     finish = true;
                     break;
                 }

                 // tolower() shows up on profiles. This is faster!
                 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
                     m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
                 else
                     m_cBuffer[cBufferPos++] = curchar;
                 src.advancePastNonNewline();
             }

             // Disadvantage: we add the possible rest of the tag
             // as attribute names. ### judge if this causes problems
             if (finish || CBUFLEN == cBufferPos) {
                 bool beginTag;
                 UChar* ptr = m_cBuffer;
                 unsigned int len = cBufferPos;
                 m_cBuffer[cBufferPos] = '\0';
                 if ((cBufferPos > 0) && (*ptr == '/')) {
                     // End Tag
                     beginTag = false;
                     ptr++;
                     len--;
                 }
                 else
                     // Start Tag
                     beginTag = true;

                 // Ignore the / in fake xml tags like <br/>.  We trim off the "/" so that we'll get "br" as the tag name and not "br/".
                 if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
                     ptr[--len] = '\0';

                 // Now that we've shaved off any invalid / that might have followed the name), make the tag.
                 // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
                 if (ptr[0] != '!' || inViewSourceMode()) {
                     m_currentToken.tagName = AtomicString(ptr);
                     m_currentToken.beginTag = beginTag;
                 }
                 m_dest = m_buffer;
                 state.setTagState(SearchAttribute);
                 cBufferPos = 0;
             }
             break;
         }
         case SearchAttribute:
             while(!src.isEmpty()) {
                 UChar curchar = *src;
                 // In this mode just ignore any quotes we encounter and treat them like spaces.
                 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
                     if (curchar == '<' || curchar == '>')
                         state.setTagState(SearchEnd);
                     else
                         state.setTagState(AttributeName);

                     cBufferPos = 0;
                     break;
                 }
                 if (inViewSourceMode())
                     m_currentToken.addViewSourceChar(curchar);
                 src.advance(m_lineNumber);
             }
             break;
         case AttributeName:
         {
             int ll = min(src.length(), CBUFLEN - cBufferPos);
             while (ll--) {
                 UChar curchar = *src;
                 // If we encounter a "/" when scanning an attribute name, treat it as a delimiter.  This allows the
                 // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
                 if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
                     m_cBuffer[cBufferPos] = '\0';
                     m_attrName = AtomicString(m_cBuffer);
                     m_dest = m_buffer;
                     *m_dest++ = 0;
                     state.setTagState(SearchEqual);
                     if (inViewSourceMode())
                         m_currentToken.addViewSourceChar('a');
                     break;
                 }

                 // tolower() shows up on profiles. This is faster!
                 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
                     m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
                 else
                     m_cBuffer[cBufferPos++] = curchar;

                 src.advance(m_lineNumber);
             }
             if (cBufferPos == CBUFLEN) {
                 m_cBuffer[cBufferPos] = '\0';
                 m_attrName = AtomicString(m_cBuffer);
                 m_dest = m_buffer;
                 *m_dest++ = 0;
                 state.setTagState(SearchEqual);
                 if (inViewSourceMode())
                     m_currentToken.addViewSourceChar('a');
             }
             break;
         }
         case SearchEqual:
             while (!src.isEmpty()) {
                 UChar curchar = *src;

                 if (lastIsSlash && curchar == '>') {
                     // This is a quirk (with a long sad history).  We have to do this
                     // since widgets do <script src="foo.js"/> and expect the tag to close.
                     if (m_currentToken.tagName == scriptTag)
                         m_currentToken.selfClosingTag = true;
                     m_currentToken.brokenXMLStyle = true;
                 }

                 // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
                 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
                     if (curchar == '=') {
                         state.setTagState(SearchValue);
                         if (inViewSourceMode())
                             m_currentToken.addViewSourceChar(curchar);
                         src.advancePastNonNewline();
                     } else {
                         m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
                         m_dest = m_buffer;
                         state.setTagState(SearchAttribute);
                         lastIsSlash = false;
                     }
                     break;
                 }
                 if (inViewSourceMode())
                     m_currentToken.addViewSourceChar(curchar);

                 lastIsSlash = curchar == '/';

                 src.advance(m_lineNumber);
             }
             break;
         case SearchValue:
             while (!src.isEmpty()) {
                 UChar curchar = *src;
                 if (!isASCIISpace(curchar)) {
                     if (curchar == '\'' || curchar == '\"') {
                         tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
                         state.setTagState(QuotedValue);
                         if (inViewSourceMode())
                             m_currentToken.addViewSourceChar(curchar);
                         src.advancePastNonNewline();
                     } else
                         state.setTagState(Value);

                     break;
                 }
                 if (inViewSourceMode())
                     m_currentToken.addViewSourceChar(curchar);
                 src.advance(m_lineNumber);
             }
             break;
         case QuotedValue:
             while (!src.isEmpty()) {
                 checkBuffer();

                 UChar curchar = *src;
                 if (curchar <= '>' && !src.escaped()) {
                     if (curchar == '>' && m_attrName.isEmpty()) {
                         // Handle a case like <img '>.  Just go ahead and be willing
                         // to close the whole tag.  Don't consume the character and
                         // just go back into SearchEnd while ignoring the whole
                         // value.
                         // FIXME: Note that this is actually not a very good solution.
                         // It doesn't handle the general case of
                         // unmatched quotes among attributes that have names. -dwh
                         while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
                             m_dest--; // remove trailing newlines
                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
                         if (!attributeValue.contains('/'))
                             m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
                         if (inViewSourceMode())
                             m_currentToken.addViewSourceChar('x');
                         state.setTagState(SearchAttribute);
                         m_dest = m_buffer;
                         tquote = NoQuote;
                         break;
                     }

                     if (curchar == '&') {
                         src.advancePastNonNewline();
                         state = parseEntity(src, m_dest, state, cBufferPos, true, true);
                         break;
                     }

                     if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
                         // some <input type=hidden> rely on trailing spaces. argh
                         while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
                             m_dest--; // remove trailing newlines
                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
                         if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
                             m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
                             if (inViewSourceMode())
                                 m_currentToken.addViewSourceChar('x');
                         } else if (inViewSourceMode())
                             m_currentToken.addViewSourceChar('v');
                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
                         m_dest = m_buffer;
                         state.setTagState(SearchAttribute);
                         tquote = NoQuote;
                         if (inViewSourceMode())
                             m_currentToken.addViewSourceChar(curchar);
                         src.advancePastNonNewline();
                         break;
                     }
                 }

                 *m_dest++ = curchar;
                 src.advance(m_lineNumber);
             }
             break;
         case Value:
             while(!src.isEmpty()) {
                 checkBuffer();
                 UChar curchar = *src;
                 if (curchar <= '>' && !src.escaped()) {
                     // parse Entities
                     if (curchar == '&') {
                         src.advancePastNonNewline();
                         state = parseEntity(src, m_dest, state, cBufferPos, true, true);
                         break;
                     }
                     // no quotes. Every space means end of value
                     // '/' does not delimit in IE!
                     if (isASCIISpace(curchar) || curchar == '>') {
                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
                         if (inViewSourceMode())
                             m_currentToken.addViewSourceChar('v');
                         m_dest = m_buffer;
                         state.setTagState(SearchAttribute);
                         break;
                     }
                 }

                 *m_dest++ = curchar;
                 src.advance(m_lineNumber);
             }
             break;
         case SearchEnd:
         {
             while (!src.isEmpty()) {
                 UChar ch = *src;
                 if (ch == '>' || ch == '<')
                     break;
                 if (ch == '/')
                     m_currentToken.selfClosingTag = true;
                 if (inViewSourceMode())
                     m_currentToken.addViewSourceChar(ch);
                 src.advance(m_lineNumber);
             }
             if (src.isEmpty())
                 break;

             searchCount = 0; // Stop looking for '<!--' sequence
             state.setTagState(NoTag);
             tquote = NoQuote;

             if (*src != '<')
                 src.advance(m_lineNumber);

             if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
                 m_cBufferPos = cBufferPos;
                 return state;
             }

             AtomicString tagName = m_currentToken.tagName;

             // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
             // compatibility.
             bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
             bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
             if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {
                 Attribute* a = 0;
                 m_scriptTagSrcAttrValue = String();
                 m_scriptTagCharsetAttrValue = String();
                 if (m_currentToken.attrs && !m_fragment) {
                     if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {
                         if ((a = m_currentToken.attrs->getAttributeItem(srcAttr))) {
                             m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();
                             if (m_XSSAuditor && !m_XSSAuditor->canLoadExternalScriptFromSrc(a->value()))
                                 m_scriptTagSrcAttrValue = String();
                         }
                     }
                 }
             }

             RefPtr<Node> n = processToken();
             m_cBufferPos = cBufferPos;
             if (n || inViewSourceMode()) {
                 State savedState = state;
                 SegmentedString savedSrc = src;
                 long savedLineno = m_lineNumber;
                 if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
                     if (beginTag)
                         state.setDiscardLF(true); // Discard the first LF after we open a pre.
                 } else if (tagName == scriptTag) {
                     ASSERT(!m_scriptNode);
                     m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
                     if (m_scriptNode)
                         m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
                     if (beginTag) {
                         m_searchStopper = scriptEnd;
                         m_searchStopperLength = 8;
                         state.setInScript(true);
                         state = parseNonHTMLText(src, state);
                     } else if (isSelfClosingScript) { // Handle <script src="foo"/>
                         state.setInScript(true);
                         state = scriptHandler(state);
                     }
                 } else if (tagName == styleTag) {
                     if (beginTag) {
                         m_searchStopper = styleEnd;
                         m_searchStopperLength = 7;
                         state.setInStyle(true);
                         state = parseNonHTMLText(src, state);
                     }
                 } else if (tagName == textareaTag) {
                     if (beginTag) {
                         m_searchStopper = textareaEnd;
                         m_searchStopperLength = 10;
                         state.setInTextArea(true);
                         state = parseNonHTMLText(src, state);
                     }
                 } else if (tagName == titleTag) {
                     if (beginTag) {
                         m_searchStopper = titleEnd;
                         m_searchStopperLength = 7;
                         state.setInTitle(true);
                         state = parseNonHTMLText(src, state);
                     }
                 } else if (tagName == xmpTag) {
                     if (beginTag) {
                         m_searchStopper = xmpEnd;
                         m_searchStopperLength = 5;
                         state.setInXmp(true);
                         state = parseNonHTMLText(src, state);
                     }
                 } else if (tagName == iframeTag) {
                     if (beginTag) {
                         m_searchStopper = iframeEnd;
                         m_searchStopperLength = 8;
                         state.setInIFrame(true);
                         state = parseNonHTMLText(src, state);
                     }
                 }
                 if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) {
                     // We just ate the rest of the document as the #text node under the special tag!
                     // Reset the state then retokenize without special handling.
                     // Let the parser clean up the missing close tag.
                     // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
                     // at the end of the document unless m_noMoreData is also true. We need
                     // to detect this case elsewhere, and save the state somewhere other
                     // than a local variable.
                     state = savedState;
                     src = savedSrc;
                     m_lineNumber = savedLineno;
                     m_scriptCodeSize = 0;
                 }
             }
             if (tagName == plaintextTag)
                 state.setInPlainText(beginTag);
             return state; // Finished parsing tag!
         }
         } // end switch
     }
     m_cBufferPos = cBufferPos;
     return state;
 }

 inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
 {
     // We don't want to be checking elapsed time with every character, so we only check after we've
     // processed a certain number of characters.
     bool allowedYield = state.allowYield();
     state.setAllowYield(false);
     if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
         processedCount = 0;
         if (currentTime() - startTime > m_tokenizerTimeDelay) {
             /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
                load, but this hurts overall performance on slower machines.  For now turn this
                off.
             || (!m_doc->haveStylesheetsLoaded() &&
                 (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
             // Schedule the timer to keep processing as soon as possible.
             m_timer.startOneShot(0);
 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
             if (currentTime() - startTime > m_tokenizerTimeDelay)
                 printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
 #endif
             return false;
         }
     }

     processedCount++;
     return true;
 }

 void HTMLTokenizer::write(const SegmentedString& str, bool appendData)
 {
     if (!m_buffer)
         return;

     if (m_parserStopped)
         return;

     SegmentedString source(str);
     if (m_executingScript)
         source.setExcludeLineNumbers();

     if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
         // don't parse; we will do this later
         if (m_currentPrependingSrc)
             m_currentPrependingSrc->append(source);
         else {
             m_pendingSrc.append(source);
 #if PRELOAD_SCANNER_ENABLED
             if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
                 m_preloadScanner->write(source);
 #endif
         }
         return;
     }

 #if PRELOAD_SCANNER_ENABLED
     if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
         m_preloadScanner->end();
 #endif

     if (!m_src.isEmpty())
         m_src.append(source);
     else
         setSrc(source);

     // Once a timer is set, it has control of when the tokenizer continues.
     if (m_timer.isActive())
         return;

     bool wasInWrite = m_inWrite;
     m_inWrite = true;

 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("Beginning write at time %d\n", m_doc->elapsedTime());
 #endif

     int processedCount = 0;
     double startTime = currentTime();

     Frame* frame = m_doc->frame();

     State state = m_state;

     while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
         if (!continueProcessing(processedCount, startTime, state))
             break;

         // do we need to enlarge the buffer?
         checkBuffer();

         UChar cc = *m_src;

         bool wasSkipLF = state.skipLF();
         if (wasSkipLF)
             state.setSkipLF(false);

         if (wasSkipLF && (cc == '\n'))
             m_src.advance();
         else if (state.needsSpecialWriteHandling()) {
             // it's important to keep needsSpecialWriteHandling with the flags this block tests
             if (state.hasEntityState())
                 state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
             else if (state.inPlainText())
                 state = parseText(m_src, state);
             else if (state.inAnyNonHTMLText())
                 state = parseNonHTMLText(m_src, state);
             else if (state.inComment())
                 state = parseComment(m_src, state);
             else if (state.inDoctype())
                 state = parseDoctype(m_src, state);
             else if (state.inServer())
                 state = parseServer(m_src, state);
             else if (state.inProcessingInstruction())
                 state = parseProcessingInstruction(m_src, state);
             else if (state.hasTagState())
                 state = parseTag(m_src, state);
             else if (state.startTag()) {
                 state.setStartTag(false);

                 switch(cc) {
                 case '/':
                     break;
                 case '!': {
                     // <!-- comment --> or <!DOCTYPE ...>
                     searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
                     m_doctypeSearchCount = 1;
                     break;
                 }
                 case '?': {
                     // xml processing instruction
                     state.setInProcessingInstruction(true);
                     tquote = NoQuote;
                     state = parseProcessingInstruction(m_src, state);
                     continue;

                     break;
                 }
                 case '%':
                     if (!m_brokenServer) {
                         // <% server stuff, handle as comment %>
                         state.setInServer(true);
                         tquote = NoQuote;
                         state = parseServer(m_src, state);
                         continue;
                     }
                     // else fall through
                 default: {
                     if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
                         // Start of a Start-Tag
                     } else {
                         // Invalid tag
                         // Add as is
                         *m_dest = '<';
                         m_dest++;
                         continue;
                     }
                 }
                 }; // end case

                 processToken();

                 m_cBufferPos = 0;
                 state.setTagState(TagName);
                 state = parseTag(m_src, state);
             }
         } else if (cc == '&' && !m_src.escaped()) {
             m_src.advancePastNonNewline();
             state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
         } else if (cc == '<' && !m_src.escaped()) {
             m_currentTagStartLineNumber = m_lineNumber;
             m_src.advancePastNonNewline();
             state.setStartTag(true);
             state.setDiscardLF(false);
         } else if (cc == '\n' || cc == '\r') {
             if (state.discardLF())
                 // Ignore this LF
                 state.setDiscardLF(false); // We have discarded 1 LF
             else {
                 // Process this LF
                 *m_dest++ = '\n';
                 if (cc == '\r' && !m_src.excludeLineNumbers())
                     m_lineNumber++;
             }

             /* Check for MS-DOS CRLF sequence */
             if (cc == '\r')
                 state.setSkipLF(true);
             m_src.advance(m_lineNumber);
         } else {
             state.setDiscardLF(false);
             *m_dest++ = cc;
             m_src.advancePastNonNewline();
         }
     }

 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("Ending write at time %d\n", m_doc->elapsedTime());
 #endif

     m_inWrite = wasInWrite;

     m_state = state;

     if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
         end(); // this actually causes us to be deleted
 }

 void HTMLTokenizer::stopParsing()
 {
     Tokenizer::stopParsing();
     m_timer.stop();

     // The part needs to know that the tokenizer has finished with its data,
     // regardless of whether it happened naturally or due to manual intervention.
     if (!m_fragment && m_doc->frame())
         m_doc->frame()->loader()->tokenizerProcessedData();
 }

 bool HTMLTokenizer::processingData() const
 {
     return m_timer.isActive() || m_inWrite;
 }

 void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
 {
 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
 #endif

     if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
         // Restart the timer and let layout win.  This is basically a way of ensuring that the layout
         // timer has higher priority than our timer.
         m_timer.startOneShot(0);
         return;
     }

     // Invoke write() as though more data came in. This might cause us to get deleted.
     write(SegmentedString(), true);
 }

 void HTMLTokenizer::end()
 {
     ASSERT(!m_timer.isActive());
     m_timer.stop(); // Only helps if assertion above fires, but do it anyway.

     if (m_buffer) {
         // parseTag is using the buffer for different matters
         if (!m_state.hasTagState())
             processToken();

         fastFree(m_scriptCode);
         m_scriptCode = 0;
         m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;

         fastFree(m_buffer);
         m_buffer = 0;
     }

     if (!inViewSourceMode())
         m_parser->finished();
     else
         m_doc->finishedParsing();
 }

 void HTMLTokenizer::finish()
 {
     // do this as long as we don't find matching comment ends
     while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
         // we've found an unmatched comment start
         if (m_state.inComment())
             m_brokenComments = true;
         else
             m_brokenServer = true;
         checkScriptBuffer();
         m_scriptCode[m_scriptCodeSize] = 0;
         m_scriptCode[m_scriptCodeSize + 1] = 0;
         int pos;
         String food;
         if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
             food = String(m_scriptCode, m_scriptCodeSize);
         else if (m_state.inServer()) {
             food = "<";
             food.append(m_scriptCode, m_scriptCodeSize);
         } else {
             pos = find(m_scriptCode, m_scriptCodeSize, '>');
             food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
         }
         fastFree(m_scriptCode);
         m_scriptCode = 0;
         m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
         m_state.setInComment(false);
         m_state.setInServer(false);
         if (!food.isEmpty())
             write(food, true);
     }
     // this indicates we will not receive any more data... but if we are waiting on
     // an external script to load, we can't finish parsing until that is done
     m_noMoreData = true;
     if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
         end(); // this actually causes us to be deleted
 }

 PassRefPtr<Node> HTMLTokenizer::processToken()
 {
     ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
     if (scriptController && scriptController->isEnabled())
         // FIXME: Why isn't this m_currentScriptTagStartLineNumber?  I suspect this is wrong.
         scriptController->setEventHandlerLineNumber(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
     if (m_dest > m_buffer) {
         m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
         if (m_currentToken.tagName != commentAtom)
             m_currentToken.tagName = textAtom;
     } else if (m_currentToken.tagName == nullAtom) {
         m_currentToken.reset();
         if (scriptController)
             scriptController->setEventHandlerLineNumber(m_lineNumber + 1); // Script line numbers are 1 based.
         return 0;
     }

     m_dest = m_buffer;

     RefPtr<Node> n;

     if (!m_parserStopped) {
         if (NamedMappedAttrMap* map = m_currentToken.attrs.get())
             map->shrinkToLength();
         if (inViewSourceMode())
             static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
         else
             // pass the token over to the parser, the parser DOES NOT delete the token
             n = m_parser->parseToken(&m_currentToken);
     }
     m_currentToken.reset();
     if (scriptController)
         scriptController->setEventHandlerLineNumber(0);

     return n.release();
 }

 void HTMLTokenizer::processDoctypeToken()
 {
     if (inViewSourceMode())
         static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
     else
         m_parser->parseDoctypeToken(&m_doctypeToken);
 }

 HTMLTokenizer::~HTMLTokenizer()
 {
     ASSERT(!m_inWrite);
     reset();
 }


 void HTMLTokenizer::enlargeBuffer(int len)
 {
     // Resize policy: Always at least double the size of the buffer each time.
     int delta = max(len, m_bufferSize);

     // Check for overflow.
     // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
     static const int maxSize = INT_MAX / sizeof(UChar);
     if (delta > maxSize - m_bufferSize)
         CRASH();

     int newSize = m_bufferSize + delta;
     int oldOffset = m_dest - m_buffer;
     m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
     m_dest = m_buffer + oldOffset;
     m_bufferSize = newSize;
 }

 void HTMLTokenizer::enlargeScriptBuffer(int len)
 {
     // Resize policy: Always at least double the size of the buffer each time.
     int delta = max(len, m_scriptCodeCapacity);

     // Check for overflow.
     // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
     static const int maxSize = INT_MAX / sizeof(UChar);
     if (delta > maxSize - m_scriptCodeCapacity)
         CRASH();

     int newSize = m_scriptCodeCapacity + delta;
     m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
     m_scriptCodeCapacity = newSize;
 }

 void HTMLTokenizer::executeScriptsWaitingForStylesheets()
 {
     ASSERT(m_doc->haveStylesheetsLoaded());

     if (m_hasScriptsWaitingForStylesheets)
         notifyFinished(0);
 }

 void HTMLTokenizer::notifyFinished(CachedResource*)
 {
 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
     if (!m_doc->ownerElement())
         printf("script loaded at %d\n", m_doc->elapsedTime());
 #endif

     ASSERT(!m_pendingScripts.isEmpty());

     // Make external scripts wait for external stylesheets.
     // FIXME: This needs to be done for inline scripts too.
     m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
     if (m_hasScriptsWaitingForStylesheets)
         return;

     bool finished = false;
     while (!finished && m_pendingScripts.first()->isLoaded()) {
         CachedScript* cs = m_pendingScripts.first().get();
         m_pendingScripts.removeFirst();
         ASSERT(cache()->disabled() || cs->accessCount() > 0);

         setSrc(SegmentedString());

         // make sure we forget about the script before we execute the new one
         // infinite recursion might happen otherwise
         ScriptSourceCode sourceCode(cs);
         bool errorOccurred = cs->errorOccurred();
         cs->removeClient(this);

         RefPtr<Node> n = m_scriptNode.release();

 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
         if (!m_doc->ownerElement())
             printf("external script beginning execution at %d\n", m_doc->elapsedTime());
 #endif

         if (errorOccurred)
             n->dispatchEvent(eventNames().errorEvent, true, false);
         else {
             if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
                 m_state = scriptExecution(sourceCode, m_state);
 #if ENABLE(XHTMLMP)
             else
                 m_doc->setShouldProcessNoscriptElement(true);
 #endif
             n->dispatchEvent(eventNames().loadEvent, false, false);
         }

         // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
         // call above, so test afterwards.
         finished = m_pendingScripts.isEmpty();
         if (finished) {
             ASSERT(!m_hasScriptsWaitingForStylesheets);
             m_state.setLoadingExtScript(false);
 #ifdef INSTRUMENT_LAYOUT_SCHEDULING
             if (!m_doc->ownerElement())
                 printf("external script finished execution at %d\n", m_doc->elapsedTime());
 #endif
         } else if (m_hasScriptsWaitingForStylesheets) {
             // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
             // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
             finished = true;
         }

         // 'm_requestingScript' is true when we are called synchronously from
         // scriptHandler(). In that case scriptHandler() will take care
         // of m_pendingSrc.
         if (!m_requestingScript) {
             SegmentedString rest = m_pendingSrc;
             m_pendingSrc.clear();
             write(rest, false);
             // we might be deleted at this point, do not access any members.
         }
     }
 }

 bool HTMLTokenizer::isWaitingForScripts() const
 {
     return m_state.loadingExtScript();
 }

 void HTMLTokenizer::setSrc(const SegmentedString& source)
 {
     m_src = source;
 }

 void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
 {
     HTMLTokenizer tok(fragment);
     tok.setForceSynchronous(true);
     tok.write(source, true);
     tok.finish();
     ASSERT(!tok.processingData());      // make sure we're done (see 3963151)
 }

 UChar decodeNamedEntity(const char* name)
 {
     const Entity* e = findEntity(name, strlen(name));
     return e ? e->code : 0;
 }

 }