| /* |
| Copyright (C) 1997 Martin Jones (mjones@kde.org) |
| (C) 1997 Torben Weis (weis@kde.org) |
| (C) 1998 Waldo Bastian (bastian@kde.org) |
| (C) 1999 Lars Knoll (knoll@kde.org) |
| (C) 1999 Antti Koivisto (koivisto@kde.org) |
| (C) 2001 Dirk Mueller (mueller@kde.org) |
| Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com) |
| Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
| |
| This library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Library General Public |
| License as published by the Free Software Foundation; either |
| version 2 of the License, or (at your option) any later version. |
| |
| This library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Library General Public License for more details. |
| |
| You should have received a copy of the GNU Library General Public License |
| along with this library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "config.h" |
| #include "HTMLTokenizer.h" |
| |
| #include "CSSHelper.h" |
| #include "Cache.h" |
| #include "CachedScript.h" |
| #include "DocLoader.h" |
| #include "DocumentFragment.h" |
| #include "EventNames.h" |
| #include "Frame.h" |
| #include "FrameLoader.h" |
| #include "FrameView.h" |
| #include "HTMLElement.h" |
| #include "HTMLNames.h" |
| #include "HTMLParser.h" |
| #include "HTMLScriptElement.h" |
| #include "HTMLViewSourceDocument.h" |
| #include "MappedAttribute.h" |
| #include "Page.h" |
| #include "PreloadScanner.h" |
| #include "ScriptController.h" |
| #include "ScriptSourceCode.h" |
| #include "ScriptValue.h" |
| #include "XSSAuditor.h" |
| #include <wtf/ASCIICType.h> |
| #include <wtf/CurrentTime.h> |
| |
| #include "HTMLEntityNames.c" |
| |
| #define PRELOAD_SCANNER_ENABLED 1 |
| // #define INSTRUMENT_LAYOUT_SCHEDULING 1 |
| |
| using namespace WTF; |
| using namespace std; |
| |
| namespace WebCore { |
| |
| using namespace HTMLNames; |
| |
| #if MOBILE |
| // The mobile device needs to be responsive, as such the tokenizer chunk size is reduced. |
| // This value is used to define how many characters the tokenizer will process before |
| // yeilding control. |
| static const int defaultTokenizerChunkSize = 256; |
| #else |
| static const int defaultTokenizerChunkSize = 4096; |
| #endif |
| |
| #if MOBILE |
| // As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise |
| // it will take way to long to load a page. |
| static const double defaultTokenizerTimeDelay = 0.300; |
| #else |
| // FIXME: We would like this constant to be 200ms. |
| // Yielding more aggressively results in increased responsiveness and better incremental rendering. |
| // It slows down overall page-load on slower machines, though, so for now we set a value of 500. |
| static const double defaultTokenizerTimeDelay = 0.500; |
| #endif |
| |
| static const char commentStart [] = "<!--"; |
| static const char doctypeStart [] = "<!doctype"; |
| static const char publicStart [] = "public"; |
| static const char systemStart [] = "system"; |
| static const char scriptEnd [] = "</script"; |
| static const char xmpEnd [] = "</xmp"; |
| static const char styleEnd [] = "</style"; |
| static const char textareaEnd [] = "</textarea"; |
| static const char titleEnd [] = "</title"; |
| static const char iframeEnd [] = "</iframe"; |
| |
| // Full support for MS Windows extensions to Latin-1. |
| // Technically these extensions should only be activated for pages |
| // marked "windows-1252" or "cp1252", but |
| // in the standard Microsoft way, these extensions infect hundreds of thousands |
| // of web pages. Note that people with non-latin-1 Microsoft extensions |
| // are SOL. |
| // |
| // See: http://www.microsoft.com/globaldev/reference/WinCP.asp |
| // http://www.bbsinc.com/iso8859.html |
| // http://www.obviously.com/ |
| // |
| // There may be better equivalents |
| |
| // We only need this for entities. For non-entity text, we handle this in the text encoding. |
| |
| static const UChar windowsLatin1ExtensionArray[32] = { |
| 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 |
| 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F |
| 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 |
| 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F |
| }; |
| |
| static inline UChar fixUpChar(UChar c) |
| { |
| if ((c & ~0x1F) != 0x0080) |
| return c; |
| return windowsLatin1ExtensionArray[c - 0x80]; |
| } |
| |
| static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length) |
| { |
| for (unsigned i = 0; i != length; ++i) { |
| unsigned char c1 = s1[i]; |
| unsigned char uc1 = toASCIIUpper(static_cast<char>(c1)); |
| UChar c2 = s2[i]; |
| if (c1 != c2 && uc1 != c2) |
| return false; |
| } |
| return true; |
| } |
| |
| inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode) |
| { |
| if (!attrName.isEmpty()) { |
| ASSERT(!attrName.contains('/')); |
| RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue); |
| if (!attrs) { |
| attrs = NamedMappedAttrMap::create(); |
| attrs->reserveInitialCapacity(10); |
| } |
| attrs->insertAttribute(a.release(), viewSourceMode); |
| } |
| |
| attrName = emptyAtom; |
| } |
| |
| // ---------------------------------------------------------------------------- |
| |
| HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors) |
| : Tokenizer() |
| , m_buffer(0) |
| , m_scriptCode(0) |
| , m_scriptCodeSize(0) |
| , m_scriptCodeCapacity(0) |
| , m_scriptCodeResync(0) |
| , m_executingScript(0) |
| , m_requestingScript(false) |
| , m_hasScriptsWaitingForStylesheets(false) |
| , m_timer(this, &HTMLTokenizer::timerFired) |
| , m_doc(doc) |
| , m_parser(new HTMLParser(doc, reportErrors)) |
| , m_inWrite(false) |
| , m_fragment(false) |
| { |
| begin(); |
| } |
| |
| HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc) |
| : Tokenizer(true) |
| , m_buffer(0) |
| , m_scriptCode(0) |
| , m_scriptCodeSize(0) |
| , m_scriptCodeCapacity(0) |
| , m_scriptCodeResync(0) |
| , m_executingScript(0) |
| , m_requestingScript(false) |
| , m_hasScriptsWaitingForStylesheets(false) |
| , m_timer(this, &HTMLTokenizer::timerFired) |
| , m_doc(doc) |
| , m_parser(0) |
| , m_inWrite(false) |
| , m_fragment(false) |
| { |
| begin(); |
| } |
| |
| HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag) |
| : m_buffer(0) |
| , m_scriptCode(0) |
| , m_scriptCodeSize(0) |
| , m_scriptCodeCapacity(0) |
| , m_scriptCodeResync(0) |
| , m_executingScript(0) |
| , m_requestingScript(false) |
| , m_hasScriptsWaitingForStylesheets(false) |
| , m_timer(this, &HTMLTokenizer::timerFired) |
| , m_doc(frag->document()) |
| , m_parser(new HTMLParser(frag)) |
| , m_inWrite(false) |
| , m_fragment(true) |
| { |
| begin(); |
| } |
| |
| void HTMLTokenizer::reset() |
| { |
| ASSERT(m_executingScript == 0); |
| |
| while (!m_pendingScripts.isEmpty()) { |
| CachedScript* cs = m_pendingScripts.first().get(); |
| m_pendingScripts.removeFirst(); |
| ASSERT(cache()->disabled() || cs->accessCount() > 0); |
| cs->removeClient(this); |
| } |
| |
| fastFree(m_buffer); |
| m_buffer = m_dest = 0; |
| m_bufferSize = 0; |
| |
| fastFree(m_scriptCode); |
| m_scriptCode = 0; |
| m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
| |
| m_timer.stop(); |
| m_state.setAllowYield(false); |
| m_state.setForceSynchronous(false); |
| |
| m_currentToken.reset(); |
| m_doctypeToken.reset(); |
| m_doctypeSearchCount = 0; |
| m_doctypeSecondarySearchCount = 0; |
| m_hasScriptsWaitingForStylesheets = false; |
| } |
| |
| void HTMLTokenizer::begin() |
| { |
| m_executingScript = 0; |
| m_requestingScript = false; |
| m_hasScriptsWaitingForStylesheets = false; |
| m_state.setLoadingExtScript(false); |
| reset(); |
| m_bufferSize = 254; |
| m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254)); |
| m_dest = m_buffer; |
| tquote = NoQuote; |
| searchCount = 0; |
| m_state.setEntityState(NoEntity); |
| m_scriptTagSrcAttrValue = String(); |
| m_pendingSrc.clear(); |
| m_currentPrependingSrc = 0; |
| m_noMoreData = false; |
| m_brokenComments = false; |
| m_brokenServer = false; |
| m_lineNumber = 0; |
| m_currentScriptTagStartLineNumber = 0; |
| m_currentTagStartLineNumber = 0; |
| m_state.setForceSynchronous(false); |
| |
| Page* page = m_doc->page(); |
| if (page && page->hasCustomHTMLTokenizerTimeDelay()) |
| m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay(); |
| else |
| m_tokenizerTimeDelay = defaultTokenizerTimeDelay; |
| |
| if (page && page->hasCustomHTMLTokenizerChunkSize()) |
| m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize(); |
| else |
| m_tokenizerChunkSize = defaultTokenizerChunkSize; |
| } |
| |
| void HTMLTokenizer::setForceSynchronous(bool force) |
| { |
| m_state.setForceSynchronous(force); |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state) |
| { |
| // This function adds the listing 'list' as |
| // preformatted text-tokens to the token-collection |
| while (!list.isEmpty()) { |
| if (state.skipLF()) { |
| state.setSkipLF(false); |
| if (*list == '\n') { |
| list.advance(); |
| continue; |
| } |
| } |
| |
| checkBuffer(); |
| |
| if (*list == '\n' || *list == '\r') { |
| if (state.discardLF()) |
| // Ignore this LF |
| state.setDiscardLF(false); // We have discarded 1 LF |
| else |
| *m_dest++ = '\n'; |
| |
| /* Check for MS-DOS CRLF sequence */ |
| if (*list == '\r') |
| state.setSkipLF(true); |
| |
| list.advance(); |
| } else { |
| state.setDiscardLF(false); |
| *m_dest++ = *list; |
| list.advance(); |
| } |
| } |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseNonHTMLText(SegmentedString& src, State state) |
| { |
| ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState()); |
| ASSERT(!state.hasTagState()); |
| ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 ); |
| if (state.inScript() && !m_currentScriptTagStartLineNumber) |
| m_currentScriptTagStartLineNumber = m_lineNumber; |
| |
| if (state.inComment()) |
| state = parseComment(src, state); |
| |
| int lastDecodedEntityPosition = -1; |
| while (!src.isEmpty()) { |
| checkScriptBuffer(); |
| UChar ch = *src; |
| |
| if (!m_scriptCodeResync && !m_brokenComments && |
| !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() && |
| m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' && |
| (lastDecodedEntityPosition < m_scriptCodeSize - 3)) { |
| state.setInComment(true); |
| state = parseComment(src, state); |
| continue; |
| } |
| if (m_scriptCodeResync && !tquote && ch == '>') { |
| src.advancePastNonNewline(); |
| m_scriptCodeSize = m_scriptCodeResync - 1; |
| m_scriptCodeResync = 0; |
| m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0; |
| if (state.inScript()) |
| state = scriptHandler(state); |
| else { |
| state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state); |
| processToken(); |
| if (state.inStyle()) { |
| m_currentToken.tagName = styleTag.localName(); |
| m_currentToken.beginTag = false; |
| } else if (state.inTextArea()) { |
| m_currentToken.tagName = textareaTag.localName(); |
| m_currentToken.beginTag = false; |
| } else if (state.inTitle()) { |
| m_currentToken.tagName = titleTag.localName(); |
| m_currentToken.beginTag = false; |
| } else if (state.inXmp()) { |
| m_currentToken.tagName = xmpTag.localName(); |
| m_currentToken.beginTag = false; |
| } else if (state.inIFrame()) { |
| m_currentToken.tagName = iframeTag.localName(); |
| m_currentToken.beginTag = false; |
| } |
| processToken(); |
| state.setInStyle(false); |
| state.setInScript(false); |
| state.setInTextArea(false); |
| state.setInTitle(false); |
| state.setInXmp(false); |
| state.setInIFrame(false); |
| tquote = NoQuote; |
| m_scriptCodeSize = m_scriptCodeResync = 0; |
| } |
| return state; |
| } |
| // possible end of tagname, lets check. |
| if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) && |
| m_scriptCodeSize >= m_searchStopperLength && |
| tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) && |
| (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) { |
| m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1; |
| tquote = NoQuote; |
| continue; |
| } |
| if (m_scriptCodeResync && !state.escaped()) { |
| if (ch == '\"') |
| tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote); |
| else if (ch == '\'') |
| tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote; |
| else if (tquote != NoQuote && (ch == '\r' || ch == '\n')) |
| tquote = NoQuote; |
| } |
| state.setEscaped(!state.escaped() && ch == '\\'); |
| if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') { |
| UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize; |
| src.advancePastNonNewline(); |
| state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false); |
| if (scriptCodeDest == m_scriptCode + m_scriptCodeSize) |
| lastDecodedEntityPosition = m_scriptCodeSize; |
| else |
| m_scriptCodeSize = scriptCodeDest - m_scriptCode; |
| } else { |
| m_scriptCode[m_scriptCodeSize++] = ch; |
| src.advance(m_lineNumber); |
| } |
| } |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state) |
| { |
| // We are inside a <script> |
| bool doScriptExec = false; |
| int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based |
| |
| // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element |
| m_currentScriptTagStartLineNumber = 0; |
| |
| // (Bugzilla 3837) Scripts following a frameset element should not execute or, |
| // in the case of extern scripts, even load. |
| bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag)); |
| |
| CachedScript* cs = 0; |
| // don't load external scripts for standalone documents (for now) |
| if (!inViewSourceMode()) { |
| if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) { |
| // forget what we just got; load from src url instead |
| if (!m_parser->skipMode() && !followingFrameset) { |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("Requesting script at time %d\n", m_doc->elapsedTime()); |
| #endif |
| // The parser might have been stopped by for example a window.close call in an earlier script. |
| // If so, we don't want to load scripts. |
| if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue))) |
| m_pendingScripts.append(cs); |
| else |
| m_scriptNode = 0; |
| } else |
| m_scriptNode = 0; |
| m_scriptTagSrcAttrValue = String(); |
| } else { |
| // Parse m_scriptCode containing <script> info |
| doScriptExec = m_scriptNode->shouldExecuteAsJavaScript(); |
| #if ENABLE(XHTMLMP) |
| if (!doScriptExec) |
| m_doc->setShouldProcessNoscriptElement(true); |
| #endif |
| m_scriptNode = 0; |
| } |
| } |
| |
| state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state); |
| RefPtr<Node> node = processToken(); |
| String scriptString = node ? node->textContent() : ""; |
| m_currentToken.tagName = scriptTag.localName(); |
| m_currentToken.beginTag = false; |
| processToken(); |
| |
| state.setInScript(false); |
| m_scriptCodeSize = m_scriptCodeResync = 0; |
| |
| // FIXME: The script should be syntax highlighted. |
| if (inViewSourceMode()) |
| return state; |
| |
| SegmentedString* savedPrependingSrc = m_currentPrependingSrc; |
| SegmentedString prependingSrc; |
| m_currentPrependingSrc = &prependingSrc; |
| |
| if (!m_parser->skipMode() && !followingFrameset) { |
| if (cs) { |
| if (savedPrependingSrc) |
| savedPrependingSrc->append(m_src); |
| else |
| m_pendingSrc.prepend(m_src); |
| setSrc(SegmentedString()); |
| |
| // the ref() call below may call notifyFinished if the script is already in cache, |
| // and that mucks with the state directly, so we must write it back to the object. |
| m_state = state; |
| bool savedRequestingScript = m_requestingScript; |
| m_requestingScript = true; |
| cs->addClient(this); |
| m_requestingScript = savedRequestingScript; |
| state = m_state; |
| // will be 0 if script was already loaded and ref() executed it |
| if (!m_pendingScripts.isEmpty()) |
| state.setLoadingExtScript(true); |
| } else if (!m_fragment && doScriptExec) { |
| if (!m_executingScript) |
| m_pendingSrc.prepend(m_src); |
| else |
| prependingSrc = m_src; |
| setSrc(SegmentedString()); |
| state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state); |
| } |
| } |
| |
| if (!m_executingScript && !state.loadingExtScript()) { |
| m_src.append(m_pendingSrc); |
| m_pendingSrc.clear(); |
| } else if (!prependingSrc.isEmpty()) { |
| // restore first so that the write appends in the right place |
| // (does not hurt to do it again below) |
| m_currentPrependingSrc = savedPrependingSrc; |
| |
| // we need to do this slightly modified bit of one of the write() cases |
| // because we want to prepend to m_pendingSrc rather than appending |
| // if there's no previous prependingSrc |
| if (!m_pendingScripts.isEmpty()) { |
| if (m_currentPrependingSrc) |
| m_currentPrependingSrc->append(prependingSrc); |
| else |
| m_pendingSrc.prepend(prependingSrc); |
| } else { |
| m_state = state; |
| write(prependingSrc, false); |
| state = m_state; |
| } |
| } |
| |
| #if PRELOAD_SCANNER_ENABLED |
| if (!m_pendingScripts.isEmpty() && !m_executingScript) { |
| if (!m_preloadScanner) |
| m_preloadScanner.set(new PreloadScanner(m_doc)); |
| if (!m_preloadScanner->inProgress()) { |
| m_preloadScanner->begin(); |
| m_preloadScanner->write(m_pendingSrc); |
| } |
| } |
| #endif |
| m_currentPrependingSrc = savedPrependingSrc; |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state) |
| { |
| if (m_fragment || !m_doc->frame()) |
| return state; |
| m_executingScript++; |
| |
| SegmentedString* savedPrependingSrc = m_currentPrependingSrc; |
| SegmentedString prependingSrc; |
| m_currentPrependingSrc = &prependingSrc; |
| |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("beginning script execution at %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| m_state = state; |
| m_doc->frame()->loader()->executeScript(sourceCode); |
| state = m_state; |
| |
| state.setAllowYield(true); |
| |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("ending script execution at %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| m_executingScript--; |
| |
| if (!m_executingScript && !state.loadingExtScript()) { |
| m_pendingSrc.prepend(prependingSrc); |
| m_src.append(m_pendingSrc); |
| m_pendingSrc.clear(); |
| } else if (!prependingSrc.isEmpty()) { |
| // restore first so that the write appends in the right place |
| // (does not hurt to do it again below) |
| m_currentPrependingSrc = savedPrependingSrc; |
| |
| // we need to do this slightly modified bit of one of the write() cases |
| // because we want to prepend to m_pendingSrc rather than appending |
| // if there's no previous prependingSrc |
| if (!m_pendingScripts.isEmpty()) { |
| if (m_currentPrependingSrc) |
| m_currentPrependingSrc->append(prependingSrc); |
| else |
| m_pendingSrc.prepend(prependingSrc); |
| |
| #if PRELOAD_SCANNER_ENABLED |
| // We are stuck waiting for another script. Lets check the source that |
| // was just document.write()n for anything to load. |
| PreloadScanner documentWritePreloadScanner(m_doc); |
| documentWritePreloadScanner.begin(); |
| documentWritePreloadScanner.write(prependingSrc); |
| documentWritePreloadScanner.end(); |
| #endif |
| } else { |
| m_state = state; |
| write(prependingSrc, false); |
| state = m_state; |
| } |
| } |
| |
| m_currentPrependingSrc = savedPrependingSrc; |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state) |
| { |
| // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus. |
| checkScriptBuffer(src.length()); |
| while (!src.isEmpty()) { |
| UChar ch = *src; |
| m_scriptCode[m_scriptCodeSize++] = ch; |
| if (ch == '>') { |
| bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle()); |
| int endCharsCount = 1; // start off with one for the '>' character |
| if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') { |
| endCharsCount = 3; |
| } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' && |
| m_scriptCode[m_scriptCodeSize-2] == '!') { |
| // Other browsers will accept --!> as a close comment, even though it's |
| // not technically valid. |
| endCharsCount = 4; |
| } |
| if (handleBrokenComments || endCharsCount > 1) { |
| src.advancePastNonNewline(); |
| if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) { |
| checkScriptBuffer(); |
| m_scriptCode[m_scriptCodeSize] = 0; |
| m_scriptCode[m_scriptCodeSize + 1] = 0; |
| m_currentToken.tagName = commentAtom; |
| m_currentToken.beginTag = true; |
| state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state); |
| processToken(); |
| m_currentToken.tagName = commentAtom; |
| m_currentToken.beginTag = false; |
| processToken(); |
| m_scriptCodeSize = 0; |
| } |
| state.setInComment(false); |
| return state; // Finished parsing comment |
| } |
| } |
| src.advance(m_lineNumber); |
| } |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state) |
| { |
| checkScriptBuffer(src.length()); |
| while (!src.isEmpty()) { |
| UChar ch = *src; |
| m_scriptCode[m_scriptCodeSize++] = ch; |
| if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') { |
| src.advancePastNonNewline(); |
| state.setInServer(false); |
| m_scriptCodeSize = 0; |
| return state; // Finished parsing server include |
| } |
| src.advance(m_lineNumber); |
| } |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state) |
| { |
| UChar oldchar = 0; |
| while (!src.isEmpty()) { |
| UChar chbegin = *src; |
| if (chbegin == '\'') |
| tquote = tquote == SingleQuote ? NoQuote : SingleQuote; |
| else if (chbegin == '\"') |
| tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; |
| // Look for '?>' |
| // Some crappy sites omit the "?" before it, so |
| // we look for an unquoted '>' instead. (IE compatible) |
| else if (chbegin == '>' && (!tquote || oldchar == '?')) { |
| // We got a '?>' sequence |
| state.setInProcessingInstruction(false); |
| src.advancePastNonNewline(); |
| state.setDiscardLF(true); |
| return state; // Finished parsing comment! |
| } |
| src.advance(m_lineNumber); |
| oldchar = chbegin; |
| } |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state) |
| { |
| while (!src.isEmpty()) { |
| UChar cc = *src; |
| |
| if (state.skipLF()) { |
| state.setSkipLF(false); |
| if (cc == '\n') { |
| src.advancePastNewline(m_lineNumber); |
| continue; |
| } |
| } |
| |
| // do we need to enlarge the buffer? |
| checkBuffer(); |
| |
| if (cc == '\r') { |
| state.setSkipLF(true); |
| *m_dest++ = '\n'; |
| } else |
| *m_dest++ = cc; |
| src.advance(m_lineNumber); |
| } |
| |
| return state; |
| } |
| |
| |
| HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag) |
| { |
| if (start) { |
| cBufferPos = 0; |
| state.setEntityState(SearchEntity); |
| EntityUnicodeValue = 0; |
| } |
| |
| while(!src.isEmpty()) { |
| UChar cc = *src; |
| switch(state.entityState()) { |
| case NoEntity: |
| ASSERT(state.entityState() != NoEntity); |
| return state; |
| |
| case SearchEntity: |
| if (cc == '#') { |
| m_cBuffer[cBufferPos++] = cc; |
| src.advancePastNonNewline(); |
| state.setEntityState(NumericSearch); |
| } else |
| state.setEntityState(EntityName); |
| break; |
| |
| case NumericSearch: |
| if (cc == 'x' || cc == 'X') { |
| m_cBuffer[cBufferPos++] = cc; |
| src.advancePastNonNewline(); |
| state.setEntityState(Hexadecimal); |
| } else if (cc >= '0' && cc <= '9') |
| state.setEntityState(Decimal); |
| else |
| state.setEntityState(SearchSemicolon); |
| break; |
| |
| case Hexadecimal: { |
| int ll = min(src.length(), 10 - cBufferPos); |
| while (ll--) { |
| cc = *src; |
| if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) { |
| state.setEntityState(SearchSemicolon); |
| break; |
| } |
| int digit; |
| if (cc < 'A') |
| digit = cc - '0'; |
| else |
| digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch |
| EntityUnicodeValue = EntityUnicodeValue * 16 + digit; |
| m_cBuffer[cBufferPos++] = cc; |
| src.advancePastNonNewline(); |
| } |
| if (cBufferPos == 10) |
| state.setEntityState(SearchSemicolon); |
| break; |
| } |
| case Decimal: |
| { |
| int ll = min(src.length(), 9-cBufferPos); |
| while(ll--) { |
| cc = *src; |
| |
| if (!(cc >= '0' && cc <= '9')) { |
| state.setEntityState(SearchSemicolon); |
| break; |
| } |
| |
| EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0'); |
| m_cBuffer[cBufferPos++] = cc; |
| src.advancePastNonNewline(); |
| } |
| if (cBufferPos == 9) |
| state.setEntityState(SearchSemicolon); |
| break; |
| } |
| case EntityName: |
| { |
| int ll = min(src.length(), 9-cBufferPos); |
| while(ll--) { |
| cc = *src; |
| |
| if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { |
| state.setEntityState(SearchSemicolon); |
| break; |
| } |
| |
| m_cBuffer[cBufferPos++] = cc; |
| src.advancePastNonNewline(); |
| } |
| if (cBufferPos == 9) |
| state.setEntityState(SearchSemicolon); |
| if (state.entityState() == SearchSemicolon) { |
| if(cBufferPos > 1) { |
| // Since the maximum length of entity name is 9, |
| // so a single char array which is allocated on |
| // the stack, its length is 10, should be OK. |
| // Also if we have an illegal character, we treat it |
| // as illegal entity name. |
| unsigned testedEntityNameLen = 0; |
| char tmpEntityNameBuffer[10]; |
| |
| ASSERT(cBufferPos < 10); |
| for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) { |
| if (m_cBuffer[testedEntityNameLen] > 0x7e) |
| break; |
| tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen]; |
| } |
| |
| const Entity *e; |
| |
| if (testedEntityNameLen == cBufferPos) |
| e = findEntity(tmpEntityNameBuffer, cBufferPos); |
| else |
| e = 0; |
| |
| if(e) |
| EntityUnicodeValue = e->code; |
| |
| // be IE compatible |
| if(parsingTag && EntityUnicodeValue > 255 && *src != ';') |
| EntityUnicodeValue = 0; |
| } |
| } |
| else |
| break; |
| } |
| case SearchSemicolon: |
| // Don't allow values that are more than 21 bits. |
| if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) { |
| if (!inViewSourceMode()) { |
| if (*src == ';') |
| src.advancePastNonNewline(); |
| if (EntityUnicodeValue <= 0xFFFF) { |
| checkBuffer(); |
| src.push(fixUpChar(EntityUnicodeValue)); |
| } else { |
| // Convert to UTF-16, using surrogate code points. |
| checkBuffer(2); |
| src.push(U16_LEAD(EntityUnicodeValue)); |
| src.push(U16_TRAIL(EntityUnicodeValue)); |
| } |
| } else { |
| // FIXME: We should eventually colorize entities by sending them as a special token. |
| // 12 bytes required: up to 10 bytes in m_cBuffer plus the |
| // leading '&' and trailing ';' |
| checkBuffer(12); |
| *dest++ = '&'; |
| for (unsigned i = 0; i < cBufferPos; i++) |
| dest[i] = m_cBuffer[i]; |
| dest += cBufferPos; |
| if (*src == ';') { |
| *dest++ = ';'; |
| src.advancePastNonNewline(); |
| } |
| } |
| } else { |
| // 11 bytes required: up to 10 bytes in m_cBuffer plus the |
| // leading '&' |
| checkBuffer(11); |
| // ignore the sequence, add it to the buffer as plaintext |
| *dest++ = '&'; |
| for (unsigned i = 0; i < cBufferPos; i++) |
| dest[i] = m_cBuffer[i]; |
| dest += cBufferPos; |
| } |
| |
| state.setEntityState(NoEntity); |
| return state; |
| } |
| } |
| |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state) |
| { |
| ASSERT(state.inDoctype()); |
| while (!src.isEmpty() && state.inDoctype()) { |
| UChar c = *src; |
| bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' '; |
| switch (m_doctypeToken.state()) { |
| case DoctypeBegin: { |
| m_doctypeToken.setState(DoctypeBeforeName); |
| if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| } |
| case DoctypeBeforeName: { |
| if (c == '>') { |
| // Malformed. Just exit. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| if (inViewSourceMode()) |
| processDoctypeToken(); |
| } else if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else |
| m_doctypeToken.setState(DoctypeName); |
| break; |
| } |
| case DoctypeName: { |
| if (c == '>') { |
| // Valid doctype. Emit it. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| processDoctypeToken(); |
| } else if (isWhitespace) { |
| m_doctypeSearchCount = 0; // Used now to scan for PUBLIC |
| m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM |
| m_doctypeToken.setState(DoctypeAfterName); |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else { |
| src.advancePastNonNewline(); |
| m_doctypeToken.m_name.append(c); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| } |
| case DoctypeAfterName: { |
| if (c == '>') { |
| // Valid doctype. Emit it. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| processDoctypeToken(); |
| } else if (!isWhitespace) { |
| src.advancePastNonNewline(); |
| if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) { |
| m_doctypeSearchCount++; |
| if (m_doctypeSearchCount == 6) |
| // Found 'PUBLIC' sequence |
| m_doctypeToken.setState(DoctypeBeforePublicID); |
| } else if (m_doctypeSearchCount > 0) { |
| m_doctypeSearchCount = 0; |
| m_doctypeToken.setState(DoctypeBogus); |
| } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) { |
| m_doctypeSecondarySearchCount++; |
| if (m_doctypeSecondarySearchCount == 6) |
| // Found 'SYSTEM' sequence |
| m_doctypeToken.setState(DoctypeBeforeSystemID); |
| } else { |
| m_doctypeSecondarySearchCount = 0; |
| m_doctypeToken.setState(DoctypeBogus); |
| } |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else { |
| src.advance(m_lineNumber); // Whitespace keeps us in the after name state. |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| } |
| case DoctypeBeforePublicID: { |
| if (c == '\"' || c == '\'') { |
| tquote = c == '\"' ? DoubleQuote : SingleQuote; |
| m_doctypeToken.setState(DoctypePublicID); |
| src.advancePastNonNewline(); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else if (c == '>') { |
| // Considered bogus. Don't process the doctype. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| if (inViewSourceMode()) |
| processDoctypeToken(); |
| } else if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else |
| m_doctypeToken.setState(DoctypeBogus); |
| break; |
| } |
| case DoctypePublicID: { |
| if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { |
| src.advancePastNonNewline(); |
| m_doctypeToken.setState(DoctypeAfterPublicID); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else if (c == '>') { |
| // Considered bogus. Don't process the doctype. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| if (inViewSourceMode()) |
| processDoctypeToken(); |
| } else { |
| m_doctypeToken.m_publicID.append(c); |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| } |
| case DoctypeAfterPublicID: |
| if (c == '\"' || c == '\'') { |
| tquote = c == '\"' ? DoubleQuote : SingleQuote; |
| m_doctypeToken.setState(DoctypeSystemID); |
| src.advancePastNonNewline(); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else if (c == '>') { |
| // Valid doctype. Emit it now. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| processDoctypeToken(); |
| } else if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else |
| m_doctypeToken.setState(DoctypeBogus); |
| break; |
| case DoctypeBeforeSystemID: |
| if (c == '\"' || c == '\'') { |
| tquote = c == '\"' ? DoubleQuote : SingleQuote; |
| m_doctypeToken.setState(DoctypeSystemID); |
| src.advancePastNonNewline(); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else if (c == '>') { |
| // Considered bogus. Don't process the doctype. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| } else if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else |
| m_doctypeToken.setState(DoctypeBogus); |
| break; |
| case DoctypeSystemID: |
| if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { |
| src.advancePastNonNewline(); |
| m_doctypeToken.setState(DoctypeAfterSystemID); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else if (c == '>') { |
| // Considered bogus. Don't process the doctype. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| if (inViewSourceMode()) |
| processDoctypeToken(); |
| } else { |
| m_doctypeToken.m_systemID.append(c); |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| case DoctypeAfterSystemID: |
| if (c == '>') { |
| // Valid doctype. Emit it now. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| processDoctypeToken(); |
| } else if (isWhitespace) { |
| src.advance(m_lineNumber); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } else |
| m_doctypeToken.setState(DoctypeBogus); |
| break; |
| case DoctypeBogus: |
| if (c == '>') { |
| // Done with the bogus doctype. |
| src.advancePastNonNewline(); |
| state.setInDoctype(false); |
| if (inViewSourceMode()) |
| processDoctypeToken(); |
| } else { |
| src.advance(m_lineNumber); // Just keep scanning for '>' |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(c); |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| return state; |
| } |
| |
| HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state) |
| { |
| ASSERT(!state.hasEntityState()); |
| |
| unsigned cBufferPos = m_cBufferPos; |
| |
| bool lastIsSlash = false; |
| |
| while (!src.isEmpty()) { |
| checkBuffer(); |
| switch(state.tagState()) { |
| case NoTag: |
| { |
| m_cBufferPos = cBufferPos; |
| return state; |
| } |
| case TagName: |
| { |
| if (searchCount > 0) { |
| if (*src == commentStart[searchCount]) { |
| searchCount++; |
| if (searchCount == 2) |
| m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well. |
| else |
| m_doctypeSearchCount = 0; |
| if (searchCount == 4) { |
| // Found '<!--' sequence |
| src.advancePastNonNewline(); |
| m_dest = m_buffer; // ignore the previous part of this tag |
| state.setInComment(true); |
| state.setTagState(NoTag); |
| |
| // Fix bug 34302 at kde.bugs.org. Go ahead and treat |
| // <!--> as a valid comment, since both mozilla and IE on windows |
| // can handle this case. Only do this in quirks mode. -dwh |
| if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) { |
| state.setInComment(false); |
| src.advancePastNonNewline(); |
| if (!src.isEmpty()) |
| m_cBuffer[cBufferPos++] = *src; |
| } else |
| state = parseComment(src, state); |
| |
| m_cBufferPos = cBufferPos; |
| return state; // Finished parsing tag! |
| } |
| m_cBuffer[cBufferPos++] = *src; |
| src.advancePastNonNewline(); |
| break; |
| } else |
| searchCount = 0; // Stop looking for '<!--' sequence |
| } |
| |
| if (m_doctypeSearchCount > 0) { |
| if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) { |
| m_doctypeSearchCount++; |
| m_cBuffer[cBufferPos++] = *src; |
| src.advancePastNonNewline(); |
| if (m_doctypeSearchCount == 9) { |
| // Found '<!DOCTYPE' sequence |
| state.setInDoctype(true); |
| state.setTagState(NoTag); |
| m_doctypeToken.reset(); |
| if (inViewSourceMode()) |
| m_doctypeToken.m_source.append(m_cBuffer, cBufferPos); |
| state = parseDoctype(src, state); |
| m_cBufferPos = cBufferPos; |
| return state; |
| } |
| break; |
| } else |
| m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence |
| } |
| |
| bool finish = false; |
| unsigned int ll = min(src.length(), CBUFLEN - cBufferPos); |
| while (ll--) { |
| UChar curchar = *src; |
| if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') { |
| finish = true; |
| break; |
| } |
| |
| // tolower() shows up on profiles. This is faster! |
| if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) |
| m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); |
| else |
| m_cBuffer[cBufferPos++] = curchar; |
| src.advancePastNonNewline(); |
| } |
| |
| // Disadvantage: we add the possible rest of the tag |
| // as attribute names. ### judge if this causes problems |
| if (finish || CBUFLEN == cBufferPos) { |
| bool beginTag; |
| UChar* ptr = m_cBuffer; |
| unsigned int len = cBufferPos; |
| m_cBuffer[cBufferPos] = '\0'; |
| if ((cBufferPos > 0) && (*ptr == '/')) { |
| // End Tag |
| beginTag = false; |
| ptr++; |
| len--; |
| } |
| else |
| // Start Tag |
| beginTag = true; |
| |
| // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/". |
| if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode()) |
| ptr[--len] = '\0'; |
| |
| // Now that we've shaved off any invalid / that might have followed the name), make the tag. |
| // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html) |
| if (ptr[0] != '!' || inViewSourceMode()) { |
| m_currentToken.tagName = AtomicString(ptr); |
| m_currentToken.beginTag = beginTag; |
| } |
| m_dest = m_buffer; |
| state.setTagState(SearchAttribute); |
| cBufferPos = 0; |
| } |
| break; |
| } |
| case SearchAttribute: |
| while(!src.isEmpty()) { |
| UChar curchar = *src; |
| // In this mode just ignore any quotes we encounter and treat them like spaces. |
| if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') { |
| if (curchar == '<' || curchar == '>') |
| state.setTagState(SearchEnd); |
| else |
| state.setTagState(AttributeName); |
| |
| cBufferPos = 0; |
| break; |
| } |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| src.advance(m_lineNumber); |
| } |
| break; |
| case AttributeName: |
| { |
| int ll = min(src.length(), CBUFLEN - cBufferPos); |
| while (ll--) { |
| UChar curchar = *src; |
| // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the |
| // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5). |
| if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) { |
| m_cBuffer[cBufferPos] = '\0'; |
| m_attrName = AtomicString(m_cBuffer); |
| m_dest = m_buffer; |
| *m_dest++ = 0; |
| state.setTagState(SearchEqual); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('a'); |
| break; |
| } |
| |
| // tolower() shows up on profiles. This is faster! |
| if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) |
| m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); |
| else |
| m_cBuffer[cBufferPos++] = curchar; |
| |
| src.advance(m_lineNumber); |
| } |
| if (cBufferPos == CBUFLEN) { |
| m_cBuffer[cBufferPos] = '\0'; |
| m_attrName = AtomicString(m_cBuffer); |
| m_dest = m_buffer; |
| *m_dest++ = 0; |
| state.setTagState(SearchEqual); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('a'); |
| } |
| break; |
| } |
| case SearchEqual: |
| while (!src.isEmpty()) { |
| UChar curchar = *src; |
| |
| if (lastIsSlash && curchar == '>') { |
| // This is a quirk (with a long sad history). We have to do this |
| // since widgets do <script src="foo.js"/> and expect the tag to close. |
| if (m_currentToken.tagName == scriptTag) |
| m_currentToken.selfClosingTag = true; |
| m_currentToken.brokenXMLStyle = true; |
| } |
| |
| // In this mode just ignore any quotes or slashes we encounter and treat them like spaces. |
| if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') { |
| if (curchar == '=') { |
| state.setTagState(SearchValue); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| src.advancePastNonNewline(); |
| } else { |
| m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode()); |
| m_dest = m_buffer; |
| state.setTagState(SearchAttribute); |
| lastIsSlash = false; |
| } |
| break; |
| } |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| |
| lastIsSlash = curchar == '/'; |
| |
| src.advance(m_lineNumber); |
| } |
| break; |
| case SearchValue: |
| while (!src.isEmpty()) { |
| UChar curchar = *src; |
| if (!isASCIISpace(curchar)) { |
| if (curchar == '\'' || curchar == '\"') { |
| tquote = curchar == '\"' ? DoubleQuote : SingleQuote; |
| state.setTagState(QuotedValue); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| src.advancePastNonNewline(); |
| } else |
| state.setTagState(Value); |
| |
| break; |
| } |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| src.advance(m_lineNumber); |
| } |
| break; |
| case QuotedValue: |
| while (!src.isEmpty()) { |
| checkBuffer(); |
| |
| UChar curchar = *src; |
| if (curchar <= '>' && !src.escaped()) { |
| if (curchar == '>' && m_attrName.isEmpty()) { |
| // Handle a case like <img '>. Just go ahead and be willing |
| // to close the whole tag. Don't consume the character and |
| // just go back into SearchEnd while ignoring the whole |
| // value. |
| // FIXME: Note that this is actually not a very good solution. |
| // It doesn't handle the general case of |
| // unmatched quotes among attributes that have names. -dwh |
| while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) |
| m_dest--; // remove trailing newlines |
| AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
| if (!attributeValue.contains('/')) |
| m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?) |
| m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('x'); |
| state.setTagState(SearchAttribute); |
| m_dest = m_buffer; |
| tquote = NoQuote; |
| break; |
| } |
| |
| if (curchar == '&') { |
| src.advancePastNonNewline(); |
| state = parseEntity(src, m_dest, state, cBufferPos, true, true); |
| break; |
| } |
| |
| if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) { |
| // some <input type=hidden> rely on trailing spaces. argh |
| while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) |
| m_dest--; // remove trailing newlines |
| AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
| if (m_attrName.isEmpty() && !attributeValue.contains('/')) { |
| m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?) |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('x'); |
| } else if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('v'); |
| m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
| m_dest = m_buffer; |
| state.setTagState(SearchAttribute); |
| tquote = NoQuote; |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(curchar); |
| src.advancePastNonNewline(); |
| break; |
| } |
| } |
| |
| *m_dest++ = curchar; |
| src.advance(m_lineNumber); |
| } |
| break; |
| case Value: |
| while(!src.isEmpty()) { |
| checkBuffer(); |
| UChar curchar = *src; |
| if (curchar <= '>' && !src.escaped()) { |
| // parse Entities |
| if (curchar == '&') { |
| src.advancePastNonNewline(); |
| state = parseEntity(src, m_dest, state, cBufferPos, true, true); |
| break; |
| } |
| // no quotes. Every space means end of value |
| // '/' does not delimit in IE! |
| if (isASCIISpace(curchar) || curchar == '>') { |
| AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
| m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar('v'); |
| m_dest = m_buffer; |
| state.setTagState(SearchAttribute); |
| break; |
| } |
| } |
| |
| *m_dest++ = curchar; |
| src.advance(m_lineNumber); |
| } |
| break; |
| case SearchEnd: |
| { |
| while (!src.isEmpty()) { |
| UChar ch = *src; |
| if (ch == '>' || ch == '<') |
| break; |
| if (ch == '/') |
| m_currentToken.selfClosingTag = true; |
| if (inViewSourceMode()) |
| m_currentToken.addViewSourceChar(ch); |
| src.advance(m_lineNumber); |
| } |
| if (src.isEmpty()) |
| break; |
| |
| searchCount = 0; // Stop looking for '<!--' sequence |
| state.setTagState(NoTag); |
| tquote = NoQuote; |
| |
| if (*src != '<') |
| src.advance(m_lineNumber); |
| |
| if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown |
| m_cBufferPos = cBufferPos; |
| return state; |
| } |
| |
| AtomicString tagName = m_currentToken.tagName; |
| |
| // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard |
| // compatibility. |
| bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag; |
| bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag; |
| if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) { |
| Attribute* a = 0; |
| m_scriptTagSrcAttrValue = String(); |
| m_scriptTagCharsetAttrValue = String(); |
| if (m_currentToken.attrs && !m_fragment) { |
| if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) { |
| if ((a = m_currentToken.attrs->getAttributeItem(srcAttr))) { |
| m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string(); |
| if (m_XSSAuditor && !m_XSSAuditor->canLoadExternalScriptFromSrc(a->value())) |
| m_scriptTagSrcAttrValue = String(); |
| } |
| } |
| } |
| } |
| |
| RefPtr<Node> n = processToken(); |
| m_cBufferPos = cBufferPos; |
| if (n || inViewSourceMode()) { |
| State savedState = state; |
| SegmentedString savedSrc = src; |
| long savedLineno = m_lineNumber; |
| if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) { |
| if (beginTag) |
| state.setDiscardLF(true); // Discard the first LF after we open a pre. |
| } else if (tagName == scriptTag) { |
| ASSERT(!m_scriptNode); |
| m_scriptNode = static_pointer_cast<HTMLScriptElement>(n); |
| if (m_scriptNode) |
| m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset(); |
| if (beginTag) { |
| m_searchStopper = scriptEnd; |
| m_searchStopperLength = 8; |
| state.setInScript(true); |
| state = parseNonHTMLText(src, state); |
| } else if (isSelfClosingScript) { // Handle <script src="foo"/> |
| state.setInScript(true); |
| state = scriptHandler(state); |
| } |
| } else if (tagName == styleTag) { |
| if (beginTag) { |
| m_searchStopper = styleEnd; |
| m_searchStopperLength = 7; |
| state.setInStyle(true); |
| state = parseNonHTMLText(src, state); |
| } |
| } else if (tagName == textareaTag) { |
| if (beginTag) { |
| m_searchStopper = textareaEnd; |
| m_searchStopperLength = 10; |
| state.setInTextArea(true); |
| state = parseNonHTMLText(src, state); |
| } |
| } else if (tagName == titleTag) { |
| if (beginTag) { |
| m_searchStopper = titleEnd; |
| m_searchStopperLength = 7; |
| state.setInTitle(true); |
| state = parseNonHTMLText(src, state); |
| } |
| } else if (tagName == xmpTag) { |
| if (beginTag) { |
| m_searchStopper = xmpEnd; |
| m_searchStopperLength = 5; |
| state.setInXmp(true); |
| state = parseNonHTMLText(src, state); |
| } |
| } else if (tagName == iframeTag) { |
| if (beginTag) { |
| m_searchStopper = iframeEnd; |
| m_searchStopperLength = 8; |
| state.setInIFrame(true); |
| state = parseNonHTMLText(src, state); |
| } |
| } |
| if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) { |
| // We just ate the rest of the document as the #text node under the special tag! |
| // Reset the state then retokenize without special handling. |
| // Let the parser clean up the missing close tag. |
| // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're |
| // at the end of the document unless m_noMoreData is also true. We need |
| // to detect this case elsewhere, and save the state somewhere other |
| // than a local variable. |
| state = savedState; |
| src = savedSrc; |
| m_lineNumber = savedLineno; |
| m_scriptCodeSize = 0; |
| } |
| } |
| if (tagName == plaintextTag) |
| state.setInPlainText(beginTag); |
| return state; // Finished parsing tag! |
| } |
| } // end switch |
| } |
| m_cBufferPos = cBufferPos; |
| return state; |
| } |
| |
| inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state) |
| { |
| // We don't want to be checking elapsed time with every character, so we only check after we've |
| // processed a certain number of characters. |
| bool allowedYield = state.allowYield(); |
| state.setAllowYield(false); |
| if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) { |
| processedCount = 0; |
| if (currentTime() - startTime > m_tokenizerTimeDelay) { |
| /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to |
| load, but this hurts overall performance on slower machines. For now turn this |
| off. |
| || (!m_doc->haveStylesheetsLoaded() && |
| (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/ |
| // Schedule the timer to keep processing as soon as possible. |
| m_timer.startOneShot(0); |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (currentTime() - startTime > m_tokenizerTimeDelay) |
| printf("Deferring processing of data because 500ms elapsed away from event loop.\n"); |
| #endif |
| return false; |
| } |
| } |
| |
| processedCount++; |
| return true; |
| } |
| |
| void HTMLTokenizer::write(const SegmentedString& str, bool appendData) |
| { |
| if (!m_buffer) |
| return; |
| |
| if (m_parserStopped) |
| return; |
| |
| SegmentedString source(str); |
| if (m_executingScript) |
| source.setExcludeLineNumbers(); |
| |
| if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) { |
| // don't parse; we will do this later |
| if (m_currentPrependingSrc) |
| m_currentPrependingSrc->append(source); |
| else { |
| m_pendingSrc.append(source); |
| #if PRELOAD_SCANNER_ENABLED |
| if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) |
| m_preloadScanner->write(source); |
| #endif |
| } |
| return; |
| } |
| |
| #if PRELOAD_SCANNER_ENABLED |
| if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) |
| m_preloadScanner->end(); |
| #endif |
| |
| if (!m_src.isEmpty()) |
| m_src.append(source); |
| else |
| setSrc(source); |
| |
| // Once a timer is set, it has control of when the tokenizer continues. |
| if (m_timer.isActive()) |
| return; |
| |
| bool wasInWrite = m_inWrite; |
| m_inWrite = true; |
| |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("Beginning write at time %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| int processedCount = 0; |
| double startTime = currentTime(); |
| |
| Frame* frame = m_doc->frame(); |
| |
| State state = m_state; |
| |
| while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) { |
| if (!continueProcessing(processedCount, startTime, state)) |
| break; |
| |
| // do we need to enlarge the buffer? |
| checkBuffer(); |
| |
| UChar cc = *m_src; |
| |
| bool wasSkipLF = state.skipLF(); |
| if (wasSkipLF) |
| state.setSkipLF(false); |
| |
| if (wasSkipLF && (cc == '\n')) |
| m_src.advance(); |
| else if (state.needsSpecialWriteHandling()) { |
| // it's important to keep needsSpecialWriteHandling with the flags this block tests |
| if (state.hasEntityState()) |
| state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState()); |
| else if (state.inPlainText()) |
| state = parseText(m_src, state); |
| else if (state.inAnyNonHTMLText()) |
| state = parseNonHTMLText(m_src, state); |
| else if (state.inComment()) |
| state = parseComment(m_src, state); |
| else if (state.inDoctype()) |
| state = parseDoctype(m_src, state); |
| else if (state.inServer()) |
| state = parseServer(m_src, state); |
| else if (state.inProcessingInstruction()) |
| state = parseProcessingInstruction(m_src, state); |
| else if (state.hasTagState()) |
| state = parseTag(m_src, state); |
| else if (state.startTag()) { |
| state.setStartTag(false); |
| |
| switch(cc) { |
| case '/': |
| break; |
| case '!': { |
| // <!-- comment --> or <!DOCTYPE ...> |
| searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype |
| m_doctypeSearchCount = 1; |
| break; |
| } |
| case '?': { |
| // xml processing instruction |
| state.setInProcessingInstruction(true); |
| tquote = NoQuote; |
| state = parseProcessingInstruction(m_src, state); |
| continue; |
| |
| break; |
| } |
| case '%': |
| if (!m_brokenServer) { |
| // <% server stuff, handle as comment %> |
| state.setInServer(true); |
| tquote = NoQuote; |
| state = parseServer(m_src, state); |
| continue; |
| } |
| // else fall through |
| default: { |
| if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) { |
| // Start of a Start-Tag |
| } else { |
| // Invalid tag |
| // Add as is |
| *m_dest = '<'; |
| m_dest++; |
| continue; |
| } |
| } |
| }; // end case |
| |
| processToken(); |
| |
| m_cBufferPos = 0; |
| state.setTagState(TagName); |
| state = parseTag(m_src, state); |
| } |
| } else if (cc == '&' && !m_src.escaped()) { |
| m_src.advancePastNonNewline(); |
| state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState()); |
| } else if (cc == '<' && !m_src.escaped()) { |
| m_currentTagStartLineNumber = m_lineNumber; |
| m_src.advancePastNonNewline(); |
| state.setStartTag(true); |
| state.setDiscardLF(false); |
| } else if (cc == '\n' || cc == '\r') { |
| if (state.discardLF()) |
| // Ignore this LF |
| state.setDiscardLF(false); // We have discarded 1 LF |
| else { |
| // Process this LF |
| *m_dest++ = '\n'; |
| if (cc == '\r' && !m_src.excludeLineNumbers()) |
| m_lineNumber++; |
| } |
| |
| /* Check for MS-DOS CRLF sequence */ |
| if (cc == '\r') |
| state.setSkipLF(true); |
| m_src.advance(m_lineNumber); |
| } else { |
| state.setDiscardLF(false); |
| *m_dest++ = cc; |
| m_src.advancePastNonNewline(); |
| } |
| } |
| |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("Ending write at time %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| m_inWrite = wasInWrite; |
| |
| m_state = state; |
| |
| if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) |
| end(); // this actually causes us to be deleted |
| } |
| |
| void HTMLTokenizer::stopParsing() |
| { |
| Tokenizer::stopParsing(); |
| m_timer.stop(); |
| |
| // The part needs to know that the tokenizer has finished with its data, |
| // regardless of whether it happened naturally or due to manual intervention. |
| if (!m_fragment && m_doc->frame()) |
| m_doc->frame()->loader()->tokenizerProcessedData(); |
| } |
| |
| bool HTMLTokenizer::processingData() const |
| { |
| return m_timer.isActive() || m_inWrite; |
| } |
| |
| void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*) |
| { |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("Beginning timer write at time %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) { |
| // Restart the timer and let layout win. This is basically a way of ensuring that the layout |
| // timer has higher priority than our timer. |
| m_timer.startOneShot(0); |
| return; |
| } |
| |
| // Invoke write() as though more data came in. This might cause us to get deleted. |
| write(SegmentedString(), true); |
| } |
| |
| void HTMLTokenizer::end() |
| { |
| ASSERT(!m_timer.isActive()); |
| m_timer.stop(); // Only helps if assertion above fires, but do it anyway. |
| |
| if (m_buffer) { |
| // parseTag is using the buffer for different matters |
| if (!m_state.hasTagState()) |
| processToken(); |
| |
| fastFree(m_scriptCode); |
| m_scriptCode = 0; |
| m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
| |
| fastFree(m_buffer); |
| m_buffer = 0; |
| } |
| |
| if (!inViewSourceMode()) |
| m_parser->finished(); |
| else |
| m_doc->finishedParsing(); |
| } |
| |
| void HTMLTokenizer::finish() |
| { |
| // do this as long as we don't find matching comment ends |
| while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) { |
| // we've found an unmatched comment start |
| if (m_state.inComment()) |
| m_brokenComments = true; |
| else |
| m_brokenServer = true; |
| checkScriptBuffer(); |
| m_scriptCode[m_scriptCodeSize] = 0; |
| m_scriptCode[m_scriptCodeSize + 1] = 0; |
| int pos; |
| String food; |
| if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea()) |
| food = String(m_scriptCode, m_scriptCodeSize); |
| else if (m_state.inServer()) { |
| food = "<"; |
| food.append(m_scriptCode, m_scriptCodeSize); |
| } else { |
| pos = find(m_scriptCode, m_scriptCodeSize, '>'); |
| food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1); |
| } |
| fastFree(m_scriptCode); |
| m_scriptCode = 0; |
| m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
| m_state.setInComment(false); |
| m_state.setInServer(false); |
| if (!food.isEmpty()) |
| write(food, true); |
| } |
| // this indicates we will not receive any more data... but if we are waiting on |
| // an external script to load, we can't finish parsing until that is done |
| m_noMoreData = true; |
| if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) |
| end(); // this actually causes us to be deleted |
| } |
| |
| PassRefPtr<Node> HTMLTokenizer::processToken() |
| { |
| ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0; |
| if (scriptController && scriptController->isEnabled()) |
| // FIXME: Why isn't this m_currentScriptTagStartLineNumber? I suspect this is wrong. |
| scriptController->setEventHandlerLineNumber(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based. |
| if (m_dest > m_buffer) { |
| m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer); |
| if (m_currentToken.tagName != commentAtom) |
| m_currentToken.tagName = textAtom; |
| } else if (m_currentToken.tagName == nullAtom) { |
| m_currentToken.reset(); |
| if (scriptController) |
| scriptController->setEventHandlerLineNumber(m_lineNumber + 1); // Script line numbers are 1 based. |
| return 0; |
| } |
| |
| m_dest = m_buffer; |
| |
| RefPtr<Node> n; |
| |
| if (!m_parserStopped) { |
| if (NamedMappedAttrMap* map = m_currentToken.attrs.get()) |
| map->shrinkToLength(); |
| if (inViewSourceMode()) |
| static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken); |
| else |
| // pass the token over to the parser, the parser DOES NOT delete the token |
| n = m_parser->parseToken(&m_currentToken); |
| } |
| m_currentToken.reset(); |
| if (scriptController) |
| scriptController->setEventHandlerLineNumber(0); |
| |
| return n.release(); |
| } |
| |
| void HTMLTokenizer::processDoctypeToken() |
| { |
| if (inViewSourceMode()) |
| static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken); |
| else |
| m_parser->parseDoctypeToken(&m_doctypeToken); |
| } |
| |
| HTMLTokenizer::~HTMLTokenizer() |
| { |
| ASSERT(!m_inWrite); |
| reset(); |
| } |
| |
| |
| void HTMLTokenizer::enlargeBuffer(int len) |
| { |
| // Resize policy: Always at least double the size of the buffer each time. |
| int delta = max(len, m_bufferSize); |
| |
| // Check for overflow. |
| // For now, handle overflow the same way we handle fastRealloc failure, with CRASH. |
| static const int maxSize = INT_MAX / sizeof(UChar); |
| if (delta > maxSize - m_bufferSize) |
| CRASH(); |
| |
| int newSize = m_bufferSize + delta; |
| int oldOffset = m_dest - m_buffer; |
| m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar))); |
| m_dest = m_buffer + oldOffset; |
| m_bufferSize = newSize; |
| } |
| |
| void HTMLTokenizer::enlargeScriptBuffer(int len) |
| { |
| // Resize policy: Always at least double the size of the buffer each time. |
| int delta = max(len, m_scriptCodeCapacity); |
| |
| // Check for overflow. |
| // For now, handle overflow the same way we handle fastRealloc failure, with CRASH. |
| static const int maxSize = INT_MAX / sizeof(UChar); |
| if (delta > maxSize - m_scriptCodeCapacity) |
| CRASH(); |
| |
| int newSize = m_scriptCodeCapacity + delta; |
| m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar))); |
| m_scriptCodeCapacity = newSize; |
| } |
| |
| void HTMLTokenizer::executeScriptsWaitingForStylesheets() |
| { |
| ASSERT(m_doc->haveStylesheetsLoaded()); |
| |
| if (m_hasScriptsWaitingForStylesheets) |
| notifyFinished(0); |
| } |
| |
| void HTMLTokenizer::notifyFinished(CachedResource*) |
| { |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("script loaded at %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| ASSERT(!m_pendingScripts.isEmpty()); |
| |
| // Make external scripts wait for external stylesheets. |
| // FIXME: This needs to be done for inline scripts too. |
| m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded(); |
| if (m_hasScriptsWaitingForStylesheets) |
| return; |
| |
| bool finished = false; |
| while (!finished && m_pendingScripts.first()->isLoaded()) { |
| CachedScript* cs = m_pendingScripts.first().get(); |
| m_pendingScripts.removeFirst(); |
| ASSERT(cache()->disabled() || cs->accessCount() > 0); |
| |
| setSrc(SegmentedString()); |
| |
| // make sure we forget about the script before we execute the new one |
| // infinite recursion might happen otherwise |
| ScriptSourceCode sourceCode(cs); |
| bool errorOccurred = cs->errorOccurred(); |
| cs->removeClient(this); |
| |
| RefPtr<Node> n = m_scriptNode.release(); |
| |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("external script beginning execution at %d\n", m_doc->elapsedTime()); |
| #endif |
| |
| if (errorOccurred) |
| n->dispatchEvent(eventNames().errorEvent, true, false); |
| else { |
| if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript()) |
| m_state = scriptExecution(sourceCode, m_state); |
| #if ENABLE(XHTMLMP) |
| else |
| m_doc->setShouldProcessNoscriptElement(true); |
| #endif |
| n->dispatchEvent(eventNames().loadEvent, false, false); |
| } |
| |
| // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution() |
| // call above, so test afterwards. |
| finished = m_pendingScripts.isEmpty(); |
| if (finished) { |
| ASSERT(!m_hasScriptsWaitingForStylesheets); |
| m_state.setLoadingExtScript(false); |
| #ifdef INSTRUMENT_LAYOUT_SCHEDULING |
| if (!m_doc->ownerElement()) |
| printf("external script finished execution at %d\n", m_doc->elapsedTime()); |
| #endif |
| } else if (m_hasScriptsWaitingForStylesheets) { |
| // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution. |
| // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive. |
| finished = true; |
| } |
| |
| // 'm_requestingScript' is true when we are called synchronously from |
| // scriptHandler(). In that case scriptHandler() will take care |
| // of m_pendingSrc. |
| if (!m_requestingScript) { |
| SegmentedString rest = m_pendingSrc; |
| m_pendingSrc.clear(); |
| write(rest, false); |
| // we might be deleted at this point, do not access any members. |
| } |
| } |
| } |
| |
| bool HTMLTokenizer::isWaitingForScripts() const |
| { |
| return m_state.loadingExtScript(); |
| } |
| |
| void HTMLTokenizer::setSrc(const SegmentedString& source) |
| { |
| m_src = source; |
| } |
| |
| void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment) |
| { |
| HTMLTokenizer tok(fragment); |
| tok.setForceSynchronous(true); |
| tok.write(source, true); |
| tok.finish(); |
| ASSERT(!tok.processingData()); // make sure we're done (see 3963151) |
| } |
| |
| UChar decodeNamedEntity(const char* name) |
| { |
| const Entity* e = findEntity(name, strlen(name)); |
| return e ? e->code : 0; |
| } |
| |
| } |