kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 1 | /* |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 2 | Copyright (C) 1997 Martin Jones (mjones@kde.org) |
| 3 | (C) 1997 Torben Weis (weis@kde.org) |
| 4 | (C) 1998 Waldo Bastian (bastian@kde.org) |
| 5 | (C) 2001 Dirk Mueller (mueller@kde.org) |
darin@apple.com | faced26 | 2009-01-12 07:44:27 +0000 | [diff] [blame] | 6 | Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 7 | |
| 8 | This library is free software; you can redistribute it and/or |
| 9 | modify it under the terms of the GNU Library General Public |
| 10 | License as published by the Free Software Foundation; either |
| 11 | version 2 of the License, or (at your option) any later version. |
| 12 | |
| 13 | This library is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | Library General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU Library General Public License |
| 19 | along with this library; see the file COPYING.LIB. If not, write to |
ddkilzer | c8eccec | 2007-09-26 02:29:57 +0000 | [diff] [blame] | 20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 21 | Boston, MA 02110-1301, USA. |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 22 | */ |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 23 | |
abarth@webkit.org | a814ad6 | 2010-06-23 09:21:08 +0000 | [diff] [blame] | 24 | #ifndef LegacyHTMLDocumentParser_h |
| 25 | #define LegacyHTMLDocumentParser_h |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 26 | |
antti@apple.com | 72e4a84 | 2008-09-05 09:28:11 +0000 | [diff] [blame] | 27 | #include "CachedResourceClient.h" |
| 28 | #include "CachedResourceHandle.h" |
abarth@webkit.org | 26dfd17 | 2010-06-22 23:43:59 +0000 | [diff] [blame] | 29 | #include "FragmentScriptingPermission.h" |
weinig@apple.com | 1c6ce05 | 2010-05-23 23:20:03 +0000 | [diff] [blame] | 30 | #include "NamedNodeMap.h" |
eric@webkit.org | e37f595 | 2010-06-28 07:56:10 +0000 | [diff] [blame] | 31 | #include "ScriptableDocumentParser.h" |
darin | d03140b | 2006-01-19 08:59:31 +0000 | [diff] [blame] | 32 | #include "SegmentedString.h" |
darin | a52f4e1 | 2006-02-02 02:51:03 +0000 | [diff] [blame] | 33 | #include "Timer.h" |
antti@apple.com | 72e4a84 | 2008-09-05 09:28:11 +0000 | [diff] [blame] | 34 | #include <wtf/Deque.h> |
hyatt | 97815a3 | 2007-05-17 06:27:28 +0000 | [diff] [blame] | 35 | #include <wtf/OwnPtr.h> |
antti@apple.com | 72e4a84 | 2008-09-05 09:28:11 +0000 | [diff] [blame] | 36 | #include <wtf/Vector.h> |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 37 | |
darin | d03140b | 2006-01-19 08:59:31 +0000 | [diff] [blame] | 38 | namespace WebCore { |
hyatt | 3b4f6d4 | 2004-02-07 01:19:44 +0000 | [diff] [blame] | 39 | |
| 40 | class CachedScript; |
darin | b9481ed | 2006-03-20 02:57:59 +0000 | [diff] [blame] | 41 | class DocumentFragment; |
| 42 | class Document; |
hyatt | 3ad2407 | 2006-06-26 23:53:02 +0000 | [diff] [blame] | 43 | class HTMLDocument; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 44 | class HTMLScriptElement; |
hyatt | 3ad2407 | 2006-06-26 23:53:02 +0000 | [diff] [blame] | 45 | class HTMLViewSourceDocument; |
darin | ffd93c3 | 2006-01-31 17:09:20 +0000 | [diff] [blame] | 46 | class FrameView; |
abarth@webkit.org | c59388e | 2010-06-23 09:03:47 +0000 | [diff] [blame] | 47 | class LegacyHTMLTreeBuilder; |
darin | b9481ed | 2006-03-20 02:57:59 +0000 | [diff] [blame] | 48 | class Node; |
abarth@webkit.org | 757e876 | 2010-06-23 08:44:01 +0000 | [diff] [blame] | 49 | class LegacyPreloadScanner; |
darin@chromium.org | 6b41247 | 2008-11-24 23:07:38 +0000 | [diff] [blame] | 50 | class ScriptSourceCode; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 51 | |
hyatt | 3b4f6d4 | 2004-02-07 01:19:44 +0000 | [diff] [blame] | 52 | /** |
| 53 | * @internal |
| 54 | * represents one HTML tag. Consists of a numerical id, and the list |
| 55 | * of attributes. Can also represent text. In this case the id = 0 and |
| 56 | * text contains the text. |
| 57 | */ |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 58 | struct Token { |
| 59 | Token() |
| 60 | : beginTag(true) |
| 61 | , selfClosingTag(false) |
| 62 | , brokenXMLStyle(false) |
| 63 | , m_sourceInfo(0) |
| 64 | { } |
hyatt | 97815a3 | 2007-05-17 06:27:28 +0000 | [diff] [blame] | 65 | ~Token() { } |
hyatt | 59136b7 | 2005-07-09 20:19:28 +0000 | [diff] [blame] | 66 | |
darin@apple.com | faced26 | 2009-01-12 07:44:27 +0000 | [diff] [blame] | 67 | void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode); |
hyatt | 59136b7 | 2005-07-09 20:19:28 +0000 | [diff] [blame] | 68 | |
darin | d03140b | 2006-01-19 08:59:31 +0000 | [diff] [blame] | 69 | bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; } |
| 70 | bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; } |
hyatt | 59136b7 | 2005-07-09 20:19:28 +0000 | [diff] [blame] | 71 | |
hyatt | 3b4f6d4 | 2004-02-07 01:19:44 +0000 | [diff] [blame] | 72 | void reset() |
| 73 | { |
darin | 2a4c374 | 2005-12-27 18:26:16 +0000 | [diff] [blame] | 74 | attrs = 0; |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 75 | text = 0; |
darin | d03140b | 2006-01-19 08:59:31 +0000 | [diff] [blame] | 76 | tagName = nullAtom; |
hyatt | 59136b7 | 2005-07-09 20:19:28 +0000 | [diff] [blame] | 77 | beginTag = true; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 78 | selfClosingTag = false; |
hyatt | bf85dfb | 2007-05-24 19:54:47 +0000 | [diff] [blame] | 79 | brokenXMLStyle = false; |
hyatt | 97815a3 | 2007-05-17 06:27:28 +0000 | [diff] [blame] | 80 | if (m_sourceInfo) |
| 81 | m_sourceInfo->clear(); |
hyatt | 3b4f6d4 | 2004-02-07 01:19:44 +0000 | [diff] [blame] | 82 | } |
hyatt | 59136b7 | 2005-07-09 20:19:28 +0000 | [diff] [blame] | 83 | |
hyatt | 97815a3 | 2007-05-17 06:27:28 +0000 | [diff] [blame] | 84 | void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector<UChar>); m_sourceInfo->append(c); } |
| 85 | |
weinig@apple.com | 1c6ce05 | 2010-05-23 23:20:03 +0000 | [diff] [blame] | 86 | RefPtr<NamedNodeMap> attrs; |
darin | b9481ed | 2006-03-20 02:57:59 +0000 | [diff] [blame] | 87 | RefPtr<StringImpl> text; |
darin | d03140b | 2006-01-19 08:59:31 +0000 | [diff] [blame] | 88 | AtomicString tagName; |
darin | 2a4c374 | 2005-12-27 18:26:16 +0000 | [diff] [blame] | 89 | bool beginTag; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 90 | bool selfClosingTag; |
hyatt | bf85dfb | 2007-05-24 19:54:47 +0000 | [diff] [blame] | 91 | bool brokenXMLStyle; |
hyatt | 97815a3 | 2007-05-17 06:27:28 +0000 | [diff] [blame] | 92 | OwnPtr<Vector<UChar> > m_sourceInfo; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 93 | }; |
| 94 | |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 95 | enum DoctypeState { |
| 96 | DoctypeBegin, |
| 97 | DoctypeBeforeName, |
| 98 | DoctypeName, |
| 99 | DoctypeAfterName, |
| 100 | DoctypeBeforePublicID, |
| 101 | DoctypePublicID, |
| 102 | DoctypeAfterPublicID, |
| 103 | DoctypeBeforeSystemID, |
| 104 | DoctypeSystemID, |
| 105 | DoctypeAfterSystemID, |
| 106 | DoctypeBogus |
| 107 | }; |
| 108 | |
| 109 | class DoctypeToken { |
| 110 | public: |
| 111 | DoctypeToken() {} |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 112 | |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 113 | void reset() |
| 114 | { |
| 115 | m_name.clear(); |
| 116 | m_publicID.clear(); |
| 117 | m_systemID.clear(); |
| 118 | m_state = DoctypeBegin; |
| 119 | m_source.clear(); |
abarth@webkit.org | 75cc4e6 | 2010-06-08 07:15:52 +0000 | [diff] [blame] | 120 | m_forceQuirks = false; |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 121 | } |
| 122 | |
| 123 | DoctypeState state() { return m_state; } |
| 124 | void setState(DoctypeState s) { m_state = s; } |
| 125 | |
| 126 | Vector<UChar> m_name; |
| 127 | Vector<UChar> m_publicID; |
| 128 | Vector<UChar> m_systemID; |
| 129 | DoctypeState m_state; |
abarth@webkit.org | 75cc4e6 | 2010-06-08 07:15:52 +0000 | [diff] [blame] | 130 | |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 131 | Vector<UChar> m_source; |
abarth@webkit.org | 75cc4e6 | 2010-06-08 07:15:52 +0000 | [diff] [blame] | 132 | |
| 133 | bool m_forceQuirks; // Used by the HTML5 parser. |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 134 | }; |
| 135 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 136 | //----------------------------------------------------------------------------- |
| 137 | |
abarth@webkit.org | 1183dd6 | 2010-06-23 07:16:51 +0000 | [diff] [blame] | 138 | // FIXME: This class does too much. Right now it is both an HTML tokenizer as well |
| 139 | // as handling all of the non-tokenizer-specific junk related to tokenizing HTML |
| 140 | // (like dealing with <script> tags). The HTML tokenizer bits should be pushed |
| 141 | // down into a separate HTML tokenizer class. |
eric@webkit.org | 09879f6 | 2010-05-18 10:17:01 +0000 | [diff] [blame] | 142 | |
eric@webkit.org | e37f595 | 2010-06-28 07:56:10 +0000 | [diff] [blame] | 143 | class LegacyHTMLDocumentParser : public ScriptableDocumentParser, public CachedResourceClient { |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 144 | public: |
abarth@webkit.org | 0091a90 | 2010-06-23 06:41:51 +0000 | [diff] [blame] | 145 | LegacyHTMLDocumentParser(HTMLDocument*, bool reportErrors); |
| 146 | LegacyHTMLDocumentParser(HTMLViewSourceDocument*); |
| 147 | LegacyHTMLDocumentParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed); |
| 148 | virtual ~LegacyHTMLDocumentParser(); |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 149 | |
eric@webkit.org | 71d0f46 | 2010-06-25 07:09:36 +0000 | [diff] [blame] | 150 | bool forceSynchronous() const { return m_state.forceSynchronous(); } |
| 151 | void setForceSynchronous(bool force); |
| 152 | |
eric@webkit.org | 71d0f46 | 2010-06-25 07:09:36 +0000 | [diff] [blame] | 153 | static void parseDocumentFragment(const String&, DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed); |
| 154 | |
| 155 | protected: |
| 156 | // Exposed for FTPDirectoryDocumentParser |
eric@webkit.org | 5cfd556 | 2010-06-25 21:37:08 +0000 | [diff] [blame] | 157 | virtual void insert(const SegmentedString&); |
darin | a3cce73 | 2004-07-22 20:50:10 +0000 | [diff] [blame] | 158 | virtual void finish(); |
eric@webkit.org | 71d0f46 | 2010-06-25 07:09:36 +0000 | [diff] [blame] | 159 | |
| 160 | private: |
eric@webkit.org | e37f595 | 2010-06-28 07:56:10 +0000 | [diff] [blame] | 161 | // ScriptableDocumentParser |
eric@webkit.org | 5cfd556 | 2010-06-25 21:37:08 +0000 | [diff] [blame] | 162 | virtual void append(const SegmentedString&); |
abarth@webkit.org | bdbc28d | 2010-06-16 21:57:34 +0000 | [diff] [blame] | 163 | virtual bool finishWasCalled(); |
darin | ed60ff2 | 2004-11-12 22:04:26 +0000 | [diff] [blame] | 164 | virtual bool isWaitingForScripts() const; |
ggaren | 33e6544 | 2005-10-22 01:41:36 +0000 | [diff] [blame] | 165 | virtual void stopParsing(); |
hyatt | 9c4ba9b | 2004-11-10 03:47:56 +0000 | [diff] [blame] | 166 | virtual bool processingData() const; |
eric@webkit.org | ed69c5f | 2010-06-24 06:14:27 +0000 | [diff] [blame] | 167 | virtual bool isExecutingScript() const { return !!m_executingScript; } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 168 | |
darin@apple.com | 5c1c758 | 2007-11-12 04:09:30 +0000 | [diff] [blame] | 169 | virtual int lineNumber() const { return m_lineNumber; } |
rwlbuis | 36fea0a | 2007-01-07 16:47:24 +0000 | [diff] [blame] | 170 | virtual int columnNumber() const { return 1; } |
| 171 | |
eric@webkit.org | e37f595 | 2010-06-28 07:56:10 +0000 | [diff] [blame] | 172 | virtual bool processingContentWrittenByScript() const { return m_src.excludeLineNumbers(); } |
| 173 | |
antti | 65b1518 | 2007-09-10 14:55:11 +0000 | [diff] [blame] | 174 | virtual void executeScriptsWaitingForStylesheets(); |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 175 | |
abarth@webkit.org | c59388e | 2010-06-23 09:03:47 +0000 | [diff] [blame] | 176 | virtual LegacyHTMLTreeBuilder* htmlTreeBuilder() const { return m_treeBuilder.get(); } |
hyatt | abc1d21 | 2007-05-16 08:11:05 +0000 | [diff] [blame] | 177 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 178 | class State; |
| 179 | |
darin | a3cce73 | 2004-07-22 20:50:10 +0000 | [diff] [blame] | 180 | void begin(); |
| 181 | void end(); |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 182 | void reset(); |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 183 | |
eric@webkit.org | 01c1ac1 | 2010-05-19 07:19:47 +0000 | [diff] [blame] | 184 | void willWriteHTML(const SegmentedString&); |
eric@webkit.org | 5cfd556 | 2010-06-25 21:37:08 +0000 | [diff] [blame] | 185 | void write(const SegmentedString&, bool appendData); |
eric@webkit.org | 01c1ac1 | 2010-05-19 07:19:47 +0000 | [diff] [blame] | 186 | ALWAYS_INLINE void advance(State&); |
| 187 | void didWriteHTML(); |
| 188 | |
darin | b9481ed | 2006-03-20 02:57:59 +0000 | [diff] [blame] | 189 | PassRefPtr<Node> processToken(); |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 190 | void processDoctypeToken(); |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 191 | |
hyatt | d2c53f2 | 2006-01-15 07:12:43 +0000 | [diff] [blame] | 192 | State processListing(SegmentedString, State); |
| 193 | State parseComment(SegmentedString&, State); |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 194 | State parseDoctype(SegmentedString&, State); |
hyatt | d2c53f2 | 2006-01-15 07:12:43 +0000 | [diff] [blame] | 195 | State parseServer(SegmentedString&, State); |
| 196 | State parseText(SegmentedString&, State); |
bfulgham@webkit.org | eaa216e | 2009-06-11 18:15:05 +0000 | [diff] [blame] | 197 | State parseNonHTMLText(SegmentedString&, State); |
hyatt | d2c53f2 | 2006-01-15 07:12:43 +0000 | [diff] [blame] | 198 | State parseTag(SegmentedString&, State); |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 199 | State parseEntity(SegmentedString&, UChar*& dest, State, unsigned& cBufferPos, bool start, bool parsingTag); |
hyatt | d2c53f2 | 2006-01-15 07:12:43 +0000 | [diff] [blame] | 200 | State parseProcessingInstruction(SegmentedString&, State); |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 201 | State scriptHandler(State); |
darin@chromium.org | 6b41247 | 2008-11-24 23:07:38 +0000 | [diff] [blame] | 202 | State scriptExecution(const ScriptSourceCode&, State); |
darin | 7ab3109 | 2006-05-10 04:59:57 +0000 | [diff] [blame] | 203 | void setSrc(const SegmentedString&); |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 204 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 205 | // check if we have enough space in the buffer. |
| 206 | // if not enlarge it |
| 207 | inline void checkBuffer(int len = 10) |
| 208 | { |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 209 | if ((m_dest - m_buffer) > m_bufferSize - len) |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 210 | enlargeBuffer(len); |
| 211 | } |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 212 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 213 | inline void checkScriptBuffer(int len = 10) |
| 214 | { |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 215 | if (m_scriptCodeSize + len >= m_scriptCodeCapacity) |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 216 | enlargeScriptBuffer(len); |
| 217 | } |
| 218 | |
| 219 | void enlargeBuffer(int len); |
| 220 | void enlargeScriptBuffer(int len); |
| 221 | |
darin | 7ab3109 | 2006-05-10 04:59:57 +0000 | [diff] [blame] | 222 | bool continueProcessing(int& processedCount, double startTime, State&); |
abarth@webkit.org | 0091a90 | 2010-06-23 06:41:51 +0000 | [diff] [blame] | 223 | void timerFired(Timer<LegacyHTMLDocumentParser>*); |
hyatt | 9c4ba9b | 2004-11-10 03:47:56 +0000 | [diff] [blame] | 224 | void allDataProcessed(); |
| 225 | |
darin | e775cf7 | 2006-07-09 22:48:56 +0000 | [diff] [blame] | 226 | // from CachedResourceClient |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 227 | void notifyFinished(CachedResource*); |
mjs | 9d0f55f | 2003-11-17 23:27:42 +0000 | [diff] [blame] | 228 | |
eric@webkit.org | 73eef4f | 2009-12-14 15:21:41 +0000 | [diff] [blame] | 229 | void executeExternalScriptsIfReady(); |
abarth@webkit.org | 0091a90 | 2010-06-23 06:41:51 +0000 | [diff] [blame] | 230 | void executeExternalScriptsTimerFired(Timer<LegacyHTMLDocumentParser>*); |
eric@webkit.org | 73eef4f | 2009-12-14 15:21:41 +0000 | [diff] [blame] | 231 | bool continueExecutingExternalScripts(double startTime); |
| 232 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 233 | // Internal buffers |
| 234 | /////////////////// |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 235 | UChar* m_buffer; |
| 236 | int m_bufferSize; |
| 237 | UChar* m_dest; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 238 | |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 239 | Token m_currentToken; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 240 | |
abarth@webkit.org | 8268f15 | 2009-07-13 05:36:15 +0000 | [diff] [blame] | 241 | // This buffer holds the raw characters we've seen between the beginning of |
| 242 | // the attribute name and the first character of the attribute value. |
| 243 | Vector<UChar, 32> m_rawAttributeBeforeValue; |
| 244 | |
eric@webkit.org | 74d35e9 | 2010-06-12 08:10:24 +0000 | [diff] [blame] | 245 | // DocumentParser flags |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 246 | ////////////////// |
| 247 | // are we in quotes within a html tag |
darin | 7ab3109 | 2006-05-10 04:59:57 +0000 | [diff] [blame] | 248 | enum { NoQuote, SingleQuote, DoubleQuote } tquote; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 249 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 250 | // Are we in a &... character entity description? |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 251 | enum EntityState { |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 252 | NoEntity = 0, |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 253 | SearchEntity = 1, |
| 254 | NumericSearch = 2, |
| 255 | Hexadecimal = 3, |
| 256 | Decimal = 4, |
| 257 | EntityName = 5, |
| 258 | SearchSemicolon = 6 |
| 259 | }; |
darin | 4526522 | 2003-05-07 16:01:49 +0000 | [diff] [blame] | 260 | unsigned EntityUnicodeValue; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 261 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 262 | enum TagState { |
| 263 | NoTag = 0, |
| 264 | TagName = 1, |
| 265 | SearchAttribute = 2, |
| 266 | AttributeName = 3, |
| 267 | SearchEqual = 4, |
| 268 | SearchValue = 5, |
| 269 | QuotedValue = 6, |
| 270 | Value = 7, |
| 271 | SearchEnd = 8 |
| 272 | }; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 273 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 274 | class State { |
| 275 | public: |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 276 | State() : m_bits(0) { } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 277 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 278 | TagState tagState() const { return static_cast<TagState>(m_bits & TagMask); } |
| 279 | void setTagState(TagState t) { m_bits = (m_bits & ~TagMask) | t; } |
| 280 | EntityState entityState() const { return static_cast<EntityState>((m_bits & EntityMask) >> EntityShift); } |
| 281 | void setEntityState(EntityState e) { m_bits = (m_bits & ~EntityMask) | (e << EntityShift); } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 282 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 283 | bool inScript() const { return testBit(InScript); } |
| 284 | void setInScript(bool v) { setBit(InScript, v); } |
| 285 | bool inStyle() const { return testBit(InStyle); } |
| 286 | void setInStyle(bool v) { setBit(InStyle, v); } |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 287 | bool inXmp() const { return testBit(InXmp); } |
| 288 | void setInXmp(bool v) { setBit(InXmp, v); } |
| 289 | bool inTitle() const { return testBit(InTitle); } |
| 290 | void setInTitle(bool v) { setBit(InTitle, v); } |
hyatt@apple.com | d647ec8 | 2008-02-10 22:30:04 +0000 | [diff] [blame] | 291 | bool inIFrame() const { return testBit(InIFrame); } |
| 292 | void setInIFrame(bool v) { setBit(InIFrame, v); } |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 293 | bool inPlainText() const { return testBit(InPlainText); } |
| 294 | void setInPlainText(bool v) { setBit(InPlainText, v); } |
| 295 | bool inProcessingInstruction() const { return testBit(InProcessingInstruction); } |
| 296 | void setInProcessingInstruction(bool v) { return setBit(InProcessingInstruction, v); } |
| 297 | bool inComment() const { return testBit(InComment); } |
| 298 | void setInComment(bool v) { setBit(InComment, v); } |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 299 | bool inDoctype() const { return testBit(InDoctype); } |
| 300 | void setInDoctype(bool v) { setBit(InDoctype, v); } |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 301 | bool inTextArea() const { return testBit(InTextArea); } |
| 302 | void setInTextArea(bool v) { setBit(InTextArea, v); } |
| 303 | bool escaped() const { return testBit(Escaped); } |
| 304 | void setEscaped(bool v) { setBit(Escaped, v); } |
| 305 | bool inServer() const { return testBit(InServer); } |
| 306 | void setInServer(bool v) { setBit(InServer, v); } |
| 307 | bool skipLF() const { return testBit(SkipLF); } |
| 308 | void setSkipLF(bool v) { setBit(SkipLF, v); } |
| 309 | bool startTag() const { return testBit(StartTag); } |
| 310 | void setStartTag(bool v) { setBit(StartTag, v); } |
| 311 | bool discardLF() const { return testBit(DiscardLF); } |
| 312 | void setDiscardLF(bool v) { setBit(DiscardLF, v); } |
| 313 | bool allowYield() const { return testBit(AllowYield); } |
| 314 | void setAllowYield(bool v) { setBit(AllowYield, v); } |
| 315 | bool loadingExtScript() const { return testBit(LoadingExtScript); } |
| 316 | void setLoadingExtScript(bool v) { setBit(LoadingExtScript, v); } |
| 317 | bool forceSynchronous() const { return testBit(ForceSynchronous); } |
| 318 | void setForceSynchronous(bool v) { setBit(ForceSynchronous, v); } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 319 | |
bfulgham@webkit.org | eaa216e | 2009-06-11 18:15:05 +0000 | [diff] [blame] | 320 | bool inAnyNonHTMLText() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame); } |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 321 | bool hasTagState() const { return m_bits & TagMask; } |
| 322 | bool hasEntityState() const { return m_bits & EntityMask; } |
darin | f028f81 | 2002-06-10 20:08:04 +0000 | [diff] [blame] | 323 | |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 324 | bool needsSpecialWriteHandling() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame | TagMask | EntityMask | InPlainText | InComment | InDoctype | InServer | InProcessingInstruction | StartTag); } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 325 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 326 | private: |
| 327 | static const int EntityShift = 4; |
| 328 | enum StateBits { |
| 329 | TagMask = (1 << 4) - 1, |
| 330 | EntityMask = (1 << 7) - (1 << 4), |
| 331 | InScript = 1 << 7, |
| 332 | InStyle = 1 << 8, |
ddkilzer | 12ea70f | 2007-01-15 00:39:47 +0000 | [diff] [blame] | 333 | // Bit 9 unused |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 334 | InXmp = 1 << 10, |
| 335 | InTitle = 1 << 11, |
| 336 | InPlainText = 1 << 12, |
| 337 | InProcessingInstruction = 1 << 13, |
| 338 | InComment = 1 << 14, |
| 339 | InTextArea = 1 << 15, |
| 340 | Escaped = 1 << 16, |
| 341 | InServer = 1 << 17, |
| 342 | SkipLF = 1 << 18, |
| 343 | StartTag = 1 << 19, |
| 344 | DiscardLF = 1 << 20, // FIXME: should clarify difference between skip and discard |
| 345 | AllowYield = 1 << 21, |
| 346 | LoadingExtScript = 1 << 22, |
hyatt@apple.com | d647ec8 | 2008-02-10 22:30:04 +0000 | [diff] [blame] | 347 | ForceSynchronous = 1 << 23, |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 348 | InIFrame = 1 << 24, |
| 349 | InDoctype = 1 << 25 |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 350 | }; |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 351 | |
| 352 | void setBit(StateBits bit, bool value) |
| 353 | { |
| 354 | if (value) |
| 355 | m_bits |= bit; |
| 356 | else |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 357 | m_bits &= ~bit; |
| 358 | } |
| 359 | bool testBit(StateBits bit) const { return m_bits & bit; } |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 360 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 361 | unsigned m_bits; |
| 362 | }; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 363 | |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 364 | State m_state; |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 365 | |
hyatt@apple.com | 4258307 | 2008-02-20 22:47:57 +0000 | [diff] [blame] | 366 | DoctypeToken m_doctypeToken; |
| 367 | int m_doctypeSearchCount; |
| 368 | int m_doctypeSecondarySearchCount; |
mjs | 6f821c8 | 2002-03-22 00:31:57 +0000 | [diff] [blame] | 369 | |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 370 | bool m_brokenServer; |
mjs | 6f821c8 | 2002-03-22 00:31:57 +0000 | [diff] [blame] | 371 | |
hyatt | 2773dff | 2005-07-18 21:44:31 +0000 | [diff] [blame] | 372 | // Name of an attribute that we just scanned. |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 373 | AtomicString m_attrName; |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 374 | |
ddkilzer@apple.com | 7eba671 | 2008-05-27 23:38:36 +0000 | [diff] [blame] | 375 | // Used to store the code of a scripting sequence |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 376 | UChar* m_scriptCode; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 377 | // Size of the script sequenze stored in @ref #scriptCode |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 378 | int m_scriptCodeSize; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 379 | // Maximal size that can be stored in @ref #scriptCode |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 380 | int m_scriptCodeCapacity; |
gramps | 0aed4d6 | 2001-09-19 15:53:27 +0000 | [diff] [blame] | 381 | // resync point of script code size |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 382 | int m_scriptCodeResync; |
mjs | 6f821c8 | 2002-03-22 00:31:57 +0000 | [diff] [blame] | 383 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 384 | // Stores characters if we are scanning for a string like "</script>" |
darin | 7ab3109 | 2006-05-10 04:59:57 +0000 | [diff] [blame] | 385 | UChar searchBuffer[10]; |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 386 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 387 | // Counts where we are in the string we are scanning for |
| 388 | int searchCount; |
gramps | 0aed4d6 | 2001-09-19 15:53:27 +0000 | [diff] [blame] | 389 | // the stopper string |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 390 | const char* m_searchStopper; |
| 391 | int m_searchStopperLength; |
eric@webkit.org | fa046f2 | 2010-06-13 02:16:46 +0000 | [diff] [blame] | 392 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 393 | // if no more data is coming, just parse what we have (including ext scripts that |
| 394 | // may be still downloading) and finish |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 395 | bool m_noMoreData; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 396 | // URL to get source code of script from |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 397 | String m_scriptTagSrcAttrValue; |
| 398 | String m_scriptTagCharsetAttrValue; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 399 | // the HTML code we will parse after the external script we are waiting for has loaded |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 400 | SegmentedString m_pendingSrc; |
mjs | 4ed3d11 | 2004-07-20 20:45:22 +0000 | [diff] [blame] | 401 | |
| 402 | // the HTML code we will parse after this particular script has |
| 403 | // loaded, but before all pending HTML |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 404 | SegmentedString* m_currentPrependingSrc; |
mjs | 4ed3d11 | 2004-07-20 20:45:22 +0000 | [diff] [blame] | 405 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 406 | // true if we are executing a script while parsing a document. This causes the parsing of |
| 407 | // the output of the script to be postponed until after the script has finished executing |
| 408 | int m_executingScript; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 409 | Deque<CachedResourceHandle<CachedScript> > m_pendingScripts; |
| 410 | RefPtr<HTMLScriptElement> m_scriptNode; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 411 | |
ddkilzer | 24652a9 | 2006-07-02 11:39:43 +0000 | [diff] [blame] | 412 | bool m_requestingScript; |
antti | 65b1518 | 2007-09-10 14:55:11 +0000 | [diff] [blame] | 413 | bool m_hasScriptsWaitingForStylesheets; |
ddkilzer | 24652a9 | 2006-07-02 11:39:43 +0000 | [diff] [blame] | 414 | |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 415 | // if we found one broken comment, there are most likely others as well |
| 416 | // store a flag to get rid of the O(n^2) behaviour in such a case. |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 417 | bool m_brokenComments; |
mjs | 6f821c8 | 2002-03-22 00:31:57 +0000 | [diff] [blame] | 418 | // current line number |
darin@apple.com | 5c1c758 | 2007-11-12 04:09:30 +0000 | [diff] [blame] | 419 | int m_lineNumber; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 420 | int m_currentScriptTagStartLineNumber; |
| 421 | int m_currentTagStartLineNumber; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 422 | |
weinig@apple.com | 4b51d00 | 2008-08-14 23:33:22 +0000 | [diff] [blame] | 423 | double m_tokenizerTimeDelay; |
| 424 | int m_tokenizerChunkSize; |
| 425 | |
hyatt | 9c4ba9b | 2004-11-10 03:47:56 +0000 | [diff] [blame] | 426 | // The timer for continued processing. |
abarth@webkit.org | 0091a90 | 2010-06-23 06:41:51 +0000 | [diff] [blame] | 427 | Timer<LegacyHTMLDocumentParser> m_timer; |
hyatt | 9c4ba9b | 2004-11-10 03:47:56 +0000 | [diff] [blame] | 428 | |
eric@webkit.org | 73eef4f | 2009-12-14 15:21:41 +0000 | [diff] [blame] | 429 | // The timer for continued executing external scripts. |
abarth@webkit.org | 0091a90 | 2010-06-23 06:41:51 +0000 | [diff] [blame] | 430 | Timer<LegacyHTMLDocumentParser> m_externalScriptsTimer; |
eric@webkit.org | 73eef4f | 2009-12-14 15:21:41 +0000 | [diff] [blame] | 431 | |
sullivan | 6062ecb | 2003-07-24 22:43:50 +0000 | [diff] [blame] | 432 | // This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags. |
| 433 | // So any fixed number might be too small, but rather than rewriting all usage of this buffer |
| 434 | // we'll just make it large enough to handle all imaginable cases. |
| 435 | #define CBUFLEN 1024 |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 436 | UChar m_cBuffer[CBUFLEN + 2]; |
mjs | b1c8f66 | 2005-10-18 03:15:31 +0000 | [diff] [blame] | 437 | unsigned int m_cBufferPos; |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 438 | |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 439 | SegmentedString m_src; |
abarth@webkit.org | c59388e | 2010-06-23 09:03:47 +0000 | [diff] [blame] | 440 | OwnPtr<LegacyHTMLTreeBuilder> m_treeBuilder; |
eric@webkit.org | 683713f | 2008-11-12 01:53:37 +0000 | [diff] [blame] | 441 | bool m_inWrite; |
darin | e9700da | 2006-03-06 23:09:48 +0000 | [diff] [blame] | 442 | bool m_fragment; |
enrica@apple.com | 622ff66 | 2010-01-22 00:40:23 +0000 | [diff] [blame] | 443 | FragmentScriptingPermission m_scriptingPermission; |
antti@apple.com | 6c76e54 | 2008-03-13 21:20:31 +0000 | [diff] [blame] | 444 | |
abarth@webkit.org | 757e876 | 2010-06-23 08:44:01 +0000 | [diff] [blame] | 445 | OwnPtr<LegacyPreloadScanner> m_preloadScanner; |
kocienda | bb0c24b | 2001-08-24 14:24:40 +0000 | [diff] [blame] | 446 | }; |
darin | b95d6c4 | 2002-06-04 00:19:07 +0000 | [diff] [blame] | 447 | |
darin | 7ab3109 | 2006-05-10 04:59:57 +0000 | [diff] [blame] | 448 | UChar decodeNamedEntity(const char*); |
eseidel | 363bc0d | 2005-10-27 06:03:33 +0000 | [diff] [blame] | 449 | |
weinig | 54a89ffb | 2007-01-08 17:50:01 +0000 | [diff] [blame] | 450 | } // namespace WebCore |
hyatt | 3b4f6d4 | 2004-02-07 01:19:44 +0000 | [diff] [blame] | 451 | |
abarth@webkit.org | a814ad6 | 2010-06-23 09:21:08 +0000 | [diff] [blame] | 452 | #endif // LegacyHTMLDocumentParser_h |