blob: 49e6976031181d3bfaaa2f2a41489a7bae2f819d [file] [log] [blame]
kociendabb0c24b2001-08-24 14:24:40 +00001/*
kociendabb0c24b2001-08-24 14:24:40 +00002 Copyright (C) 1997 Martin Jones (mjones@kde.org)
3 (C) 1997 Torben Weis (weis@kde.org)
4 (C) 1998 Waldo Bastian (bastian@kde.org)
5 (C) 2001 Dirk Mueller (mueller@kde.org)
darin@apple.comfaced262009-01-12 07:44:27 +00006 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
kociendabb0c24b2001-08-24 14:24:40 +00007
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
ddkilzerc8eccec2007-09-26 02:29:57 +000020 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
kociendabb0c24b2001-08-24 14:24:40 +000022*/
kociendabb0c24b2001-08-24 14:24:40 +000023
abarth@webkit.orga814ad62010-06-23 09:21:08 +000024#ifndef LegacyHTMLDocumentParser_h
25#define LegacyHTMLDocumentParser_h
kociendabb0c24b2001-08-24 14:24:40 +000026
antti@apple.com72e4a842008-09-05 09:28:11 +000027#include "CachedResourceClient.h"
28#include "CachedResourceHandle.h"
abarth@webkit.org26dfd172010-06-22 23:43:59 +000029#include "FragmentScriptingPermission.h"
weinig@apple.com1c6ce052010-05-23 23:20:03 +000030#include "NamedNodeMap.h"
eric@webkit.orge37f5952010-06-28 07:56:10 +000031#include "ScriptableDocumentParser.h"
darind03140b2006-01-19 08:59:31 +000032#include "SegmentedString.h"
darina52f4e12006-02-02 02:51:03 +000033#include "Timer.h"
antti@apple.com72e4a842008-09-05 09:28:11 +000034#include <wtf/Deque.h>
hyatt97815a32007-05-17 06:27:28 +000035#include <wtf/OwnPtr.h>
antti@apple.com72e4a842008-09-05 09:28:11 +000036#include <wtf/Vector.h>
kociendabb0c24b2001-08-24 14:24:40 +000037
darind03140b2006-01-19 08:59:31 +000038namespace WebCore {
hyatt3b4f6d42004-02-07 01:19:44 +000039
40class CachedScript;
darinb9481ed2006-03-20 02:57:59 +000041class DocumentFragment;
42class Document;
hyatt3ad24072006-06-26 23:53:02 +000043class HTMLDocument;
eric@webkit.org683713f2008-11-12 01:53:37 +000044class HTMLScriptElement;
hyatt3ad24072006-06-26 23:53:02 +000045class HTMLViewSourceDocument;
darinffd93c32006-01-31 17:09:20 +000046class FrameView;
abarth@webkit.orgc59388e2010-06-23 09:03:47 +000047class LegacyHTMLTreeBuilder;
darinb9481ed2006-03-20 02:57:59 +000048class Node;
abarth@webkit.org757e8762010-06-23 08:44:01 +000049class LegacyPreloadScanner;
darin@chromium.org6b412472008-11-24 23:07:38 +000050class ScriptSourceCode;
kociendabb0c24b2001-08-24 14:24:40 +000051
hyatt3b4f6d42004-02-07 01:19:44 +000052/**
53 * @internal
54 * represents one HTML tag. Consists of a numerical id, and the list
55 * of attributes. Can also represent text. In this case the id = 0 and
56 * text contains the text.
57 */
eric@webkit.org683713f2008-11-12 01:53:37 +000058struct Token {
59 Token()
60 : beginTag(true)
61 , selfClosingTag(false)
62 , brokenXMLStyle(false)
63 , m_sourceInfo(0)
64 { }
hyatt97815a32007-05-17 06:27:28 +000065 ~Token() { }
hyatt59136b72005-07-09 20:19:28 +000066
darin@apple.comfaced262009-01-12 07:44:27 +000067 void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode);
hyatt59136b72005-07-09 20:19:28 +000068
darind03140b2006-01-19 08:59:31 +000069 bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; }
70 bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; }
hyatt59136b72005-07-09 20:19:28 +000071
hyatt3b4f6d42004-02-07 01:19:44 +000072 void reset()
73 {
darin2a4c3742005-12-27 18:26:16 +000074 attrs = 0;
weinig54a89ffb2007-01-08 17:50:01 +000075 text = 0;
darind03140b2006-01-19 08:59:31 +000076 tagName = nullAtom;
hyatt59136b72005-07-09 20:19:28 +000077 beginTag = true;
eric@webkit.org683713f2008-11-12 01:53:37 +000078 selfClosingTag = false;
hyattbf85dfb2007-05-24 19:54:47 +000079 brokenXMLStyle = false;
hyatt97815a32007-05-17 06:27:28 +000080 if (m_sourceInfo)
81 m_sourceInfo->clear();
hyatt3b4f6d42004-02-07 01:19:44 +000082 }
hyatt59136b72005-07-09 20:19:28 +000083
hyatt97815a32007-05-17 06:27:28 +000084 void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector<UChar>); m_sourceInfo->append(c); }
85
weinig@apple.com1c6ce052010-05-23 23:20:03 +000086 RefPtr<NamedNodeMap> attrs;
darinb9481ed2006-03-20 02:57:59 +000087 RefPtr<StringImpl> text;
darind03140b2006-01-19 08:59:31 +000088 AtomicString tagName;
darin2a4c3742005-12-27 18:26:16 +000089 bool beginTag;
eric@webkit.org683713f2008-11-12 01:53:37 +000090 bool selfClosingTag;
hyattbf85dfb2007-05-24 19:54:47 +000091 bool brokenXMLStyle;
hyatt97815a32007-05-17 06:27:28 +000092 OwnPtr<Vector<UChar> > m_sourceInfo;
kociendabb0c24b2001-08-24 14:24:40 +000093};
94
hyatt@apple.com42583072008-02-20 22:47:57 +000095enum DoctypeState {
96 DoctypeBegin,
97 DoctypeBeforeName,
98 DoctypeName,
99 DoctypeAfterName,
100 DoctypeBeforePublicID,
101 DoctypePublicID,
102 DoctypeAfterPublicID,
103 DoctypeBeforeSystemID,
104 DoctypeSystemID,
105 DoctypeAfterSystemID,
106 DoctypeBogus
107};
108
109class DoctypeToken {
110public:
111 DoctypeToken() {}
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000112
hyatt@apple.com42583072008-02-20 22:47:57 +0000113 void reset()
114 {
115 m_name.clear();
116 m_publicID.clear();
117 m_systemID.clear();
118 m_state = DoctypeBegin;
119 m_source.clear();
abarth@webkit.org75cc4e62010-06-08 07:15:52 +0000120 m_forceQuirks = false;
hyatt@apple.com42583072008-02-20 22:47:57 +0000121 }
122
123 DoctypeState state() { return m_state; }
124 void setState(DoctypeState s) { m_state = s; }
125
126 Vector<UChar> m_name;
127 Vector<UChar> m_publicID;
128 Vector<UChar> m_systemID;
129 DoctypeState m_state;
abarth@webkit.org75cc4e62010-06-08 07:15:52 +0000130
hyatt@apple.com42583072008-02-20 22:47:57 +0000131 Vector<UChar> m_source;
abarth@webkit.org75cc4e62010-06-08 07:15:52 +0000132
133 bool m_forceQuirks; // Used by the HTML5 parser.
hyatt@apple.com42583072008-02-20 22:47:57 +0000134};
135
kociendabb0c24b2001-08-24 14:24:40 +0000136//-----------------------------------------------------------------------------
137
abarth@webkit.org1183dd62010-06-23 07:16:51 +0000138// FIXME: This class does too much. Right now it is both an HTML tokenizer as well
139// as handling all of the non-tokenizer-specific junk related to tokenizing HTML
140// (like dealing with <script> tags). The HTML tokenizer bits should be pushed
141// down into a separate HTML tokenizer class.
eric@webkit.org09879f62010-05-18 10:17:01 +0000142
eric@webkit.orge37f5952010-06-28 07:56:10 +0000143class LegacyHTMLDocumentParser : public ScriptableDocumentParser, public CachedResourceClient {
kociendabb0c24b2001-08-24 14:24:40 +0000144public:
abarth@webkit.org0091a902010-06-23 06:41:51 +0000145 LegacyHTMLDocumentParser(HTMLDocument*, bool reportErrors);
146 LegacyHTMLDocumentParser(HTMLViewSourceDocument*);
147 LegacyHTMLDocumentParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
148 virtual ~LegacyHTMLDocumentParser();
kociendabb0c24b2001-08-24 14:24:40 +0000149
eric@webkit.org71d0f462010-06-25 07:09:36 +0000150 bool forceSynchronous() const { return m_state.forceSynchronous(); }
151 void setForceSynchronous(bool force);
152
eric@webkit.org71d0f462010-06-25 07:09:36 +0000153 static void parseDocumentFragment(const String&, DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
154
155protected:
156 // Exposed for FTPDirectoryDocumentParser
eric@webkit.org5cfd5562010-06-25 21:37:08 +0000157 virtual void insert(const SegmentedString&);
darina3cce732004-07-22 20:50:10 +0000158 virtual void finish();
eric@webkit.org71d0f462010-06-25 07:09:36 +0000159
160private:
eric@webkit.orge37f5952010-06-28 07:56:10 +0000161 // ScriptableDocumentParser
eric@webkit.org5cfd5562010-06-25 21:37:08 +0000162 virtual void append(const SegmentedString&);
abarth@webkit.orgbdbc28d2010-06-16 21:57:34 +0000163 virtual bool finishWasCalled();
darined60ff22004-11-12 22:04:26 +0000164 virtual bool isWaitingForScripts() const;
ggaren33e65442005-10-22 01:41:36 +0000165 virtual void stopParsing();
hyatt9c4ba9b2004-11-10 03:47:56 +0000166 virtual bool processingData() const;
eric@webkit.orged69c5f2010-06-24 06:14:27 +0000167 virtual bool isExecutingScript() const { return !!m_executingScript; }
kociendabb0c24b2001-08-24 14:24:40 +0000168
darin@apple.com5c1c7582007-11-12 04:09:30 +0000169 virtual int lineNumber() const { return m_lineNumber; }
rwlbuis36fea0a2007-01-07 16:47:24 +0000170 virtual int columnNumber() const { return 1; }
171
eric@webkit.orge37f5952010-06-28 07:56:10 +0000172 virtual bool processingContentWrittenByScript() const { return m_src.excludeLineNumbers(); }
173
antti65b15182007-09-10 14:55:11 +0000174 virtual void executeScriptsWaitingForStylesheets();
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000175
abarth@webkit.orgc59388e2010-06-23 09:03:47 +0000176 virtual LegacyHTMLTreeBuilder* htmlTreeBuilder() const { return m_treeBuilder.get(); }
hyattabc1d212007-05-16 08:11:05 +0000177
mjsb1c8f662005-10-18 03:15:31 +0000178 class State;
179
darina3cce732004-07-22 20:50:10 +0000180 void begin();
181 void end();
kociendabb0c24b2001-08-24 14:24:40 +0000182 void reset();
hyatt@apple.com42583072008-02-20 22:47:57 +0000183
eric@webkit.org01c1ac12010-05-19 07:19:47 +0000184 void willWriteHTML(const SegmentedString&);
eric@webkit.org5cfd5562010-06-25 21:37:08 +0000185 void write(const SegmentedString&, bool appendData);
eric@webkit.org01c1ac12010-05-19 07:19:47 +0000186 ALWAYS_INLINE void advance(State&);
187 void didWriteHTML();
188
darinb9481ed2006-03-20 02:57:59 +0000189 PassRefPtr<Node> processToken();
hyatt@apple.com42583072008-02-20 22:47:57 +0000190 void processDoctypeToken();
kociendabb0c24b2001-08-24 14:24:40 +0000191
hyattd2c53f22006-01-15 07:12:43 +0000192 State processListing(SegmentedString, State);
193 State parseComment(SegmentedString&, State);
hyatt@apple.com42583072008-02-20 22:47:57 +0000194 State parseDoctype(SegmentedString&, State);
hyattd2c53f22006-01-15 07:12:43 +0000195 State parseServer(SegmentedString&, State);
196 State parseText(SegmentedString&, State);
bfulgham@webkit.orgeaa216e2009-06-11 18:15:05 +0000197 State parseNonHTMLText(SegmentedString&, State);
hyattd2c53f22006-01-15 07:12:43 +0000198 State parseTag(SegmentedString&, State);
eric@webkit.org683713f2008-11-12 01:53:37 +0000199 State parseEntity(SegmentedString&, UChar*& dest, State, unsigned& cBufferPos, bool start, bool parsingTag);
hyattd2c53f22006-01-15 07:12:43 +0000200 State parseProcessingInstruction(SegmentedString&, State);
mjsb1c8f662005-10-18 03:15:31 +0000201 State scriptHandler(State);
darin@chromium.org6b412472008-11-24 23:07:38 +0000202 State scriptExecution(const ScriptSourceCode&, State);
darin7ab31092006-05-10 04:59:57 +0000203 void setSrc(const SegmentedString&);
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000204
kociendabb0c24b2001-08-24 14:24:40 +0000205 // check if we have enough space in the buffer.
206 // if not enlarge it
207 inline void checkBuffer(int len = 10)
208 {
eric@webkit.org683713f2008-11-12 01:53:37 +0000209 if ((m_dest - m_buffer) > m_bufferSize - len)
kociendabb0c24b2001-08-24 14:24:40 +0000210 enlargeBuffer(len);
211 }
weinig54a89ffb2007-01-08 17:50:01 +0000212
kociendabb0c24b2001-08-24 14:24:40 +0000213 inline void checkScriptBuffer(int len = 10)
214 {
eric@webkit.org683713f2008-11-12 01:53:37 +0000215 if (m_scriptCodeSize + len >= m_scriptCodeCapacity)
kociendabb0c24b2001-08-24 14:24:40 +0000216 enlargeScriptBuffer(len);
217 }
218
219 void enlargeBuffer(int len);
220 void enlargeScriptBuffer(int len);
221
darin7ab31092006-05-10 04:59:57 +0000222 bool continueProcessing(int& processedCount, double startTime, State&);
abarth@webkit.org0091a902010-06-23 06:41:51 +0000223 void timerFired(Timer<LegacyHTMLDocumentParser>*);
hyatt9c4ba9b2004-11-10 03:47:56 +0000224 void allDataProcessed();
225
darine775cf72006-07-09 22:48:56 +0000226 // from CachedResourceClient
eric@webkit.org683713f2008-11-12 01:53:37 +0000227 void notifyFinished(CachedResource*);
mjs9d0f55f2003-11-17 23:27:42 +0000228
eric@webkit.org73eef4f2009-12-14 15:21:41 +0000229 void executeExternalScriptsIfReady();
abarth@webkit.org0091a902010-06-23 06:41:51 +0000230 void executeExternalScriptsTimerFired(Timer<LegacyHTMLDocumentParser>*);
eric@webkit.org73eef4f2009-12-14 15:21:41 +0000231 bool continueExecutingExternalScripts(double startTime);
232
kociendabb0c24b2001-08-24 14:24:40 +0000233 // Internal buffers
234 ///////////////////
eric@webkit.org683713f2008-11-12 01:53:37 +0000235 UChar* m_buffer;
236 int m_bufferSize;
237 UChar* m_dest;
kociendabb0c24b2001-08-24 14:24:40 +0000238
eric@webkit.org683713f2008-11-12 01:53:37 +0000239 Token m_currentToken;
kociendabb0c24b2001-08-24 14:24:40 +0000240
abarth@webkit.org8268f152009-07-13 05:36:15 +0000241 // This buffer holds the raw characters we've seen between the beginning of
242 // the attribute name and the first character of the attribute value.
243 Vector<UChar, 32> m_rawAttributeBeforeValue;
244
eric@webkit.org74d35e92010-06-12 08:10:24 +0000245 // DocumentParser flags
kociendabb0c24b2001-08-24 14:24:40 +0000246 //////////////////
247 // are we in quotes within a html tag
darin7ab31092006-05-10 04:59:57 +0000248 enum { NoQuote, SingleQuote, DoubleQuote } tquote;
kociendabb0c24b2001-08-24 14:24:40 +0000249
kociendabb0c24b2001-08-24 14:24:40 +0000250 // Are we in a &... character entity description?
mjsb1c8f662005-10-18 03:15:31 +0000251 enum EntityState {
kociendabb0c24b2001-08-24 14:24:40 +0000252 NoEntity = 0,
mjsb1c8f662005-10-18 03:15:31 +0000253 SearchEntity = 1,
254 NumericSearch = 2,
255 Hexadecimal = 3,
256 Decimal = 4,
257 EntityName = 5,
258 SearchSemicolon = 6
259 };
darin45265222003-05-07 16:01:49 +0000260 unsigned EntityUnicodeValue;
kociendabb0c24b2001-08-24 14:24:40 +0000261
mjsb1c8f662005-10-18 03:15:31 +0000262 enum TagState {
263 NoTag = 0,
264 TagName = 1,
265 SearchAttribute = 2,
266 AttributeName = 3,
267 SearchEqual = 4,
268 SearchValue = 5,
269 QuotedValue = 6,
270 Value = 7,
271 SearchEnd = 8
272 };
kociendabb0c24b2001-08-24 14:24:40 +0000273
mjsb1c8f662005-10-18 03:15:31 +0000274 class State {
275 public:
weinig54a89ffb2007-01-08 17:50:01 +0000276 State() : m_bits(0) { }
kociendabb0c24b2001-08-24 14:24:40 +0000277
mjsb1c8f662005-10-18 03:15:31 +0000278 TagState tagState() const { return static_cast<TagState>(m_bits & TagMask); }
279 void setTagState(TagState t) { m_bits = (m_bits & ~TagMask) | t; }
280 EntityState entityState() const { return static_cast<EntityState>((m_bits & EntityMask) >> EntityShift); }
281 void setEntityState(EntityState e) { m_bits = (m_bits & ~EntityMask) | (e << EntityShift); }
kociendabb0c24b2001-08-24 14:24:40 +0000282
mjsb1c8f662005-10-18 03:15:31 +0000283 bool inScript() const { return testBit(InScript); }
284 void setInScript(bool v) { setBit(InScript, v); }
285 bool inStyle() const { return testBit(InStyle); }
286 void setInStyle(bool v) { setBit(InStyle, v); }
mjsb1c8f662005-10-18 03:15:31 +0000287 bool inXmp() const { return testBit(InXmp); }
288 void setInXmp(bool v) { setBit(InXmp, v); }
289 bool inTitle() const { return testBit(InTitle); }
290 void setInTitle(bool v) { setBit(InTitle, v); }
hyatt@apple.comd647ec82008-02-10 22:30:04 +0000291 bool inIFrame() const { return testBit(InIFrame); }
292 void setInIFrame(bool v) { setBit(InIFrame, v); }
mjsb1c8f662005-10-18 03:15:31 +0000293 bool inPlainText() const { return testBit(InPlainText); }
294 void setInPlainText(bool v) { setBit(InPlainText, v); }
295 bool inProcessingInstruction() const { return testBit(InProcessingInstruction); }
296 void setInProcessingInstruction(bool v) { return setBit(InProcessingInstruction, v); }
297 bool inComment() const { return testBit(InComment); }
298 void setInComment(bool v) { setBit(InComment, v); }
hyatt@apple.com42583072008-02-20 22:47:57 +0000299 bool inDoctype() const { return testBit(InDoctype); }
300 void setInDoctype(bool v) { setBit(InDoctype, v); }
mjsb1c8f662005-10-18 03:15:31 +0000301 bool inTextArea() const { return testBit(InTextArea); }
302 void setInTextArea(bool v) { setBit(InTextArea, v); }
303 bool escaped() const { return testBit(Escaped); }
304 void setEscaped(bool v) { setBit(Escaped, v); }
305 bool inServer() const { return testBit(InServer); }
306 void setInServer(bool v) { setBit(InServer, v); }
307 bool skipLF() const { return testBit(SkipLF); }
308 void setSkipLF(bool v) { setBit(SkipLF, v); }
309 bool startTag() const { return testBit(StartTag); }
310 void setStartTag(bool v) { setBit(StartTag, v); }
311 bool discardLF() const { return testBit(DiscardLF); }
312 void setDiscardLF(bool v) { setBit(DiscardLF, v); }
313 bool allowYield() const { return testBit(AllowYield); }
314 void setAllowYield(bool v) { setBit(AllowYield, v); }
315 bool loadingExtScript() const { return testBit(LoadingExtScript); }
316 void setLoadingExtScript(bool v) { setBit(LoadingExtScript, v); }
317 bool forceSynchronous() const { return testBit(ForceSynchronous); }
318 void setForceSynchronous(bool v) { setBit(ForceSynchronous, v); }
kociendabb0c24b2001-08-24 14:24:40 +0000319
bfulgham@webkit.orgeaa216e2009-06-11 18:15:05 +0000320 bool inAnyNonHTMLText() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame); }
mjsb1c8f662005-10-18 03:15:31 +0000321 bool hasTagState() const { return m_bits & TagMask; }
322 bool hasEntityState() const { return m_bits & EntityMask; }
darinf028f812002-06-10 20:08:04 +0000323
hyatt@apple.com42583072008-02-20 22:47:57 +0000324 bool needsSpecialWriteHandling() const { return m_bits & (InScript | InStyle | InXmp | InTextArea | InTitle | InIFrame | TagMask | EntityMask | InPlainText | InComment | InDoctype | InServer | InProcessingInstruction | StartTag); }
kociendabb0c24b2001-08-24 14:24:40 +0000325
mjsb1c8f662005-10-18 03:15:31 +0000326 private:
327 static const int EntityShift = 4;
328 enum StateBits {
329 TagMask = (1 << 4) - 1,
330 EntityMask = (1 << 7) - (1 << 4),
331 InScript = 1 << 7,
332 InStyle = 1 << 8,
ddkilzer12ea70f2007-01-15 00:39:47 +0000333 // Bit 9 unused
mjsb1c8f662005-10-18 03:15:31 +0000334 InXmp = 1 << 10,
335 InTitle = 1 << 11,
336 InPlainText = 1 << 12,
337 InProcessingInstruction = 1 << 13,
338 InComment = 1 << 14,
339 InTextArea = 1 << 15,
340 Escaped = 1 << 16,
341 InServer = 1 << 17,
342 SkipLF = 1 << 18,
343 StartTag = 1 << 19,
344 DiscardLF = 1 << 20, // FIXME: should clarify difference between skip and discard
345 AllowYield = 1 << 21,
346 LoadingExtScript = 1 << 22,
hyatt@apple.comd647ec82008-02-10 22:30:04 +0000347 ForceSynchronous = 1 << 23,
hyatt@apple.com42583072008-02-20 22:47:57 +0000348 InIFrame = 1 << 24,
349 InDoctype = 1 << 25
mjsb1c8f662005-10-18 03:15:31 +0000350 };
weinig54a89ffb2007-01-08 17:50:01 +0000351
352 void setBit(StateBits bit, bool value)
353 {
354 if (value)
355 m_bits |= bit;
356 else
mjsb1c8f662005-10-18 03:15:31 +0000357 m_bits &= ~bit;
358 }
359 bool testBit(StateBits bit) const { return m_bits & bit; }
kociendabb0c24b2001-08-24 14:24:40 +0000360
mjsb1c8f662005-10-18 03:15:31 +0000361 unsigned m_bits;
362 };
kociendabb0c24b2001-08-24 14:24:40 +0000363
mjsb1c8f662005-10-18 03:15:31 +0000364 State m_state;
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000365
hyatt@apple.com42583072008-02-20 22:47:57 +0000366 DoctypeToken m_doctypeToken;
367 int m_doctypeSearchCount;
368 int m_doctypeSecondarySearchCount;
mjs6f821c82002-03-22 00:31:57 +0000369
eric@webkit.org683713f2008-11-12 01:53:37 +0000370 bool m_brokenServer;
mjs6f821c82002-03-22 00:31:57 +0000371
hyatt2773dff2005-07-18 21:44:31 +0000372 // Name of an attribute that we just scanned.
eric@webkit.org683713f2008-11-12 01:53:37 +0000373 AtomicString m_attrName;
weinig54a89ffb2007-01-08 17:50:01 +0000374
ddkilzer@apple.com7eba6712008-05-27 23:38:36 +0000375 // Used to store the code of a scripting sequence
eric@webkit.org683713f2008-11-12 01:53:37 +0000376 UChar* m_scriptCode;
kociendabb0c24b2001-08-24 14:24:40 +0000377 // Size of the script sequenze stored in @ref #scriptCode
eric@webkit.org683713f2008-11-12 01:53:37 +0000378 int m_scriptCodeSize;
kociendabb0c24b2001-08-24 14:24:40 +0000379 // Maximal size that can be stored in @ref #scriptCode
eric@webkit.org683713f2008-11-12 01:53:37 +0000380 int m_scriptCodeCapacity;
gramps0aed4d62001-09-19 15:53:27 +0000381 // resync point of script code size
eric@webkit.org683713f2008-11-12 01:53:37 +0000382 int m_scriptCodeResync;
mjs6f821c82002-03-22 00:31:57 +0000383
kociendabb0c24b2001-08-24 14:24:40 +0000384 // Stores characters if we are scanning for a string like "</script>"
darin7ab31092006-05-10 04:59:57 +0000385 UChar searchBuffer[10];
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000386
kociendabb0c24b2001-08-24 14:24:40 +0000387 // Counts where we are in the string we are scanning for
388 int searchCount;
gramps0aed4d62001-09-19 15:53:27 +0000389 // the stopper string
eric@webkit.org683713f2008-11-12 01:53:37 +0000390 const char* m_searchStopper;
391 int m_searchStopperLength;
eric@webkit.orgfa046f22010-06-13 02:16:46 +0000392
kociendabb0c24b2001-08-24 14:24:40 +0000393 // if no more data is coming, just parse what we have (including ext scripts that
394 // may be still downloading) and finish
eric@webkit.org683713f2008-11-12 01:53:37 +0000395 bool m_noMoreData;
kociendabb0c24b2001-08-24 14:24:40 +0000396 // URL to get source code of script from
eric@webkit.org683713f2008-11-12 01:53:37 +0000397 String m_scriptTagSrcAttrValue;
398 String m_scriptTagCharsetAttrValue;
kociendabb0c24b2001-08-24 14:24:40 +0000399 // the HTML code we will parse after the external script we are waiting for has loaded
eric@webkit.org683713f2008-11-12 01:53:37 +0000400 SegmentedString m_pendingSrc;
mjs4ed3d112004-07-20 20:45:22 +0000401
402 // the HTML code we will parse after this particular script has
403 // loaded, but before all pending HTML
eric@webkit.org683713f2008-11-12 01:53:37 +0000404 SegmentedString* m_currentPrependingSrc;
mjs4ed3d112004-07-20 20:45:22 +0000405
kociendabb0c24b2001-08-24 14:24:40 +0000406 // true if we are executing a script while parsing a document. This causes the parsing of
407 // the output of the script to be postponed until after the script has finished executing
408 int m_executingScript;
eric@webkit.org683713f2008-11-12 01:53:37 +0000409 Deque<CachedResourceHandle<CachedScript> > m_pendingScripts;
410 RefPtr<HTMLScriptElement> m_scriptNode;
kociendabb0c24b2001-08-24 14:24:40 +0000411
ddkilzer24652a92006-07-02 11:39:43 +0000412 bool m_requestingScript;
antti65b15182007-09-10 14:55:11 +0000413 bool m_hasScriptsWaitingForStylesheets;
ddkilzer24652a92006-07-02 11:39:43 +0000414
kociendabb0c24b2001-08-24 14:24:40 +0000415 // if we found one broken comment, there are most likely others as well
416 // store a flag to get rid of the O(n^2) behaviour in such a case.
eric@webkit.org683713f2008-11-12 01:53:37 +0000417 bool m_brokenComments;
mjs6f821c82002-03-22 00:31:57 +0000418 // current line number
darin@apple.com5c1c7582007-11-12 04:09:30 +0000419 int m_lineNumber;
eric@webkit.org683713f2008-11-12 01:53:37 +0000420 int m_currentScriptTagStartLineNumber;
421 int m_currentTagStartLineNumber;
kociendabb0c24b2001-08-24 14:24:40 +0000422
weinig@apple.com4b51d002008-08-14 23:33:22 +0000423 double m_tokenizerTimeDelay;
424 int m_tokenizerChunkSize;
425
hyatt9c4ba9b2004-11-10 03:47:56 +0000426 // The timer for continued processing.
abarth@webkit.org0091a902010-06-23 06:41:51 +0000427 Timer<LegacyHTMLDocumentParser> m_timer;
hyatt9c4ba9b2004-11-10 03:47:56 +0000428
eric@webkit.org73eef4f2009-12-14 15:21:41 +0000429 // The timer for continued executing external scripts.
abarth@webkit.org0091a902010-06-23 06:41:51 +0000430 Timer<LegacyHTMLDocumentParser> m_externalScriptsTimer;
eric@webkit.org73eef4f2009-12-14 15:21:41 +0000431
sullivan6062ecb2003-07-24 22:43:50 +0000432// This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags.
433// So any fixed number might be too small, but rather than rewriting all usage of this buffer
434// we'll just make it large enough to handle all imaginable cases.
435#define CBUFLEN 1024
eric@webkit.org683713f2008-11-12 01:53:37 +0000436 UChar m_cBuffer[CBUFLEN + 2];
mjsb1c8f662005-10-18 03:15:31 +0000437 unsigned int m_cBufferPos;
weinig54a89ffb2007-01-08 17:50:01 +0000438
eric@webkit.org683713f2008-11-12 01:53:37 +0000439 SegmentedString m_src;
abarth@webkit.orgc59388e2010-06-23 09:03:47 +0000440 OwnPtr<LegacyHTMLTreeBuilder> m_treeBuilder;
eric@webkit.org683713f2008-11-12 01:53:37 +0000441 bool m_inWrite;
darine9700da2006-03-06 23:09:48 +0000442 bool m_fragment;
enrica@apple.com622ff662010-01-22 00:40:23 +0000443 FragmentScriptingPermission m_scriptingPermission;
antti@apple.com6c76e542008-03-13 21:20:31 +0000444
abarth@webkit.org757e8762010-06-23 08:44:01 +0000445 OwnPtr<LegacyPreloadScanner> m_preloadScanner;
kociendabb0c24b2001-08-24 14:24:40 +0000446};
darinb95d6c42002-06-04 00:19:07 +0000447
darin7ab31092006-05-10 04:59:57 +0000448UChar decodeNamedEntity(const char*);
eseidel363bc0d2005-10-27 06:03:33 +0000449
weinig54a89ffb2007-01-08 17:50:01 +0000450} // namespace WebCore
hyatt3b4f6d42004-02-07 01:19:44 +0000451
abarth@webkit.orga814ad62010-06-23 09:21:08 +0000452#endif // LegacyHTMLDocumentParser_h