| /* |
| Copyright (C) 1997 Martin Jones (mjones@kde.org) |
| (C) 1997 Torben Weis (weis@kde.org) |
| (C) 1998 Waldo Bastian (bastian@kde.org) |
| (C) 1999 Lars Knoll (knoll@kde.org) |
| Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| |
| This library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Library General Public |
| License as published by the Free Software Foundation; either |
| version 2 of the License, or (at your option) any later version. |
| |
| This library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Library General Public License for more details. |
| |
| You should have received a copy of the GNU Library General Public License |
| along with this library; see the file COPYING.LIB. If not, write to |
| the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| Boston, MA 02110-1301, USA. |
| */ |
| |
| #ifndef HTMLParser_h |
| #define HTMLParser_h |
| |
| #include "QualifiedName.h" |
| #include <wtf/Forward.h> |
| #include <wtf/OwnPtr.h> |
| #include <wtf/RefPtr.h> |
| #include "HTMLParserErrorCodes.h" |
| #include "MappedAttributeEntry.h" |
| |
| namespace WebCore { |
| |
| class DoctypeToken; |
| class Document; |
| class DocumentFragment; |
| class HTMLDocument; |
| class HTMLFormElement; |
| class HTMLHeadElement; |
| class HTMLMapElement; |
| class HTMLParserQuirks; |
| class Node; |
| |
| struct HTMLStackElem; |
| struct Token; |
| |
| /** |
| * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and |
| * builds up the Document structure from it. |
| */ |
| class HTMLParser : public Noncopyable { |
| public: |
| HTMLParser(HTMLDocument*, bool reportErrors); |
| HTMLParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed); |
| virtual ~HTMLParser(); |
| |
| /** |
| * parses one token delivered by the tokenizer |
| */ |
| PassRefPtr<Node> parseToken(Token*); |
| |
| // Parses a doctype token. |
| void parseDoctypeToken(DoctypeToken*); |
| |
| /** |
| * tokenizer says it's not going to be sending us any more tokens |
| */ |
| void finished(); |
| |
| /** |
| * resets the parser |
| */ |
| void reset(); |
| |
| bool skipMode() const { return !m_skipModeTag.isNull(); } |
| bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; } |
| |
| private: |
| void setCurrent(Node*); |
| void derefCurrent(); |
| void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); } |
| |
| PassRefPtr<Node> getNode(Token*); |
| bool bodyCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool canvasCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool commentCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool ddCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool dtCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool formCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool framesetCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool headCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool iframeCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool isindexCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool mapCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool nestedCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool noembedCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool noframesCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool rpCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool rtCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool selectCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&); |
| bool textCreateErrorCheck(Token*, RefPtr<Node>&); |
| |
| void processCloseTag(Token*); |
| |
| void limitDepth(int tagPriority); |
| bool insertNodeAfterLimitDepth(Node*, bool flat = false); |
| bool insertNode(Node*, bool flat = false); |
| bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority); |
| |
| void pushBlock(const AtomicString& tagName, int level); |
| void popBlock(const AtomicString& tagName, bool reportErrors = false); |
| void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability. |
| void popOneBlock(); |
| void moveOneBlockToStack(HTMLStackElem*& head); |
| inline HTMLStackElem* popOneBlockCommon(); |
| void popInlineBlocks(); |
| |
| void freeBlock(); |
| |
| void createHead(); |
| |
| static bool isResidualStyleTag(const AtomicString& tagName); |
| static bool isAffectedByResidualStyle(const AtomicString& tagName); |
| void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*); |
| void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent); |
| |
| bool allowNestedRedundantTag(const AtomicString& tagName); |
| |
| static bool isHeadingTag(const AtomicString& tagName); |
| |
| bool isInline(Node*) const; |
| |
| void startBody(); // inserts the isindex element |
| PassRefPtr<Node> handleIsindex(Token*); |
| |
| void checkIfHasPElementInScope(); |
| bool hasPElementInScope() |
| { |
| if (m_hasPElementInScope == Unknown) |
| checkIfHasPElementInScope(); |
| return m_hasPElementInScope == InScope; |
| } |
| |
| void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false) |
| { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); } |
| |
| void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags); |
| |
| Document* m_document; |
| |
| // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element. |
| Node* m_current; |
| // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref. |
| bool m_didRefCurrent; |
| |
| HTMLStackElem* m_blockStack; |
| |
| // The number of tags with priority minBlockLevelTagPriority or higher |
| // currently in m_blockStack. The parser enforces a cap on this value by |
| // adding such new elements as siblings instead of children once it is reached. |
| size_t m_blocksInStack; |
| // Depth of the tree. |
| unsigned m_treeDepth; |
| |
| enum ElementInScopeState { NotInScope, InScope, Unknown }; |
| ElementInScopeState m_hasPElementInScope; |
| |
| RefPtr<HTMLFormElement> m_currentFormElement; // currently active form |
| RefPtr<HTMLMapElement> m_currentMapElement; // current map |
| RefPtr<HTMLHeadElement> m_head; // head element; needed for HTML which defines <base> after </head> |
| RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head |
| |
| bool m_inBody; |
| bool m_haveContent; |
| bool m_haveFrameSet; |
| |
| AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified |
| |
| bool m_isParsingFragment; |
| bool m_reportErrors; |
| bool m_handlingResidualStyleAcrossBlocks; |
| int m_inStrayTableContent; |
| FragmentScriptingPermission m_scriptingPermission; |
| |
| OwnPtr<HTMLParserQuirks> m_parserQuirks; |
| }; |
| |
| #if defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_TIGER) |
| bool shouldCreateImplicitHead(Document*); |
| #else |
| inline bool shouldCreateImplicitHead(Document*) { return true; } |
| #endif |
| |
| // Converts the specified string to a floating number. |
| // If the conversion fails, the return value is false. Take care that leading or trailing unnecessary characters make failures. This returns false for an empty string input. |
| // The double* parameter may be 0. |
| bool parseToDoubleForNumberType(const String&, double*); |
| // Converts the specified number to a string. This is an implementation of |
| // HTML5's "algorithm to convert a number to a string" for NUMBER/RANGE types. |
| String serializeForNumberType(double); |
| |
| } |
| |
| #endif // HTMLParser_h |