Reviewed by Darin. A partial fix for http://bugs.webkit.org/show_bug.cgi?id=13021 XPath can be very slow * xml/XPathExpression.cpp: (WebCore::XPathExpression::evaluate): Cache evaluationContext in a local variable. * xml/XPathExpressionNode.cpp: (WebCore::XPath::Expression::evaluationContext): * xml/XPathExpressionNode.h: (WebCore::XPath::Expression::addSubExpression): (WebCore::XPath::Expression::subExprCount): (WebCore::XPath::Expression::subExpr): * xml/XPathFunctions.cpp: * xml/XPathFunctions.h: (WebCore::XPath::Function::setName): (WebCore::XPath::Function::arg): (WebCore::XPath::Function::argCount): (WebCore::XPath::Function::name): Made one-liners critical for performance inline. * xml/XPathGrammar.y: Fully parse NodeTests, so that strings are no longer passed for what is essentially an enum. Use LocationPath accessors to add steps, instead of directly manipulating internal data members. * xml/XPathParser.cpp: (WebCore::XPath::Parser::parseStatement): (WebCore::XPath::Parser::registerNodeTest): (WebCore::XPath::Parser::deleteNodeTest): * xml/XPathParser.h: Added support methods for changes in XPathGrammar.y. * xml/XPathPath.cpp: (WebCore::XPath::Filter::evaluate): Cache evaluationContext in a local variable. Use swap() to avoid performing vector assignments. (WebCore::XPath::LocationPath::evaluate): Use swap() to avoid performing vector assignments. (WebCore::XPath::LocationPath::optimizeStepPair): This new method is called during LocationPath construction, to simplify the path as it's being built. Currently, the only optimized case is "//*" - it is a basis for important operations that cannot be efficiently written in XPath 1.0, but can be optimized with a little bit of XPath 2.0. (WebCore::XPath::LocationPath::appendStep): A new accessor that modifies m_steps and calls optimizeStepPair(). (WebCore::XPath::LocationPath::insertFirstStep): Ditto. * xml/XPathPath.h: (WebCore::XPath::LocationPath::setAbsolute): A new accessor. * xml/XPathStep.h: (WebCore::XPath::Step::NodeTest::): (WebCore::XPath::Step::NodeTest::NodeTest): (WebCore::XPath::Step::NodeTest::kind): (WebCore::XPath::Step::NodeTest::data): Step::NodeTest is a new sub-class that represents a fully parsed NodeTest. (WebCore::XPath::Step::axis): (WebCore::XPath::Step::nodeTest): (WebCore::XPath::Step::nodeTestData): (WebCore::XPath::Step::namespaceURI): (WebCore::XPath::Step::predicates): (WebCore::XPath::Step::setAxis): (WebCore::XPath::Step::setNodeTest): (WebCore::XPath::Step::setNodeTestData): (WebCore::XPath::Step::setNamespaceURI): (WebCore::XPath::Step::setPredicates): New accessors that let optimizeStepPair() manipulate Step data. * xml/XPathStep.cpp: (WebCore::XPath::Step::Step): Use the new NodeTest class. (WebCore::XPath::Step::evaluate): Cache evaluationContext in a local variable. Use swap() to avoid performing unneeded vector assignments. (WebCore::XPath::Step::nodesInAxis): Cosmetic changes. (WebCore::XPath::Step::nodeTestMatches): Use NodeTest instead of parsing the test from string each time. Added a partial implementation of XPath 2.0 element() node test. git-svn-id: http://svn.webkit.org/repository/webkit/trunk@20102 268f45cc-cd09-0410-ab3c-d52691b4dbfc

commit: 94457f194e719303a502e60389f9ecf1aac90fd4 [log] [tgz]
author: ap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Sun Mar 11 08:21:13 2007 +0000
committer: ap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Sun Mar 11 08:21:13 2007 +0000
tree: d8490fb0109bd7b709ab58178b65602a0c1f9eb1
parent: 25186396e44ff3e01385e5646b102bc77f93dcf6 [diff] [blame]
diff --git a/WebCore/xml/XPathStep.cpp b/WebCore/xml/XPathStep.cpp
index 81d9bb2..b33972a 100644
--- a/WebCore/xml/XPathStep.cpp
+++ b/WebCore/xml/XPathStep.cpp

@@ -38,14 +38,14 @@
 namespace WebCore {
 namespace XPath {
 
-Step::Step(Axis axis, const String& nodeTest, const Vector<Predicate*>& predicates)
+Step::Step(Axis axis, const NodeTest& nodeTest, const Vector<Predicate*>& predicates)
     : m_axis(axis)
     , m_nodeTest(nodeTest)
     , m_predicates(predicates)
 {
 }
 
-Step::Step(Axis axis, const String& nodeTest, const String& namespaceURI, const Vector<Predicate*>& predicates)
+Step::Step(Axis axis, const NodeTest& nodeTest, const String& namespaceURI, const Vector<Predicate*>& predicates)
     : m_axis(axis)
     , m_nodeTest(nodeTest)
     , m_namespaceURI(namespaceURI)
@@ -60,31 +60,32 @@
 
 NodeVector Step::evaluate(Node* context) const
 {
-    NodeVector inNodes = nodesInAxis(context), outNodes;
-    inNodes = nodeTestMatches(inNodes);
+    NodeVector nodes = nodesInAxis(context);
+    nodes = nodeTestMatches(nodes);
     
-    outNodes = inNodes;
+    EvaluationContext& evaluationContext = Expression::evaluationContext();
+    
     for (unsigned i = 0; i < m_predicates.size(); i++) {
         Predicate* predicate = m_predicates[i];
 
-        outNodes.clear();
-        Expression::evaluationContext().size = inNodes.size();
-        Expression::evaluationContext().position = 1;
-        for (unsigned j = 0; j < inNodes.size(); j++) {
-            Node* node = inNodes[j].get();
+        NodeVector newNodes;
+        evaluationContext.size = nodes.size();
+        evaluationContext.position = 1;
+        for (unsigned j = 0; j < nodes.size(); j++) {
+            Node* node = nodes[j].get();
 
             Expression::evaluationContext().node = node;
-            EvaluationContext backupCtx = Expression::evaluationContext();
+            EvaluationContext backupCtx = evaluationContext;
             if (predicate->evaluate())
-                outNodes.append(node);
+                newNodes.append(node);
 
-            Expression::evaluationContext() = backupCtx;
-            ++Expression::evaluationContext().position;
+            evaluationContext = backupCtx;
+            ++evaluationContext.position;
         }
 
-        inNodes = outNodes;
+        nodes.swap(newNodes);
     }
-    return outNodes;
+    return nodes;
 }
 
 NodeVector Step::nodesInAxis(Node* context) const
@@ -155,10 +156,9 @@
                 nodes.append (attrs->item(i));
             return nodes;
         }
-        case NamespaceAxis: {
+        case NamespaceAxis:
             // XPath namespace nodes are not implemented yet.
             return NodeVector();
-        }
         case SelfAxis:
             nodes.append(context);
             return nodes;
@@ -173,7 +173,7 @@
                 nodes.append(n);
             return nodes;
     }
-
+    ASSERT_NOT_REACHED();
     return NodeVector();
 }
 
@@ -182,86 +182,91 @@
 {
     NodeVector matches;
 
-    if (m_nodeTest == "*") {
-        for (unsigned i = 0; i < nodes.size(); i++) {
-            Node* node = nodes[i].get();
-            if (node->nodeType() == primaryNodeType(m_axis) &&
-                (m_namespaceURI.isEmpty() || m_namespaceURI == node->namespaceURI()))
-                matches.append(node);
-        }
-        return matches;
-    } else if (m_nodeTest == "text()") {
-        HashSet<Node*> nodeSet;
-        for (unsigned i = 0; i < nodes.size(); i++) {
-            Node* node = nodes[i].get();
-            if ((node->nodeType() == Node::TEXT_NODE || node->nodeType() == Node::CDATA_SECTION_NODE)) {
-                nodeSet.add(node);
-                if (!nodeSet.contains(node->previousSibling())) // See <http://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#TextNodes>
-                    matches.append(node);
-            }
-        }
-        return matches;
-    } else if (m_nodeTest == "comment()") {
-        for (unsigned i = 0; i < nodes.size(); i++) {
-            Node* node = nodes[i].get();
-            if (node->nodeType() == Node::COMMENT_NODE)
-                matches.append(node);
-        }
-        return matches;
-    } else if (m_nodeTest.startsWith("processing-instruction")) {
-        String param;
-
-        const int space = m_nodeTest.find(' ');
-        if (space > -1)
-            param = m_nodeTest.substring(space + 1);
-
-        for (unsigned i = 0; i < nodes.size(); i++) {
-            Node* node = nodes[i].get();
-
-            if (node->nodeType() == Node::PROCESSING_INSTRUCTION_NODE &&
-                (param.isEmpty() || node->nodeName() == param))
-                    matches.append(node);
-        }    
-        return matches;
-    } else if (m_nodeTest == "node()")
-        return nodes;
-    else {
-        if (m_axis == AttributeAxis) {
-            // In XPath land, namespace nodes are not accessible
-            // on the attribute axis.
-            if (m_nodeTest == "xmlns")
-                return matches;
-
+    switch (m_nodeTest.kind()) {
+        case NodeTest::TextNodeTest: {
+            HashSet<Node*> nodeSet;
             for (unsigned i = 0; i < nodes.size(); i++) {
                 Node* node = nodes[i].get();
-                
-                if (node->nodeName() == m_nodeTest) {
-                    matches.append(node);
-                    break; // There can only be one.
+                if ((node->nodeType() == Node::TEXT_NODE || node->nodeType() == Node::CDATA_SECTION_NODE)) {
+                    nodeSet.add(node);
+                    if (!nodeSet.contains(node->previousSibling())) // See <http://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#TextNodes>
+                        matches.append(node);
                 }
             }
-
-            return matches;
-        } else if (m_axis == NamespaceAxis) {
-            // Node test on the namespace axis is not implemented yet
-        } else {
-            for (unsigned i = 0; i < nodes.size(); i++) {
-                Node* node = nodes[i].get();
-
-                // We use tagQName here because we don't want the element name in uppercase 
-                // like we get with HTML elements.
-                // Paths without namespaces should match HTML elements in HTML documents despite those having an XHTML namespace.
-                if (node->nodeType() == Node::ELEMENT_NODE
-                    && static_cast<Element*>(node)->tagQName().localName() == m_nodeTest
-                    && ((node->isHTMLElement() && node->document()->isHTMLDocument() && m_namespaceURI.isNull()) || m_namespaceURI == node->namespaceURI()))
-                    matches.append(node);
-            }
-
             return matches;
         }
-    }
+        case NodeTest::CommentNodeTest:
+            for (unsigned i = 0; i < nodes.size(); i++) {
+                Node* node = nodes[i].get();
+                if (node->nodeType() == Node::COMMENT_NODE)
+                    matches.append(node);
+            }
+            return matches;
+        case NodeTest::ProcessingInstructionNodeTest:
+            for (unsigned i = 0; i < nodes.size(); i++) {
+                Node* node = nodes[i].get();
+                const String& name = m_nodeTest.data();
+                if (node->nodeType() == Node::PROCESSING_INSTRUCTION_NODE && (name.isEmpty() || node->nodeName() == name))
+                        matches.append(node);
+            }    
+            return matches;
+        case NodeTest::ElementNodeTest:
+            for (unsigned i = 0; i < nodes.size(); i++) {
+                Node* node = nodes[i].get();
+                if (node->isElementNode())
+                    matches.append(node);
+            }
+            return matches;
+        case NodeTest::AnyNodeTest:
+            return nodes;
+        case NodeTest::NameTest: {
+            const String& name = m_nodeTest.data();
+            if (name == "*") {
+                for (unsigned i = 0; i < nodes.size(); i++) {
+                    Node* node = nodes[i].get();
+                    if (node->nodeType() == primaryNodeType(m_axis) &&
+                        (m_namespaceURI.isEmpty() || m_namespaceURI == node->namespaceURI()))
+                        matches.append(node);
+                }
+                return matches;
+            }
+            if (m_axis == AttributeAxis) {
+                // In XPath land, namespace nodes are not accessible
+                // on the attribute axis.
+                if (name == "xmlns")
+                    return matches;
 
-    return matches;
+                for (unsigned i = 0; i < nodes.size(); i++) {
+                    Node* node = nodes[i].get();
+                    
+                    if (node->nodeName() == name) {
+                        matches.append(node);
+                        break; // There can only be one.
+                    }
+                }
+
+                return matches;
+            } else if (m_axis == NamespaceAxis) {
+                // Node test on the namespace axis is not implemented yet
+            } else {
+                for (unsigned i = 0; i < nodes.size(); i++) {
+                    Node* node = nodes[i].get();
+
+                    // We use tagQName here because we don't want the element name in uppercase 
+                    // like we get with HTML elements.
+                    // Paths without namespaces should match HTML elements in HTML documents despite those having an XHTML namespace.
+                    if (node->nodeType() == Node::ELEMENT_NODE
+                        && static_cast<Element*>(node)->tagQName().localName() == name
+                        && ((node->isHTMLElement() && node->document()->isHTMLDocument() && m_namespaceURI.isNull()) || m_namespaceURI == node->namespaceURI()))
+                        matches.append(node);
+                }
+
+                return matches;
+            }
+        }
+    }
+    ASSERT_NOT_REACHED();
+    return NodeVector();
 }
 
 Node::NodeType Step::primaryNodeType(Axis axis) const
commit	94457f194e719303a502e60389f9ecf1aac90fd4	[log] [tgz]
author	ap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Sun Mar 11 08:21:13 2007 +0000
committer	ap <ap@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Sun Mar 11 08:21:13 2007 +0000
tree	d8490fb0109bd7b709ab58178b65602a0c1f9eb1
parent	25186396e44ff3e01385e5646b102bc77f93dcf6 [diff] [blame]