Reviewed by Darin.
A partial fix for http://bugs.webkit.org/show_bug.cgi?id=13021
XPath can be very slow
* xml/XPathExpression.cpp:
(WebCore::XPathExpression::evaluate): Cache evaluationContext in a local variable.
* xml/XPathExpressionNode.cpp:
(WebCore::XPath::Expression::evaluationContext):
* xml/XPathExpressionNode.h:
(WebCore::XPath::Expression::addSubExpression):
(WebCore::XPath::Expression::subExprCount):
(WebCore::XPath::Expression::subExpr):
* xml/XPathFunctions.cpp:
* xml/XPathFunctions.h:
(WebCore::XPath::Function::setName):
(WebCore::XPath::Function::arg):
(WebCore::XPath::Function::argCount):
(WebCore::XPath::Function::name):
Made one-liners critical for performance inline.
* xml/XPathGrammar.y: Fully parse NodeTests, so that strings are no longer passed for what is
essentially an enum. Use LocationPath accessors to add steps, instead of directly manipulating
internal data members.
* xml/XPathParser.cpp:
(WebCore::XPath::Parser::parseStatement):
(WebCore::XPath::Parser::registerNodeTest):
(WebCore::XPath::Parser::deleteNodeTest):
* xml/XPathParser.h:
Added support methods for changes in XPathGrammar.y.
* xml/XPathPath.cpp:
(WebCore::XPath::Filter::evaluate): Cache evaluationContext in a local variable. Use swap() to avoid
performing vector assignments.
(WebCore::XPath::LocationPath::evaluate): Use swap() to avoid performing vector assignments.
(WebCore::XPath::LocationPath::optimizeStepPair): This new method is called during LocationPath construction,
to simplify the path as it's being built. Currently, the only optimized case is "//*" - it is a basis for
important operations that cannot be efficiently written in XPath 1.0, but can be optimized with a little bit
of XPath 2.0.
(WebCore::XPath::LocationPath::appendStep): A new accessor that modifies m_steps and calls optimizeStepPair().
(WebCore::XPath::LocationPath::insertFirstStep): Ditto.
* xml/XPathPath.h:
(WebCore::XPath::LocationPath::setAbsolute): A new accessor.
* xml/XPathStep.h:
(WebCore::XPath::Step::NodeTest::):
(WebCore::XPath::Step::NodeTest::NodeTest):
(WebCore::XPath::Step::NodeTest::kind):
(WebCore::XPath::Step::NodeTest::data):
Step::NodeTest is a new sub-class that represents a fully parsed NodeTest.
(WebCore::XPath::Step::axis):
(WebCore::XPath::Step::nodeTest):
(WebCore::XPath::Step::nodeTestData):
(WebCore::XPath::Step::namespaceURI):
(WebCore::XPath::Step::predicates):
(WebCore::XPath::Step::setAxis):
(WebCore::XPath::Step::setNodeTest):
(WebCore::XPath::Step::setNodeTestData):
(WebCore::XPath::Step::setNamespaceURI):
(WebCore::XPath::Step::setPredicates):
New accessors that let optimizeStepPair() manipulate Step data.
* xml/XPathStep.cpp:
(WebCore::XPath::Step::Step): Use the new NodeTest class.
(WebCore::XPath::Step::evaluate): Cache evaluationContext in a local variable. Use swap() to avoid
performing unneeded vector assignments.
(WebCore::XPath::Step::nodesInAxis): Cosmetic changes.
(WebCore::XPath::Step::nodeTestMatches): Use NodeTest instead of parsing the test from string each time.
Added a partial implementation of XPath 2.0 element() node test.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@20102 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/xml/XPathStep.cpp b/WebCore/xml/XPathStep.cpp
index 81d9bb2..b33972a 100644
--- a/WebCore/xml/XPathStep.cpp
+++ b/WebCore/xml/XPathStep.cpp
@@ -38,14 +38,14 @@
namespace WebCore {
namespace XPath {
-Step::Step(Axis axis, const String& nodeTest, const Vector<Predicate*>& predicates)
+Step::Step(Axis axis, const NodeTest& nodeTest, const Vector<Predicate*>& predicates)
: m_axis(axis)
, m_nodeTest(nodeTest)
, m_predicates(predicates)
{
}
-Step::Step(Axis axis, const String& nodeTest, const String& namespaceURI, const Vector<Predicate*>& predicates)
+Step::Step(Axis axis, const NodeTest& nodeTest, const String& namespaceURI, const Vector<Predicate*>& predicates)
: m_axis(axis)
, m_nodeTest(nodeTest)
, m_namespaceURI(namespaceURI)
@@ -60,31 +60,32 @@
NodeVector Step::evaluate(Node* context) const
{
- NodeVector inNodes = nodesInAxis(context), outNodes;
- inNodes = nodeTestMatches(inNodes);
+ NodeVector nodes = nodesInAxis(context);
+ nodes = nodeTestMatches(nodes);
- outNodes = inNodes;
+ EvaluationContext& evaluationContext = Expression::evaluationContext();
+
for (unsigned i = 0; i < m_predicates.size(); i++) {
Predicate* predicate = m_predicates[i];
- outNodes.clear();
- Expression::evaluationContext().size = inNodes.size();
- Expression::evaluationContext().position = 1;
- for (unsigned j = 0; j < inNodes.size(); j++) {
- Node* node = inNodes[j].get();
+ NodeVector newNodes;
+ evaluationContext.size = nodes.size();
+ evaluationContext.position = 1;
+ for (unsigned j = 0; j < nodes.size(); j++) {
+ Node* node = nodes[j].get();
Expression::evaluationContext().node = node;
- EvaluationContext backupCtx = Expression::evaluationContext();
+ EvaluationContext backupCtx = evaluationContext;
if (predicate->evaluate())
- outNodes.append(node);
+ newNodes.append(node);
- Expression::evaluationContext() = backupCtx;
- ++Expression::evaluationContext().position;
+ evaluationContext = backupCtx;
+ ++evaluationContext.position;
}
- inNodes = outNodes;
+ nodes.swap(newNodes);
}
- return outNodes;
+ return nodes;
}
NodeVector Step::nodesInAxis(Node* context) const
@@ -155,10 +156,9 @@
nodes.append (attrs->item(i));
return nodes;
}
- case NamespaceAxis: {
+ case NamespaceAxis:
// XPath namespace nodes are not implemented yet.
return NodeVector();
- }
case SelfAxis:
nodes.append(context);
return nodes;
@@ -173,7 +173,7 @@
nodes.append(n);
return nodes;
}
-
+ ASSERT_NOT_REACHED();
return NodeVector();
}
@@ -182,86 +182,91 @@
{
NodeVector matches;
- if (m_nodeTest == "*") {
- for (unsigned i = 0; i < nodes.size(); i++) {
- Node* node = nodes[i].get();
- if (node->nodeType() == primaryNodeType(m_axis) &&
- (m_namespaceURI.isEmpty() || m_namespaceURI == node->namespaceURI()))
- matches.append(node);
- }
- return matches;
- } else if (m_nodeTest == "text()") {
- HashSet<Node*> nodeSet;
- for (unsigned i = 0; i < nodes.size(); i++) {
- Node* node = nodes[i].get();
- if ((node->nodeType() == Node::TEXT_NODE || node->nodeType() == Node::CDATA_SECTION_NODE)) {
- nodeSet.add(node);
- if (!nodeSet.contains(node->previousSibling())) // See <http://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#TextNodes>
- matches.append(node);
- }
- }
- return matches;
- } else if (m_nodeTest == "comment()") {
- for (unsigned i = 0; i < nodes.size(); i++) {
- Node* node = nodes[i].get();
- if (node->nodeType() == Node::COMMENT_NODE)
- matches.append(node);
- }
- return matches;
- } else if (m_nodeTest.startsWith("processing-instruction")) {
- String param;
-
- const int space = m_nodeTest.find(' ');
- if (space > -1)
- param = m_nodeTest.substring(space + 1);
-
- for (unsigned i = 0; i < nodes.size(); i++) {
- Node* node = nodes[i].get();
-
- if (node->nodeType() == Node::PROCESSING_INSTRUCTION_NODE &&
- (param.isEmpty() || node->nodeName() == param))
- matches.append(node);
- }
- return matches;
- } else if (m_nodeTest == "node()")
- return nodes;
- else {
- if (m_axis == AttributeAxis) {
- // In XPath land, namespace nodes are not accessible
- // on the attribute axis.
- if (m_nodeTest == "xmlns")
- return matches;
-
+ switch (m_nodeTest.kind()) {
+ case NodeTest::TextNodeTest: {
+ HashSet<Node*> nodeSet;
for (unsigned i = 0; i < nodes.size(); i++) {
Node* node = nodes[i].get();
-
- if (node->nodeName() == m_nodeTest) {
- matches.append(node);
- break; // There can only be one.
+ if ((node->nodeType() == Node::TEXT_NODE || node->nodeType() == Node::CDATA_SECTION_NODE)) {
+ nodeSet.add(node);
+ if (!nodeSet.contains(node->previousSibling())) // See <http://www.w3.org/TR/DOM-Level-3-XPath/xpath.html#TextNodes>
+ matches.append(node);
}
}
-
- return matches;
- } else if (m_axis == NamespaceAxis) {
- // Node test on the namespace axis is not implemented yet
- } else {
- for (unsigned i = 0; i < nodes.size(); i++) {
- Node* node = nodes[i].get();
-
- // We use tagQName here because we don't want the element name in uppercase
- // like we get with HTML elements.
- // Paths without namespaces should match HTML elements in HTML documents despite those having an XHTML namespace.
- if (node->nodeType() == Node::ELEMENT_NODE
- && static_cast<Element*>(node)->tagQName().localName() == m_nodeTest
- && ((node->isHTMLElement() && node->document()->isHTMLDocument() && m_namespaceURI.isNull()) || m_namespaceURI == node->namespaceURI()))
- matches.append(node);
- }
-
return matches;
}
- }
+ case NodeTest::CommentNodeTest:
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+ if (node->nodeType() == Node::COMMENT_NODE)
+ matches.append(node);
+ }
+ return matches;
+ case NodeTest::ProcessingInstructionNodeTest:
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+ const String& name = m_nodeTest.data();
+ if (node->nodeType() == Node::PROCESSING_INSTRUCTION_NODE && (name.isEmpty() || node->nodeName() == name))
+ matches.append(node);
+ }
+ return matches;
+ case NodeTest::ElementNodeTest:
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+ if (node->isElementNode())
+ matches.append(node);
+ }
+ return matches;
+ case NodeTest::AnyNodeTest:
+ return nodes;
+ case NodeTest::NameTest: {
+ const String& name = m_nodeTest.data();
+ if (name == "*") {
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+ if (node->nodeType() == primaryNodeType(m_axis) &&
+ (m_namespaceURI.isEmpty() || m_namespaceURI == node->namespaceURI()))
+ matches.append(node);
+ }
+ return matches;
+ }
+ if (m_axis == AttributeAxis) {
+ // In XPath land, namespace nodes are not accessible
+ // on the attribute axis.
+ if (name == "xmlns")
+ return matches;
- return matches;
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+
+ if (node->nodeName() == name) {
+ matches.append(node);
+ break; // There can only be one.
+ }
+ }
+
+ return matches;
+ } else if (m_axis == NamespaceAxis) {
+ // Node test on the namespace axis is not implemented yet
+ } else {
+ for (unsigned i = 0; i < nodes.size(); i++) {
+ Node* node = nodes[i].get();
+
+ // We use tagQName here because we don't want the element name in uppercase
+ // like we get with HTML elements.
+ // Paths without namespaces should match HTML elements in HTML documents despite those having an XHTML namespace.
+ if (node->nodeType() == Node::ELEMENT_NODE
+ && static_cast<Element*>(node)->tagQName().localName() == name
+ && ((node->isHTMLElement() && node->document()->isHTMLDocument() && m_namespaceURI.isNull()) || m_namespaceURI == node->namespaceURI()))
+ matches.append(node);
+ }
+
+ return matches;
+ }
+ }
+ }
+ ASSERT_NOT_REACHED();
+ return NodeVector();
}
Node::NodeType Step::primaryNodeType(Axis axis) const