Text manipulation: first and last unit in a paragraph should not contain only excluded tokens https://bugs.webkit.org/show_bug.cgi?id=212759 Reviewed by Wenson Hsieh. Source/WebCore: In r262398, we literally made text of one Node as the minimum unit for text manipulation. This patches introduce a struct ManipulationUnit for that. Now a paragraph can be represented as multiple ManipulationUnits. When all tokens in a ManipulationUnit are excluded, it means the ManipulationUnit is excluded and should not be manipulated. To record ManipulationUnits in a paragraph based on our current implementation, we need to keep the excluded ManipulationUnits surrounded by non-excluded ManipulationUnits, but we can safely remove the leading and trailing excluded ManipulationUnits. In this case, we can limit the range of paragraph further and thus less text replacement work. Covered by existing test. * editing/TextManipulationController.cpp: (WebCore::TextManipulationController::parse): (WebCore::TextManipulationController::addItemIfPossible): (WebCore::TextManipulationController::observeParagraphs): * editing/TextManipulationController.h: Tools: Modify existing test for changed behavior that leading and trailing excluded units are not included in paragraph now. * TestWebKitAPI/Tests/WebKitCocoa/TextManipulation.mm: (TestWebKitAPI::TEST): git-svn-id: http://svn.webkit.org/repository/webkit/trunk@262601 268f45cc-cd09-0410-ab3c-d52691b4dbfc

commit: 33d45222a2f9b9f73a48805ea6392afde83fc0cf [log] [tgz]
author: sihui_liu@apple.com <sihui_liu@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Fri Jun 05 06:13:30 2020 +0000
committer: sihui_liu@apple.com <sihui_liu@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Fri Jun 05 06:13:30 2020 +0000
tree: 5b192174a0570a8c3a420c824f986bc06e67cf36
parent: 3c81e0d2da11a53f69d583a71f5a476cd91b3eb0 [diff] [blame]
diff --git a/Source/WebCore/editing/TextManipulationController.cpp b/Source/WebCore/editing/TextManipulationController.cpp
index b3de989..2b36397 100644
--- a/Source/WebCore/editing/TextManipulationController.cpp
+++ b/Source/WebCore/editing/TextManipulationController.cpp

@@ -294,12 +294,13 @@
     return false;
 }
 
-TextManipulationController::ManipulationTokens TextManipulationController::parse(StringView text, Node* textNode)
+TextManipulationController::ManipulationUnit TextManipulationController::parse(StringView text, Node* textNode)
 {
     Vector<ManipulationToken> tokens;
     ExclusionRuleMatcher exclusionRuleMatcher(m_exclusionRules);
     size_t positionOfLastNonHTMLSpace = WTF::notFound;
     size_t startPositionOfCurrentToken = 0;
+    bool isNodeExcluded = exclusionRuleMatcher.isExcluded(textNode);
     bool containsOnlyHTMLSpace = true;
     bool containsLineBreak = false;
     bool firstTokenContainsLineBreak = false;
@@ -315,7 +316,7 @@
             containsLineBreak = true;
             if (positionOfLastNonHTMLSpace != WTF::notFound && startPositionOfCurrentToken <= positionOfLastNonHTMLSpace) {
                 auto tokenString = text.substring(startPositionOfCurrentToken, positionOfLastNonHTMLSpace + 1 - startPositionOfCurrentToken).toString();
-                tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), exclusionRuleMatcher.isExcluded(textNode) });
+                tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
                 startPositionOfCurrentToken = positionOfLastNonHTMLSpace + 1;
             }
 
@@ -337,11 +338,36 @@
 
     if (startPositionOfCurrentToken < text.length()) {
         auto tokenString = text.substring(startPositionOfCurrentToken, index + 1 - startPositionOfCurrentToken).toString();
-        tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), exclusionRuleMatcher.isExcluded(textNode) });
+        tokens.append(ManipulationToken { m_tokenIdentifier.generate(), tokenString, tokenInfo(textNode), isNodeExcluded });
         lastTokenContainsLineBreak = false;
     }
 
-    return { WTFMove(tokens), containsOnlyHTMLSpace, containsLineBreak, firstTokenContainsLineBreak, lastTokenContainsLineBreak };
+    return { WTFMove(tokens), *textNode, containsOnlyHTMLSpace || isNodeExcluded, containsLineBreak, firstTokenContainsLineBreak, lastTokenContainsLineBreak };
+}
+
+void TextManipulationController::addItemIfPossible(Vector<ManipulationUnit>&& units)
+{
+    if (units.isEmpty())
+        return;
+
+    size_t index = 0;
+    size_t end = units.size();
+    while (index < units.size() && units[index].areAllTokensExcluded)
+        ++index;
+
+    while (end > 0 && units[end - 1].areAllTokensExcluded)
+        --end;
+
+    if (index == end)
+        return;
+
+    auto startPosition = firstPositionInOrBeforeNode(units.first().node.ptr());
+    auto endPosition = positionAfterNode(units.last().node.ptr());
+    Vector<ManipulationToken> tokens;
+    for (; index < end; ++index)
+        tokens.appendVector(WTFMove(units[index].tokens));
+
+    addItem(ManipulationItemData { startPosition, endPosition, nullptr, nullQName(), WTFMove(tokens) });
 }
 
 void TextManipulationController::observeParagraphs(const Position& start, const Position& end)
@@ -351,24 +377,20 @@
 
     auto document = makeRefPtr(start.document());
     ASSERT(document);
-    ParagraphContentIterator iterator { start, end };
     // TextIterator's constructor may have updated the layout and executed arbitrary scripts.
     if (document != start.document() || document != end.document())
         return;
 
-    Vector<ManipulationToken> tokensInCurrentParagraph;
-    Position startOfCurrentParagraph;
-    Position endOfCurrentParagraph;
+    Vector<ManipulationUnit> unitsInCurrentParagraph;
     RefPtr<Element> enclosingItemBoundaryElement;
-
+    ParagraphContentIterator iterator { start, end };
     for (; !iterator.atEnd(); iterator.advance()) {
         auto content = iterator.currentContent();
         auto* contentNode = content.node.get();
         ASSERT(contentNode);
 
         if (enclosingItemBoundaryElement && !enclosingItemBoundaryElement->contains(contentNode)) {
-            if (!tokensInCurrentParagraph.isEmpty())
-                addItem(ManipulationItemData { startOfCurrentParagraph, endOfCurrentParagraph, nullptr, nullQName(), std::exchange(tokensInCurrentParagraph, { }) });
+            addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
             enclosingItemBoundaryElement = nullptr;
         }
 
@@ -401,37 +423,28 @@
         }
 
         if (content.isReplacedContent) {
-            if (!tokensInCurrentParagraph.isEmpty()) {
-                tokensInCurrentParagraph.append(ManipulationToken { m_tokenIdentifier.generate(), "[]", tokenInfo(content.node.get()), true });
-                endOfCurrentParagraph = positionAfterNode(contentNode);
-            }
+            if (!unitsInCurrentParagraph.isEmpty())
+                unitsInCurrentParagraph.append(ManipulationUnit { { ManipulationToken { m_tokenIdentifier.generate(), "[]", tokenInfo(content.node.get()), true } }, *contentNode });
             continue;
         }
 
         if (!content.isTextContent)
             continue;
 
-        auto tokensInCurrentNode = parse(content.text, contentNode);
-        if (!tokensInCurrentParagraph.isEmpty() && tokensInCurrentNode.firstTokenContainsLineBreak)
-            addItem(ManipulationItemData { startOfCurrentParagraph, endOfCurrentParagraph, nullptr, nullQName(), std::exchange(tokensInCurrentParagraph, { }) });
+        auto unitsInCurrentNode = parse(content.text, contentNode);
+        if (unitsInCurrentNode.firstTokenContainsLineBreak)
+            addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
 
-        if (tokensInCurrentParagraph.isEmpty()) {
-            if (tokensInCurrentNode.containsOnlyHTMLSpace)
+        if (unitsInCurrentParagraph.isEmpty() && unitsInCurrentNode.areAllTokensExcluded)
                 continue;
-            startOfCurrentParagraph = firstPositionInOrBeforeNode(contentNode);
-        }
 
-        tokensInCurrentParagraph.appendVector(tokensInCurrentNode.tokens);
-        endOfCurrentParagraph = positionAfterNode(contentNode);
+        unitsInCurrentParagraph.append(WTFMove(unitsInCurrentNode));
 
-        if (!tokensInCurrentParagraph.isEmpty() && tokensInCurrentNode.lastTokenContainsLineBreak) {
-            ASSERT(!tokensInCurrentParagraph.isEmpty());
-            addItem(ManipulationItemData { startOfCurrentParagraph, endOfCurrentParagraph, nullptr, nullQName(), std::exchange(tokensInCurrentParagraph, { }) });
-        }
+        if (unitsInCurrentNode.lastTokenContainsLineBreak)
+            addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
     }
 
-    if (!tokensInCurrentParagraph.isEmpty())
-        addItem(ManipulationItemData { startOfCurrentParagraph, endOfCurrentParagraph, nullptr, nullQName(), WTFMove(tokensInCurrentParagraph) });
+    addItemIfPossible(std::exchange(unitsInCurrentParagraph, { }));
 }
 
 void TextManipulationController::didCreateRendererForElement(Element& element)
commit	33d45222a2f9b9f73a48805ea6392afde83fc0cf	[log] [tgz]
author	sihui_liu@apple.com <sihui_liu@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Fri Jun 05 06:13:30 2020 +0000
committer	sihui_liu@apple.com <sihui_liu@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Fri Jun 05 06:13:30 2020 +0000
tree	5b192174a0570a8c3a420c824f986bc06e67cf36
parent	3c81e0d2da11a53f69d583a71f5a476cd91b3eb0 [diff] [blame]