blob: a3bc969fbe72a7b80bdd6fb693a68e10cd33ff08 [file] [log] [blame]
/*
* Copyright (C) 2006-2020 Apple Inc. All rights reserved.
* Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "TextCheckingHelper.h"
#include "Document.h"
#include "DocumentMarkerController.h"
#include "EditorClient.h"
#include "Frame.h"
#include "FrameSelection.h"
#include "Range.h"
#include "Settings.h"
#include "TextCheckerClient.h"
#include "TextIterator.h"
#include "VisibleUnits.h"
#include <unicode/ubrk.h>
#include <wtf/text/StringView.h>
#include <wtf/text/TextBreakIterator.h>
namespace WebCore {
#if !USE(UNIFIED_TEXT_CHECKING)
static void findGrammaticalErrors(TextCheckerClient& client, StringView text, Vector<TextCheckingResult>& results)
{
for (unsigned checkLocation = 0; checkLocation < text.length(); ) {
int badGrammarLocation = -1;
int badGrammarLength = 0;
Vector<GrammarDetail> badGrammarDetails;
client.checkGrammarOfString(text.substring(checkLocation), badGrammarDetails, &badGrammarLocation, &badGrammarLength);
if (!badGrammarLength)
break;
ASSERT(badGrammarLocation >= 0);
ASSERT(static_cast<unsigned>(badGrammarLocation) <= text.length() - checkLocation);
ASSERT(badGrammarLength > 0);
ASSERT(static_cast<unsigned>(badGrammarLength) <= text.length() - checkLocation - badGrammarLocation);
TextCheckingResult badGrammar;
badGrammar.type = TextCheckingType::Grammar;
badGrammar.range = CharacterRange(checkLocation + badGrammarLocation, badGrammarLength);
badGrammar.details = WTFMove(badGrammarDetails);
results.append(badGrammar);
checkLocation += badGrammarLocation + badGrammarLength;
}
}
static void findMisspellings(TextCheckerClient& client, StringView text, Vector<TextCheckingResult>& results)
{
UBreakIterator* iterator = wordBreakIterator(text);
if (!iterator)
return;
for (int wordStart = ubrk_current(iterator); wordStart >= 0; ) {
int wordEnd = ubrk_next(iterator);
if (wordEnd < 0)
break;
int wordLength = wordEnd - wordStart;
int misspellingLocation = -1;
int misspellingLength = 0;
client.checkSpellingOfString(text.substring(wordStart, wordLength), &misspellingLocation, &misspellingLength);
if (misspellingLength > 0) {
ASSERT(misspellingLocation >= 0);
ASSERT(misspellingLocation <= wordLength);
ASSERT(misspellingLength > 0);
ASSERT(misspellingLocation + misspellingLength <= wordLength);
TextCheckingResult misspelling;
misspelling.type = TextCheckingType::Spelling;
misspelling.range = CharacterRange(wordStart + misspellingLocation, misspellingLength);
misspelling.replacement = client.getAutoCorrectSuggestionForMisspelledWord(text.substring(misspelling.range.location, misspelling.range.length).toStringWithoutCopying());
results.append(misspelling);
}
wordStart = wordEnd;
}
}
#endif
static SimpleRange expandToParagraphBoundary(const SimpleRange& range)
{
auto start = makeBoundaryPoint(startOfParagraph(makeDeprecatedLegacyPosition(range.start)));
auto end = makeBoundaryPoint(endOfParagraph(makeDeprecatedLegacyPosition(range.end)));
if (!start || !end)
return range;
return { *start, *end };
}
TextCheckingParagraph::TextCheckingParagraph(const SimpleRange& range)
: m_checkingRange(range)
, m_automaticReplacementRange(range)
{
}
TextCheckingParagraph::TextCheckingParagraph(const SimpleRange& checkingRange, const SimpleRange& replacementRange, const std::optional<SimpleRange>& paragraphRange)
: m_checkingRange(checkingRange)
, m_automaticReplacementRange(replacementRange)
, m_paragraphRange(paragraphRange)
{
}
void TextCheckingParagraph::expandRangeToNextEnd()
{
paragraphRange();
if (auto end = makeBoundaryPoint(endOfParagraph(startOfNextParagraph(makeDeprecatedLegacyPosition(m_paragraphRange->start)))))
m_paragraphRange->end = WTFMove(*end);
invalidateParagraphRangeValues();
}
void TextCheckingParagraph::invalidateParagraphRangeValues()
{
m_checkingStart.reset();
m_automaticReplacementStart.reset();
m_automaticReplacementLength.reset();
m_offsetAsRange = std::nullopt;
m_text = String();
}
uint64_t TextCheckingParagraph::rangeLength() const
{
return characterCount(paragraphRange());
}
const SimpleRange& TextCheckingParagraph::paragraphRange() const
{
if (!m_paragraphRange)
m_paragraphRange = expandToParagraphBoundary(m_checkingRange);
return *m_paragraphRange;
}
SimpleRange TextCheckingParagraph::subrange(CharacterRange range) const
{
return resolveCharacterRange(paragraphRange(), range);
}
ExceptionOr<uint64_t> TextCheckingParagraph::offsetTo(const Position& position) const
{
auto range = makeSimpleRange(paragraphRange().start, position);
if (!range)
return Exception { TypeError };
return characterCount(*range);
}
bool TextCheckingParagraph::isEmpty() const
{
// Both predicates should have same result, but we check both just for sure.
// We need to investigate to remove this redundancy.
return checkingStart() >= checkingEnd() || text().isEmpty();
}
const SimpleRange& TextCheckingParagraph::offsetAsRange() const
{
if (!m_offsetAsRange)
m_offsetAsRange = { { paragraphRange().start, m_checkingRange.start } };
return *m_offsetAsRange;
}
StringView TextCheckingParagraph::text() const
{
if (m_text.isNull())
m_text = plainText(paragraphRange());
return m_text;
}
uint64_t TextCheckingParagraph::checkingStart() const
{
if (!m_checkingStart)
m_checkingStart = characterCount(offsetAsRange());
return *m_checkingStart;
}
uint64_t TextCheckingParagraph::checkingEnd() const
{
return checkingStart() + checkingLength();
}
uint64_t TextCheckingParagraph::checkingLength() const
{
if (!m_checkingLength)
m_checkingLength = characterCount(m_checkingRange);
return *m_checkingLength;
}
uint64_t TextCheckingParagraph::automaticReplacementStart() const
{
if (!m_automaticReplacementStart)
m_automaticReplacementStart = characterCount({ paragraphRange().start, m_automaticReplacementRange.start });
return *m_automaticReplacementStart;
}
uint64_t TextCheckingParagraph::automaticReplacementLength() const
{
if (!m_automaticReplacementLength)
m_automaticReplacementLength = characterCount(m_automaticReplacementRange);
return *m_automaticReplacementLength;
}
TextCheckingHelper::TextCheckingHelper(EditorClient& client, const SimpleRange& range)
: m_client(client)
, m_range(range)
{
}
auto TextCheckingHelper::findMisspelledWords(Operation operation) const -> std::pair<MisspelledWord, std::optional<SimpleRange>>
{
std::pair<MisspelledWord, std::optional<SimpleRange>> first;
uint64_t currentChunkOffset = 0;
for (WordAwareIterator it(m_range); !it.atEnd(); currentChunkOffset += it.text().length(), it.advance()) {
StringView text = it.text();
if (text == " "_s)
continue;
int misspellingLocation = -1;
int misspellingLength = 0;
m_client.textChecker()->checkSpellingOfString(text, &misspellingLocation, &misspellingLength);
int textLength = text.length();
// 5490627 shows that there was some code path here where the String constructor below crashes.
// We don't know exactly what combination of bad input caused this, so we're making this much
// more robust against bad input on release builds.
ASSERT(misspellingLength >= 0);
ASSERT(misspellingLocation >= -1);
ASSERT(!misspellingLength || misspellingLocation >= 0);
ASSERT(misspellingLocation < textLength);
ASSERT(misspellingLength <= textLength);
ASSERT(misspellingLocation + misspellingLength <= textLength);
if (!(misspellingLocation >= 0 && misspellingLength > 0 && misspellingLocation < textLength && misspellingLength <= textLength && misspellingLocation + misspellingLength <= textLength))
continue;
auto misspellingRange = resolveCharacterRange(m_range, CharacterRange(currentChunkOffset + misspellingLocation, misspellingLength));
if (operation == Operation::MarkAll)
addMarker(misspellingRange, DocumentMarker::Spelling);
if (first.first.word.isNull()) {
first = {
{
text.substring(misspellingLocation, misspellingLength).toString(),
currentChunkOffset + misspellingLocation
},
WTFMove(misspellingRange)
};
}
if (operation == Operation::FindFirst)
break;
}
return first;
}
auto TextCheckingHelper::findFirstMisspelledWord() const -> MisspelledWord
{
return findMisspelledWords(Operation::FindFirst).first;
}
auto TextCheckingHelper::findFirstMisspelledWordOrUngrammaticalPhrase(bool checkGrammar) const -> std::variant<MisspelledWord, UngrammaticalPhrase>
{
if (!unifiedTextCheckerEnabled())
return { };
if (platformDrivenTextCheckerEnabled())
return { };
std::variant<MisspelledWord, UngrammaticalPhrase> firstFoundItem;
GrammarDetail grammarDetail;
String misspelledWord;
std::optional<SimpleRange> misspelledWordRange;
String badGrammarPhrase;
// Expand the search range to encompass entire paragraphs, since text checking needs that much context.
// Determine the character offset from the start of the paragraph to the start of the original search range,
// since we will want to ignore results in this area.
auto paragraphRange = *makeSimpleRange(startOfParagraph(makeDeprecatedLegacyPosition(m_range.start)), m_range.end);
auto totalRangeLength = characterCount(paragraphRange);
paragraphRange.end = *makeBoundaryPoint(endOfParagraph(makeDeprecatedLegacyPosition(m_range.start)));
auto rangeStartOffset = characterCount({ paragraphRange.start, m_range.start });
uint64_t totalLengthProcessed = 0;
bool firstIteration = true;
bool lastIteration = false;
while (totalLengthProcessed < totalRangeLength) {
// Iterate through the search range by paragraphs, checking each one for spelling and grammar.
auto currentLength = characterCount(paragraphRange);
uint64_t currentStartOffset = firstIteration ? rangeStartOffset : 0;
uint64_t currentEndOffset = currentLength;
if (inSameParagraph(makeDeprecatedLegacyPosition(paragraphRange.start), makeDeprecatedLegacyPosition(m_range.end))) {
// Determine the character offset from the end of the original search range to the end of the paragraph,
// since we will want to ignore results in this area.
currentEndOffset = characterCount({ paragraphRange.start, m_range.end });
lastIteration = true;
}
if (currentStartOffset < currentEndOffset) {
String paragraphString = plainText(paragraphRange);
if (paragraphString.length() > 0) {
bool foundGrammar = false;
uint64_t spellingLocation = 0;
uint64_t grammarPhraseLocation = 0;
uint64_t grammarDetailLocation = 0;
unsigned grammarDetailIndex = 0;
Vector<TextCheckingResult> results;
OptionSet<TextCheckingType> checkingTypes { TextCheckingType::Spelling };
if (checkGrammar)
checkingTypes.add(TextCheckingType::Grammar);
VisibleSelection currentSelection;
if (Frame* frame = paragraphRange.start.document().frame())
currentSelection = frame->selection().selection();
checkTextOfParagraph(*m_client.textChecker(), paragraphString, checkingTypes, results, currentSelection);
for (auto& result : results) {
if (result.type == TextCheckingType::Spelling && result.range.location >= currentStartOffset && result.range.location + result.range.length <= currentEndOffset) {
ASSERT(result.range.length > 0);
spellingLocation = result.range.location;
misspelledWord = paragraphString.substring(result.range.location, result.range.length);
ASSERT(misspelledWord.length());
break;
}
if (checkGrammar && result.type == TextCheckingType::Grammar && result.range.location < currentEndOffset && result.range.location + result.range.length > currentStartOffset) {
ASSERT(result.range.length > 0);
// We can't stop after the first grammar result, since there might still be a spelling result after
// it begins but before the first detail in it, but we can stop if we find a second grammar result.
if (foundGrammar)
break;
for (unsigned j = 0; j < result.details.size(); j++) {
const GrammarDetail* detail = &result.details[j];
ASSERT(detail->range.length > 0);
if (result.range.location + detail->range.location >= currentStartOffset && result.range.location + detail->range.location + detail->range.length <= currentEndOffset && (!foundGrammar || result.range.location + detail->range.location < grammarDetailLocation)) {
grammarDetailIndex = j;
grammarDetailLocation = result.range.location + detail->range.location;
foundGrammar = true;
}
}
if (foundGrammar) {
grammarPhraseLocation = result.range.location;
grammarDetail = result.details[grammarDetailIndex];
badGrammarPhrase = paragraphString.substring(result.range.location, result.range.length);
ASSERT(badGrammarPhrase.length());
}
}
}
if (!misspelledWord.isEmpty() && (!checkGrammar || badGrammarPhrase.isEmpty() || spellingLocation <= grammarDetailLocation)) {
uint64_t spellingOffset = spellingLocation - currentStartOffset;
if (!firstIteration)
spellingOffset += characterCount({ m_range.start, paragraphRange.start });
firstFoundItem = MisspelledWord {
misspelledWord,
spellingOffset
};
break;
}
if (checkGrammar && !badGrammarPhrase.isEmpty()) {
uint64_t grammarPhraseOffset = grammarPhraseLocation - currentStartOffset;
if (!firstIteration)
grammarPhraseOffset += characterCount({ m_range.start, paragraphRange.start });
firstFoundItem = UngrammaticalPhrase {
badGrammarPhrase,
grammarPhraseOffset,
grammarDetail
};
break;
}
}
}
if (lastIteration || totalLengthProcessed + currentLength >= totalRangeLength)
break;
auto nextStart = startOfNextParagraph(makeDeprecatedLegacyPosition(paragraphRange.end));
auto nextParagraphRange = makeSimpleRange(nextStart, endOfParagraph(nextStart));
if (!nextParagraphRange)
break;
paragraphRange = WTFMove(*nextParagraphRange);
firstIteration = false;
totalLengthProcessed += currentLength;
}
return firstFoundItem;
}
int TextCheckingHelper::findUngrammaticalPhrases(Operation operation, const Vector<GrammarDetail>& grammarDetails, uint64_t badGrammarPhraseLocation, uint64_t startOffset, uint64_t endOffset) const
{
// Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
// Optionally add a DocumentMarker for each detail in the range.
uint64_t earliestDetailLocationSoFar = 0;
int earliestDetailIndex = -1;
for (unsigned i = 0; i < grammarDetails.size(); i++) {
const GrammarDetail* detail = &grammarDetails[i];
ASSERT(detail->range.length > 0);
uint64_t detailStartOffsetInParagraph = badGrammarPhraseLocation + detail->range.location;
// Skip this detail if it starts before the original search range
if (detailStartOffsetInParagraph < startOffset)
continue;
// Skip this detail if it starts after the original search range
if (detailStartOffsetInParagraph >= endOffset)
continue;
if (operation == Operation::MarkAll) {
auto badGrammarRange = resolveCharacterRange(m_range, { badGrammarPhraseLocation - startOffset + detail->range.location, detail->range.length });
addMarker(badGrammarRange, DocumentMarker::Grammar, detail->userDescription);
}
// Remember this detail only if it's earlier than our current candidate (the details aren't in a guaranteed order)
if (earliestDetailIndex < 0 || earliestDetailLocationSoFar > detail->range.location) {
earliestDetailIndex = i;
earliestDetailLocationSoFar = detail->range.location;
}
}
return earliestDetailIndex;
}
auto TextCheckingHelper::findUngrammaticalPhrases(Operation operation) const -> UngrammaticalPhrase
{
UngrammaticalPhrase result;
// Expand the search range to encompass entire paragraphs, since grammar checking needs that much context.
// Determine the character offset from the start of the paragraph to the start of the original search range,
// since we will want to ignore results in this area.
TextCheckingParagraph paragraph(m_range);
// Start checking from beginning of paragraph, but skip past results that occur before the start of the original search range.
for (uint64_t startOffset = 0; startOffset < paragraph.checkingEnd(); ) {
Vector<GrammarDetail> grammarDetails;
int badGrammarPhraseLocation = -1;
int badGrammarPhraseLength = 0;
m_client.textChecker()->checkGrammarOfString(paragraph.text().substring(startOffset), grammarDetails, &badGrammarPhraseLocation, &badGrammarPhraseLength);
if (!badGrammarPhraseLength) {
ASSERT(badGrammarPhraseLocation == -1);
return { };
}
ASSERT(badGrammarPhraseLocation >= 0);
badGrammarPhraseLocation += startOffset;
// Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
int badGrammarIndex = findUngrammaticalPhrases(operation, grammarDetails, badGrammarPhraseLocation, paragraph.checkingStart(), paragraph.checkingEnd());
if (badGrammarIndex >= 0 && result.phrase.isEmpty()) {
result.offset = badGrammarPhraseLocation - paragraph.checkingStart();
result.phrase = paragraph.text().substring(badGrammarPhraseLocation, badGrammarPhraseLength).toString();
ASSERT(static_cast<unsigned>(badGrammarIndex) < grammarDetails.size());
result.detail = grammarDetails[badGrammarIndex];
// Found one. We're done now, unless we're marking each instance.
if (operation == Operation::FindFirst)
break;
}
// These results were all between the start of the paragraph and the start of the search range; look beyond this phrase.
startOffset = badGrammarPhraseLocation + badGrammarPhraseLength;
}
return result;
}
auto TextCheckingHelper::findFirstUngrammaticalPhrase() const -> UngrammaticalPhrase
{
return findUngrammaticalPhrases(Operation::FindFirst);
}
TextCheckingGuesses TextCheckingHelper::guessesForMisspelledWordOrUngrammaticalPhrase(bool checkGrammar) const
{
if (!unifiedTextCheckerEnabled())
return { };
if (platformDrivenTextCheckerEnabled())
return { };
if (m_range.collapsed())
return { };
// Expand the range to encompass entire paragraphs, since text checking needs that much context.
TextCheckingParagraph paragraph(m_range);
if (paragraph.isEmpty())
return { };
Vector<TextCheckingResult> results;
OptionSet<TextCheckingType> checkingTypes { TextCheckingType::Spelling };
if (checkGrammar)
checkingTypes.add(TextCheckingType::Grammar);
VisibleSelection currentSelection;
if (auto frame = m_range.start.document().frame())
currentSelection = frame->selection().selection();
checkTextOfParagraph(*m_client.textChecker(), paragraph.text(), checkingTypes, results, currentSelection);
for (auto& result : results) {
if (result.type == TextCheckingType::Spelling && paragraph.checkingRangeMatches(result.range)) {
String misspelledWord = paragraph.checkingSubstring().toString();
ASSERT(misspelledWord.length());
Vector<String> guesses;
m_client.textChecker()->getGuessesForWord(misspelledWord, String(), currentSelection, guesses);
m_client.updateSpellingUIWithMisspelledWord(misspelledWord);
return { WTFMove(guesses), true, false };
}
}
if (!checkGrammar)
return { };
for (auto& result : results) {
if (result.type == TextCheckingType::Grammar && paragraph.isCheckingRangeCoveredBy(result.range)) {
for (auto& detail : result.details) {
ASSERT(detail.range.length > 0);
if (paragraph.checkingRangeMatches({ result.range.location + detail.range.location, detail.range.length })) {
String badGrammarPhrase = paragraph.text().substring(result.range.location, result.range.length).toString();
ASSERT(badGrammarPhrase.length());
m_client.updateSpellingUIWithGrammarString(badGrammarPhrase, detail);
return { WTFMove(detail.guesses), false, true };
}
}
}
}
return { };
}
std::optional<SimpleRange> TextCheckingHelper::markAllMisspelledWords() const
{
return findMisspelledWords(Operation::MarkAll).second;
}
void TextCheckingHelper::markAllUngrammaticalPhrases() const
{
findUngrammaticalPhrases(Operation::MarkAll);
}
bool TextCheckingHelper::unifiedTextCheckerEnabled() const
{
return WebCore::unifiedTextCheckerEnabled(m_range.start.document().frame());
}
void checkTextOfParagraph(TextCheckerClient& client, StringView text, OptionSet<TextCheckingType> checkingTypes, Vector<TextCheckingResult>& results, const VisibleSelection& currentSelection)
{
#if USE(UNIFIED_TEXT_CHECKING)
results = client.checkTextOfParagraph(text, checkingTypes, currentSelection);
#else
UNUSED_PARAM(currentSelection);
Vector<TextCheckingResult> misspellings;
if (checkingTypes.contains(TextCheckingType::Spelling))
findMisspellings(client, text, misspellings);
// Look for grammatical errors that occur before the first misspelling.
Vector<TextCheckingResult> grammaticalErrors;
if (checkingTypes.contains(TextCheckingType::Grammar)) {
unsigned grammarCheckLength = text.length();
for (auto& misspelling : misspellings)
grammarCheckLength = std::min<unsigned>(grammarCheckLength, misspelling.range.location);
findGrammaticalErrors(client, text.substring(0, grammarCheckLength), grammaticalErrors);
}
results = WTFMove(grammaticalErrors);
if (results.isEmpty())
results = WTFMove(misspellings);
else
results.appendVector(misspellings);
#endif // USE(UNIFIED_TEXT_CHECKING)
}
bool unifiedTextCheckerEnabled(const Frame* frame)
{
if (!frame)
return false;
return frame->settings().unifiedTextCheckerEnabled();
}
bool platformDrivenTextCheckerEnabled()
{
#if ENABLE(PLATFORM_DRIVEN_TEXT_CHECKING)
return true;
#else
return false;
#endif
}
}