Source/WebCore/rendering/BreakLines.h - WebKit - Git at Google

 /*
  * Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved.
  * Copyright (C) 2011 Google Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  *
  */

 #pragma once

 #include <unicode/ubrk.h>
 #include <wtf/ASCIICType.h>
 #include <wtf/StdLibExtras.h>
 #include <wtf/text/TextBreakIterator.h>
 #include <wtf/unicode/CharacterNames.h>

 namespace WebCore {

 static const UChar lineBreakTableFirstCharacter = '!';
 static const UChar lineBreakTableLastCharacter = 127;
 static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1;

 WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];

 enum class NonBreakingSpaceBehavior {
     IgnoreNonBreakingSpace,
     TreatNonBreakingSpaceAsBreak,
 };

 enum class CanUseShortcut {
     Yes,
     No
 };

 template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
 static inline bool isBreakableSpace(UChar character)
 {
     switch (character) {
     case ' ':
     case '\n':
     case '\t':
         return true;
     case noBreakSpace:
         return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
     default:
         return false;
     }
 }

 inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
 {
     // Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context,
     // while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs.
     if (character == '-' && isASCIIDigit(nextCharacter))
         return isASCIIAlphanumeric(lastCharacter);

     // If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility
     // with other browsers (see comments for asciiLineBreakTable for details).
     if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
         const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
         unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
         return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8));
     }
     // Otherwise defer to the Unicode algorithm by returning false.
     return false;
 }

 template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
 inline bool needsLineBreakIterator(UChar character)
 {
     if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
         return character > lineBreakTableLastCharacter;
     return character > lineBreakTableLastCharacter && character != noBreakSpace;
 }

 // When in non-loose mode, we can use the ASCII shortcut table.
 template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior, CanUseShortcut canUseShortcut>
 inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
 {
     std::optional<unsigned> nextBreak;

     CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
     CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
     unsigned priorContextLength = lazyBreakIterator.priorContextLength();
     for (unsigned i = startPosition; i < length; i++) {
         CharacterType character = string[i];

         if (isBreakableSpace<nonBreakingSpaceBehavior>(character) || (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character)))
             return i;

         if (canUseShortcut == CanUseShortcut::No || needsLineBreakIterator<nonBreakingSpaceBehavior>(character) || needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
             if (!nextBreak || nextBreak.value() < i) {
                 // Don't break if positioned at start of primary context and there is no prior context.
                 if (i || priorContextLength) {
                     UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
                     if (breakIterator) {
                         int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength);
                         if (candidate == UBRK_DONE)
                             nextBreak = std::nullopt;
                         else {
                             unsigned result = candidate;
                             ASSERT(result >= priorContextLength);
                             nextBreak = result - priorContextLength;
                         }
                     }
                 }
             }
             if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
                 return i;
         }

         lastLastCharacter = lastCharacter;
         lastCharacter = character;
     }

     return length;
 }

 template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
 inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
 {
     // FIXME: Use ICU instead.
     for (unsigned i = startPosition; i < length; i++) {
         if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
             return i;
         if (string[i] == ideographicSpace)
             return i + 1;
     }
     return length;
 }

 inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
 {
     auto stringView = lazyBreakIterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
 {
     auto stringView = iterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
 {
     auto stringView = iterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
 {
     auto stringView = lazyBreakIterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
 {
     auto stringView = lazyBreakIterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
 {
     auto stringView = lazyBreakIterator.stringView();
     if (stringView.is8Bit())
         return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
     return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
 }

 inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
 {
     auto stringView = lazyBreakIterator.stringView();
     ASSERT(startPosition <= stringView.length());
     // FIXME: Can/Should we implement this using a Shared Iterator (performance issue)
     // https://bugs.webkit.org/show_bug.cgi?id=197876
     NonSharedCharacterBreakIterator iterator(stringView);
     std::optional<unsigned> next = ubrk_following(iterator, startPosition);
     return next.value_or(stringView.length());
 }

 inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, std::optional<unsigned>& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere)
 {
     if (nextBreakable && nextBreakable.value() >= startPosition)
         return startPosition == nextBreakable;

     if (breakAnywhere)
         nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition);
     else if (keepAllWords) {
         if (breakNBSP)
             nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
         else
             nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
     } else if (!canUseShortcut) {
         if (breakNBSP)
             nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition);
         else
             nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition);
     } else {
         if (breakNBSP)
             nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
         else
             nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
     }
     return startPosition == nextBreakable;
 }

 } // namespace WebCore
	/*
	* Copyright (C) 2005, 2007, 2010, 2013, 2016 Apple Inc. All rights reserved.
	* Copyright (C) 2011 Google Inc. All rights reserved.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*
	*/

	#pragma once

	#include <unicode/ubrk.h>
	#include <wtf/ASCIICType.h>
	#include <wtf/StdLibExtras.h>
	#include <wtf/text/TextBreakIterator.h>
	#include <wtf/unicode/CharacterNames.h>

	namespace WebCore {

	static const UChar lineBreakTableFirstCharacter = '!';
	static const UChar lineBreakTableLastCharacter = 127;
	static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1;

	WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];

	enum class NonBreakingSpaceBehavior {
	IgnoreNonBreakingSpace,
	TreatNonBreakingSpaceAsBreak,
	};

	enum class CanUseShortcut {
	Yes,
	No
	};

	template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
	static inline bool isBreakableSpace(UChar character)
	{
	switch (character) {
	case ' ':
	case '\n':
	case '\t':
	return true;
	case noBreakSpace:
	return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
	default:
	return false;
	}
	}

	inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
	{
	// Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context,
	// while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs.
	if (character == '-' && isASCIIDigit(nextCharacter))
	return isASCIIAlphanumeric(lastCharacter);

	// If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility
	// with other browsers (see comments for asciiLineBreakTable for details).
	if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
	const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
	unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
	return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8));
	}
	// Otherwise defer to the Unicode algorithm by returning false.
	return false;
	}

	template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
	inline bool needsLineBreakIterator(UChar character)
	{
	if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
	return character > lineBreakTableLastCharacter;
	return character > lineBreakTableLastCharacter && character != noBreakSpace;
	}

	// When in non-loose mode, we can use the ASCII shortcut table.
	template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior, CanUseShortcut canUseShortcut>
	inline unsigned nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
	{
	std::optional<unsigned> nextBreak;

	CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
	CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
	unsigned priorContextLength = lazyBreakIterator.priorContextLength();
	for (unsigned i = startPosition; i < length; i++) {
	CharacterType character = string[i];

	if (isBreakableSpace<nonBreakingSpaceBehavior>(character) \|\| (canUseShortcut == CanUseShortcut::Yes && shouldBreakAfter(lastLastCharacter, lastCharacter, character)))
	return i;

	if (canUseShortcut == CanUseShortcut::No \|\| needsLineBreakIterator<nonBreakingSpaceBehavior>(character) \|\| needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
	if (!nextBreak \|\| nextBreak.value() < i) {
	// Don't break if positioned at start of primary context and there is no prior context.
	if (i \|\| priorContextLength) {
	UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
	if (breakIterator) {
	int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength);
	if (candidate == UBRK_DONE)
	nextBreak = std::nullopt;
	else {
	unsigned result = candidate;
	ASSERT(result >= priorContextLength);
	nextBreak = result - priorContextLength;
	}
	}
	}
	}
	if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
	return i;
	}

	lastLastCharacter = lastCharacter;
	lastCharacter = character;
	}

	return length;
	}

	template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
	inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
	{
	// FIXME: Use ICU instead.
	for (unsigned i = startPosition; i < length; i++) {
	if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
	return i;
	if (string[i] == ideographicSpace)
	return i + 1;
	}
	return length;
	}

	inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
	{
	auto stringView = lazyBreakIterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
	{
	auto stringView = iterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
	{
	auto stringView = iterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::Yes>(iterator, stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
	{
	auto stringView = lazyBreakIterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::Yes>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePositionWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
	{
	auto stringView = lazyBreakIterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePositionIgnoringNBSPWithoutShortcut(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
	{
	auto stringView = lazyBreakIterator.stringView();
	if (stringView.is8Bit())
	return nextBreakablePosition<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
	return nextBreakablePosition<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace, CanUseShortcut::No>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
	}

	inline unsigned nextBreakablePositionBreakCharacter(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
	{
	auto stringView = lazyBreakIterator.stringView();
	ASSERT(startPosition <= stringView.length());
	// FIXME: Can/Should we implement this using a Shared Iterator (performance issue)
	// https://bugs.webkit.org/show_bug.cgi?id=197876
	NonSharedCharacterBreakIterator iterator(stringView);
	std::optional<unsigned> next = ubrk_following(iterator, startPosition);
	return next.value_or(stringView.length());
	}

	inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, std::optional<unsigned>& nextBreakable, bool breakNBSP, bool canUseShortcut, bool keepAllWords, bool breakAnywhere)
	{
	if (nextBreakable && nextBreakable.value() >= startPosition)
	return startPosition == nextBreakable;

	if (breakAnywhere)
	nextBreakable = nextBreakablePositionBreakCharacter(lazyBreakIterator, startPosition);
	else if (keepAllWords) {
	if (breakNBSP)
	nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
	else
	nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
	} else if (!canUseShortcut) {
	if (breakNBSP)
	nextBreakable = nextBreakablePositionWithoutShortcut(lazyBreakIterator, startPosition);
	else
	nextBreakable = nextBreakablePositionIgnoringNBSPWithoutShortcut(lazyBreakIterator, startPosition);
	} else {
	if (breakNBSP)
	nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
	else
	nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
	}
	return startPosition == nextBreakable;
	}

	} // namespace WebCore