| /* |
| * Copyright (C) 2005, 2007, 2010, 2013 Apple Inc. All rights reserved. |
| * Copyright (C) 2011 Google Inc. All rights reserved. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| * |
| */ |
| |
| #ifndef break_lines_h |
| #define break_lines_h |
| |
| #include "TextBreakIterator.h" |
| #include <wtf/ASCIICType.h> |
| #include <wtf/StdLibExtras.h> |
| #include <wtf/unicode/CharacterNames.h> |
| |
| namespace WebCore { |
| |
| static const UChar asciiLineBreakTableFirstChar = '!'; |
| static const UChar asciiLineBreakTableLastChar = 127; |
| static const unsigned asciiLineBreakTableColumnCount = (asciiLineBreakTableLastChar - asciiLineBreakTableFirstChar) / 8 + 1; |
| |
| WEBCORE_EXPORT extern const unsigned char asciiLineBreakTable[][asciiLineBreakTableColumnCount]; |
| |
| enum class NBSPBehavior { |
| IgnoreNBSP, |
| TreatNBSPAsBreak, |
| }; |
| |
| template<NBSPBehavior nbspBehavior> |
| static inline bool isBreakableSpace(UChar ch) |
| { |
| switch (ch) { |
| case ' ': |
| case '\n': |
| case '\t': |
| return true; |
| case noBreakSpace: |
| return nbspBehavior == NBSPBehavior::TreatNBSPAsBreak; |
| default: |
| return false; |
| } |
| } |
| |
| inline bool shouldBreakAfter(UChar lastCh, UChar ch, UChar nextCh) |
| { |
| // Don't allow line breaking between '-' and a digit if the '-' may mean a minus sign in the context, |
| // while allow breaking in 'ABCD-1234' and '1234-5678' which may be in long URLs. |
| if (ch == '-' && isASCIIDigit(nextCh)) |
| return isASCIIAlphanumeric(lastCh); |
| |
| // If both ch and nextCh are ASCII characters, use a lookup table for enhanced speed and for compatibility |
| // with other browsers (see comments for asciiLineBreakTable for details). |
| if (ch >= asciiLineBreakTableFirstChar && ch <= asciiLineBreakTableLastChar && nextCh >= asciiLineBreakTableFirstChar && nextCh <= asciiLineBreakTableLastChar) { |
| const unsigned char* tableRow = asciiLineBreakTable[ch - asciiLineBreakTableFirstChar]; |
| int nextChIndex = nextCh - asciiLineBreakTableFirstChar; |
| return tableRow[nextChIndex / 8] & (1 << (nextChIndex % 8)); |
| } |
| // Otherwise defer to the Unicode algorithm by returning false. |
| return false; |
| } |
| |
| template<NBSPBehavior nbspBehavior> |
| inline bool needsLineBreakIterator(UChar ch) |
| { |
| if (nbspBehavior == NBSPBehavior::TreatNBSPAsBreak) |
| return ch > asciiLineBreakTableLastChar; |
| return ch > asciiLineBreakTableLastChar && ch != noBreakSpace; |
| } |
| |
| // When in non-loose mode, we can use the ASCII shortcut table. |
| template<typename CharacterType, NBSPBehavior nbspBehavior> |
| inline int nextBreakablePositionNonLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos) |
| { |
| int len = static_cast<int>(length); |
| int nextBreak = -1; |
| |
| CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter()); |
| CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter()); |
| unsigned priorContextLength = lazyBreakIterator.priorContextLength(); |
| for (int i = pos; i < len; i++) { |
| CharacterType ch = str[i]; |
| |
| // Non-loose mode, so use ASCII shortcut (shouldBreakAfter) if not breakable space. |
| if (isBreakableSpace<nbspBehavior>(ch) || shouldBreakAfter(lastLastCh, lastCh, ch)) |
| return i; |
| |
| // Non-loose mode, so conditionally use break iterator. |
| if (needsLineBreakIterator<nbspBehavior>(ch) || needsLineBreakIterator<nbspBehavior>(lastCh)) { |
| if (nextBreak < i) { |
| // Don't break if positioned at start of primary context and there is no prior context. |
| if (i || priorContextLength) { |
| TextBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength); |
| if (breakIterator) { |
| nextBreak = textBreakFollowing(breakIterator, i - 1 + priorContextLength); |
| if (nextBreak >= 0) |
| nextBreak -= priorContextLength; |
| } |
| } |
| } |
| if (i == nextBreak && !isBreakableSpace<nbspBehavior>(lastCh)) |
| return i; |
| } |
| |
| lastLastCh = lastCh; |
| lastCh = ch; |
| } |
| |
| return len; |
| } |
| |
| // When in loose mode, we can't use the ASCII shortcut table since loose mode allows "$100" to break after '$' in content marked as CJK. |
| // N.B. It should be possible to combine the following with the non-loose version above by adding a LooseBehavior template parameter; |
| // however, when doing this, a 10% performance regression appeared on chromium-win (https://bugs.webkit.org/show_bug.cgi?id=89235#c112). |
| template<typename CharacterType, NBSPBehavior nbspBehavior> |
| static inline int nextBreakablePositionLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos) |
| { |
| int len = static_cast<int>(length); |
| int nextBreak = -1; |
| |
| CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter()); |
| unsigned priorContextLength = lazyBreakIterator.priorContextLength(); |
| for (int i = pos; i < len; i++) { |
| CharacterType ch = str[i]; |
| |
| // Always loose mode, so don't use ASCII shortcut (shouldBreakAfter). |
| if (isBreakableSpace<nbspBehavior>(ch)) |
| return i; |
| |
| // Always use line break iterator in loose mode. |
| if (nextBreak < i) { |
| // Don't break if positioned at start of primary context and there is no prior context. |
| if (i || priorContextLength) { |
| TextBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength); |
| if (breakIterator) { |
| nextBreak = textBreakFollowing(breakIterator, i - 1 + priorContextLength); |
| if (nextBreak >= 0) |
| nextBreak -= priorContextLength; |
| } |
| } |
| } |
| if (i == nextBreak && !isBreakableSpace<nbspBehavior>(lastCh)) |
| return i; |
| |
| lastCh = ch; |
| } |
| |
| return len; |
| } |
| |
| template<typename CharacterType, NBSPBehavior nbspBehavior> |
| inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition) |
| { |
| for (unsigned i = startPosition; i < length; i++) { |
| if (isBreakableSpace<nbspBehavior>(string[i])) |
| return i; |
| } |
| return length; |
| } |
| |
| inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, int startPosition) |
| { |
| String string = lazyBreakIterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionKeepingAllWords<LChar, NBSPBehavior::TreatNBSPAsBreak>(string.characters8(), string.length(), startPosition); |
| return nextBreakablePositionKeepingAllWords<UChar, NBSPBehavior::TreatNBSPAsBreak>(string.characters16(), string.length(), startPosition); |
| } |
| |
| inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, int startPosition) |
| { |
| String string = iterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionKeepingAllWords<LChar, NBSPBehavior::IgnoreNBSP>(string.characters8(), string.length(), startPosition); |
| return nextBreakablePositionKeepingAllWords<UChar, NBSPBehavior::IgnoreNBSP>(string.characters16(), string.length(), startPosition); |
| } |
| |
| inline int nextBreakablePosition(LazyLineBreakIterator& iterator, int pos) |
| { |
| String string = iterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionNonLoosely<LChar, NBSPBehavior::TreatNBSPAsBreak>(iterator, string.characters8(), string.length(), pos); |
| return nextBreakablePositionNonLoosely<UChar, NBSPBehavior::TreatNBSPAsBreak>(iterator, string.characters16(), string.length(), pos); |
| } |
| |
| inline int nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, int pos) |
| { |
| String string = lazyBreakIterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionNonLoosely<LChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters8(), string.length(), pos); |
| return nextBreakablePositionNonLoosely<UChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters16(), string.length(), pos); |
| } |
| |
| inline int nextBreakablePositionLoose(LazyLineBreakIterator& lazyBreakIterator, int pos) |
| { |
| String string = lazyBreakIterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionLoosely<LChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters8(), string.length(), pos); |
| return nextBreakablePositionLoosely<UChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters16(), string.length(), pos); |
| } |
| |
| inline int nextBreakablePositionIgnoringNBSPLoose(LazyLineBreakIterator& lazyBreakIterator, int pos) |
| { |
| String string = lazyBreakIterator.string(); |
| if (string.is8Bit()) |
| return nextBreakablePositionLoosely<LChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters8(), string.length(), pos); |
| return nextBreakablePositionLoosely<UChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters16(), string.length(), pos); |
| } |
| |
| inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, int pos, int& nextBreakable, bool breakNBSP, bool isLooseMode, bool keepAllWords) |
| { |
| if (pos <= nextBreakable) |
| return pos == nextBreakable; |
| |
| if (keepAllWords) { |
| if (breakNBSP) |
| nextBreakable = static_cast<int>(nextBreakablePositionKeepingAllWords(lazyBreakIterator, pos)); |
| else |
| nextBreakable = static_cast<int>(nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, pos)); |
| } else if (isLooseMode) { |
| if (breakNBSP) |
| nextBreakable = nextBreakablePositionLoose(lazyBreakIterator, pos); |
| else |
| nextBreakable = nextBreakablePositionIgnoringNBSPLoose(lazyBreakIterator, pos); |
| } else { |
| if (breakNBSP) |
| nextBreakable = nextBreakablePosition(lazyBreakIterator, pos); |
| else |
| nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, pos); |
| } |
| return pos == nextBreakable; |
| } |
| |
| } // namespace WebCore |
| |
| #endif // break_lines_h |