blob: d6987717c9ff6a4c252dda3efbe2523a2115535f [file] [log] [blame]
/*
* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
* Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef TextBreakIterator_h
#define TextBreakIterator_h
#include <wtf/text/AtomicString.h>
#include <wtf/text/StringView.h>
namespace WTF {
class TextBreakIterator;
// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
enum LineBreakIteratorMode {
LineBreakIteratorModeUAX14,
LineBreakIteratorModeUAX14Loose,
LineBreakIteratorModeUAX14Normal,
LineBreakIteratorModeUAX14Strict,
};
// This is similar to character break iterator in most cases, but is subject to
// platform UI conventions. One notable example where this can be different
// from character break iterator is Thai prepend characters, see bug 24342.
// Use this for insertion point and selection manipulations.
WTF_EXPORT_PRIVATE TextBreakIterator* cursorMovementIterator(StringView);
WTF_EXPORT_PRIVATE TextBreakIterator* wordBreakIterator(StringView);
WTF_EXPORT_PRIVATE TextBreakIterator* sentenceBreakIterator(StringView);
WTF_EXPORT_PRIVATE TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode, bool isCJK);
WTF_EXPORT_PRIVATE void releaseLineBreakIterator(TextBreakIterator*);
TextBreakIterator* openLineBreakIterator(const AtomicString& locale, LineBreakIteratorMode, bool isCJK);
void closeLineBreakIterator(TextBreakIterator*&);
WTF_EXPORT_PRIVATE int textBreakFirst(TextBreakIterator*);
WTF_EXPORT_PRIVATE int textBreakLast(TextBreakIterator*);
WTF_EXPORT_PRIVATE int textBreakNext(TextBreakIterator*);
WTF_EXPORT_PRIVATE int textBreakPrevious(TextBreakIterator*);
WTF_EXPORT_PRIVATE int textBreakCurrent(TextBreakIterator*);
WTF_EXPORT_PRIVATE int textBreakPreceding(TextBreakIterator*, int);
WTF_EXPORT_PRIVATE int textBreakFollowing(TextBreakIterator*, int);
WTF_EXPORT_PRIVATE bool isTextBreak(TextBreakIterator*, int);
WTF_EXPORT_PRIVATE bool isWordTextBreak(TextBreakIterator*);
const int TextBreakDone = -1;
WTF_EXPORT_PRIVATE bool isCJKLocale(const AtomicString&);
class LazyLineBreakIterator {
public:
LazyLineBreakIterator()
: m_iterator(nullptr)
, m_cachedPriorContext(nullptr)
, m_mode(LineBreakIteratorModeUAX14)
, m_cachedPriorContextLength(0)
, m_isCJK(false)
{
resetPriorContext();
}
LazyLineBreakIterator(StringView stringView, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorModeUAX14)
: m_stringView(stringView)
, m_locale(locale)
, m_iterator(nullptr)
, m_cachedPriorContext(nullptr)
, m_mode(mode)
, m_cachedPriorContextLength(0)
{
resetPriorContext();
m_isCJK = isCJKLocale(locale);
}
~LazyLineBreakIterator()
{
if (m_iterator)
releaseLineBreakIterator(m_iterator);
}
StringView stringView() const { return m_stringView; }
bool isLooseCJKMode() const { return m_isCJK && m_mode == LineBreakIteratorModeUAX14Loose; }
UChar lastCharacter() const
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
return m_priorContext[1];
}
UChar secondToLastCharacter() const
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
return m_priorContext[0];
}
void setPriorContext(UChar last, UChar secondToLast)
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = secondToLast;
m_priorContext[1] = last;
}
void updatePriorContext(UChar last)
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = m_priorContext[1];
m_priorContext[1] = last;
}
void resetPriorContext()
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = 0;
m_priorContext[1] = 0;
}
unsigned priorContextLength() const
{
unsigned priorContextLength = 0;
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
if (m_priorContext[1]) {
++priorContextLength;
if (m_priorContext[0])
++priorContextLength;
}
return priorContextLength;
}
// Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
// initialized to use the previously stored string as the primary breaking context and using
// previously stored prior context if non-empty.
TextBreakIterator* get(unsigned priorContextLength)
{
ASSERT(priorContextLength <= priorContextCapacity);
const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
if (!m_iterator) {
m_iterator = acquireLineBreakIterator(m_stringView, m_locale, priorContext, priorContextLength, m_mode, m_isCJK);
m_cachedPriorContext = priorContext;
m_cachedPriorContextLength = priorContextLength;
} else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
resetStringAndReleaseIterator(m_stringView, m_locale, m_mode);
return this->get(priorContextLength);
}
return m_iterator;
}
void resetStringAndReleaseIterator(StringView stringView, const AtomicString& locale, LineBreakIteratorMode mode)
{
if (m_iterator)
releaseLineBreakIterator(m_iterator);
m_stringView = stringView;
m_locale = locale;
m_iterator = nullptr;
m_cachedPriorContext = nullptr;
m_mode = mode;
m_isCJK = isCJKLocale(locale);
m_cachedPriorContextLength = 0;
}
private:
static const unsigned priorContextCapacity = 2;
StringView m_stringView;
AtomicString m_locale;
TextBreakIterator* m_iterator;
const UChar* m_cachedPriorContext;
LineBreakIteratorMode m_mode;
unsigned m_cachedPriorContextLength;
UChar m_priorContext[priorContextCapacity];
bool m_isCJK;
};
// Iterates over "extended grapheme clusters", as defined in UAX #29.
// Note that platform implementations may be less sophisticated - e.g. ICU prior to
// version 4.0 only supports "legacy grapheme clusters".
// Use this for general text processing, e.g. string truncation.
class NonSharedCharacterBreakIterator {
WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator);
public:
WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView);
WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator();
NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&);
operator TextBreakIterator*() const { return m_iterator; }
private:
TextBreakIterator* m_iterator;
};
// Counts the number of grapheme clusters. A surrogate pair or a sequence
// of a non-combining character and following combining characters is
// counted as 1 grapheme cluster.
WTF_EXPORT_PRIVATE unsigned numGraphemeClusters(StringView);
// Returns the number of characters which will be less than or equal to
// the specified grapheme cluster length.
WTF_EXPORT_PRIVATE unsigned numCharactersInGraphemeClusters(const StringView&, unsigned);
}
using WTF::LineBreakIteratorMode;
using WTF::LineBreakIteratorModeUAX14;
using WTF::LazyLineBreakIterator;
using WTF::NonSharedCharacterBreakIterator;
using WTF::TextBreakDone;
using WTF::TextBreakIterator;
#endif