Reviewed by Hyatt.
- http://bugs.webkit.org/show_bug.cgi?id=12175
port break_lines.cpp to be platform independent
* platform/TextBreakIterator.h: Added lineBreakIterator.
Removed ICU-specific stuff from the header.
* platform/TextBreakIteratorICU.cpp:
(WebCore::setUpIterator): Added helper to be shared by the three different
creation functions.
(WebCore::characterBreakIterator): Changed to use setUpIterator.
(WebCore::wordBreakIterator): Ditto.
(WebCore::lineBreakIterator): Added.
* platform/qt/TextBreakIteratorQt.cpp:
(WebCore::lineBreakIterator): Added a stub.
* rendering/break_lines.cpp:
(WebCore::isBreakableSpace): Added.
(WebCore::shouldBreakAfter): Added.
(WebCore::needsAdvancedBreakAnalysis): Added.
(WebCore::nextBreakablePosition): Changed to use TextBreakIterator on platforms
other than Mac.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@18708 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index 3dddabf..fceaa1c 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,30 @@
+2007-01-09 Darin Adler <darin@apple.com>
+
+ Reviewed by Hyatt.
+
+ - http://bugs.webkit.org/show_bug.cgi?id=12175
+ port break_lines.cpp to be platform independent
+
+ * platform/TextBreakIterator.h: Added lineBreakIterator.
+ Removed ICU-specific stuff from the header.
+
+ * platform/TextBreakIteratorICU.cpp:
+ (WebCore::setUpIterator): Added helper to be shared by the three different
+ creation functions.
+ (WebCore::characterBreakIterator): Changed to use setUpIterator.
+ (WebCore::wordBreakIterator): Ditto.
+ (WebCore::lineBreakIterator): Added.
+
+ * platform/qt/TextBreakIteratorQt.cpp:
+ (WebCore::lineBreakIterator): Added a stub.
+
+ * rendering/break_lines.cpp:
+ (WebCore::isBreakableSpace): Added.
+ (WebCore::shouldBreakAfter): Added.
+ (WebCore::needsAdvancedBreakAnalysis): Added.
+ (WebCore::nextBreakablePosition): Changed to use TextBreakIterator on platforms
+ other than Mac.
+
2007-01-09 Maciej Stachowiak <mjs@apple.com>
Reviewed by Darin.
diff --git a/WebCore/platform/TextBreakIterator.h b/WebCore/platform/TextBreakIterator.h
index bcbaa4b..343ee19 100644
--- a/WebCore/platform/TextBreakIterator.h
+++ b/WebCore/platform/TextBreakIterator.h
@@ -1,7 +1,6 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -19,30 +18,29 @@
* Boston, MA 02111-1307, USA.
*
*/
+
#ifndef TextBreakIterator_h
#define TextBreakIterator_h
-#include "wtf/unicode/Unicode.h"
+#include <wtf/unicode/Unicode.h>
-#if USE(ICU_UNICODE)
-#include <unicode/ubrk.h>
-typedef UBreakIterator TextBreakIterator;
-#elif USE(QT4_UNICODE)
namespace WebCore {
+
class TextBreakIterator;
-}
-#endif
-
-namespace WebCore {
- TextBreakIterator* wordBreakIterator(const UChar* string, int length);
- TextBreakIterator* characterBreakIterator(const UChar* string, int length);
+ // Note: The returned iterator is good only until you get another iterator.
+ TextBreakIterator* characterBreakIterator(const UChar*, int length);
+ TextBreakIterator* wordBreakIterator(const UChar*, int length);
+ TextBreakIterator* lineBreakIterator(const UChar*, int length);
int textBreakFirst(TextBreakIterator*);
int textBreakNext(TextBreakIterator*);
int textBreakCurrent(TextBreakIterator*);
int textBreakPreceding(TextBreakIterator*, int);
int textBreakFollowing(TextBreakIterator*, int);
- enum { TextBreakDone = -1 };
+
+ const int TextBreakDone = -1;
+
}
+
#endif
diff --git a/WebCore/platform/TextBreakIteratorICU.cpp b/WebCore/platform/TextBreakIteratorICU.cpp
index a65d4f0..09cc5a9 100644
--- a/WebCore/platform/TextBreakIteratorICU.cpp
+++ b/WebCore/platform/TextBreakIteratorICU.cpp
@@ -1,7 +1,6 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
* Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -20,28 +19,34 @@
*
*/
+#include "config.h"
#include "TextBreakIterator.h"
+#include <unicode/ubrk.h>
+
namespace WebCore {
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+ UBreakIteratorType type, const UChar* string, int length)
{
- // The locale is currently ignored when determining character cluster breaks.
- // This may change in the future, according to Deborah Goldsmith.
- static bool createdIterator = false;
- static UBreakIterator* iterator;
- UErrorCode status;
+ if (!string)
+ return 0;
+
if (!createdIterator) {
- status = U_ZERO_ERROR;
- iterator = ubrk_open(UBRK_WORD, "en_us", 0, 0, &status);
+ // The locale is currently ignored when determining character cluster breaks.
+ // This may change in the future, according to Deborah Goldsmith.
+ // FIXME: Presumably we do need to pass the correct locale for word and line
+ // break iterators, though!
+ UErrorCode openStatus = U_ZERO_ERROR;
+ iterator = static_cast<TextBreakIterator*>(ubrk_open(type, "en_us", 0, 0, &openStatus));
createdIterator = true;
}
if (!iterator)
return 0;
- status = U_ZERO_ERROR;
- ubrk_setText(iterator, string, length, &status);
- if (U_FAILURE(status))
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setText(iterator, string, length, &setTextStatus);
+ if (U_FAILURE(setTextStatus))
return 0;
return iterator;
@@ -49,28 +54,26 @@
TextBreakIterator* characterBreakIterator(const UChar* string, int length)
{
- if (!string)
- return 0;
+ static bool createdCharacterBreakIterator = false;
+ static TextBreakIterator* staticCharacterBreakIterator;
+ return setUpIterator(createdCharacterBreakIterator,
+ staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
- // The locale is currently ignored when determining character cluster breaks.
- // This may change in the future, according to Deborah Goldsmith.
- static bool createdIterator = false;
- static UBreakIterator* iterator;
- UErrorCode status;
- if (!createdIterator) {
- status = U_ZERO_ERROR;
- iterator = ubrk_open(UBRK_CHARACTER, "en_us", 0, 0, &status);
- createdIterator = true;
- }
- if (!iterator)
- return 0;
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+ static bool createdWordBreakIterator = false;
+ static TextBreakIterator* staticWordBreakIterator;
+ return setUpIterator(createdWordBreakIterator,
+ staticWordBreakIterator, UBRK_WORD, string, length);
+}
- status = U_ZERO_ERROR;
- ubrk_setText(iterator, reinterpret_cast<const UChar*>(string), length, &status);
- if (status != U_ZERO_ERROR)
- return 0;
-
- return iterator;
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+ static bool createdLineBreakIterator = false;
+ static TextBreakIterator* staticLineBreakIterator;
+ return setUpIterator(createdLineBreakIterator,
+ staticLineBreakIterator, UBRK_LINE, string, length);
}
int textBreakFirst(TextBreakIterator* bi)
diff --git a/WebCore/platform/qt/TextBreakIteratorQt.cpp b/WebCore/platform/qt/TextBreakIteratorQt.cpp
index fad2a13..a9020df 100644
--- a/WebCore/platform/qt/TextBreakIteratorQt.cpp
+++ b/WebCore/platform/qt/TextBreakIteratorQt.cpp
@@ -143,6 +143,12 @@
return iterator;
}
+TextBreakIterator* lineBreakIterator(const UChar*, int)
+{
+ // not yet implemented
+ return 0;
+}
+
int textBreakFirst(TextBreakIterator* bi)
{
return bi->first();
diff --git a/WebCore/rendering/break_lines.cpp b/WebCore/rendering/break_lines.cpp
index 8aa47ee..83d7991 100644
--- a/WebCore/rendering/break_lines.cpp
+++ b/WebCore/rendering/break_lines.cpp
@@ -1,7 +1,5 @@
/*
- * This file is part of the DOM implementation for KDE.
- *
- * Copyright (C) 2005 Apple Computer, Inc.
+ * Copyright (C) 2005, 2007 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -23,7 +21,7 @@
#include "config.h"
#include "break_lines.h"
-#include "RenderText.h"
+#include "TextBreakIterator.h"
#if PLATFORM(MAC)
#include <CoreServices/CoreServices.h>
@@ -31,44 +29,88 @@
namespace WebCore {
-int nextBreakablePosition(const UChar* str, int pos, int len, bool breakNBSP)
+const UChar noBreakSpace = 0x00A0;
+const UChar softHyphen = 0x00AD;
+
+static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
{
+ switch (ch) {
+ case ' ':
+ case '\n':
+ case '\t':
+ return true;
+ case noBreakSpace:
+ return treatNoBreakSpaceAsBreak;
+ default:
+ return false;
+ }
+}
+
+static inline bool shouldBreakAfter(UChar ch)
+{
+ // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
+ switch (ch) {
+ case '-':
+ case '?':
+ case softHyphen:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool needsLineBreakIterator(UChar ch)
+{
+ return ch > 0x7F && ch != noBreakSpace;
+}
+
#if PLATFORM(MAC)
- OSStatus status = 0, findStatus = -1;
- static TextBreakLocatorRef breakLocator = 0;
- int nextUCBreak = -1;
+static inline TextBreakLocatorRef lineBreakLocator()
+{
+ TextBreakLocatorRef locator = 0;
+ UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
+ return locator;
+}
#endif
- int i;
- unsigned short ch, lastCh;
-
- lastCh = pos > 0 ? str[pos - 1] : 0;
- for (i = pos; i < len; i++) {
- ch = str[i];
- if (ch == ' ' || ch == '\n' || ch == '\t' || (breakNBSP && ch == 0xa0))
- break;
- // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
- if (lastCh == '-' || lastCh == '?' || lastCh == SOFT_HYPHEN)
- break;
-#if PLATFORM(MAC)
- // FIXME: Rewrite break location using ICU.
- // If current character, or the previous character aren't simple latin1 then
- // use the UC line break locator. UCFindTextBreak will report false if we
- // have a sequence of 0xa0 0x20 (nbsp, sp), so we explicity check for that
- // case.
- if ((ch > 0x7f && ch != 0xa0) || (lastCh > 0x7f && lastCh != 0xa0)) {
- if (nextUCBreak < i) {
- if (!breakLocator)
- status = UCCreateTextBreakLocator(NULL, 0, kUCTextBreakLineMask, &breakLocator);
- if (status == 0)
- findStatus = UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, (const UniChar *)str, len, i, (UniCharArrayOffset *)&nextUCBreak);
+
+int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
+{
+#if !PLATFORM(MAC)
+ TextBreakIterator* breakIterator = 0;
+#endif
+ int nextBreak = -1;
+
+ UChar lastCh = pos > 0 ? str[pos - 1] : 0;
+ for (int i = pos; i < len; i++) {
+ UChar ch = str[i];
+
+ if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh))
+ return i;
+
+ if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
+ if (nextBreak < i && i) {
+#if !PLATFORM(MAC)
+ if (!breakIterator)
+ breakIterator = lineBreakIterator(str, len);
+ if (breakIterator)
+ nextBreak = textBreakFollowing(breakIterator, i - 1);
+#else
+ static TextBreakLocatorRef breakLocator = lineBreakLocator();
+ if (breakLocator) {
+ UniCharArrayOffset nextUCBreak;
+ if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
+ nextBreak = nextUCBreak;
+ }
+#endif
}
- if (findStatus == 0 && i == nextUCBreak && !(lastCh == ' ' || lastCh == '\n' || lastCh == '\t' || (breakNBSP && lastCh == 0xa0)))
- break;
+ if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
+ return i;
}
-#endif
+
lastCh = ch;
}
- return i;
+
+ return len;
}
} // namespace WebCore