Reviewed by Hyatt.

        - http://bugs.webkit.org/show_bug.cgi?id=12175
          port break_lines.cpp to be platform independent

        * platform/TextBreakIterator.h: Added lineBreakIterator.
        Removed ICU-specific stuff from the header.

        * platform/TextBreakIteratorICU.cpp:
        (WebCore::setUpIterator): Added helper to be shared by the three different
        creation functions.
        (WebCore::characterBreakIterator): Changed to use setUpIterator.
        (WebCore::wordBreakIterator): Ditto.
        (WebCore::lineBreakIterator): Added.

        * platform/qt/TextBreakIteratorQt.cpp:
        (WebCore::lineBreakIterator): Added a stub.

        * rendering/break_lines.cpp:
        (WebCore::isBreakableSpace): Added.
        (WebCore::shouldBreakAfter): Added.
        (WebCore::needsAdvancedBreakAnalysis): Added.
        (WebCore::nextBreakablePosition): Changed to use TextBreakIterator on platforms
        other than Mac.



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@18708 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index 3dddabf..fceaa1c 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,30 @@
+2007-01-09  Darin Adler  <darin@apple.com>
+
+        Reviewed by Hyatt.
+
+        - http://bugs.webkit.org/show_bug.cgi?id=12175
+          port break_lines.cpp to be platform independent
+
+        * platform/TextBreakIterator.h: Added lineBreakIterator.
+        Removed ICU-specific stuff from the header.
+
+        * platform/TextBreakIteratorICU.cpp:
+        (WebCore::setUpIterator): Added helper to be shared by the three different
+        creation functions.
+        (WebCore::characterBreakIterator): Changed to use setUpIterator.
+        (WebCore::wordBreakIterator): Ditto.
+        (WebCore::lineBreakIterator): Added.
+
+        * platform/qt/TextBreakIteratorQt.cpp:
+        (WebCore::lineBreakIterator): Added a stub.
+
+        * rendering/break_lines.cpp:
+        (WebCore::isBreakableSpace): Added.
+        (WebCore::shouldBreakAfter): Added.
+        (WebCore::needsAdvancedBreakAnalysis): Added.
+        (WebCore::nextBreakablePosition): Changed to use TextBreakIterator on platforms
+        other than Mac.
+
 2007-01-09  Maciej Stachowiak  <mjs@apple.com>
 
         Reviewed by Darin.
diff --git a/WebCore/platform/TextBreakIterator.h b/WebCore/platform/TextBreakIterator.h
index bcbaa4b..343ee19 100644
--- a/WebCore/platform/TextBreakIterator.h
+++ b/WebCore/platform/TextBreakIterator.h
@@ -1,7 +1,6 @@
 /*
- * This file is part of the DOM implementation for KDE.
- *
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -19,30 +18,29 @@
  * Boston, MA 02111-1307, USA.
  *
  */
+
 #ifndef TextBreakIterator_h
 #define TextBreakIterator_h
 
-#include "wtf/unicode/Unicode.h"
+#include <wtf/unicode/Unicode.h>
 
-#if USE(ICU_UNICODE)
-#include <unicode/ubrk.h>
-typedef UBreakIterator TextBreakIterator;
-#elif USE(QT4_UNICODE)
 namespace WebCore {
+
     class TextBreakIterator;
-}
-#endif
 
-
-namespace WebCore {
-    TextBreakIterator* wordBreakIterator(const UChar* string, int length);
-    TextBreakIterator* characterBreakIterator(const UChar* string, int length);
+    // Note: The returned iterator is good only until you get another iterator.
+    TextBreakIterator* characterBreakIterator(const UChar*, int length);
+    TextBreakIterator* wordBreakIterator(const UChar*, int length);
+    TextBreakIterator* lineBreakIterator(const UChar*, int length);
 
     int textBreakFirst(TextBreakIterator*);
     int textBreakNext(TextBreakIterator*);
     int textBreakCurrent(TextBreakIterator*);
     int textBreakPreceding(TextBreakIterator*, int);
     int textBreakFollowing(TextBreakIterator*, int);
-    enum { TextBreakDone = -1 };
+
+    const int TextBreakDone = -1;
+
 }
+
 #endif
diff --git a/WebCore/platform/TextBreakIteratorICU.cpp b/WebCore/platform/TextBreakIteratorICU.cpp
index a65d4f0..09cc5a9 100644
--- a/WebCore/platform/TextBreakIteratorICU.cpp
+++ b/WebCore/platform/TextBreakIteratorICU.cpp
@@ -1,7 +1,6 @@
 /*
- * This file is part of the DOM implementation for KDE.
- *
  * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -20,28 +19,34 @@
  *
  */
 
+#include "config.h"
 #include "TextBreakIterator.h"
 
+#include <unicode/ubrk.h>
+
 namespace WebCore {
 
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+    UBreakIteratorType type, const UChar* string, int length)
 {
-    // The locale is currently ignored when determining character cluster breaks.
-    // This may change in the future, according to Deborah Goldsmith.
-    static bool createdIterator = false;
-    static UBreakIterator* iterator;
-    UErrorCode status;
+    if (!string)
+        return 0;
+
     if (!createdIterator) {
-        status = U_ZERO_ERROR;
-        iterator = ubrk_open(UBRK_WORD, "en_us", 0, 0, &status);
+        // The locale is currently ignored when determining character cluster breaks.
+        // This may change in the future, according to Deborah Goldsmith.
+        // FIXME: Presumably we do need to pass the correct locale for word and line
+        // break iterators, though!
+        UErrorCode openStatus = U_ZERO_ERROR;
+        iterator = static_cast<TextBreakIterator*>(ubrk_open(type, "en_us", 0, 0, &openStatus));
         createdIterator = true;
     }
     if (!iterator)
         return 0;
 
-    status = U_ZERO_ERROR;
-    ubrk_setText(iterator, string, length, &status);
-    if (U_FAILURE(status))
+    UErrorCode setTextStatus = U_ZERO_ERROR;
+    ubrk_setText(iterator, string, length, &setTextStatus);
+    if (U_FAILURE(setTextStatus))
         return 0;
 
     return iterator;
@@ -49,28 +54,26 @@
 
 TextBreakIterator* characterBreakIterator(const UChar* string, int length)
 {
-    if (!string)
-        return 0;
+    static bool createdCharacterBreakIterator = false;
+    static TextBreakIterator* staticCharacterBreakIterator;
+    return setUpIterator(createdCharacterBreakIterator,
+        staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
 
-    // The locale is currently ignored when determining character cluster breaks.
-    // This may change in the future, according to Deborah Goldsmith.
-    static bool createdIterator = false;
-    static UBreakIterator* iterator;
-    UErrorCode status;
-    if (!createdIterator) {
-        status = U_ZERO_ERROR;
-        iterator = ubrk_open(UBRK_CHARACTER, "en_us", 0, 0, &status);
-        createdIterator = true;
-    }
-    if (!iterator)
-        return 0;
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    static bool createdWordBreakIterator = false;
+    static TextBreakIterator* staticWordBreakIterator;
+    return setUpIterator(createdWordBreakIterator,
+        staticWordBreakIterator, UBRK_WORD, string, length);
+}
 
-    status = U_ZERO_ERROR;
-    ubrk_setText(iterator, reinterpret_cast<const UChar*>(string), length, &status);
-    if (status != U_ZERO_ERROR)
-        return 0;
-
-    return iterator;
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    static bool createdLineBreakIterator = false;
+    static TextBreakIterator* staticLineBreakIterator;
+    return setUpIterator(createdLineBreakIterator,
+        staticLineBreakIterator, UBRK_LINE, string, length);
 }
 
 int textBreakFirst(TextBreakIterator* bi)
diff --git a/WebCore/platform/qt/TextBreakIteratorQt.cpp b/WebCore/platform/qt/TextBreakIteratorQt.cpp
index fad2a13..a9020df 100644
--- a/WebCore/platform/qt/TextBreakIteratorQt.cpp
+++ b/WebCore/platform/qt/TextBreakIteratorQt.cpp
@@ -143,6 +143,12 @@
     return iterator;
 }
 
+TextBreakIterator* lineBreakIterator(const UChar*, int)
+{
+    // not yet implemented
+    return 0;
+}
+
 int textBreakFirst(TextBreakIterator* bi)
 {
     return bi->first();
diff --git a/WebCore/rendering/break_lines.cpp b/WebCore/rendering/break_lines.cpp
index 8aa47ee..83d7991 100644
--- a/WebCore/rendering/break_lines.cpp
+++ b/WebCore/rendering/break_lines.cpp
@@ -1,7 +1,5 @@
 /*
- * This file is part of the DOM implementation for KDE.
- *
- * Copyright (C) 2005 Apple Computer, Inc.
+ * Copyright (C) 2005, 2007 Apple Computer, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -23,7 +21,7 @@
 #include "config.h"
 #include "break_lines.h"
 
-#include "RenderText.h"
+#include "TextBreakIterator.h"
 
 #if PLATFORM(MAC)
 #include <CoreServices/CoreServices.h>
@@ -31,44 +29,88 @@
 
 namespace WebCore {
 
-int nextBreakablePosition(const UChar* str, int pos, int len, bool breakNBSP)
+const UChar noBreakSpace = 0x00A0;
+const UChar softHyphen = 0x00AD;
+
+static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
 {
+    switch (ch) {
+        case ' ':
+        case '\n':
+        case '\t':
+            return true;
+        case noBreakSpace:
+            return treatNoBreakSpaceAsBreak;
+        default:
+            return false;
+    }
+}
+
+static inline bool shouldBreakAfter(UChar ch)
+{
+    // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
+    switch (ch) {
+        case '-':
+        case '?':
+        case softHyphen:
+            return true;
+        default:
+            return false;
+    }
+}
+
+static inline bool needsLineBreakIterator(UChar ch)
+{
+    return ch > 0x7F && ch != noBreakSpace;
+}
+
 #if PLATFORM(MAC)
-    OSStatus status = 0, findStatus = -1;
-    static TextBreakLocatorRef breakLocator = 0;
-    int nextUCBreak = -1;
+static inline TextBreakLocatorRef lineBreakLocator()
+{
+    TextBreakLocatorRef locator = 0;
+    UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
+    return locator;
+}
 #endif
-    int i;
-    unsigned short ch, lastCh;
-    
-    lastCh = pos > 0 ? str[pos - 1] : 0;
-    for (i = pos; i < len; i++) {
-        ch = str[i];
-        if (ch == ' ' || ch == '\n' || ch == '\t' || (breakNBSP && ch == 0xa0))
-            break;
-        // Match WinIE's breaking strategy, which is to always allow breaks after hyphens and question marks.
-        if (lastCh == '-' || lastCh == '?' || lastCh == SOFT_HYPHEN)
-            break;
-#if PLATFORM(MAC)
-        // FIXME: Rewrite break location using ICU.
-        // If current character, or the previous character aren't simple latin1 then
-        // use the UC line break locator.  UCFindTextBreak will report false if we
-        // have a sequence of 0xa0 0x20 (nbsp, sp), so we explicity check for that
-        // case.
-        if ((ch > 0x7f && ch != 0xa0) || (lastCh > 0x7f && lastCh != 0xa0)) {
-            if (nextUCBreak < i) {
-                if (!breakLocator)
-                    status = UCCreateTextBreakLocator(NULL, 0, kUCTextBreakLineMask, &breakLocator);
-                if (status == 0)
-                    findStatus = UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, (const UniChar *)str, len, i, (UniCharArrayOffset *)&nextUCBreak);
+
+int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
+{
+#if !PLATFORM(MAC)
+    TextBreakIterator* breakIterator = 0;
+#endif
+    int nextBreak = -1;
+
+    UChar lastCh = pos > 0 ? str[pos - 1] : 0;
+    for (int i = pos; i < len; i++) {
+        UChar ch = str[i];
+
+        if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh))
+            return i;
+
+        if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
+            if (nextBreak < i && i) {
+#if !PLATFORM(MAC)
+                if (!breakIterator)
+                    breakIterator = lineBreakIterator(str, len);
+                if (breakIterator)
+                    nextBreak = textBreakFollowing(breakIterator, i - 1);
+#else
+                static TextBreakLocatorRef breakLocator = lineBreakLocator();
+                if (breakLocator) {
+                    UniCharArrayOffset nextUCBreak;
+                    if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
+                        nextBreak = nextUCBreak;
+                }
+#endif
             }
-            if (findStatus == 0 && i == nextUCBreak && !(lastCh == ' ' || lastCh == '\n' || lastCh == '\t' || (breakNBSP && lastCh == 0xa0)))
-                break;
+            if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
+                return i;
         }
-#endif
+
         lastCh = ch;
     }
-    return i;
+
+    return len;
 }
 
 } // namespace WebCore