WebCore/platform/StringImpl.cpp - WebKit - Git at Google

 /**
  * This file is part of the DOM implementation for KDE.
  *
  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
  * Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc.
  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 02111-1307, USA.
  *
  */

 #include "config.h"
 #include "StringImpl.h"

 #include "AtomicString.h"
 #include "Length.h"
 #include <kxmlcore/Assertions.h>
 #include <string.h>
 #include <unicode/ubrk.h>

 using namespace KXMLCore;

 namespace WebCore {

 static inline QChar* newQCharVector(unsigned n)
 {
     return static_cast<QChar*>(fastMalloc(sizeof(QChar) * n));
 }

 static inline void deleteQCharVector(QChar* p)
 {
     fastFree(p);
 }

 StringImpl* StringImpl::empty()
 {
     static WithOneRef w;
     static StringImpl e(w);
     return &e;
 }

 StringImpl::StringImpl(const DeprecatedString& str)
 {
     initWithQChar(str.unicode(), str.length());
 }

 StringImpl::StringImpl(const QChar* str, unsigned len)
 {
     initWithQChar(str, len);
 }

 StringImpl::StringImpl(const char* str)
 {
     initWithChar(str, strlen(str));
 }

 StringImpl::StringImpl(const char* str, unsigned int len)
 {
     initWithChar(str, len);
 }

 void StringImpl::initWithChar(const char* str, unsigned int len)
 {
     _hash = 0;
     _inTable = false;
     l = len;
     if (!l || !str) {
         s = 0;
         return;
     }

     s = newQCharVector(l);
     int i = l;
     QChar* ptr = s;
     while (i--)
         *ptr++ = *str++;
 }

 void StringImpl::initWithQChar(const QChar* str, unsigned int len)
 {
     _hash = 0;
     _inTable = false;
     l = len;
     if (!l || !str) {
         s = 0;
         return;
     }

     s = newQCharVector(len);
     memcpy(s, str, len * sizeof(QChar));
 }

 StringImpl::~StringImpl()
 {
     if (_inTable)
         AtomicString::remove(this);
     deleteQCharVector(s);
 }

 void StringImpl::append(const StringImpl* str)
 {
     assert(!_inTable);
     if(str && str->l != 0)
     {
         int newlen = l+str->l;
         QChar* c = newQCharVector(newlen);
         memcpy(c, s, l*sizeof(QChar));
         memcpy(c+l, str->s, str->l*sizeof(QChar));
         deleteQCharVector(s);
         s = c;
         l = newlen;
     }
 }

 void StringImpl::insert(const StringImpl* str, unsigned pos)
 {
     assert(!_inTable);
     if (pos >= l) {
         append(str);
         return;
     }
     if (str && str->l != 0) {
         int newlen = l + str->l;
         QChar* c = newQCharVector(newlen);
         memcpy(c, s, pos*sizeof(QChar));
         memcpy(c+pos, str->s, str->l*sizeof(QChar));
         memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(QChar));
         deleteQCharVector(s);
         s = c;
         l = newlen;
     }
 }

 void StringImpl::truncate(int len)
 {
     assert(!_inTable);
     if (len > (int)l)
         return;
     int nl = len < 1 ? 1 : len;
     QChar* c = newQCharVector(nl);
     memcpy(c, s, nl*sizeof(QChar));
     deleteQCharVector(s);
     s = c;
     l = len;
 }

 void StringImpl::remove(unsigned pos, int len)
 {
     assert(!_inTable);
     if(len <= 0) return;
     if(pos >= l ) return;
     if((unsigned)len > l - pos)
     len = l - pos;

     unsigned newLen = l-len;
     QChar* c = newQCharVector(newLen);
     memcpy(c, s, pos*sizeof(QChar));
     memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(QChar));
     deleteQCharVector(s);
     s = c;
     l = newLen;
 }

 StringImpl* StringImpl::split(unsigned pos)
 {
     assert(!_inTable);
     if( pos >=l ) return new StringImpl();

     unsigned newLen = l-pos;
     QChar* c = newQCharVector(newLen);
     memcpy(c, s+pos, newLen*sizeof(QChar));

     StringImpl* str = new StringImpl(s + pos, newLen);
     truncate(pos);
     return str;
 }

 bool StringImpl::containsOnlyWhitespace() const
 {
     return containsOnlyWhitespace(0, l);
 }

 bool StringImpl::containsOnlyWhitespace(unsigned int from, unsigned int len) const
 {
     if (!s)
         return true;

     for (unsigned i = from; i < len; i++) {
         QChar c = s[i];
         if (c.unicode() <= 0x7F) {
             if (!isspace(c.unicode()))
                 return false;
         }
         else
             return false;
     }
     return true;
 }

 StringImpl* StringImpl::substring(unsigned pos, unsigned len)
 {
     if (pos >= l) return
         new StringImpl;
     if (len > l - pos)
         len = l - pos;
     return new StringImpl(s + pos, len);
 }

 static Length parseLength(const QChar* s, unsigned int l)
 {
     if (l == 0)
         return Length(1, Relative);

     unsigned i = 0;
     while (i < l && s[i].isSpace())
         ++i;
     if (i < l && (s[i] == '+' || s[i] == '-'))
         ++i;
     while (i < l && s[i].isDigit())
         ++i;

     bool ok;
     int r = QConstString(s, i).string().toInt(&ok);

     /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
     while (i < l && (s[i].isDigit() || s[i] == '.'))
         ++i;

     /* IE Quirk: Skip any whitespace (20 % => 20%) */
     while (i < l && s[i].isSpace())
         ++i;

     if (ok) {
         if (i == l) {
             return Length(r, Fixed);
         } else {
             const QChar* next = s+i;

             if (*next == '%')
                 return Length(r, Percent);

             if (*next == '*')
                 return Length(r, Relative);
         }
         return Length(r, Fixed);
     } else {
         if (i < l) {
             const QChar* next = s + i;

             if (*next == '*')
                 return Length(1, Relative);

             if (*next == '%')
                 return Length(1, Relative);
         }
     }
     return Length(0, Relative);
 }

 Length StringImpl::toLength() const
 {
     return parseLength(s, l);
 }

 Length* StringImpl::toCoordsArray(int& len) const
 {
     QChar* spacified = newQCharVector(l);
     QChar space(' ');
     for(unsigned int i=0; i < l; i++) {
         QChar cc = s[i];
         if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
             spacified[i] = space;
         else
             spacified[i] = cc;
     }
     DeprecatedString str(spacified, l);
     deleteQCharVector(spacified);

     str = str.simplifyWhiteSpace();

     len = str.contains(' ') + 1;
     Length* r = new Length[len];

     int i = 0;
     int pos = 0;
     int pos2;

     while((pos2 = str.find(' ', pos)) != -1) {
         r[i++] = parseLength(str.unicode() + pos, pos2-pos);
         pos = pos2+1;
     }
     r[i] = parseLength(str.unicode() + pos, str.length()-pos);

     return r;
 }

 Length* StringImpl::toLengthArray(int& len) const
 {
     DeprecatedString str(s, l);
     str = str.simplifyWhiteSpace();

     len = str.contains(',') + 1;
     Length* r = new Length[len];

     int i = 0;
     int pos = 0;
     int pos2;

     while((pos2 = str.find(',', pos)) != -1) {
         r[i++] = parseLength(str.unicode() + pos, pos2 - pos);
         pos = pos2+1;
     }

     /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
     if (str.length()-pos > 0)
         r[i] = parseLength(str.unicode() + pos, str.length() - pos);
     else
         len--;

     return r;
 }

 bool StringImpl::isLower() const
 {
     unsigned int i;
     for (i = 0; i < l; i++)
 	if (s[i].lower() != s[i])
 	    return false;
     return true;
 }

 StringImpl* StringImpl::lower() const
 {
     StringImpl* c = new StringImpl;
     if (!l)
         return c;

     c->s = newQCharVector(l);
     c->l = l;

     for (unsigned int i = 0; i < l; i++)
 	c->s[i] = s[i].lower();

     return c;
 }

 StringImpl* StringImpl::upper() const
 {
     StringImpl* c = new StringImpl;
     if (!l)
         return c;

     c->s = newQCharVector(l);
     c->l = l;

     for (unsigned int i = 0; i < l; i++)
 	c->s[i] = s[i].upper();

     return c;
 }

 static UBreakIterator* getWordBreakIterator(const QChar* string, int length)
 {
     // The locale is currently ignored when determining character cluster breaks.
     // This may change in the future, according to Deborah Goldsmith.
     static bool createdIterator = false;
     static UBreakIterator* iterator;
     UErrorCode status;
     if (!createdIterator) {
         status = U_ZERO_ERROR;
         iterator = ubrk_open(UBRK_WORD, "en_us", 0, 0, &status);
         createdIterator = true;
     }
     if (!iterator)
         return 0;

     status = U_ZERO_ERROR;
     ubrk_setText(iterator, reinterpret_cast<const UChar*>(string), length, &status);
     if (status != U_ZERO_ERROR)
         return 0;

     return iterator;
 }

 StringImpl* StringImpl::capitalize(QChar previous) const
 {
     StringImpl* capitalizedString = new StringImpl;
     if (!l)
         return capitalizedString;

     QChar* stringWithPrevious = newQCharVector(l + 1);
     stringWithPrevious[0] = previous;
     for (unsigned i = 1; i < l + 1; i++)
         stringWithPrevious[i] = s[i - 1];

     UBreakIterator* boundary = getWordBreakIterator(stringWithPrevious, l + 1);
     if (!boundary) {
         deleteQCharVector(stringWithPrevious);
         return capitalizedString;
     }

     capitalizedString->s = newQCharVector(l);
     capitalizedString->l = l;

     int32_t endOfWord;
     int32_t startOfWord = ubrk_first(boundary);
     for (endOfWord = ubrk_next(boundary); endOfWord != UBRK_DONE; startOfWord = endOfWord, endOfWord = ubrk_next(boundary)) {
         if (startOfWord != 0) // Ignore first char of previous string
             capitalizedString->s[startOfWord - 1] = stringWithPrevious[startOfWord].upper();
         for (int i = startOfWord + 1; i < endOfWord; i++)
             capitalizedString->s[i - 1] = stringWithPrevious[i];
     }

     deleteQCharVector(stringWithPrevious);
     return capitalizedString;
 }

 int StringImpl::toInt(bool* ok) const
 {
     unsigned i = 0;

     // Allow leading spaces.
     for (; i != l; ++i) {
         if (!s[i].isSpace()) {
             break;
         }
     }

     // Allow sign.
     if (i != l) {
         if (s[i] == '+' || s[i] == '-') {
             ++i;
         }
     }

     // Allow digits.
     for (; i != l; ++i) {
         if (!s[i].isDigit()) {
             break;
         }
     }

     return QConstString(s, i).string().toInt(ok);
 }

 static bool equal(const QChar* a, const char* b, int l)
 {
     ASSERT(l >= 0);
     while (l--) {
         if (*a != *b)
             return false;
 	a++; b++;
     }
     return true;
 }

 static bool equalCaseInsensitive(const QChar* a, const char* b, int l)
 {
     ASSERT(l >= 0);
     while (l--) {
         if (tolower(a->unicode()) != tolower(*b))
             return false;
 	a++; b++;
     }
     return true;
 }

 static bool equalCaseInsensitive(const QChar* a, const QChar* b, int l)
 {
     ASSERT(l >= 0);
     while (l--) {
         if (tolower(a->unicode()) != tolower(b->unicode()))
             return false;
 	a++; b++;
     }
     return true;
 }

 // This function should be as fast as possible, every little bit helps.
 // Our usage patterns are typically small strings.  In time trials
 // this simplistic algorithm is much faster than Boyer-Moore or hash
 // based algorithms.
 // NOTE: Those time trials were done when this function was part of KWQ's DeprecatedString
 // It was copied here and changed slightly since.
 int StringImpl::find(const char* chs, int index, bool caseSensitive) const
 {
     if (!chs || index < 0)
         return -1;

     int chsLength = strlen(chs);
     int n = l - index;
     if (n < 0)
         return -1;
     n -= chsLength - 1;
     if (n <= 0)
         return -1;

     const char* chsPlusOne = chs + 1;
     int chsLengthMinusOne = chsLength - 1;

     const QChar* ptr = s + index - 1;
     if (caseSensitive) {
         QChar c = *chs;
         do {
             if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
                 return l - chsLength - n + 1;
         } while (--n);
     } else {
         int lc = tolower((unsigned char)*chs);
         do {
             if (tolower((++ptr)->unicode()) == lc && equalCaseInsensitive(ptr + 1, chsPlusOne, chsLengthMinusOne))
                 return l - chsLength - n + 1;
         } while (--n);
     }

     return -1;
 }

 int StringImpl::find(const QChar c, int start) const
 {
     unsigned int index = start;
     if (index >= l )
         return -1;
     while(index < l) {
 	if (s[index] == c)
             return index;
 	index++;
     }
     return -1;
 }

 // This was copied from KWQ's DeprecatedString and made to work here w/ small modifications.
 // FIXME comments were from the DeprecatedString version.
 int StringImpl::find(const StringImpl* str, int index, bool caseSensitive) const
 {
     // FIXME, use the first character algorithm
     /*
       We use some weird hashing for efficiency's sake.  Instead of
       comparing strings, we compare the sum of str with that of
       a part of this DeprecatedString.  Only if that matches, we call memcmp
       or ucstrnicmp.

       The hash value of a string is the sum of the cells of its
       QChars.
     */
     ASSERT(str);
     if (index < 0)
 	index += l;
     int lstr = str->l;
     int lthis = l - index;
     if ((unsigned)lthis > l)
 	return -1;
     int delta = lthis - lstr;
     if (delta < 0)
 	return -1;

     const QChar* uthis = s + index;
     const QChar* ustr = str->s;
     unsigned hthis = 0;
     unsigned hstr = 0;
     if (caseSensitive) {
 	for (int i = 0; i < lstr; i++) {
 	    hthis += uthis[i].unicode();
 	    hstr += ustr[i].unicode();
 	}
 	int i = 0;
 	while (1) {
 	    if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(QChar)) == 0)
 		return index + i;
 	    if (i == delta)
 		return -1;
 	    hthis += uthis[i + lstr].unicode();
 	    hthis -= uthis[i].unicode();
 	    i++;
 	}
     } else {
 	for (int i = 0; i < lstr; i++ ) {
 	    hthis += tolower(uthis[i].unicode());
 	    hstr += tolower(ustr[i].unicode());
 	}
 	int i = 0;
 	while (1) {
 	    if (hthis == hstr && equalCaseInsensitive(uthis + i, ustr, lstr))
 		return index + i;
 	    if (i == delta)
 		return -1;
 	    hthis += tolower(uthis[i + lstr].unicode());
 	    hthis -= tolower(uthis[i].unicode());
 	    i++;
 	}
     }
 }

 bool StringImpl::endsWith(const StringImpl* s, bool caseSensitive) const
 {
     ASSERT(s);
     int start = l - s->l;
     if (start >= 0)
         return (find(s, start, caseSensitive) == start);
     return false;
 }

 StringImpl* StringImpl::replace(QChar oldC, QChar newC)
 {
     if (oldC == newC)
         return this;
     unsigned i;
     for (i = 0; i != l; ++i)
 	if (s[i] == oldC)
             break;
     if (i == l)
         return this;

     StringImpl* c = new StringImpl;

     c->s = newQCharVector(l);
     c->l = l;

     for (i = 0; i != l; ++i) {
         QChar ch = s[i];
 	if (ch == oldC)
             ch = newC;
         c->s[i] = ch;
     }

     return c;
 }

 bool equal(const StringImpl* a, const StringImpl* b)
 {
     return StrHash<StringImpl*>::equal(a, b);
 }

 bool equal(const StringImpl* a, const char* b)
 {
     if (!a)
         return !b;
     if (!b)
         return !a;

     unsigned length = a->l;
     const QChar* as = a->s;
     for (unsigned i = 0; i != length; ++i) {
         char c = b[i];
         if (!c)
             return false;
         if (as[i] != c)
             return false;
     }

     return !b[length];
 }

 bool equal(const char* a, const StringImpl* b)
 {
     if (!a)
         return !b;
     if (!b)
         return !a;

     unsigned length = b->l;
     const QChar* bs = b->s;
     for (unsigned i = 0; i != length; ++i) {
         char c = a[i];
         if (!c)
             return false;
         if (c != bs[i])
             return false;
     }

     return !a[length];
 }

 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
 {
     return CaseInsensitiveHash::equal(a, b);
 }

 bool equalIgnoringCase(const StringImpl* a, const char* b)
 {
     if (!a)
         return !b;
     if (!b)
         return !a;

     unsigned length = a->l;
     const QChar* as = a->s;
     for (unsigned i = 0; i != length; ++i) {
         char c = b[i];
         if (!c)
             return false;
         if (as[i].lower() != QChar(c).lower())
             return false;
     }

     return !b[length];
 }

 bool equalIgnoringCase(const char* a, const StringImpl* b)
 {
     if (!a)
         return !b;
     if (!b)
         return !a;

     unsigned length = b->l;
     const QChar* bs = b->s;
     for (unsigned i = 0; i != length; ++i) {
         char c = a[i];
         if (!c)
             return false;
         if (QChar(c).lower() != bs[i].lower())
             return false;
     }

     return !a[length];
 }

 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
 // or anything like that.
 const unsigned PHI = 0x9e3779b9U;

 // Paul Hsieh's SuperFastHash
 // http://www.azillionmonkeys.com/qed/hash.html
 unsigned StringImpl::computeHash(const QChar* s, unsigned len)
 {
     unsigned l = len;
     uint32_t hash = PHI;
     uint32_t tmp;

     int rem = l & 1;
     l >>= 1;

     // Main loop
     for (; l > 0; l--) {
         hash += s[0].unicode();
         tmp = (s[1].unicode() << 11) ^ hash;
         hash = (hash << 16) ^ tmp;
         s += 2;
         hash += hash >> 11;
     }

     // Handle end case
     if (rem) {
         hash += s[0].unicode();
         hash ^= hash << 11;
         hash += hash >> 17;
     }

     // Force "avalanching" of final 127 bits
     hash ^= hash << 3;
     hash += hash >> 5;
     hash ^= hash << 2;
     hash += hash >> 15;
     hash ^= hash << 10;

     // this avoids ever returning a hash code of 0, since that is used to
     // signal "hash not computed yet", using a value that is likely to be
     // effectively the same as 0 when the low bits are masked
     if (hash == 0)
         hash = 0x80000000;

     return hash;
 }

 // Paul Hsieh's SuperFastHash
 // http://www.azillionmonkeys.com/qed/hash.html
 unsigned StringImpl::computeHash(const char* s)
 {
     // This hash is designed to work on 16-bit chunks at a time. But since the normal case
     // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
     // were 16-bit chunks, which should give matching results

     unsigned l = strlen(s);
     uint32_t hash = PHI;
     uint32_t tmp;

     int rem = l & 1;
     l >>= 1;

     // Main loop
     for (; l > 0; l--) {
         hash += (unsigned char)s[0];
         tmp = ((unsigned char)s[1] << 11) ^ hash;
         hash = (hash << 16) ^ tmp;
         s += 2;
         hash += hash >> 11;
     }

     // Handle end case
     if (rem) {
         hash += (unsigned char)s[0];
         hash ^= hash << 11;
         hash += hash >> 17;
     }

     // Force "avalanching" of final 127 bits
     hash ^= hash << 3;
     hash += hash >> 5;
     hash ^= hash << 2;
     hash += hash >> 15;
     hash ^= hash << 10;

     // this avoids ever returning a hash code of 0, since that is used to
     // signal "hash not computed yet", using a value that is likely to be
     // effectively the same as 0 when the low bits are masked
     if (hash == 0)
         hash = 0x80000000;

     return hash;
 }

 const char* StringImpl::ascii() const
 {
     char* buffer = new char[l + 1];
     char* p = buffer;
     for (unsigned i = 0; i != l; ++i) {
         unsigned short c = s[i].unicode();
         if (c >= 0x20 && c < 0x7F)
             *p++ = c;
         else
             *p++ = '?';
     }
     *p++ = '\0';
     return buffer;
 }

 } // namespace WebCore

 namespace KXMLCore {

 const RefPtr<WebCore::StringImpl> HashTraits<RefPtr<WebCore::StringImpl> >::_deleted = new WebCore::StringImpl(static_cast<char*>(0), 0);

 }
	/**
	* This file is part of the DOM implementation for KDE.
	*
	* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
	* (C) 1999 Antti Koivisto (koivisto@kde.org)
	* (C) 2001 Dirk Mueller ( mueller@kde.org )
	* Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc.
	* Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	* Boston, MA 02111-1307, USA.
	*
	*/

	#include "config.h"
	#include "StringImpl.h"

	#include "AtomicString.h"
	#include "Length.h"
	#include <kxmlcore/Assertions.h>
	#include <string.h>
	#include <unicode/ubrk.h>

	using namespace KXMLCore;

	namespace WebCore {

	static inline QChar* newQCharVector(unsigned n)
	{
	return static_cast<QChar>(fastMalloc(sizeof(QChar) n));
	}

	static inline void deleteQCharVector(QChar* p)
	{
	fastFree(p);
	}

	StringImpl* StringImpl::empty()
	{
	static WithOneRef w;
	static StringImpl e(w);
	return &e;
	}

	StringImpl::StringImpl(const DeprecatedString& str)
	{
	initWithQChar(str.unicode(), str.length());
	}

	StringImpl::StringImpl(const QChar* str, unsigned len)
	{
	initWithQChar(str, len);
	}

	StringImpl::StringImpl(const char* str)
	{
	initWithChar(str, strlen(str));
	}

	StringImpl::StringImpl(const char* str, unsigned int len)
	{
	initWithChar(str, len);
	}

	void StringImpl::initWithChar(const char* str, unsigned int len)
	{
	_hash = 0;
	_inTable = false;
	l = len;
	if (!l \|\| !str) {
	s = 0;
	return;
	}

	s = newQCharVector(l);
	int i = l;
	QChar* ptr = s;
	while (i--)
	ptr++ = str++;
	}

	void StringImpl::initWithQChar(const QChar* str, unsigned int len)
	{
	_hash = 0;
	_inTable = false;
	l = len;
	if (!l \|\| !str) {
	s = 0;
	return;
	}

	s = newQCharVector(len);
	memcpy(s, str, len * sizeof(QChar));
	}

	StringImpl::~StringImpl()
	{
	if (_inTable)
	AtomicString::remove(this);
	deleteQCharVector(s);
	}

	void StringImpl::append(const StringImpl* str)
	{
	assert(!_inTable);
	if(str && str->l != 0)
	{
	int newlen = l+str->l;
	QChar* c = newQCharVector(newlen);
	memcpy(c, s, l*sizeof(QChar));
	memcpy(c+l, str->s, str->l*sizeof(QChar));
	deleteQCharVector(s);
	s = c;
	l = newlen;
	}
	}

	void StringImpl::insert(const StringImpl* str, unsigned pos)
	{
	assert(!_inTable);
	if (pos >= l) {
	append(str);
	return;
	}
	if (str && str->l != 0) {
	int newlen = l + str->l;
	QChar* c = newQCharVector(newlen);
	memcpy(c, s, pos*sizeof(QChar));
	memcpy(c+pos, str->s, str->l*sizeof(QChar));
	memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(QChar));
	deleteQCharVector(s);
	s = c;
	l = newlen;
	}
	}

	void StringImpl::truncate(int len)
	{
	assert(!_inTable);
	if (len > (int)l)
	return;
	int nl = len < 1 ? 1 : len;
	QChar* c = newQCharVector(nl);
	memcpy(c, s, nl*sizeof(QChar));
	deleteQCharVector(s);
	s = c;
	l = len;
	}

	void StringImpl::remove(unsigned pos, int len)
	{
	assert(!_inTable);
	if(len <= 0) return;
	if(pos >= l ) return;
	if((unsigned)len > l - pos)
	len = l - pos;

	unsigned newLen = l-len;
	QChar* c = newQCharVector(newLen);
	memcpy(c, s, pos*sizeof(QChar));
	memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(QChar));
	deleteQCharVector(s);
	s = c;
	l = newLen;
	}

	StringImpl* StringImpl::split(unsigned pos)
	{
	assert(!_inTable);
	if( pos >=l ) return new StringImpl();

	unsigned newLen = l-pos;
	QChar* c = newQCharVector(newLen);
	memcpy(c, s+pos, newLen*sizeof(QChar));

	StringImpl* str = new StringImpl(s + pos, newLen);
	truncate(pos);
	return str;
	}

	bool StringImpl::containsOnlyWhitespace() const
	{
	return containsOnlyWhitespace(0, l);
	}

	bool StringImpl::containsOnlyWhitespace(unsigned int from, unsigned int len) const
	{
	if (!s)
	return true;

	for (unsigned i = from; i < len; i++) {
	QChar c = s[i];
	if (c.unicode() <= 0x7F) {
	if (!isspace(c.unicode()))
	return false;
	}
	else
	return false;
	}
	return true;
	}

	StringImpl* StringImpl::substring(unsigned pos, unsigned len)
	{
	if (pos >= l) return
	new StringImpl;
	if (len > l - pos)
	len = l - pos;
	return new StringImpl(s + pos, len);
	}

	static Length parseLength(const QChar* s, unsigned int l)
	{
	if (l == 0)
	return Length(1, Relative);

	unsigned i = 0;
	while (i < l && s[i].isSpace())
	++i;
	if (i < l && (s[i] == '+' \|\| s[i] == '-'))
	++i;
	while (i < l && s[i].isDigit())
	++i;

	bool ok;
	int r = QConstString(s, i).string().toInt(&ok);

	/* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
	while (i < l && (s[i].isDigit() \|\| s[i] == '.'))
	++i;

	/* IE Quirk: Skip any whitespace (20 % => 20%) */
	while (i < l && s[i].isSpace())
	++i;

	if (ok) {
	if (i == l) {
	return Length(r, Fixed);
	} else {
	const QChar* next = s+i;

	if (*next == '%')
	return Length(r, Percent);

	if (next == '')
	return Length(r, Relative);
	}
	return Length(r, Fixed);
	} else {
	if (i < l) {
	const QChar* next = s + i;

	if (next == '')
	return Length(1, Relative);

	if (*next == '%')
	return Length(1, Relative);
	}
	}
	return Length(0, Relative);
	}

	Length StringImpl::toLength() const
	{
	return parseLength(s, l);
	}

	Length* StringImpl::toCoordsArray(int& len) const
	{
	QChar* spacified = newQCharVector(l);
	QChar space(' ');
	for(unsigned int i=0; i < l; i++) {
	QChar cc = s[i];
	if (cc > '9' \|\| (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
	spacified[i] = space;
	else
	spacified[i] = cc;
	}
	DeprecatedString str(spacified, l);
	deleteQCharVector(spacified);

	str = str.simplifyWhiteSpace();

	len = str.contains(' ') + 1;
	Length* r = new Length[len];

	int i = 0;
	int pos = 0;
	int pos2;

	while((pos2 = str.find(' ', pos)) != -1) {
	r[i++] = parseLength(str.unicode() + pos, pos2-pos);
	pos = pos2+1;
	}
	r[i] = parseLength(str.unicode() + pos, str.length()-pos);

	return r;
	}

	Length* StringImpl::toLengthArray(int& len) const
	{
	DeprecatedString str(s, l);
	str = str.simplifyWhiteSpace();

	len = str.contains(',') + 1;
	Length* r = new Length[len];

	int i = 0;
	int pos = 0;
	int pos2;

	while((pos2 = str.find(',', pos)) != -1) {
	r[i++] = parseLength(str.unicode() + pos, pos2 - pos);
	pos = pos2+1;
	}

	/* IE Quirk: If the last comma is the last char skip it and reduce len by one */
	if (str.length()-pos > 0)
	r[i] = parseLength(str.unicode() + pos, str.length() - pos);
	else
	len--;

	return r;
	}

	bool StringImpl::isLower() const
	{
	unsigned int i;
	for (i = 0; i < l; i++)
	if (s[i].lower() != s[i])
	return false;
	return true;
	}

	StringImpl* StringImpl::lower() const
	{
	StringImpl* c = new StringImpl;
	if (!l)
	return c;

	c->s = newQCharVector(l);
	c->l = l;

	for (unsigned int i = 0; i < l; i++)
	c->s[i] = s[i].lower();

	return c;
	}

	StringImpl* StringImpl::upper() const
	{
	StringImpl* c = new StringImpl;
	if (!l)
	return c;

	c->s = newQCharVector(l);
	c->l = l;

	for (unsigned int i = 0; i < l; i++)
	c->s[i] = s[i].upper();

	return c;
	}

	static UBreakIterator* getWordBreakIterator(const QChar* string, int length)
	{
	// The locale is currently ignored when determining character cluster breaks.
	// This may change in the future, according to Deborah Goldsmith.
	static bool createdIterator = false;
	static UBreakIterator* iterator;
	UErrorCode status;
	if (!createdIterator) {
	status = U_ZERO_ERROR;
	iterator = ubrk_open(UBRK_WORD, "en_us", 0, 0, &status);
	createdIterator = true;
	}
	if (!iterator)
	return 0;

	status = U_ZERO_ERROR;
	ubrk_setText(iterator, reinterpret_cast<const UChar*>(string), length, &status);
	if (status != U_ZERO_ERROR)
	return 0;

	return iterator;
	}

	StringImpl* StringImpl::capitalize(QChar previous) const
	{
	StringImpl* capitalizedString = new StringImpl;
	if (!l)
	return capitalizedString;

	QChar* stringWithPrevious = newQCharVector(l + 1);
	stringWithPrevious[0] = previous;
	for (unsigned i = 1; i < l + 1; i++)
	stringWithPrevious[i] = s[i - 1];

	UBreakIterator* boundary = getWordBreakIterator(stringWithPrevious, l + 1);
	if (!boundary) {
	deleteQCharVector(stringWithPrevious);
	return capitalizedString;
	}

	capitalizedString->s = newQCharVector(l);
	capitalizedString->l = l;

	int32_t endOfWord;
	int32_t startOfWord = ubrk_first(boundary);
	for (endOfWord = ubrk_next(boundary); endOfWord != UBRK_DONE; startOfWord = endOfWord, endOfWord = ubrk_next(boundary)) {
	if (startOfWord != 0) // Ignore first char of previous string
	capitalizedString->s[startOfWord - 1] = stringWithPrevious[startOfWord].upper();
	for (int i = startOfWord + 1; i < endOfWord; i++)
	capitalizedString->s[i - 1] = stringWithPrevious[i];
	}

	deleteQCharVector(stringWithPrevious);
	return capitalizedString;
	}

	int StringImpl::toInt(bool* ok) const
	{
	unsigned i = 0;

	// Allow leading spaces.
	for (; i != l; ++i) {
	if (!s[i].isSpace()) {
	break;
	}
	}

	// Allow sign.
	if (i != l) {
	if (s[i] == '+' \|\| s[i] == '-') {
	++i;
	}
	}

	// Allow digits.
	for (; i != l; ++i) {
	if (!s[i].isDigit()) {
	break;
	}
	}

	return QConstString(s, i).string().toInt(ok);
	}

	static bool equal(const QChar* a, const char* b, int l)
	{
	ASSERT(l >= 0);
	while (l--) {
	if (a != b)
	return false;
	a++; b++;
	}
	return true;
	}

	static bool equalCaseInsensitive(const QChar* a, const char* b, int l)
	{
	ASSERT(l >= 0);
	while (l--) {
	if (tolower(a->unicode()) != tolower(*b))
	return false;
	a++; b++;
	}
	return true;
	}

	static bool equalCaseInsensitive(const QChar* a, const QChar* b, int l)
	{
	ASSERT(l >= 0);
	while (l--) {
	if (tolower(a->unicode()) != tolower(b->unicode()))
	return false;
	a++; b++;
	}
	return true;
	}

	// This function should be as fast as possible, every little bit helps.
	// Our usage patterns are typically small strings. In time trials
	// this simplistic algorithm is much faster than Boyer-Moore or hash
	// based algorithms.
	// NOTE: Those time trials were done when this function was part of KWQ's DeprecatedString
	// It was copied here and changed slightly since.
	int StringImpl::find(const char* chs, int index, bool caseSensitive) const
	{
	if (!chs \|\| index < 0)
	return -1;

	int chsLength = strlen(chs);
	int n = l - index;
	if (n < 0)
	return -1;
	n -= chsLength - 1;
	if (n <= 0)
	return -1;

	const char* chsPlusOne = chs + 1;
	int chsLengthMinusOne = chsLength - 1;

	const QChar* ptr = s + index - 1;
	if (caseSensitive) {
	QChar c = *chs;
	do {
	if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
	return l - chsLength - n + 1;
	} while (--n);
	} else {
	int lc = tolower((unsigned char)*chs);
	do {
	if (tolower((++ptr)->unicode()) == lc && equalCaseInsensitive(ptr + 1, chsPlusOne, chsLengthMinusOne))
	return l - chsLength - n + 1;
	} while (--n);
	}

	return -1;
	}

	int StringImpl::find(const QChar c, int start) const
	{
	unsigned int index = start;
	if (index >= l )
	return -1;
	while(index < l) {
	if (s[index] == c)
	return index;
	index++;
	}
	return -1;
	}

	// This was copied from KWQ's DeprecatedString and made to work here w/ small modifications.
	// FIXME comments were from the DeprecatedString version.
	int StringImpl::find(const StringImpl* str, int index, bool caseSensitive) const
	{
	// FIXME, use the first character algorithm
	/*
	We use some weird hashing for efficiency's sake. Instead of
	comparing strings, we compare the sum of str with that of
	a part of this DeprecatedString. Only if that matches, we call memcmp
	or ucstrnicmp.

	The hash value of a string is the sum of the cells of its
	QChars.
	*/
	ASSERT(str);
	if (index < 0)
	index += l;
	int lstr = str->l;
	int lthis = l - index;
	if ((unsigned)lthis > l)
	return -1;
	int delta = lthis - lstr;
	if (delta < 0)
	return -1;

	const QChar* uthis = s + index;
	const QChar* ustr = str->s;
	unsigned hthis = 0;
	unsigned hstr = 0;
	if (caseSensitive) {
	for (int i = 0; i < lstr; i++) {
	hthis += uthis[i].unicode();
	hstr += ustr[i].unicode();
	}
	int i = 0;
	while (1) {
	if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(QChar)) == 0)
	return index + i;
	if (i == delta)
	return -1;
	hthis += uthis[i + lstr].unicode();
	hthis -= uthis[i].unicode();
	i++;
	}
	} else {
	for (int i = 0; i < lstr; i++ ) {
	hthis += tolower(uthis[i].unicode());
	hstr += tolower(ustr[i].unicode());
	}
	int i = 0;
	while (1) {
	if (hthis == hstr && equalCaseInsensitive(uthis + i, ustr, lstr))
	return index + i;
	if (i == delta)
	return -1;
	hthis += tolower(uthis[i + lstr].unicode());
	hthis -= tolower(uthis[i].unicode());
	i++;
	}
	}
	}

	bool StringImpl::endsWith(const StringImpl* s, bool caseSensitive) const
	{
	ASSERT(s);
	int start = l - s->l;
	if (start >= 0)
	return (find(s, start, caseSensitive) == start);
	return false;
	}

	StringImpl* StringImpl::replace(QChar oldC, QChar newC)
	{
	if (oldC == newC)
	return this;
	unsigned i;
	for (i = 0; i != l; ++i)
	if (s[i] == oldC)
	break;
	if (i == l)
	return this;

	StringImpl* c = new StringImpl;

	c->s = newQCharVector(l);
	c->l = l;

	for (i = 0; i != l; ++i) {
	QChar ch = s[i];
	if (ch == oldC)
	ch = newC;
	c->s[i] = ch;
	}

	return c;
	}

	bool equal(const StringImpl* a, const StringImpl* b)
	{
	return StrHash<StringImpl*>::equal(a, b);
	}

	bool equal(const StringImpl* a, const char* b)
	{
	if (!a)
	return !b;
	if (!b)
	return !a;

	unsigned length = a->l;
	const QChar* as = a->s;
	for (unsigned i = 0; i != length; ++i) {
	char c = b[i];
	if (!c)
	return false;
	if (as[i] != c)
	return false;
	}

	return !b[length];
	}

	bool equal(const char* a, const StringImpl* b)
	{
	if (!a)
	return !b;
	if (!b)
	return !a;

	unsigned length = b->l;
	const QChar* bs = b->s;
	for (unsigned i = 0; i != length; ++i) {
	char c = a[i];
	if (!c)
	return false;
	if (c != bs[i])
	return false;
	}

	return !a[length];
	}

	bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
	{
	return CaseInsensitiveHash::equal(a, b);
	}

	bool equalIgnoringCase(const StringImpl* a, const char* b)
	{
	if (!a)
	return !b;
	if (!b)
	return !a;

	unsigned length = a->l;
	const QChar* as = a->s;
	for (unsigned i = 0; i != length; ++i) {
	char c = b[i];
	if (!c)
	return false;
	if (as[i].lower() != QChar(c).lower())
	return false;
	}

	return !b[length];
	}

	bool equalIgnoringCase(const char* a, const StringImpl* b)
	{
	if (!a)
	return !b;
	if (!b)
	return !a;

	unsigned length = b->l;
	const QChar* bs = b->s;
	for (unsigned i = 0; i != length; ++i) {
	char c = a[i];
	if (!c)
	return false;
	if (QChar(c).lower() != bs[i].lower())
	return false;
	}

	return !a[length];
	}

	// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
	// or anything like that.
	const unsigned PHI = 0x9e3779b9U;

	// Paul Hsieh's SuperFastHash
	// http://www.azillionmonkeys.com/qed/hash.html
	unsigned StringImpl::computeHash(const QChar* s, unsigned len)
	{
	unsigned l = len;
	uint32_t hash = PHI;
	uint32_t tmp;

	int rem = l & 1;
	l >>= 1;

	// Main loop
	for (; l > 0; l--) {
	hash += s[0].unicode();
	tmp = (s[1].unicode() << 11) ^ hash;
	hash = (hash << 16) ^ tmp;
	s += 2;
	hash += hash >> 11;
	}

	// Handle end case
	if (rem) {
	hash += s[0].unicode();
	hash ^= hash << 11;
	hash += hash >> 17;
	}

	// Force "avalanching" of final 127 bits
	hash ^= hash << 3;
	hash += hash >> 5;
	hash ^= hash << 2;
	hash += hash >> 15;
	hash ^= hash << 10;

	// this avoids ever returning a hash code of 0, since that is used to
	// signal "hash not computed yet", using a value that is likely to be
	// effectively the same as 0 when the low bits are masked
	if (hash == 0)
	hash = 0x80000000;

	return hash;
	}

	// Paul Hsieh's SuperFastHash
	// http://www.azillionmonkeys.com/qed/hash.html
	unsigned StringImpl::computeHash(const char* s)
	{
	// This hash is designed to work on 16-bit chunks at a time. But since the normal case
	// (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
	// were 16-bit chunks, which should give matching results

	unsigned l = strlen(s);
	uint32_t hash = PHI;
	uint32_t tmp;

	int rem = l & 1;
	l >>= 1;

	// Main loop
	for (; l > 0; l--) {
	hash += (unsigned char)s[0];
	tmp = ((unsigned char)s[1] << 11) ^ hash;
	hash = (hash << 16) ^ tmp;
	s += 2;
	hash += hash >> 11;
	}

	// Handle end case
	if (rem) {
	hash += (unsigned char)s[0];
	hash ^= hash << 11;
	hash += hash >> 17;
	}

	// Force "avalanching" of final 127 bits
	hash ^= hash << 3;
	hash += hash >> 5;
	hash ^= hash << 2;
	hash += hash >> 15;
	hash ^= hash << 10;

	// this avoids ever returning a hash code of 0, since that is used to
	// signal "hash not computed yet", using a value that is likely to be
	// effectively the same as 0 when the low bits are masked
	if (hash == 0)
	hash = 0x80000000;

	return hash;
	}

	const char* StringImpl::ascii() const
	{
	char* buffer = new char[l + 1];
	char* p = buffer;
	for (unsigned i = 0; i != l; ++i) {
	unsigned short c = s[i].unicode();
	if (c >= 0x20 && c < 0x7F)
	*p++ = c;
	else
	*p++ = '?';
	}
	*p++ = '\0';
	return buffer;
	}

	} // namespace WebCore

	namespace KXMLCore {

	const RefPtr<WebCore::StringImpl> HashTraits<RefPtr<WebCore::StringImpl> >::_deleted = new WebCore::StringImpl(static_cast<char*>(0), 0);

	}