Source/WebCore/html/parser/HTMLParserIdioms.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 2010 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1.  Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"
 #include "HTMLParserIdioms.h"

 #include "Decimal.h"
 #include <limits>
 #include <wtf/MathExtras.h>
 #include <wtf/dtoa.h>
 #include <wtf/text/AtomicString.h>
 #include <wtf/text/StringBuilder.h>

 namespace WebCore {

 String stripLeadingAndTrailingHTMLSpaces(const String& string)
 {
     const UChar* characters = string.characters();
     unsigned length = string.length();

     unsigned numLeadingSpaces;
     for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
         if (isNotHTMLSpace(characters[numLeadingSpaces]))
             break;
     }

     if (numLeadingSpaces == length)
         return string.isNull() ? string : emptyAtom.string();

     unsigned numTrailingSpaces;
     for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
         if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
             break;
     }

     ASSERT(numLeadingSpaces + numTrailingSpaces < length);

     return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
 }

 String serializeForNumberType(const Decimal& number)
 {
     if (number.isZero()) {
         // Decimal::toString appends exponent, e.g. "0e-18"
         return number.isNegative() ? "-0" : "0";
     }
     return number.toString();
 }

 String serializeForNumberType(double number)
 {
     // According to HTML5, "the best representation of the number n as a floating
     // point number" is a string produced by applying ToString() to n.
     NumberToStringBuffer buffer;
     return String(numberToString(number, buffer));
 }

 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
 {
     // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType

     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
     const UChar firstCharacter = string[0];
     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
         return fallbackValue;

     const Decimal value = Decimal::fromString(string);
     if (!value.isFinite())
         return fallbackValue;

     // Numbers are considered finite IEEE 754 single-precision floating point values.
     // See HTML5 2.5.4.3 `Real numbers.'
     // FIXME: We should use numeric_limits<double>::max for number input type.
     const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
     if (value < -floatMax || value > floatMax)
         return fallbackValue;

     // We return +0 for -0 case.
     return value.isZero() ? Decimal(0) : value;
 }

 Decimal parseToDecimalForNumberType(const String& string)
 {
     return parseToDecimalForNumberType(string, Decimal::nan());
 }

 double parseToDoubleForNumberType(const String& string, double fallbackValue)
 {
     // See HTML5 2.5.4.3 `Real numbers.'

     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
     UChar firstCharacter = string[0];
     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
         return fallbackValue;

     bool valid = false;
     double value = string.toDouble(&valid);
     if (!valid)
         return fallbackValue;

     // NaN and infinity are considered valid by String::toDouble, but not valid here.
     if (!isfinite(value))
         return fallbackValue;

     // Numbers are considered finite IEEE 754 single-precision floating point values.
     // See HTML5 2.5.4.3 `Real numbers.'
     if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
         return fallbackValue;

     // The following expression converts -0 to +0.
     return value ? value : 0;
 }

 double parseToDoubleForNumberType(const String& string)
 {
     return parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
 }

 double parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, unsigned *decimalPlaces, double fallbackValue)
 {
     if (decimalPlaces)
         *decimalPlaces = 0;

     double value = parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
     if (!isfinite(value))
         return fallbackValue;

     if (!decimalPlaces)
         return value;

     size_t dotIndex = string.find('.');
     size_t eIndex = string.find('e');
     if (eIndex == notFound)
         eIndex = string.find('E');

     unsigned baseDecimalPlaces = 0;
     if (dotIndex != notFound) {
         if (eIndex == notFound)
             baseDecimalPlaces = string.length() - dotIndex - 1;
         else
             baseDecimalPlaces = eIndex - dotIndex - 1;
     }

     int exponent = 0;
     if (eIndex != notFound) {
         unsigned cursor = eIndex + 1, cursorSaved;
         int digit, exponentSign;
         int32_t exponent32;
         size_t length = string.length();

         // Not using String.toInt() in order to perform the same computation as dtoa() does.
         exponentSign = 0;
         switch (digit = string[cursor]) {
         case '-':
             exponentSign = 1;
         case '+':
             digit = string[++cursor];
         }
         if (digit >= '0' && digit <= '9') {
             while (cursor < length && digit == '0')
                 digit = string[++cursor];
             if (digit > '0' && digit <= '9') {
                 exponent32 = digit - '0';
                 cursorSaved = cursor;
                 while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
                     exponent32 = (10 * exponent32) + digit - '0';
                 if (cursor - cursorSaved > 8 || exponent32 > 19999)
                     /* Avoid confusion from exponents
                      * so large that e might overflow.
                      */
                     exponent = 19999; /* safe for 16 bit ints */
                 else
                     exponent = static_cast<int>(exponent32);
                 if (exponentSign)
                     exponent = -exponent;
             } else
                 exponent = 0;
         }
     }

     int intDecimalPlaces = baseDecimalPlaces - exponent;
     if (intDecimalPlaces < 0)
         *decimalPlaces = 0;
     else if (intDecimalPlaces > 19999)
         *decimalPlaces = 19999;
     else
         *decimalPlaces = static_cast<unsigned>(intDecimalPlaces);

     return value;
 }

 double parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, unsigned *decimalPlaces)
 {
     return parseToDoubleForNumberTypeWithDecimalPlaces(string, decimalPlaces, std::numeric_limits<double>::quiet_NaN());
 }

 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
 bool parseHTMLInteger(const String& input, int& value)
 {
     // Step 1
     // Step 2
     const UChar* position = input.characters();
     const UChar* end = position + input.length();

     // Step 3
     int sign = 1;

     // Step 4
     while (position < end) {
         if (!isHTMLSpace(*position))
             break;
         ++position;
     }

     // Step 5
     if (position == end)
         return false;
     ASSERT(position < end);

     // Step 6
     if (*position == '-') {
         sign = -1;
         ++position;
     } else if (*position == '+')
         ++position;
     if (position == end)
         return false;
     ASSERT(position < end);

     // Step 7
     if (!isASCIIDigit(*position))
         return false;

     // Step 8
     StringBuilder digits;
     while (position < end) {
         if (!isASCIIDigit(*position))
             break;
         digits.append(*position++);
     }

     // Step 9
     bool ok;
     value = sign * charactersToIntStrict(digits.characters(), digits.length(), &ok);
     return ok;
 }

 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
 bool parseHTMLNonNegativeInteger(const String& input, unsigned int& value)
 {
     // Step 1
     // Step 2
     const UChar* position = input.characters();
     const UChar* end = position + input.length();

     // Step 3
     while (position < end) {
         if (!isHTMLSpace(*position))
             break;
         ++position;
     }

     // Step 4
     if (position == end)
         return false;
     ASSERT(position < end);

     // Step 5
     if (*position == '+')
         ++position;

     // Step 6
     if (position == end)
         return false;
     ASSERT(position < end);

     // Step 7
     if (!isASCIIDigit(*position))
         return false;

     // Step 8
     StringBuilder digits;
     while (position < end) {
         if (!isASCIIDigit(*position))
             break;
         digits.append(*position++);
     }

     // Step 9
     bool ok;
     value = charactersToUIntStrict(digits.characters(), digits.length(), &ok);
     return ok;
 }

 }
	/*
	* Copyright (C) 2010 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
	* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"
	#include "HTMLParserIdioms.h"

	#include "Decimal.h"
	#include <limits>
	#include <wtf/MathExtras.h>
	#include <wtf/dtoa.h>
	#include <wtf/text/AtomicString.h>
	#include <wtf/text/StringBuilder.h>

	namespace WebCore {

	String stripLeadingAndTrailingHTMLSpaces(const String& string)
	{
	const UChar* characters = string.characters();
	unsigned length = string.length();

	unsigned numLeadingSpaces;
	for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
	if (isNotHTMLSpace(characters[numLeadingSpaces]))
	break;
	}

	if (numLeadingSpaces == length)
	return string.isNull() ? string : emptyAtom.string();

	unsigned numTrailingSpaces;
	for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
	if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
	break;
	}

	ASSERT(numLeadingSpaces + numTrailingSpaces < length);

	return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
	}

	String serializeForNumberType(const Decimal& number)
	{
	if (number.isZero()) {
	// Decimal::toString appends exponent, e.g. "0e-18"
	return number.isNegative() ? "-0" : "0";
	}
	return number.toString();
	}

	String serializeForNumberType(double number)
	{
	// According to HTML5, "the best representation of the number n as a floating
	// point number" is a string produced by applying ToString() to n.
	NumberToStringBuffer buffer;
	return String(numberToString(number, buffer));
	}

	Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
	{
	// See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType

	// String::toDouble() accepts leading + and whitespace characters, which are not valid here.
	const UChar firstCharacter = string[0];
	if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
	return fallbackValue;

	const Decimal value = Decimal::fromString(string);
	if (!value.isFinite())
	return fallbackValue;

	// Numbers are considered finite IEEE 754 single-precision floating point values.
	// See HTML5 2.5.4.3 `Real numbers.'
	// FIXME: We should use numeric_limits<double>::max for number input type.
	const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
	if (value < -floatMax \|\| value > floatMax)
	return fallbackValue;

	// We return +0 for -0 case.
	return value.isZero() ? Decimal(0) : value;
	}

	Decimal parseToDecimalForNumberType(const String& string)
	{
	return parseToDecimalForNumberType(string, Decimal::nan());
	}

	double parseToDoubleForNumberType(const String& string, double fallbackValue)
	{
	// See HTML5 2.5.4.3 `Real numbers.'

	// String::toDouble() accepts leading + and whitespace characters, which are not valid here.
	UChar firstCharacter = string[0];
	if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
	return fallbackValue;

	bool valid = false;
	double value = string.toDouble(&valid);
	if (!valid)
	return fallbackValue;

	// NaN and infinity are considered valid by String::toDouble, but not valid here.
	if (!isfinite(value))
	return fallbackValue;

	// Numbers are considered finite IEEE 754 single-precision floating point values.
	// See HTML5 2.5.4.3 `Real numbers.'
	if (-std::numeric_limits<float>::max() > value \|\| value > std::numeric_limits<float>::max())
	return fallbackValue;

	// The following expression converts -0 to +0.
	return value ? value : 0;
	}

	double parseToDoubleForNumberType(const String& string)
	{
	return parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
	}

	double parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, unsigned *decimalPlaces, double fallbackValue)
	{
	if (decimalPlaces)
	*decimalPlaces = 0;

	double value = parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
	if (!isfinite(value))
	return fallbackValue;

	if (!decimalPlaces)
	return value;

	size_t dotIndex = string.find('.');
	size_t eIndex = string.find('e');
	if (eIndex == notFound)
	eIndex = string.find('E');

	unsigned baseDecimalPlaces = 0;
	if (dotIndex != notFound) {
	if (eIndex == notFound)
	baseDecimalPlaces = string.length() - dotIndex - 1;
	else
	baseDecimalPlaces = eIndex - dotIndex - 1;
	}

	int exponent = 0;
	if (eIndex != notFound) {
	unsigned cursor = eIndex + 1, cursorSaved;
	int digit, exponentSign;
	int32_t exponent32;
	size_t length = string.length();

	// Not using String.toInt() in order to perform the same computation as dtoa() does.
	exponentSign = 0;
	switch (digit = string[cursor]) {
	case '-':
	exponentSign = 1;
	case '+':
	digit = string[++cursor];
	}
	if (digit >= '0' && digit <= '9') {
	while (cursor < length && digit == '0')
	digit = string[++cursor];
	if (digit > '0' && digit <= '9') {
	exponent32 = digit - '0';
	cursorSaved = cursor;
	while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
	exponent32 = (10 * exponent32) + digit - '0';
	if (cursor - cursorSaved > 8 \|\| exponent32 > 19999)
	/* Avoid confusion from exponents
	* so large that e might overflow.
	*/
	exponent = 19999; /* safe for 16 bit ints */
	else
	exponent = static_cast<int>(exponent32);
	if (exponentSign)
	exponent = -exponent;
	} else
	exponent = 0;
	}
	}

	int intDecimalPlaces = baseDecimalPlaces - exponent;
	if (intDecimalPlaces < 0)
	*decimalPlaces = 0;
	else if (intDecimalPlaces > 19999)
	*decimalPlaces = 19999;
	else
	*decimalPlaces = static_cast<unsigned>(intDecimalPlaces);

	return value;
	}

	double parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, unsigned *decimalPlaces)
	{
	return parseToDoubleForNumberTypeWithDecimalPlaces(string, decimalPlaces, std::numeric_limits<double>::quiet_NaN());
	}

	// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
	bool parseHTMLInteger(const String& input, int& value)
	{
	// Step 1
	// Step 2
	const UChar* position = input.characters();
	const UChar* end = position + input.length();

	// Step 3
	int sign = 1;

	// Step 4
	while (position < end) {
	if (!isHTMLSpace(*position))
	break;
	++position;
	}

	// Step 5
	if (position == end)
	return false;
	ASSERT(position < end);

	// Step 6
	if (*position == '-') {
	sign = -1;
	++position;
	} else if (*position == '+')
	++position;
	if (position == end)
	return false;
	ASSERT(position < end);

	// Step 7
	if (!isASCIIDigit(*position))
	return false;

	// Step 8
	StringBuilder digits;
	while (position < end) {
	if (!isASCIIDigit(*position))
	break;
	digits.append(*position++);
	}

	// Step 9
	bool ok;
	value = sign * charactersToIntStrict(digits.characters(), digits.length(), &ok);
	return ok;
	}

	// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
	bool parseHTMLNonNegativeInteger(const String& input, unsigned int& value)
	{
	// Step 1
	// Step 2
	const UChar* position = input.characters();
	const UChar* end = position + input.length();

	// Step 3
	while (position < end) {
	if (!isHTMLSpace(*position))
	break;
	++position;
	}

	// Step 4
	if (position == end)
	return false;
	ASSERT(position < end);

	// Step 5
	if (*position == '+')
	++position;

	// Step 6
	if (position == end)
	return false;
	ASSERT(position < end);

	// Step 7
	if (!isASCIIDigit(*position))
	return false;

	// Step 8
	StringBuilder digits;
	while (position < end) {
	if (!isASCIIDigit(*position))
	break;
	digits.append(*position++);
	}

	// Step 9
	bool ok;
	value = charactersToUIntStrict(digits.characters(), digits.length(), &ok);
	return ok;
	}

	}