WebCore/platform/LinkHash.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
  *           (C) 2001 Dirk Mueller (mueller@kde.org)
  *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Library General Public License for more details.
  *
  * You should have received a copy of the GNU Library General Public License
  * along with this library; see the file COPYING.LIB.  If not, write to
  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */

 #include "config.h"
 #include "AtomicString.h"
 #include "KURL.h"
 #include "LinkHash.h"
 #include "PlatformString.h"
 #include "StringHash.h"
 #include "StringImpl.h"

 namespace WebCore {

 static inline int findSlashDotDotSlash(const UChar* characters, size_t length)
 {
     if (length < 4)
         return -1;
     unsigned loopLimit = length - 3;
     for (unsigned i = 0; i < loopLimit; ++i) {
         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
             return i;
     }
     return -1;
 }

 static inline int findSlashSlash(const UChar* characters, size_t length, int position)
 {
     if (length < 2)
         return -1;
     unsigned loopLimit = length - 1;
     for (unsigned i = position; i < loopLimit; ++i) {
         if (characters[i] == '/' && characters[i + 1] == '/')
             return i;
     }
     return -1;
 }

 static inline int findSlashDotSlash(const UChar* characters, size_t length)
 {
     if (length < 3)
         return -1;
     unsigned loopLimit = length - 2;
     for (unsigned i = 0; i < loopLimit; ++i) {
         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
             return i;
     }
     return -1;
 }

 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
 {
     if (length < 3)
         return false;
     unsigned loopLimit = length - 2;
     for (unsigned i = 0; i < loopLimit; ++i) {
         if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
             return true;
     }
     return false;
 }

 static inline void cleanPath(Vector<UChar, 512>& path)
 {
     // FIXME: Should not do this in the query or anchor part.
     int pos;
     while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) {
         int prev = reverseFind(path.data(), path.size(), '/', pos - 1);
         // don't remove the host, i.e. http://foo.org/../foo.html
         if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/'))
             path.remove(pos, 3);
         else
             path.remove(prev, pos - prev + 3);
     }

     // FIXME: Should not do this in the query part.
     // Set refPos to -2 to mean "I haven't looked for the anchor yet".
     // We don't want to waste a function call on the search for the the anchor
     // in the vast majority of cases where there is no "//" in the path.
     pos = 0;
     int refPos = -2;
     while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) {
         if (refPos == -2)
             refPos = find(path.data(), path.size(), '#');
         if (refPos > 0 && pos >= refPos)
             break;

         if (pos == 0 || path[pos - 1] != ':')
             path.remove(pos);
         else
             pos += 2;
     }

     // FIXME: Should not do this in the query or anchor part.
     while ((pos = findSlashDotSlash(path.data(), path.size())) != -1)
         path.remove(pos, 2);
 }


 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
 {
     return (c | 0x20) == lowercaseLetter;
 }

 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
 {
     if (length < 6)
         return false;
     if (!matchLetter(characters[0], 'h')
             || !matchLetter(characters[1], 't')
             || !matchLetter(characters[2], 't')
             || !matchLetter(characters[3], 'p'))
         return false;
     if (!(characters[4] == ':'
             || (matchLetter(characters[4], 's') && characters[5] == ':')))
         return false;

     unsigned pos = characters[4] == ':' ? 5 : 6;

     // Skip initial two slashes if present.
     if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
         pos += 2;

     // Find next slash.
     while (pos < length && characters[pos] != '/')
         ++pos;

     return pos == length;
 }

 static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
 {
     return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length));
 }

 LinkHash visitedLinkHash(const UChar* url, unsigned length)
 {
     return visitedLinkHashInline(url, length);
 }

 static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
 {
     if (attributeURL.isNull())
         return;

     const UChar* characters = attributeURL.characters();
     unsigned length = attributeURL.length();

     // This is a poor man's completeURL. Faster with less memory allocation.
     // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
     // For example, it does not handle international domain names properly.

     // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
     //    1) The "://" could be in the query or anchor.
     //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.

     // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
     // have a query or anchor.

     bool hasColonSlashSlash = containsColonSlashSlash(characters, length);

     if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
         buffer.append(attributeURL.characters(), attributeURL.length());
         return;
     }


     if (hasColonSlashSlash) {
         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
         // end of the path, *before* the query or anchor.
         buffer.append(characters, length);
         buffer.append('/');
         return;
     }

     if (!length)
         buffer.append(base.string().characters(), base.string().length());
     else {
         switch (characters[0]) {
             case '/':
                 buffer.append(base.string().characters(), base.pathStart());
                 break;
             case '#':
                 buffer.append(base.string().characters(), base.pathEnd());
                 break;
             default:
                 buffer.append(base.string().characters(), base.pathAfterLastSlash());
                 break;
         }
     }
     buffer.append(characters, length);
     cleanPath(buffer);
     if (needsTrailingSlash(buffer.data(), buffer.size())) {
         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
         // end of the path, *before* the query or anchor.
         buffer.append('/');
     }

     return;
 }

 void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
 {
     return visitedURLInline(base, attributeURL, buffer);
 }

 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
 {
     Vector<UChar, 512> url;
     visitedURLInline(base, attributeURL, url);
     if (url.isEmpty())
         return 0;

     return visitedLinkHashInline(url.data(), url.size());
 }

 }  // namespace WebCore
	/*
	* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
	* (C) 1999 Antti Koivisto (koivisto@kde.org)
	* (C) 2001 Dirk Mueller (mueller@kde.org)
	* (C) 2006 Alexey Proskuryakov (ap@webkit.org)
	* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Library General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Library General Public License for more details.
	*
	* You should have received a copy of the GNU Library General Public License
	* along with this library; see the file COPYING.LIB. If not, write to
	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	* Boston, MA 02110-1301, USA.
	*/

	#include "config.h"
	#include "AtomicString.h"
	#include "KURL.h"
	#include "LinkHash.h"
	#include "PlatformString.h"
	#include "StringHash.h"
	#include "StringImpl.h"

	namespace WebCore {

	static inline int findSlashDotDotSlash(const UChar* characters, size_t length)
	{
	if (length < 4)
	return -1;
	unsigned loopLimit = length - 3;
	for (unsigned i = 0; i < loopLimit; ++i) {
	if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
	return i;
	}
	return -1;
	}

	static inline int findSlashSlash(const UChar* characters, size_t length, int position)
	{
	if (length < 2)
	return -1;
	unsigned loopLimit = length - 1;
	for (unsigned i = position; i < loopLimit; ++i) {
	if (characters[i] == '/' && characters[i + 1] == '/')
	return i;
	}
	return -1;
	}

	static inline int findSlashDotSlash(const UChar* characters, size_t length)
	{
	if (length < 3)
	return -1;
	unsigned loopLimit = length - 2;
	for (unsigned i = 0; i < loopLimit; ++i) {
	if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
	return i;
	}
	return -1;
	}

	static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
	{
	if (length < 3)
	return false;
	unsigned loopLimit = length - 2;
	for (unsigned i = 0; i < loopLimit; ++i) {
	if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
	return true;
	}
	return false;
	}

	static inline void cleanPath(Vector<UChar, 512>& path)
	{
	// FIXME: Should not do this in the query or anchor part.
	int pos;
	while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) {
	int prev = reverseFind(path.data(), path.size(), '/', pos - 1);
	// don't remove the host, i.e. http://foo.org/../foo.html
	if (prev < 0 \|\| (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/'))
	path.remove(pos, 3);
	else
	path.remove(prev, pos - prev + 3);
	}

	// FIXME: Should not do this in the query part.
	// Set refPos to -2 to mean "I haven't looked for the anchor yet".
	// We don't want to waste a function call on the search for the the anchor
	// in the vast majority of cases where there is no "//" in the path.
	pos = 0;
	int refPos = -2;
	while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) {
	if (refPos == -2)
	refPos = find(path.data(), path.size(), '#');
	if (refPos > 0 && pos >= refPos)
	break;

	if (pos == 0 \|\| path[pos - 1] != ':')
	path.remove(pos);
	else
	pos += 2;
	}

	// FIXME: Should not do this in the query or anchor part.
	while ((pos = findSlashDotSlash(path.data(), path.size())) != -1)
	path.remove(pos, 2);
	}


	static inline bool matchLetter(UChar c, UChar lowercaseLetter)
	{
	return (c \| 0x20) == lowercaseLetter;
	}

	static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
	{
	if (length < 6)
	return false;
	if (!matchLetter(characters[0], 'h')
	\|\| !matchLetter(characters[1], 't')
	\|\| !matchLetter(characters[2], 't')
	\|\| !matchLetter(characters[3], 'p'))
	return false;
	if (!(characters[4] == ':'
	\|\| (matchLetter(characters[4], 's') && characters[5] == ':')))
	return false;

	unsigned pos = characters[4] == ':' ? 5 : 6;

	// Skip initial two slashes if present.
	if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
	pos += 2;

	// Find next slash.
	while (pos < length && characters[pos] != '/')
	++pos;

	return pos == length;
	}

	static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
	{
	return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length));
	}

	LinkHash visitedLinkHash(const UChar* url, unsigned length)
	{
	return visitedLinkHashInline(url, length);
	}

	static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
	{
	if (attributeURL.isNull())
	return;

	const UChar* characters = attributeURL.characters();
	unsigned length = attributeURL.length();

	// This is a poor man's completeURL. Faster with less memory allocation.
	// FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
	// For example, it does not handle international domain names properly.

	// FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
	// 1) The "://" could be in the query or anchor.
	// 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.

	// FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
	// have a query or anchor.

	bool hasColonSlashSlash = containsColonSlashSlash(characters, length);

	if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
	buffer.append(attributeURL.characters(), attributeURL.length());
	return;
	}


	if (hasColonSlashSlash) {
	// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
	// end of the path, before the query or anchor.
	buffer.append(characters, length);
	buffer.append('/');
	return;
	}

	if (!length)
	buffer.append(base.string().characters(), base.string().length());
	else {
	switch (characters[0]) {
	case '/':
	buffer.append(base.string().characters(), base.pathStart());
	break;
	case '#':
	buffer.append(base.string().characters(), base.pathEnd());
	break;
	default:
	buffer.append(base.string().characters(), base.pathAfterLastSlash());
	break;
	}
	}
	buffer.append(characters, length);
	cleanPath(buffer);
	if (needsTrailingSlash(buffer.data(), buffer.size())) {
	// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
	// end of the path, before the query or anchor.
	buffer.append('/');
	}

	return;
	}

	void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
	{
	return visitedURLInline(base, attributeURL, buffer);
	}

	LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
	{
	Vector<UChar, 512> url;
	visitedURLInline(base, attributeURL, url);
	if (url.isEmpty())
	return 0;

	return visitedLinkHashInline(url.data(), url.size());
	}

	} // namespace WebCore