| /* |
| * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| * (C) 1999 Antti Koivisto (koivisto@kde.org) |
| * (C) 2001 Dirk Mueller (mueller@kde.org) |
| * (C) 2006 Alexey Proskuryakov (ap@webkit.org) |
| * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "config.h" |
| #include "AtomicString.h" |
| #include "KURL.h" |
| #include "LinkHash.h" |
| #include "PlatformString.h" |
| #include "StringHash.h" |
| #include "StringImpl.h" |
| |
| namespace WebCore { |
| |
| static inline int findSlashDotDotSlash(const UChar* characters, size_t length) |
| { |
| if (length < 4) |
| return -1; |
| unsigned loopLimit = length - 3; |
| for (unsigned i = 0; i < loopLimit; ++i) { |
| if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') |
| return i; |
| } |
| return -1; |
| } |
| |
| static inline int findSlashSlash(const UChar* characters, size_t length, int position) |
| { |
| if (length < 2) |
| return -1; |
| unsigned loopLimit = length - 1; |
| for (unsigned i = position; i < loopLimit; ++i) { |
| if (characters[i] == '/' && characters[i + 1] == '/') |
| return i; |
| } |
| return -1; |
| } |
| |
| static inline int findSlashDotSlash(const UChar* characters, size_t length) |
| { |
| if (length < 3) |
| return -1; |
| unsigned loopLimit = length - 2; |
| for (unsigned i = 0; i < loopLimit; ++i) { |
| if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') |
| return i; |
| } |
| return -1; |
| } |
| |
| static inline bool containsColonSlashSlash(const UChar* characters, unsigned length) |
| { |
| if (length < 3) |
| return false; |
| unsigned loopLimit = length - 2; |
| for (unsigned i = 0; i < loopLimit; ++i) { |
| if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') |
| return true; |
| } |
| return false; |
| } |
| |
| static inline void cleanPath(Vector<UChar, 512>& path) |
| { |
| // FIXME: Should not do this in the query or anchor part. |
| int pos; |
| while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) { |
| int prev = reverseFind(path.data(), path.size(), '/', pos - 1); |
| // don't remove the host, i.e. http://foo.org/../foo.html |
| if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/')) |
| path.remove(pos, 3); |
| else |
| path.remove(prev, pos - prev + 3); |
| } |
| |
| // FIXME: Should not do this in the query part. |
| // Set refPos to -2 to mean "I haven't looked for the anchor yet". |
| // We don't want to waste a function call on the search for the the anchor |
| // in the vast majority of cases where there is no "//" in the path. |
| pos = 0; |
| int refPos = -2; |
| while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) { |
| if (refPos == -2) |
| refPos = find(path.data(), path.size(), '#'); |
| if (refPos > 0 && pos >= refPos) |
| break; |
| |
| if (pos == 0 || path[pos - 1] != ':') |
| path.remove(pos); |
| else |
| pos += 2; |
| } |
| |
| // FIXME: Should not do this in the query or anchor part. |
| while ((pos = findSlashDotSlash(path.data(), path.size())) != -1) |
| path.remove(pos, 2); |
| } |
| |
| |
| static inline bool matchLetter(UChar c, UChar lowercaseLetter) |
| { |
| return (c | 0x20) == lowercaseLetter; |
| } |
| |
| static inline bool needsTrailingSlash(const UChar* characters, unsigned length) |
| { |
| if (length < 6) |
| return false; |
| if (!matchLetter(characters[0], 'h') |
| || !matchLetter(characters[1], 't') |
| || !matchLetter(characters[2], 't') |
| || !matchLetter(characters[3], 'p')) |
| return false; |
| if (!(characters[4] == ':' |
| || (matchLetter(characters[4], 's') && characters[5] == ':'))) |
| return false; |
| |
| unsigned pos = characters[4] == ':' ? 5 : 6; |
| |
| // Skip initial two slashes if present. |
| if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') |
| pos += 2; |
| |
| // Find next slash. |
| while (pos < length && characters[pos] != '/') |
| ++pos; |
| |
| return pos == length; |
| } |
| |
| static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length) |
| { |
| return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length)); |
| } |
| |
| LinkHash visitedLinkHash(const UChar* url, unsigned length) |
| { |
| return visitedLinkHashInline(url, length); |
| } |
| |
| static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer) |
| { |
| if (attributeURL.isNull()) |
| return; |
| |
| const UChar* characters = attributeURL.characters(); |
| unsigned length = attributeURL.length(); |
| |
| // This is a poor man's completeURL. Faster with less memory allocation. |
| // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does. |
| // For example, it does not handle international domain names properly. |
| |
| // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: |
| // 1) The "://" could be in the query or anchor. |
| // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. |
| |
| // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does |
| // have a query or anchor. |
| |
| bool hasColonSlashSlash = containsColonSlashSlash(characters, length); |
| |
| if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { |
| buffer.append(attributeURL.characters(), attributeURL.length()); |
| return; |
| } |
| |
| |
| if (hasColonSlashSlash) { |
| // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
| // end of the path, *before* the query or anchor. |
| buffer.append(characters, length); |
| buffer.append('/'); |
| return; |
| } |
| |
| if (!length) |
| buffer.append(base.string().characters(), base.string().length()); |
| else { |
| switch (characters[0]) { |
| case '/': |
| buffer.append(base.string().characters(), base.pathStart()); |
| break; |
| case '#': |
| buffer.append(base.string().characters(), base.pathEnd()); |
| break; |
| default: |
| buffer.append(base.string().characters(), base.pathAfterLastSlash()); |
| break; |
| } |
| } |
| buffer.append(characters, length); |
| cleanPath(buffer); |
| if (needsTrailingSlash(buffer.data(), buffer.size())) { |
| // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the |
| // end of the path, *before* the query or anchor. |
| buffer.append('/'); |
| } |
| |
| return; |
| } |
| |
| void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer) |
| { |
| return visitedURLInline(base, attributeURL, buffer); |
| } |
| |
| LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL) |
| { |
| Vector<UChar, 512> url; |
| visitedURLInline(base, attributeURL, url); |
| if (url.isEmpty()) |
| return 0; |
| |
| return visitedLinkHashInline(url.data(), url.size()); |
| } |
| |
| } // namespace WebCore |