| /* |
| * Copyright (C) 2011 Google Inc. All rights reserved. |
| * Copyright (C) 2012 Intel Corporation. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Google Inc. nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "ParsedContentType.h" |
| |
| #include "HTTPParsers.h" |
| #include <wtf/text/CString.h> |
| #include <wtf/text/StringBuilder.h> |
| |
| namespace WebCore { |
| |
| static void skipSpaces(StringView input, unsigned& startIndex) |
| { |
| while (startIndex < input.length() && isHTTPSpace(input[startIndex])) |
| ++startIndex; |
| } |
| |
| static bool isQuotedStringTokenCharacter(UChar c) |
| { |
| return (c >= ' ' && c <= '~') || (c >= 0x80 && c <= 0xFF) || c == '\t'; |
| } |
| |
| static bool isTokenCharacter(UChar c) |
| { |
| return isASCII(c) && c > ' ' && c != '"' && c != '(' && c != ')' && c != ',' && c != '/' && (c < ':' || c > '@') && (c < '[' || c > ']'); |
| } |
| |
| using CharacterMeetsCondition = bool (*)(UChar); |
| |
| static Optional<StringView> parseToken(StringView input, unsigned& startIndex, CharacterMeetsCondition characterMeetsCondition, Mode mode, bool skipTrailingWhitespace = false) |
| { |
| unsigned inputLength = input.length(); |
| unsigned tokenStart = startIndex; |
| unsigned& tokenEnd = startIndex; |
| |
| if (tokenEnd >= inputLength) |
| return WTF::nullopt; |
| |
| while (tokenEnd < inputLength && characterMeetsCondition(input[tokenEnd])) { |
| if (mode == Mode::Rfc2045 && !isTokenCharacter(input[tokenEnd])) |
| break; |
| ++tokenEnd; |
| } |
| |
| if (tokenEnd == tokenStart) |
| return WTF::nullopt; |
| if (skipTrailingWhitespace) { |
| while (input[tokenEnd - 1] == ' ') |
| --tokenEnd; |
| } |
| return input.substring(tokenStart, tokenEnd - tokenStart); |
| } |
| |
| static bool isNotQuoteOrBackslash(UChar ch) |
| { |
| return ch != '"' && ch != '\\'; |
| } |
| |
| static String collectHTTPQuotedString(StringView input, unsigned& startIndex) |
| { |
| ASSERT(input[startIndex] == '"'); |
| unsigned inputLength = input.length(); |
| unsigned& position = startIndex; |
| position++; |
| StringBuilder builder; |
| while (true) { |
| unsigned positionStart = position; |
| parseToken(input, position, isNotQuoteOrBackslash, Mode::MimeSniff); |
| builder.append(input.substring(positionStart, position - positionStart)); |
| if (position >= inputLength) |
| break; |
| UChar quoteOrBackslash = input[position++]; |
| if (quoteOrBackslash == '\\') { |
| if (position >= inputLength) { |
| builder.append(quoteOrBackslash); |
| break; |
| } |
| builder.append(input[position++]); |
| } else { |
| ASSERT(quoteOrBackslash == '"'); |
| break; |
| } |
| |
| } |
| return builder.toString(); |
| } |
| |
| static bool containsNonTokenCharacters(StringView input, Mode mode) |
| { |
| if (mode == Mode::MimeSniff) |
| return !isValidHTTPToken(input.toStringWithoutCopying()); |
| for (unsigned index = 0; index < input.length(); ++index) { |
| if (!isTokenCharacter(input[index])) |
| return true; |
| } |
| return false; |
| } |
| |
| static Optional<StringView> parseQuotedString(StringView input, unsigned& startIndex) |
| { |
| unsigned inputLength = input.length(); |
| unsigned quotedStringStart = startIndex + 1; |
| unsigned& quotedStringEnd = startIndex; |
| |
| if (quotedStringEnd >= inputLength) |
| return WTF::nullopt; |
| |
| if (input[quotedStringEnd++] != '"' || quotedStringEnd >= inputLength) |
| return WTF::nullopt; |
| |
| bool lastCharacterWasBackslash = false; |
| char currentCharacter; |
| while ((currentCharacter = input[quotedStringEnd++]) != '"' || lastCharacterWasBackslash) { |
| if (quotedStringEnd >= inputLength) |
| return WTF::nullopt; |
| if (currentCharacter == '\\' && !lastCharacterWasBackslash) { |
| lastCharacterWasBackslash = true; |
| continue; |
| } |
| if (lastCharacterWasBackslash) |
| lastCharacterWasBackslash = false; |
| } |
| if (input[quotedStringEnd - 1] == '"') |
| quotedStringEnd++; |
| return input.substring(quotedStringStart, quotedStringEnd - quotedStringStart); |
| } |
| |
| // From http://tools.ietf.org/html/rfc2045#section-5.1: |
| // |
| // content := "Content-Type" ":" type "/" subtype |
| // *(";" parameter) |
| // ; Matching of media type and subtype |
| // ; is ALWAYS case-insensitive. |
| // |
| // type := discrete-type / composite-type |
| // |
| // discrete-type := "text" / "image" / "audio" / "video" / |
| // "application" / extension-token |
| // |
| // composite-type := "message" / "multipart" / extension-token |
| // |
| // extension-token := ietf-token / x-token |
| // |
| // ietf-token := <An extension token defined by a |
| // standards-track RFC and registered |
| // with IANA.> |
| // |
| // x-token := <The two characters "X-" or "x-" followed, with |
| // no intervening white space, by any token> |
| // |
| // subtype := extension-token / iana-token |
| // |
| // iana-token := <A publicly-defined extension token. Tokens |
| // of this form must be registered with IANA |
| // as specified in RFC 2048.> |
| // |
| // parameter := attribute "=" value |
| // |
| // attribute := token |
| // ; Matching of attributes |
| // ; is ALWAYS case-insensitive. |
| // |
| // value := token / quoted-string |
| // |
| // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, |
| // or tspecials> |
| // |
| // tspecials := "(" / ")" / "<" / ">" / "@" / |
| // "," / ";" / ":" / "\" / <"> |
| // "/" / "[" / "]" / "?" / "=" |
| // ; Must be in quoted-string, |
| // ; to use within parameter values |
| |
| static bool isNotForwardSlash(UChar ch) |
| { |
| return ch != '/'; |
| } |
| |
| static bool isNotSemicolon(UChar ch) |
| { |
| return ch != ';'; |
| } |
| |
| static bool isNotSemicolonOrEqualSign(UChar ch) |
| { |
| return ch != ';' && ch != '='; |
| } |
| |
| static bool containsNewline(UChar ch) |
| { |
| return ch == '\r' || ch == '\n'; |
| } |
| |
| bool ParsedContentType::parseContentType(Mode mode) |
| { |
| if (mode == Mode::Rfc2045 && m_contentType.find(containsNewline) != notFound) |
| return false; |
| unsigned index = 0; |
| unsigned contentTypeLength = m_contentType.length(); |
| skipSpaces(m_contentType, index); |
| if (index >= contentTypeLength) { |
| LOG_ERROR("Invalid Content-Type string '%s'", m_contentType.ascii().data()); |
| return false; |
| } |
| |
| unsigned contentTypeStart = index; |
| auto typeRange = parseToken(m_contentType, index, isNotForwardSlash, mode); |
| if (!typeRange || containsNonTokenCharacters(*typeRange, mode)) { |
| LOG_ERROR("Invalid Content-Type, invalid type value."); |
| return false; |
| } |
| |
| if (index >= contentTypeLength || m_contentType[index++] != '/') { |
| LOG_ERROR("Invalid Content-Type, missing '/'."); |
| return false; |
| } |
| |
| auto subTypeRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); |
| if (!subTypeRange || containsNonTokenCharacters(*subTypeRange, mode)) { |
| LOG_ERROR("Invalid Content-Type, invalid subtype value."); |
| return false; |
| } |
| |
| // There should not be any quoted strings until we reach the parameters. |
| size_t semiColonIndex = m_contentType.find(';', contentTypeStart); |
| if (semiColonIndex == notFound) { |
| setContentType(m_contentType.substring(contentTypeStart, contentTypeLength - contentTypeStart), mode); |
| return true; |
| } |
| |
| setContentType(m_contentType.substring(contentTypeStart, semiColonIndex - contentTypeStart), mode); |
| index = semiColonIndex + 1; |
| while (true) { |
| skipSpaces(m_contentType, index); |
| auto keyRange = parseToken(m_contentType, index, isNotSemicolonOrEqualSign, mode); |
| if (mode == Mode::Rfc2045 && (!keyRange || index >= contentTypeLength)) { |
| LOG_ERROR("Invalid Content-Type parameter name."); |
| return false; |
| } |
| |
| // Should we tolerate spaces here? |
| if (mode == Mode::Rfc2045) { |
| if (index >= contentTypeLength || m_contentType[index++] != '=') { |
| LOG_ERROR("Invalid Content-Type malformed parameter."); |
| return false; |
| } |
| } else { |
| if (index >= contentTypeLength) |
| break; |
| if (m_contentType[index] != '=' && m_contentType[index] != ';') { |
| LOG_ERROR("Invalid Content-Type malformed parameter."); |
| return false; |
| } |
| if (m_contentType[index++] == ';') |
| continue; |
| } |
| String parameterName = keyRange->toString(); |
| |
| // Should we tolerate spaces here? |
| String parameterValue; |
| Optional<StringView> valueRange; |
| if (index < contentTypeLength && m_contentType[index] == '"') { |
| if (mode == Mode::MimeSniff) { |
| parameterValue = collectHTTPQuotedString(m_contentType, index); |
| parseToken(m_contentType, index, isNotSemicolon, mode); |
| } else |
| valueRange = parseQuotedString(m_contentType, index); |
| } else |
| valueRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); |
| |
| |
| if (parameterValue.isNull()) { |
| if (!valueRange) { |
| if (mode == Mode::MimeSniff) |
| continue; |
| LOG_ERROR("Invalid Content-Type, invalid parameter value."); |
| return false; |
| } |
| parameterValue = valueRange->toString(); |
| } |
| |
| // Should we tolerate spaces here? |
| if (mode == Mode::Rfc2045 && index < contentTypeLength && m_contentType[index++] != ';') { |
| LOG_ERROR("Invalid Content-Type, invalid character at the end of key/value parameter."); |
| return false; |
| } |
| |
| setContentTypeParameter(parameterName, parameterValue, mode); |
| |
| if (index >= contentTypeLength) |
| return true; |
| } |
| |
| return true; |
| } |
| |
| Optional<ParsedContentType> ParsedContentType::create(const String& contentType, Mode mode) |
| { |
| ParsedContentType parsedContentType(mode == Mode::Rfc2045 ? contentType : stripLeadingAndTrailingHTTPSpaces(contentType)); |
| if (!parsedContentType.parseContentType(mode)) |
| return WTF::nullopt; |
| return { WTFMove(parsedContentType) }; |
| } |
| |
| bool isValidContentType(const String& contentType, Mode mode) |
| { |
| return ParsedContentType::create(contentType, mode) != WTF::nullopt; |
| } |
| |
| ParsedContentType::ParsedContentType(const String& contentType) |
| : m_contentType(contentType) |
| { |
| } |
| |
| String ParsedContentType::charset() const |
| { |
| return parameterValueForName("charset"); |
| } |
| |
| void ParsedContentType::setCharset(String&& charset) |
| { |
| m_parameterValues.set("charset"_s, WTFMove(charset)); |
| } |
| |
| String ParsedContentType::parameterValueForName(const String& name) const |
| { |
| return m_parameterValues.get(name); |
| } |
| |
| size_t ParsedContentType::parameterCount() const |
| { |
| return m_parameterValues.size(); |
| } |
| |
| void ParsedContentType::setContentType(StringView contentRange, Mode mode) |
| { |
| m_mimeType = contentRange.toString(); |
| if (mode == Mode::MimeSniff) |
| m_mimeType = stripLeadingAndTrailingHTTPSpaces(m_mimeType).convertToASCIILowercase(); |
| else |
| m_mimeType = m_mimeType.stripWhiteSpace(); |
| } |
| |
| static bool containsNonQuoteStringTokenCharacters(const String& input) |
| { |
| for (unsigned index = 0; index < input.length(); ++index) { |
| if (!isQuotedStringTokenCharacter(input[index])) |
| return true; |
| } |
| return false; |
| } |
| |
| void ParsedContentType::setContentTypeParameter(const String& keyName, const String& keyValue, Mode mode) |
| { |
| String name = keyName; |
| if (mode == Mode::MimeSniff) { |
| if (m_parameterValues.contains(name) || !isValidHTTPToken(name) || containsNonQuoteStringTokenCharacters(keyValue)) |
| return; |
| name = name.convertToASCIILowercase(); |
| } |
| m_parameterValues.set(name, keyValue); |
| m_parameterNames.append(name); |
| } |
| |
| String ParsedContentType::serialize() const |
| { |
| StringBuilder builder; |
| builder.append(m_mimeType); |
| for (auto& name : m_parameterNames) { |
| builder.append(';'); |
| builder.append(name); |
| builder.append('='); |
| String value = m_parameterValues.get(name); |
| if (value.isEmpty() || !isValidHTTPToken(value)) { |
| builder.append('"'); |
| for (unsigned index = 0; index < value.length(); ++index) { |
| auto ch = value[index]; |
| if (ch == '\\' || ch =='"') |
| builder.append('\\'); |
| builder.append(ch); |
| } |
| builder.append('"'); |
| } else |
| builder.append(value); |
| } |
| return builder.toString(); |
| } |
| |
| } |