naiveproxy/src/net/base/mime_util.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40284755): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif

#include "net/base/mime_util.h"

#include <algorithm>
#include <iterator>
#include <map>
#include <string>
#include <string_view>
#include <unordered_set>

#include "base/base64.h"
#include "base/check_op.h"
#include "base/containers/span.h"
#include "base/lazy_instance.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "net/base/platform_mime_util.h"
#include "net/http/http_util.h"

using std::string;

namespace net {

// Singleton utility class for mime types.
class MimeUtil : public PlatformMimeUtil {
 public:
  bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
                                std::string* mime_type) const;

  bool GetMimeTypeFromFile(const base::FilePath& file_path,
                           std::string* mime_type) const;

  bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
                                         std::string* mime_type) const;

  bool GetPreferredExtensionForMimeType(
      const std::string& mime_type,
      base::FilePath::StringType* extension) const;

  bool MatchesMimeType(const std::string& mime_type_pattern,
                       const std::string& mime_type) const;

  bool ParseMimeTypeWithoutParameter(std::string_view type_string,
                                     std::string* top_level_type,
                                     std::string* subtype) const;

  bool IsValidTopLevelMimeType(const std::string& type_string) const;

 private:
  friend struct base::LazyInstanceTraitsBase<MimeUtil>;

  MimeUtil();

  bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
                                      bool include_platform_types,
                                      std::string* mime_type) const;
};  // class MimeUtil

// This variable is Leaky because we need to access it from WorkerPool threads.
static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
    LAZY_INSTANCE_INITIALIZER;

struct MimeInfo {
  const char* const mime_type;

  // Comma-separated list of possible extensions for the type. The first
  // extension is considered preferred.
  const char* const extensions;
};

// How to use the MIME maps
// ------------------------
// READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
//
// There are two hardcoded mappings from MIME types: kPrimaryMappings and
// kSecondaryMappings.
//
// kPrimaryMappings:
//
//   Use this for mappings that are critical to the web platform.  Mappings you
//   add to this list take priority over the underlying platform when converting
//   from file extension -> MIME type.  Thus file extensions listed here will
//   work consistently across platforms.
//
// kSecondaryMappings:
//
//   Use this for mappings that must exist, but can be overridden by user
//   preferences.
//
// The following applies to both lists:
//
// * The same extension can appear multiple times in the same list under
//   different MIME types.  Extensions that appear earlier take precedence over
//   those that appear later.
//
// * A MIME type must not appear more than once in a single list.  It is valid
//   for the same MIME type to appear in kPrimaryMappings and
//   kSecondaryMappings.
//
// The MIME maps are used for three types of lookups:
//
// 1) MIME type -> file extension.  Implemented as
//    GetPreferredExtensionForMimeType().
//
//    Sources are consulted in the following order:
//
//    a) As a special case application/octet-stream is mapped to nothing.  Web
//       sites are supposed to use this MIME type to indicate that the content
//       is opaque and shouldn't be parsed as any specific type of content.  It
//       doesn't make sense to map this to anything.
//
//    b) The underlying platform.  If the operating system has a mapping from
//       the MIME type to a file extension, then that takes priority.  The
//       platform is assumed to represent the user's preference.
//
//    c) kPrimaryMappings.  Order doesn't matter since there should only be at
//       most one entry per MIME type.
//
//    d) kSecondaryMappings.  Again, order doesn't matter.
//
// 2) File extension -> MIME type.  Implemented in GetMimeTypeFromExtension().
//
//    Sources are considered in the following order:
//
//    a) kPrimaryMappings.  Order matters here since file extensions can appear
//       multiple times on these lists.  The first mapping in order of
//       appearance in the list wins.
//
//    b) Underlying platform.
//
//    c) kSecondaryMappings.  Again, the order matters.
//
// 3) File extension -> Well known MIME type.  Implemented as
//    GetWellKnownMimeTypeFromExtension().
//
//    This is similar to 2), with the exception that b) is skipped.  I.e.  Only
//    considers the hardcoded mappings in kPrimaryMappings and
//    kSecondaryMappings.

// See comments above for details on how this list is used.
static const MimeInfo kPrimaryMappings[] = {
    // Must precede audio/webm .
    {"video/webm", "webm"},

    // Must precede audio/mp3
    {"audio/mpeg", "mp3"},

    {"application/wasm", "wasm"},
    {"application/x-chrome-extension", "crx"},
    {"application/xhtml+xml", "xhtml,xht,xhtm"},
    {"audio/flac", "flac"},
    {"audio/mp3", "mp3"},
    {"audio/ogg", "ogg,oga,opus"},
    {"audio/wav", "wav"},
    {"audio/webm", "webm"},
    {"audio/x-m4a", "m4a"},
    {"image/avif", "avif"},
    {"image/gif", "gif"},
    {"image/jpeg", "jpeg,jpg"},
    {"image/png", "png"},
    {"image/apng", "png,apng"},
    {"image/svg+xml", "svg,svgz"},
    {"image/webp", "webp"},
    {"multipart/related", "mht,mhtml"},
    {"text/css", "css"},
    {"text/html", "html,htm,shtml,shtm"},
    {"text/javascript", "js,mjs"},
    {"text/xml", "xml"},
    {"video/mp4", "mp4,m4v"},
    {"video/ogg", "ogv,ogm"},

    // This is a primary mapping (overrides the platform) rather than secondary
    // to work around an issue when Excel is installed on Windows. Excel
    // registers csv as application/vnd.ms-excel instead of text/csv from RFC
    // 4180. See https://crbug.com/139105.
    {"text/csv", "csv"},
};

// See comments above for details on how this list is used.
static const MimeInfo kSecondaryMappings[] = {
    // Must precede image/vnd.microsoft.icon .
    {"image/x-icon", "ico"},

    {"application/epub+zip", "epub"},
    {"application/font-woff", "woff"},
    {"application/gzip", "gz,tgz"},
    {"application/javascript", "js"},
    {"application/json", "json"},  // Per http://www.ietf.org/rfc/rfc4627.txt.
    {"application/msword", "doc,dot"},
    {"application/octet-stream", "bin,exe,com"},
    {"application/pdf", "pdf"},
    {"application/pkcs7-mime", "p7m,p7c,p7z"},
    {"application/pkcs7-signature", "p7s"},
    {"application/postscript", "ps,eps,ai"},
    {"application/rdf+xml", "rdf"},
    {"application/rss+xml", "rss"},
    {"application/rtf", "rtf"},
    {"application/vnd.android.package-archive", "apk"},
    {"application/vnd.mozilla.xul+xml", "xul"},
    {"application/vnd.ms-excel", "xls"},
    {"application/vnd.ms-powerpoint", "ppt"},
    {"application/"
     "vnd.openxmlformats-officedocument.presentationml.presentation",
     "pptx"},
    {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
     "xlsx"},
    {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
     "docx"},
    {"application/x-gzip", "gz,tgz"},
    {"application/x-mpegurl", "m3u8"},
    {"application/x-shockwave-flash", "swf,swl"},
    {"application/x-tar", "tar"},
    {"application/x-x509-ca-cert", "cer,crt"},
    {"application/zip", "zip"},
    // This is the platform mapping on recent versions of Windows 10.
    {"audio/webm", "weba"},
    {"image/bmp", "bmp"},
    {"image/jpeg", "jfif,pjpeg,pjp"},
    {"image/tiff", "tiff,tif"},
    {"image/vnd.microsoft.icon", "ico"},
    {"image/x-png", "png"},
    {"image/x-xbitmap", "xbm"},
    {"message/rfc822", "eml"},
    {"text/calendar", "ics"},
    {"text/html", "ehtml"},
    {"text/plain", "txt,text"},
    {"text/x-sh", "sh"},
    {"text/xml", "xsl,xbl,xslt"},
    {"video/mpeg", "mpeg,mpg"},
};

// Finds mime type of |ext| from |mappings|.
template <size_t num_mappings>
static const char* FindMimeType(const MimeInfo (&mappings)[num_mappings],
                                const std::string& ext) {
  for (const auto& mapping : mappings) {
    const char* extensions = mapping.extensions;
    for (;;) {
      size_t end_pos = strcspn(extensions, ",");
      // The length check is required to prevent the std::string_view below from
      // including uninitialized memory if ext is longer than extensions.
      if (end_pos == ext.size() &&
          base::EqualsCaseInsensitiveASCII(
              std::string_view(extensions, ext.size()), ext)) {
        return mapping.mime_type;
      }
      extensions += end_pos;
      if (!*extensions)
        break;
      extensions += 1;  // skip over comma
    }
  }
  return nullptr;
}

static base::FilePath::StringType StringToFilePathStringType(
    std::string_view string_piece) {
#if BUILDFLAG(IS_WIN)
  return base::UTF8ToWide(string_piece);
#else
  return std::string(string_piece);
#endif
}

// Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
// preferred extension in MimeInfo arrays.
template <size_t num_mappings>
static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
                                   const std::string& mime_type,
                                   base::FilePath::StringType* result) {
  // There is no preferred extension for "application/octet-stream".
  if (mime_type == "application/octet-stream")
    return false;

  for (const auto& mapping : mappings) {
    if (mapping.mime_type == mime_type) {
      const char* extensions = mapping.extensions;
      const char* extension_end = strchr(extensions, ',');
      size_t len =
          extension_end ? extension_end - extensions : strlen(extensions);
      *result = StringToFilePathStringType(std::string_view(extensions, len));
      return true;
    }
  }
  return false;
}

bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
                                        string* result) const {
  return GetMimeTypeFromExtensionHelper(ext, true, result);
}

bool MimeUtil::GetWellKnownMimeTypeFromExtension(
    const base::FilePath::StringType& ext,
    string* result) const {
  return GetMimeTypeFromExtensionHelper(ext, false, result);
}

bool MimeUtil::GetPreferredExtensionForMimeType(
    const std::string& mime_type,
    base::FilePath::StringType* extension) const {
  // Search the MIME type in the platform DB first, then in kPrimaryMappings and
  // kSecondaryMappings.
  return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
         FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
         FindPreferredExtension(kSecondaryMappings, mime_type, extension);
}

bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
                                   string* result) const {
  base::FilePath::StringType file_name_str = file_path.Extension();
  if (file_name_str.empty())
    return false;
  return GetMimeTypeFromExtension(file_name_str.substr(1), result);
}

bool MimeUtil::GetMimeTypeFromExtensionHelper(
    const base::FilePath::StringType& ext,
    bool include_platform_types,
    string* result) const {
  DCHECK(ext.empty() || ext[0] != '.')
      << "extension passed in must not include leading dot";

  // Avoids crash when unable to handle a long file path. See crbug.com/48733.
  const unsigned kMaxFilePathSize = 65536;
  if (ext.length() > kMaxFilePathSize)
    return false;

  // Reject a string which contains null character.
  base::FilePath::StringType::size_type nul_pos =
      ext.find(FILE_PATH_LITERAL('\0'));
  if (nul_pos != base::FilePath::StringType::npos)
    return false;

  // We implement the same algorithm as Mozilla for mapping a file extension to
  // a mime type.  That is, we first check a hard-coded list (that cannot be
  // overridden), and then if not found there, we defer to the system registry.
  // Finally, we scan a secondary hard-coded list to catch types that we can
  // deduce but that we also want to allow the OS to override.

  base::FilePath path_ext(ext);
  const string ext_narrow_str = path_ext.AsUTF8Unsafe();
  const char* mime_type = FindMimeType(kPrimaryMappings, ext_narrow_str);
  if (mime_type) {
    *result = mime_type;
    return true;
  }

  if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
    return true;

  mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
  if (mime_type) {
    *result = mime_type;
    return true;
  }

  return false;
}

MimeUtil::MimeUtil() = default;

// Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
// must be matched by a parameter in the |mime_type|. If there are no
// parameters in the pattern, the match is a success.
//
// According rfc2045 keys of parameters are case-insensitive, while values may
// or may not be case-sensitive, but they are usually case-sensitive. So, this
// function matches values in *case-sensitive* manner, however note that this
// may produce some false negatives.
bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
                               const std::string& mime_type) {
  typedef std::map<std::string, std::string> StringPairMap;

  const std::string::size_type semicolon = mime_type_pattern.find(';');
  const std::string::size_type test_semicolon = mime_type.find(';');
  if (semicolon != std::string::npos) {
    if (test_semicolon == std::string::npos)
      return false;

    base::StringPairs pattern_parameters;
    base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
                                       '=', ';', &pattern_parameters);
    base::StringPairs test_parameters;
    base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
                                       '=', ';', &test_parameters);

    // Put the parameters to maps with the keys converted to lower case.
    StringPairMap pattern_parameter_map;
    for (const auto& pair : pattern_parameters) {
      pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
    }

    StringPairMap test_parameter_map;
    for (const auto& pair : test_parameters) {
      test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
    }

    if (pattern_parameter_map.size() > test_parameter_map.size())
      return false;

    for (const auto& parameter_pair : pattern_parameter_map) {
      const auto& test_parameter_pair_it =
          test_parameter_map.find(parameter_pair.first);
      if (test_parameter_pair_it == test_parameter_map.end())
        return false;
      if (parameter_pair.second != test_parameter_pair_it->second)
        return false;
    }
  }

  return true;
}

// This comparison handles absolute maching and also basic
// wildcards.  The plugin mime types could be:
//      application/x-foo
//      application/*
//      application/*+xml
//      *
// Also tests mime parameters -- all parameters in the pattern must be present
// in the tested type for a match to succeed.
bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
                               const std::string& mime_type) const {
  if (mime_type_pattern.empty())
    return false;

  std::string::size_type semicolon = mime_type_pattern.find(';');
  const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
  semicolon = mime_type.find(';');
  const std::string base_type(mime_type.substr(0, semicolon));

  if (base_pattern == "*" || base_pattern == "*/*")
    return MatchesMimeTypeParameters(mime_type_pattern, mime_type);

  const std::string::size_type star = base_pattern.find('*');
  if (star == std::string::npos) {
    if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
      return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
    else
      return false;
  }

  // Test length to prevent overlap between |left| and |right|.
  if (base_type.length() < base_pattern.length() - 1)
    return false;

  std::string_view base_pattern_piece(base_pattern);
  std::string_view left(base_pattern_piece.substr(0, star));
  std::string_view right(base_pattern_piece.substr(star + 1));

  if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
    return false;

  if (!right.empty() &&
      !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
    return false;

  return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
}

bool ParseMimeType(const std::string& type_str,
                   std::string* mime_type,
                   base::StringPairs* params) {
  // Trim leading and trailing whitespace from type.  We include '(' in
  // the trailing trim set to catch media-type comments, which are not at all
  // standard, but may occur in rare cases.
  size_t type_val = type_str.find_first_not_of(HTTP_LWS);
  type_val = std::min(type_val, type_str.length());
  size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
  if (type_end == std::string::npos)
    type_end = type_str.length();

  // Reject a mime-type if it does not include a slash.
  size_t slash_pos = type_str.find_first_of('/');
  if (slash_pos == std::string::npos || slash_pos > type_end)
    return false;
  if (mime_type)
    *mime_type = type_str.substr(type_val, type_end - type_val);

  // Iterate over parameters. Can't split the string around semicolons
  // preemptively because quoted strings may include semicolons. Mostly matches
  // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
  // validate characters are HTTP token code points / HTTP quoted-string token
  // code points, and ignores spaces after "=" in parameters.
  if (params)
    params->clear();
  std::string::size_type offset = type_str.find_first_of(';', type_end);
  while (offset < type_str.size()) {
    DCHECK_EQ(';', type_str[offset]);
    // Trim off the semicolon.
    ++offset;

    // Trim off any following spaces.
    offset = type_str.find_first_not_of(HTTP_LWS, offset);
    std::string::size_type param_name_start = offset;

    // Extend parameter name until run into a semicolon or equals sign.  Per
    // spec, trailing spaces are not removed.
    offset = type_str.find_first_of(";=", offset);

    // Nothing more to do if at end of string, or if there's no parameter
    // value, since names without values aren't allowed.
    if (offset == std::string::npos || type_str[offset] == ';')
      continue;

    auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
                                            type_str.begin() + offset);

    // Now parse the value.
    DCHECK_EQ('=', type_str[offset]);
    // Trim off the '='.
    offset++;

    // Remove leading spaces. This violates the spec, though it matches
    // pre-existing behavior.
    //
    // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
    // seems to align more with the spec - not the content-type spec, but the
    // GET spec's way of getting an encoding, and the spec for handling
    // boundary values as well.
    // See https://encoding.spec.whatwg.org/#names-and-labels.
    offset = type_str.find_first_not_of(HTTP_LWS, offset);

    std::string param_value;
    if (offset == std::string::npos || type_str[offset] == ';') {
      // Nothing to do here - an unquoted string of only whitespace should be
      // skipped.
      continue;
    } else if (type_str[offset] != '"') {
      // If the first character is not a quotation mark, copy data directly.
      std::string::size_type value_start = offset;
      offset = type_str.find_first_of(';', offset);
      std::string::size_type value_end = offset;

      // Remove terminal whitespace. If ran off the end of the string, have to
      // update |value_end| first.
      if (value_end == std::string::npos)
        value_end = type_str.size();
      while (value_end > value_start &&
             HttpUtil::IsLWS(type_str[value_end - 1])) {
        --value_end;
      }

      param_value = type_str.substr(value_start, value_end - value_start);
    } else {
      // Otherwise, append data, with special handling for backslashes, until
      // a close quote.  Do not trim whitespace for quoted-string.

      // Skip open quote.
      DCHECK_EQ('"', type_str[offset]);
      ++offset;

      while (offset < type_str.size() && type_str[offset] != '"') {
        // Skip over backslash and append the next character, when not at
        // the end of the string. Otherwise, copy the next character (Which may
        // be a backslash).
        if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
          ++offset;
        }
        param_value += type_str[offset];
        ++offset;
      }

      offset = type_str.find_first_of(';', offset);
    }
    if (params)
      params->emplace_back(param_name, param_value);
  }
  return true;
}

bool MimeUtil::ParseMimeTypeWithoutParameter(std::string_view type_string,
                                             std::string* top_level_type,
                                             std::string* subtype) const {
  std::vector<std::string_view> components = base::SplitStringPiece(
      type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
  if (components.size() != 2)
    return false;
  components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
  components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
  if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
    return false;

  if (top_level_type)
    top_level_type->assign(std::string(components[0]));

  if (subtype)
    subtype->assign(std::string(components[1]));

  return true;
}

// See https://www.iana.org/assignments/media-types/media-types.xhtml
static const char* const kLegalTopLevelTypes[] = {
    "application", "audio", "example",   "font", "image",
    "message",     "model", "multipart", "text", "video",
};

bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
  std::string lower_type = base::ToLowerASCII(type_string);
  for (const char* const legal_type : kLegalTopLevelTypes) {
    if (lower_type.compare(legal_type) == 0)
      return true;
  }

  return type_string.size() > 2 &&
         base::StartsWith(type_string, "x-",
                          base::CompareCase::INSENSITIVE_ASCII);
}

//----------------------------------------------------------------------------
// Wrappers for the singleton
//----------------------------------------------------------------------------

bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
                              std::string* mime_type) {
  return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
}

bool GetMimeTypeFromFile(const base::FilePath& file_path,
                         std::string* mime_type) {
  return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
}

bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
                                       std::string* mime_type) {
  return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
}

bool GetPreferredExtensionForMimeType(const std::string& mime_type,
                                      base::FilePath::StringType* extension) {
  return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
                                                            extension);
}

bool MatchesMimeType(const std::string& mime_type_pattern,
                     const std::string& mime_type) {
  return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
}

bool ParseMimeTypeWithoutParameter(std::string_view type_string,
                                   std::string* top_level_type,
                                   std::string* subtype) {
  return g_mime_util.Get().ParseMimeTypeWithoutParameter(
      type_string, top_level_type, subtype);
}

bool IsValidTopLevelMimeType(const std::string& type_string) {
  return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
}

namespace {

// From http://www.w3schools.com/media/media_mimeref.asp and
// http://plugindoc.mozdev.org/winmime.php
static const char* const kStandardImageTypes[] = {"image/avif",
                                                  "image/bmp",
                                                  "image/cis-cod",
                                                  "image/gif",
                                                  "image/ief",
                                                  "image/jpeg",
                                                  "image/webp",
                                                  "image/pict",
                                                  "image/pipeg",
                                                  "image/png",
                                                  "image/svg+xml",
                                                  "image/tiff",
                                                  "image/vnd.microsoft.icon",
                                                  "image/x-cmu-raster",
                                                  "image/x-cmx",
                                                  "image/x-icon",
                                                  "image/x-portable-anymap",
                                                  "image/x-portable-bitmap",
                                                  "image/x-portable-graymap",
                                                  "image/x-portable-pixmap",
                                                  "image/x-rgb",
                                                  "image/x-xbitmap",
                                                  "image/x-xpixmap",
                                                  "image/x-xwindowdump"};
static const char* const kStandardAudioTypes[] = {
  "audio/aac",
  "audio/aiff",
  "audio/amr",
  "audio/basic",
  "audio/flac",
  "audio/midi",
  "audio/mp3",
  "audio/mp4",
  "audio/mpeg",
  "audio/mpeg3",
  "audio/ogg",
  "audio/vorbis",
  "audio/wav",
  "audio/webm",
  "audio/x-m4a",
  "audio/x-ms-wma",
  "audio/vnd.rn-realaudio",
  "audio/vnd.wave"
};
// https://tools.ietf.org/html/rfc8081
static const char* const kStandardFontTypes[] = {
    "font/collection", "font/otf",  "font/sfnt",
    "font/ttf",        "font/woff", "font/woff2",
};
static const char* const kStandardVideoTypes[] = {
  "video/avi",
  "video/divx",
  "video/flc",
  "video/mp4",
  "video/mpeg",
  "video/ogg",
  "video/quicktime",
  "video/sd-video",
  "video/webm",
  "video/x-dv",
  "video/x-m4v",
  "video/x-mpeg",
  "video/x-ms-asf",
  "video/x-ms-wmv"
};

struct StandardType {
  const char* const leading_mime_type;
  base::span<const char* const> standard_types;
};
static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
                                              {"audio/", kStandardAudioTypes},
                                              {"font/", kStandardFontTypes},
                                              {"video/", kStandardVideoTypes},
                                              {nullptr, {}}};

// GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
// dot) to the set |extensions|, for all MIME types matching |mime_type|.
//
// The meaning of |mime_type| depends on the value of |prefix_match|:
//
//  * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
//    string such as "text/plain".
//
//  * If |prefix_match = true| then |mime_type| is treated as the prefix for a
//    (case-insensitive) string. For instance "Text/" would match "text/plain".
void GetExtensionsFromHardCodedMappings(
    base::span<const MimeInfo> mappings,
    const std::string& mime_type,
    bool prefix_match,
    std::unordered_set<base::FilePath::StringType>* extensions) {
  for (const auto& mapping : mappings) {
    std::string_view cur_mime_type(mapping.mime_type);

    if (base::StartsWith(cur_mime_type, mime_type,
                         base::CompareCase::INSENSITIVE_ASCII) &&
        (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
      for (std::string_view this_extension : base::SplitStringPiece(
               mapping.extensions, ",", base::TRIM_WHITESPACE,
               base::SPLIT_WANT_ALL)) {
        extensions->insert(StringToFilePathStringType(this_extension));
      }
    }
  }
}

void GetExtensionsHelper(
    base::span<const char* const> standard_types,
    const std::string& leading_mime_type,
    std::unordered_set<base::FilePath::StringType>* extensions) {
  for (auto* standard_type : standard_types) {
    g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
                                                       extensions);
  }

  // Also look up the extensions from hard-coded mappings in case that some
  // supported extensions are not registered in the system registry, like ogg.
  GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
                                     extensions);

  GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
                                     true, extensions);
}

// Note that the elements in the source set will be appended to the target
// vector.
template <class T>
void UnorderedSetToVector(std::unordered_set<T>* source,
                          std::vector<T>* target) {
  size_t old_target_size = target->size();
  target->resize(old_target_size + source->size());
  size_t i = 0;
  for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
    (*target)[old_target_size + i] = *iter;
}

// Characters to be used for mime multipart boundary.
//
// TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
// The RFC 2046 spec says the alphanumeric characters plus the
// following characters are legal for boundaries:  '()+_,-./:=?
// However the following characters, though legal, cause some sites
// to fail: (),./:=+
constexpr std::string_view kMimeBoundaryCharacters(
    "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");

// Size of mime multipart boundary.
const size_t kMimeBoundarySize = 69;

}  // namespace

void GetExtensionsForMimeType(
    const std::string& unsafe_mime_type,
    std::vector<base::FilePath::StringType>* extensions) {
  if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
    return;

  const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
  std::unordered_set<base::FilePath::StringType> unique_extensions;

  if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
    std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);

    // Find the matching StandardType from within kStandardTypes, or fall
    // through to the last (default) StandardType.
    const StandardType* type = nullptr;
    for (const StandardType& standard_type : kStandardTypes) {
      type = &standard_type;
      if (type->leading_mime_type &&
          leading_mime_type == type->leading_mime_type) {
        break;
      }
    }
    DCHECK(type);
    GetExtensionsHelper(type->standard_types,
                        leading_mime_type,
                        &unique_extensions);
  } else {
    g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
                                                       &unique_extensions);

    // Also look up the extensions from hard-coded mappings in case that some
    // supported extensions are not registered in the system registry, like ogg.
    GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
                                       &unique_extensions);

    GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
                                       &unique_extensions);
  }

  UnorderedSetToVector(&unique_extensions, extensions);
}

NET_EXPORT std::string GenerateMimeMultipartBoundary() {
  // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
  //   Because encapsulation boundaries must not appear in the body parts being
  //   encapsulated, a user agent must exercise care to choose a unique
  //   boundary. The boundary in the example above could have been the result of
  //   an algorithm designed to produce boundaries with a very low probability
  //   of already existing in the data to be encapsulated without having to
  //   prescan the data.
  //   [...]
  //   the boundary parameter [...] consists of 1 to 70 characters from a set of
  //   characters known to be very robust through email gateways, and NOT ending
  //   with white space.
  //   [...]
  //   boundary := 0*69<bchars> bcharsnospace
  //   bchars := bcharsnospace / " "
  //   bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
  //            "_" / "," / "-" / "." / "/" / ":" / "=" / "?"

  std::string result;
  result.reserve(kMimeBoundarySize);
  result.append("----MultipartBoundary--");
  while (result.size() < (kMimeBoundarySize - 4)) {
    char c = kMimeBoundaryCharacters[base::RandInt(
        0, kMimeBoundaryCharacters.size() - 1)];
    result.push_back(c);
  }
  result.append("----");

  // Not a strict requirement - documentation only.
  DCHECK_EQ(kMimeBoundarySize, result.size());

  return result;
}

void AddMultipartValueForUpload(const std::string& value_name,
                                const std::string& value,
                                const std::string& mime_boundary,
                                const std::string& content_type,
                                std::string* post_data) {
  DCHECK(post_data);
  // First line is the boundary.
  post_data->append("--" + mime_boundary + "\r\n");
  // Next line is the Content-disposition.
  post_data->append("Content-Disposition: form-data; name=\"" +
                    value_name + "\"\r\n");
  if (!content_type.empty()) {
    // If Content-type is specified, the next line is that.
    post_data->append("Content-Type: " + content_type + "\r\n");
  }
  // Leave an empty line and append the value.
  post_data->append("\r\n" + value + "\r\n");
}

void AddMultipartValueForUploadWithFileName(const std::string& value_name,
                                            const std::string& file_name,
                                            const std::string& value,
                                            const std::string& mime_boundary,
                                            const std::string& content_type,
                                            std::string* post_data) {
  DCHECK(post_data);
  // First line is the boundary.
  post_data->append("--" + mime_boundary + "\r\n");
  // Next line is the Content-disposition.
  post_data->append("Content-Disposition: form-data; name=\"" + value_name +
                    "\"; filename=\"" + file_name + "\"\r\n");
  if (!content_type.empty()) {
    // If Content-type is specified, the next line is that.
    post_data->append("Content-Type: " + content_type + "\r\n");
  }
  // Leave an empty line and append the value.
  post_data->append("\r\n" + value + "\r\n");
}

void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
                                         std::string* post_data) {
  DCHECK(post_data);
  post_data->append("--" + mime_boundary + "--\r\n");
}

// TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
// (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
std::optional<std::string> ExtractMimeTypeFromMediaType(
    const std::string& type_string,
    bool accept_comma_separated) {
  std::string::size_type end = type_string.find(';');
  if (accept_comma_separated) {
    end = std::min(end, type_string.find(','));
  }
  std::string top_level_type;
  std::string subtype;
  if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
                                    &subtype)) {
    return top_level_type + "/" + subtype;
  }
  return std::nullopt;
}

}  // namespace net