naiveproxy/url/url_util.h

// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef URL_URL_UTIL_H_
#define URL_URL_UTIL_H_

#include <string>
#include <vector>

#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
#include "url/url_export.h"

namespace url {

// Init ------------------------------------------------------------------------

// Initialization is NOT required, it will be implicitly initialized when first
// used. However, this implicit initialization is NOT threadsafe. If you are
// using this library in a threaded environment and don't have a consistent
// "first call" (an example might be calling Add*Scheme with your special
// application-specific schemes) then you will want to call initialize before
// spawning any threads.
//
// It is OK to call this function more than once, subsequent calls will be
// no-ops, unless Shutdown was called in the mean time. This will also be a
// no-op if other calls to the library have forced an initialization beforehand.
URL_EXPORT void Initialize();

// Cleanup is not required, except some strings may leak. For most user
// applications, this is fine. If you're using it in a library that may get
// loaded and unloaded, you'll want to unload to properly clean up your
// library.
URL_EXPORT void Shutdown();

// Schemes ---------------------------------------------------------------------

// Types of a scheme representing the requirements on the data represented by
// the authority component of a URL with the scheme.
enum SchemeType {
  // The authority component of a URL with the scheme, if any, has the port
  // (the default values may be omitted in a serialization).
  SCHEME_WITH_PORT,
  // The authority component of a URL with the scheme, if any, doesn't have a
  // port.
  SCHEME_WITHOUT_PORT,
  // A URL with the scheme doesn't have the authority component.
  SCHEME_WITHOUT_AUTHORITY,
};

// A pair for representing a standard scheme name and the SchemeType for it.
struct URL_EXPORT SchemeWithType {
  const char* scheme;
  SchemeType type;
};

// The following Add*Scheme method are not threadsafe and can not be called
// concurrently with any other url_util function. They will assert if the lists
// of schemes have been locked (see LockSchemeRegistries).

// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).

URL_EXPORT void AddStandardScheme(const char* new_scheme,
                                  SchemeType scheme_type);

// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
URL_EXPORT void AddReferrerScheme(const char* new_scheme,
                                  SchemeType scheme_type);

// Adds an application-defined scheme to the list of schemes that do not trigger
// mixed content warnings.
URL_EXPORT void AddSecureScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetSecureSchemes();

// Adds an application-defined scheme to the list of schemes that normal pages
// cannot link to or access (i.e., with the same security rules as those applied
// to "file" URLs).
URL_EXPORT void AddLocalScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetLocalSchemes();

// Adds an application-defined scheme to the list of schemes that cause pages
// loaded with them to not have access to pages loaded with any other URL
// scheme.
URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();

// Adds an application-defined scheme to the list of schemes that can be sent
// CORS requests.
URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();

// Adds an application-defined scheme to the list of web schemes that can be
// used by web to store data (e.g. cookies, local storage, ...). This is
// to differentiate them from schemes that can store data but are not used on
// web (e.g. application's internal schemes) or schemes that are used on web but
// cannot store data.
URL_EXPORT void AddWebStorageScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetWebStorageSchemes();

// Adds an application-defined scheme to the list of schemes that can bypass the
// Content-Security-Policy(CSP) checks.
URL_EXPORT void AddCSPBypassingScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetCSPBypassingSchemes();

// Adds an application-defined scheme to the list of schemes that are strictly
// empty documents, allowing them to commit synchronously.
URL_EXPORT void AddEmptyDocumentScheme(const char* new_scheme);
URL_EXPORT const std::vector<std::string>& GetEmptyDocumentSchemes();

// Sets a flag to prevent future calls to Add*Scheme from succeeding.
//
// This is designed to help prevent errors for multithreaded applications.
// Normal usage would be to call Add*Scheme for your custom schemes at
// the beginning of program initialization, and then LockSchemeRegistries. This
// prevents future callers from mistakenly calling Add*Scheme when the
// program is running with multiple threads, where such usage would be
// dangerous.
//
// We could have had Add*Scheme use a lock instead, but that would add
// some platform-specific dependencies we don't otherwise have now, and is
// overkill considering the normal usage is so simple.
URL_EXPORT void LockSchemeRegistries();

// Locates the scheme in the given string and places it into |found_scheme|,
// which may be NULL to indicate the caller does not care about the range.
//
// Returns whether the given |compare| scheme matches the scheme found in the
// input (if any). The |compare| scheme must be a valid canonical scheme or
// the result of the comparison is undefined.
URL_EXPORT bool FindAndCompareScheme(const char* str,
                                     int str_len,
                                     const char* compare,
                                     Component* found_scheme);
URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
                                     int str_len,
                                     const char* compare,
                                     Component* found_scheme);
inline bool FindAndCompareScheme(const std::string& str,
                                 const char* compare,
                                 Component* found_scheme) {
  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
                              compare, found_scheme);
}
inline bool FindAndCompareScheme(const base::string16& str,
                                 const char* compare,
                                 Component* found_scheme) {
  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
                              compare, found_scheme);
}

// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of known standard-format schemes (see AddStandardScheme).
URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);

// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of allowed schemes for referrers (see AddReferrerScheme).
URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);

// Returns true and sets |type| to the SchemeType of the given scheme
// identified by |scheme| within |spec| if the scheme is in the list of known
// standard-format schemes (see AddStandardScheme).
URL_EXPORT bool GetStandardSchemeType(const char* spec,
                                      const Component& scheme,
                                      SchemeType* type);

// Hosts  ----------------------------------------------------------------------

// Returns true if the |canonical_host| matches or is in the same domain as the
// given |canonical_domain| string. For example, if the canonicalized hostname
// is "www.google.com", this will return true for "com", "google.com", and
// "www.google.com" domains.
//
// If either of the input StringPieces is empty, the return value is false. The
// input domain should match host canonicalization rules. i.e. it should be
// lowercase except for escape chars.
URL_EXPORT bool DomainIs(base::StringPiece canonical_host,
                         base::StringPiece canonical_domain);

// Returns true if the hostname is an IP address. Note: this function isn't very
// cheap, as it must re-parse the host to verify.
URL_EXPORT bool HostIsIPAddress(base::StringPiece host);

// URL library wrappers --------------------------------------------------------

// Parses the given spec according to the extracted scheme type. Normal users
// should use the URL object, although this may be useful if performance is
// critical and you don't want to do the heap allocation for the std::string.
//
// As with the Canonicalize* functions, the charset converter can
// be NULL to use UTF-8 (it will be faster in this case).
//
// Returns true if a valid URL was produced, false if not. On failure, the
// output and parsed structures will still be filled and will be consistent,
// but they will not represent a loadable URL.
URL_EXPORT bool Canonicalize(const char* spec,
                             int spec_len,
                             bool trim_path_end,
                             CharsetConverter* charset_converter,
                             CanonOutput* output,
                             Parsed* output_parsed);
URL_EXPORT bool Canonicalize(const base::char16* spec,
                             int spec_len,
                             bool trim_path_end,
                             CharsetConverter* charset_converter,
                             CanonOutput* output,
                             Parsed* output_parsed);

// Resolves a potentially relative URL relative to the given parsed base URL.
// The base MUST be valid. The resulting canonical URL and parsed information
// will be placed in to the given out variables.
//
// The relative need not be relative. If we discover that it's absolute, this
// will produce a canonical version of that URL. See Canonicalize() for more
// about the charset_converter.
//
// Returns true if the output is valid, false if the input could not produce
// a valid URL.
URL_EXPORT bool ResolveRelative(const char* base_spec,
                                int base_spec_len,
                                const Parsed& base_parsed,
                                const char* relative,
                                int relative_length,
                                CharsetConverter* charset_converter,
                                CanonOutput* output,
                                Parsed* output_parsed);
URL_EXPORT bool ResolveRelative(const char* base_spec,
                                int base_spec_len,
                                const Parsed& base_parsed,
                                const base::char16* relative,
                                int relative_length,
                                CharsetConverter* charset_converter,
                                CanonOutput* output,
                                Parsed* output_parsed);

// Replaces components in the given VALID input URL. The new canonical URL info
// is written to output and out_parsed.
//
// Returns true if the resulting URL is valid.
URL_EXPORT bool ReplaceComponents(const char* spec,
                                  int spec_len,
                                  const Parsed& parsed,
                                  const Replacements<char>& replacements,
                                  CharsetConverter* charset_converter,
                                  CanonOutput* output,
                                  Parsed* out_parsed);
URL_EXPORT bool ReplaceComponents(
    const char* spec,
    int spec_len,
    const Parsed& parsed,
    const Replacements<base::char16>& replacements,
    CharsetConverter* charset_converter,
    CanonOutput* output,
    Parsed* out_parsed);

// String helper functions -----------------------------------------------------

// Unescapes the given string using URL escaping rules.
URL_EXPORT void DecodeURLEscapeSequences(const char* input,
                                         int length,
                                         CanonOutputW* output);

// Escapes the given string as defined by the JS method encodeURIComponent. See
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
URL_EXPORT void EncodeURIComponent(const char* input,
                                   int length,
                                   CanonOutput* output);

}  // namespace url

#endif  // URL_URL_UTIL_H_
Import chromium-64.0.3282.119 2018-01-28 21:32:06 +03:00			`// Copyright 2013 The Chromium Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file.`

			`#ifndef URL_URL_UTIL_H_`
			`#define URL_URL_UTIL_H_`

			`#include <string>`
			`#include <vector>`

			`#include "base/strings/string16.h"`
			`#include "base/strings/string_piece.h"`
			`#include "url/third_party/mozilla/url_parse.h"`
			`#include "url/url_canon.h"`
			`#include "url/url_constants.h"`
			`#include "url/url_export.h"`

			`namespace url {`

			`// Init ------------------------------------------------------------------------`

			`// Initialization is NOT required, it will be implicitly initialized when first`
			`// used. However, this implicit initialization is NOT threadsafe. If you are`
			`// using this library in a threaded environment and don't have a consistent`
			`// "first call" (an example might be calling Add*Scheme with your special`
			`// application-specific schemes) then you will want to call initialize before`
			`// spawning any threads.`
			`//`
			`// It is OK to call this function more than once, subsequent calls will be`
			`// no-ops, unless Shutdown was called in the mean time. This will also be a`
			`// no-op if other calls to the library have forced an initialization beforehand.`
			`URL_EXPORT void Initialize();`

			`// Cleanup is not required, except some strings may leak. For most user`
			`// applications, this is fine. If you're using it in a library that may get`
			`// loaded and unloaded, you'll want to unload to properly clean up your`
			`// library.`
			`URL_EXPORT void Shutdown();`

			`// Schemes ---------------------------------------------------------------------`

			`// Types of a scheme representing the requirements on the data represented by`
			`// the authority component of a URL with the scheme.`
			`enum SchemeType {`
			`// The authority component of a URL with the scheme, if any, has the port`
			`// (the default values may be omitted in a serialization).`
			`SCHEME_WITH_PORT,`
			`// The authority component of a URL with the scheme, if any, doesn't have a`
			`// port.`
			`SCHEME_WITHOUT_PORT,`
			`// A URL with the scheme doesn't have the authority component.`
			`SCHEME_WITHOUT_AUTHORITY,`
			`};`

			`// A pair for representing a standard scheme name and the SchemeType for it.`
			`struct URL_EXPORT SchemeWithType {`
			`const char* scheme;`
			`SchemeType type;`
			`};`

			`// The following Add*Scheme method are not threadsafe and can not be called`
			`// concurrently with any other url_util function. They will assert if the lists`
			`// of schemes have been locked (see LockSchemeRegistries).`

			`// Adds an application-defined scheme to the internal list of "standard-format"`
			`// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic`
			`// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).`

			`URL_EXPORT void AddStandardScheme(const char* new_scheme,`
			`SchemeType scheme_type);`

			`// Adds an application-defined scheme to the internal list of schemes allowed`
			`// for referrers.`
			`URL_EXPORT void AddReferrerScheme(const char* new_scheme,`
			`SchemeType scheme_type);`

			`// Adds an application-defined scheme to the list of schemes that do not trigger`
			`// mixed content warnings.`
			`URL_EXPORT void AddSecureScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetSecureSchemes();`

			`// Adds an application-defined scheme to the list of schemes that normal pages`
			`// cannot link to or access (i.e., with the same security rules as those applied`
			`// to "file" URLs).`
			`URL_EXPORT void AddLocalScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetLocalSchemes();`

			`// Adds an application-defined scheme to the list of schemes that cause pages`
			`// loaded with them to not have access to pages loaded with any other URL`
			`// scheme.`
			`URL_EXPORT void AddNoAccessScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();`

			`// Adds an application-defined scheme to the list of schemes that can be sent`
			`// CORS requests.`
			`URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();`

			`// Adds an application-defined scheme to the list of web schemes that can be`
			`// used by web to store data (e.g. cookies, local storage, ...). This is`
			`// to differentiate them from schemes that can store data but are not used on`
			`// web (e.g. application's internal schemes) or schemes that are used on web but`
			`// cannot store data.`
			`URL_EXPORT void AddWebStorageScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetWebStorageSchemes();`

			`// Adds an application-defined scheme to the list of schemes that can bypass the`
			`// Content-Security-Policy(CSP) checks.`
			`URL_EXPORT void AddCSPBypassingScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetCSPBypassingSchemes();`

			`// Adds an application-defined scheme to the list of schemes that are strictly`
			`// empty documents, allowing them to commit synchronously.`
			`URL_EXPORT void AddEmptyDocumentScheme(const char* new_scheme);`
			`URL_EXPORT const std::vector<std::string>& GetEmptyDocumentSchemes();`

			`// Sets a flag to prevent future calls to Add*Scheme from succeeding.`
			`//`
			`// This is designed to help prevent errors for multithreaded applications.`
			`// Normal usage would be to call Add*Scheme for your custom schemes at`
			`// the beginning of program initialization, and then LockSchemeRegistries. This`
			`// prevents future callers from mistakenly calling Add*Scheme when the`
			`// program is running with multiple threads, where such usage would be`
			`// dangerous.`
			`//`
			`// We could have had Add*Scheme use a lock instead, but that would add`
			`// some platform-specific dependencies we don't otherwise have now, and is`
			`// overkill considering the normal usage is so simple.`
			`URL_EXPORT void LockSchemeRegistries();`

			`// Locates the scheme in the given string and places it into \|found_scheme\|,`
			`// which may be NULL to indicate the caller does not care about the range.`
			`//`
			`// Returns whether the given \|compare\| scheme matches the scheme found in the`
			`// input (if any). The \|compare\| scheme must be a valid canonical scheme or`
			`// the result of the comparison is undefined.`
			`URL_EXPORT bool FindAndCompareScheme(const char* str,`
			`int str_len,`
			`const char* compare,`
			`Component* found_scheme);`
			`URL_EXPORT bool FindAndCompareScheme(const base::char16* str,`
			`int str_len,`
			`const char* compare,`
			`Component* found_scheme);`
			`inline bool FindAndCompareScheme(const std::string& str,`
			`const char* compare,`
			`Component* found_scheme) {`
			`return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),`
			`compare, found_scheme);`
			`}`
			`inline bool FindAndCompareScheme(const base::string16& str,`
			`const char* compare,`
			`Component* found_scheme) {`
			`return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),`
			`compare, found_scheme);`
			`}`

			`// Returns true if the given scheme identified by \|scheme\| within \|spec\| is in`
			`// the list of known standard-format schemes (see AddStandardScheme).`
			`URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);`
			`URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);`

			`// Returns true if the given scheme identified by \|scheme\| within \|spec\| is in`
			`// the list of allowed schemes for referrers (see AddReferrerScheme).`
			`URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);`

			`// Returns true and sets \|type\| to the SchemeType of the given scheme`
			`// identified by \|scheme\| within \|spec\| if the scheme is in the list of known`
			`// standard-format schemes (see AddStandardScheme).`
			`URL_EXPORT bool GetStandardSchemeType(const char* spec,`
			`const Component& scheme,`
			`SchemeType* type);`

			`// Hosts ----------------------------------------------------------------------`

			`// Returns true if the \|canonical_host\| matches or is in the same domain as the`
			`// given \|canonical_domain\| string. For example, if the canonicalized hostname`
			`// is "www.google.com", this will return true for "com", "google.com", and`
			`// "www.google.com" domains.`
			`//`
			`// If either of the input StringPieces is empty, the return value is false. The`
			`// input domain should match host canonicalization rules. i.e. it should be`
			`// lowercase except for escape chars.`
			`URL_EXPORT bool DomainIs(base::StringPiece canonical_host,`
			`base::StringPiece canonical_domain);`

			`// Returns true if the hostname is an IP address. Note: this function isn't very`
			`// cheap, as it must re-parse the host to verify.`
			`URL_EXPORT bool HostIsIPAddress(base::StringPiece host);`

			`// URL library wrappers --------------------------------------------------------`

			`// Parses the given spec according to the extracted scheme type. Normal users`
			`// should use the URL object, although this may be useful if performance is`
			`// critical and you don't want to do the heap allocation for the std::string.`
			`//`
			`// As with the Canonicalize* functions, the charset converter can`
			`// be NULL to use UTF-8 (it will be faster in this case).`
			`//`
			`// Returns true if a valid URL was produced, false if not. On failure, the`
			`// output and parsed structures will still be filled and will be consistent,`
			`// but they will not represent a loadable URL.`
			`URL_EXPORT bool Canonicalize(const char* spec,`
			`int spec_len,`
			`bool trim_path_end,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* output_parsed);`
			`URL_EXPORT bool Canonicalize(const base::char16* spec,`
			`int spec_len,`
			`bool trim_path_end,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* output_parsed);`

			`// Resolves a potentially relative URL relative to the given parsed base URL.`
			`// The base MUST be valid. The resulting canonical URL and parsed information`
			`// will be placed in to the given out variables.`
			`//`
			`// The relative need not be relative. If we discover that it's absolute, this`
			`// will produce a canonical version of that URL. See Canonicalize() for more`
			`// about the charset_converter.`
			`//`
			`// Returns true if the output is valid, false if the input could not produce`
			`// a valid URL.`
			`URL_EXPORT bool ResolveRelative(const char* base_spec,`
			`int base_spec_len,`
			`const Parsed& base_parsed,`
			`const char* relative,`
			`int relative_length,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* output_parsed);`
			`URL_EXPORT bool ResolveRelative(const char* base_spec,`
			`int base_spec_len,`
			`const Parsed& base_parsed,`
			`const base::char16* relative,`
			`int relative_length,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* output_parsed);`

			`// Replaces components in the given VALID input URL. The new canonical URL info`
			`// is written to output and out_parsed.`
			`//`
			`// Returns true if the resulting URL is valid.`
			`URL_EXPORT bool ReplaceComponents(const char* spec,`
			`int spec_len,`
			`const Parsed& parsed,`
			`const Replacements<char>& replacements,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* out_parsed);`
			`URL_EXPORT bool ReplaceComponents(`
			`const char* spec,`
			`int spec_len,`
			`const Parsed& parsed,`
			`const Replacements<base::char16>& replacements,`
			`CharsetConverter* charset_converter,`
			`CanonOutput* output,`
			`Parsed* out_parsed);`

			`// String helper functions -----------------------------------------------------`

			`// Unescapes the given string using URL escaping rules.`
			`URL_EXPORT void DecodeURLEscapeSequences(const char* input,`
			`int length,`
			`CanonOutputW* output);`

			`// Escapes the given string as defined by the JS method encodeURIComponent. See`
			`// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent`
			`URL_EXPORT void EncodeURIComponent(const char* input,`
			`int length,`
			`CanonOutput* output);`

			`} // namespace url`

			`#endif // URL_URL_UTIL_H_`