naiveproxy/url/url_canon_pathurl.cc

// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Functions for canonicalizing "path" URLs. Not to be confused with the path
// of a URL, these are URLs that have no authority section, only a path. For
// example, "javascript:" and "data:".

#include "url/url_canon.h"
#include "url/url_canon_internal.h"

namespace url {

namespace {

// Canonicalize the given |component| from |source| into |output| and
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
template<typename CHAR, typename UCHAR>
bool DoCanonicalizePathComponent(const CHAR* source,
                                 const Component& component,
                                 char separator,
                                 CanonOutput* output,
                                 Component* new_component) {
  bool success = true;
  if (component.is_valid()) {
    if (separator)
      output->push_back(separator);
    // Copy the path using path URL's more lax escaping rules (think for
    // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
    // ASCII characters alone. This helps readability of JavaStript.
    new_component->begin = output->length();
    int end = component.end();
    for (int i = component.begin; i < end; i++) {
      UCHAR uch = static_cast<UCHAR>(source[i]);
      if (uch < 0x20 || uch >= 0x80)
        success &= AppendUTF8EscapedChar(source, &i, end, output);
      else
        output->push_back(static_cast<char>(uch));
    }
    new_component->len = output->length() - new_component->begin;
  } else {
    // Empty part.
    new_component->reset();
  }
  return success;
}

template <typename CHAR, typename UCHAR>
bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
                           const Parsed& parsed,
                           CanonOutput* output,
                           Parsed* new_parsed) {
  // Scheme: this will append the colon.
  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
                                    output, &new_parsed->scheme);

  // We assume there's no authority for path URLs. Note that hosts should never
  // have -1 length.
  new_parsed->username.reset();
  new_parsed->password.reset();
  new_parsed->host.reset();
  new_parsed->port.reset();
  // We allow path URLs to have the path, query and fragment components, but we
  // will canonicalize each of the via the weaker path URL rules.
  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
      source.path, parsed.path, '\0', output, &new_parsed->path);
  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
      source.query, parsed.query, '?', output, &new_parsed->query);
  success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
      source.ref, parsed.ref, '#', output, &new_parsed->ref);

  return success;
}

}  // namespace

bool CanonicalizePathURL(const char* spec,
                         int spec_len,
                         const Parsed& parsed,
                         CanonOutput* output,
                         Parsed* new_parsed) {
  return DoCanonicalizePathURL<char, unsigned char>(
      URLComponentSource<char>(spec), parsed, output, new_parsed);
}

bool CanonicalizePathURL(const base::char16* spec,
                         int spec_len,
                         const Parsed& parsed,
                         CanonOutput* output,
                         Parsed* new_parsed) {
  return DoCanonicalizePathURL<base::char16, base::char16>(
      URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
}

bool ReplacePathURL(const char* base,
                    const Parsed& base_parsed,
                    const Replacements<char>& replacements,
                    CanonOutput* output,
                    Parsed* new_parsed) {
  URLComponentSource<char> source(base);
  Parsed parsed(base_parsed);
  SetupOverrideComponents(base, replacements, &source, &parsed);
  return DoCanonicalizePathURL<char, unsigned char>(
      source, parsed, output, new_parsed);
}

bool ReplacePathURL(const char* base,
                    const Parsed& base_parsed,
                    const Replacements<base::char16>& replacements,
                    CanonOutput* output,
                    Parsed* new_parsed) {
  RawCanonOutput<1024> utf8;
  URLComponentSource<char> source(base);
  Parsed parsed(base_parsed);
  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
  return DoCanonicalizePathURL<char, unsigned char>(
      source, parsed, output, new_parsed);
}

}  // namespace url
Import chromium-64.0.3282.140 2018-02-02 13:49:39 +03:00			`// Copyright 2013 The Chromium Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file.`

			`// Functions for canonicalizing "path" URLs. Not to be confused with the path`
			`// of a URL, these are URLs that have no authority section, only a path. For`
			`// example, "javascript:" and "data:".`

			`#include "url/url_canon.h"`
			`#include "url/url_canon_internal.h"`

			`namespace url {`

			`namespace {`

			`// Canonicalize the given \|component\| from \|source\| into \|output\| and`
			`// \|new_component\|. If \|separator\| is non-zero, it is pre-pended to \|output\|`
			`// prior to the canonicalized component; i.e. for the '?' or '#' characters.`
			`template<typename CHAR, typename UCHAR>`
			`bool DoCanonicalizePathComponent(const CHAR* source,`
			`const Component& component,`
			`char separator,`
			`CanonOutput* output,`
			`Component* new_component) {`
			`bool success = true;`
			`if (component.is_valid()) {`
			`if (separator)`
			`output->push_back(separator);`
			`// Copy the path using path URL's more lax escaping rules (think for`
			`// javascript:). We convert to UTF-8 and escape non-ASCII, but leave all`
			`// ASCII characters alone. This helps readability of JavaStript.`
			`new_component->begin = output->length();`
			`int end = component.end();`
			`for (int i = component.begin; i < end; i++) {`
			`UCHAR uch = static_cast<UCHAR>(source[i]);`
			`if (uch < 0x20 \|\| uch >= 0x80)`
			`success &= AppendUTF8EscapedChar(source, &i, end, output);`
			`else`
			`output->push_back(static_cast<char>(uch));`
			`}`
			`new_component->len = output->length() - new_component->begin;`
			`} else {`
			`// Empty part.`
			`new_component->reset();`
			`}`
			`return success;`
			`}`

			`template <typename CHAR, typename UCHAR>`
			`bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,`
			`const Parsed& parsed,`
			`CanonOutput* output,`
			`Parsed* new_parsed) {`
			`// Scheme: this will append the colon.`
			`bool success = CanonicalizeScheme(source.scheme, parsed.scheme,`
			`output, &new_parsed->scheme);`

			`// We assume there's no authority for path URLs. Note that hosts should never`
			`// have -1 length.`
			`new_parsed->username.reset();`
			`new_parsed->password.reset();`
			`new_parsed->host.reset();`
			`new_parsed->port.reset();`
			`// We allow path URLs to have the path, query and fragment components, but we`
			`// will canonicalize each of the via the weaker path URL rules.`
			`success &= DoCanonicalizePathComponent<CHAR, UCHAR>(`
			`source.path, parsed.path, '\0', output, &new_parsed->path);`
			`success &= DoCanonicalizePathComponent<CHAR, UCHAR>(`
			`source.query, parsed.query, '?', output, &new_parsed->query);`
			`success &= DoCanonicalizePathComponent<CHAR, UCHAR>(`
			`source.ref, parsed.ref, '#', output, &new_parsed->ref);`

			`return success;`
			`}`

			`} // namespace`

			`bool CanonicalizePathURL(const char* spec,`
			`int spec_len,`
			`const Parsed& parsed,`
			`CanonOutput* output,`
			`Parsed* new_parsed) {`
			`return DoCanonicalizePathURL<char, unsigned char>(`
			`URLComponentSource<char>(spec), parsed, output, new_parsed);`
			`}`

			`bool CanonicalizePathURL(const base::char16* spec,`
			`int spec_len,`
			`const Parsed& parsed,`
			`CanonOutput* output,`
			`Parsed* new_parsed) {`
			`return DoCanonicalizePathURL<base::char16, base::char16>(`
			`URLComponentSource<base::char16>(spec), parsed, output, new_parsed);`
			`}`

			`bool ReplacePathURL(const char* base,`
			`const Parsed& base_parsed,`
			`const Replacements<char>& replacements,`
			`CanonOutput* output,`
			`Parsed* new_parsed) {`
			`URLComponentSource<char> source(base);`
			`Parsed parsed(base_parsed);`
			`SetupOverrideComponents(base, replacements, &source, &parsed);`
			`return DoCanonicalizePathURL<char, unsigned char>(`
			`source, parsed, output, new_parsed);`
			`}`

			`bool ReplacePathURL(const char* base,`
			`const Parsed& base_parsed,`
			`const Replacements<base::char16>& replacements,`
			`CanonOutput* output,`
			`Parsed* new_parsed) {`
			`RawCanonOutput<1024> utf8;`
			`URLComponentSource<char> source(base);`
			`Parsed parsed(base_parsed);`
			`SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);`
			`return DoCanonicalizePathURL<char, unsigned char>(`
			`source, parsed, output, new_parsed);`
			`}`

			`} // namespace url`