mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-28 08:16:09 +03:00
376 lines
12 KiB
C++
376 lines
12 KiB
C++
|
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
// found in the LICENSE file.
|
||
|
|
||
|
#include "net/ftp/ftp_util.h"
|
||
|
|
||
|
#include <map>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "base/i18n/case_conversion.h"
|
||
|
#include "base/i18n/char_iterator.h"
|
||
|
#include "base/i18n/unicodestring.h"
|
||
|
#include "base/logging.h"
|
||
|
#include "base/macros.h"
|
||
|
#include "base/memory/singleton.h"
|
||
|
#include "base/strings/string_number_conversions.h"
|
||
|
#include "base/strings/string_piece.h"
|
||
|
#include "base/strings/string_split.h"
|
||
|
#include "base/strings/string_tokenizer.h"
|
||
|
#include "base/strings/string_util.h"
|
||
|
#include "base/strings/utf_string_conversions.h"
|
||
|
#include "base/time/time.h"
|
||
|
#include "third_party/icu/source/common/unicode/uchar.h"
|
||
|
#include "third_party/icu/source/i18n/unicode/datefmt.h"
|
||
|
#include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
|
||
|
|
||
|
using base::ASCIIToUTF16;
|
||
|
using base::StringPiece16;
|
||
|
|
||
|
// For examples of Unix<->VMS path conversions, see the unit test file. On VMS
|
||
|
// a path looks differently depending on whether it's a file or directory.
|
||
|
|
||
|
namespace net {
|
||
|
|
||
|
// static
|
||
|
std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
|
||
|
if (unix_path.empty())
|
||
|
return std::string();
|
||
|
|
||
|
base::StringTokenizer tokenizer(unix_path, "/");
|
||
|
std::vector<std::string> tokens;
|
||
|
while (tokenizer.GetNext())
|
||
|
tokens.push_back(tokenizer.token());
|
||
|
|
||
|
if (unix_path[0] == '/') {
|
||
|
// It's an absolute path.
|
||
|
|
||
|
if (tokens.empty()) {
|
||
|
// It's just "/" or a series of slashes, which all mean the same thing.
|
||
|
return "[]";
|
||
|
}
|
||
|
|
||
|
if (tokens.size() == 1)
|
||
|
return tokens.front(); // Return without leading slashes.
|
||
|
|
||
|
std::string result(tokens[0] + ":[");
|
||
|
if (tokens.size() == 2) {
|
||
|
// Don't ask why, it just works that way on VMS.
|
||
|
result.append("000000");
|
||
|
} else {
|
||
|
result.append(tokens[1]);
|
||
|
for (size_t i = 2; i < tokens.size() - 1; i++)
|
||
|
result.append("." + tokens[i]);
|
||
|
}
|
||
|
result.append("]" + tokens.back());
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
if (tokens.size() == 1)
|
||
|
return unix_path;
|
||
|
|
||
|
std::string result("[");
|
||
|
for (size_t i = 0; i < tokens.size() - 1; i++)
|
||
|
result.append("." + tokens[i]);
|
||
|
result.append("]" + tokens.back());
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
// static
|
||
|
std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
|
||
|
if (unix_path.empty())
|
||
|
return std::string();
|
||
|
|
||
|
std::string path(unix_path);
|
||
|
|
||
|
if (path.back() != '/')
|
||
|
path.append("/");
|
||
|
|
||
|
// Reuse logic from UnixFilePathToVMS by appending a fake file name to the
|
||
|
// real path and removing it after conversion.
|
||
|
path.append("x");
|
||
|
path = UnixFilePathToVMS(path);
|
||
|
return path.substr(0, path.length() - 1);
|
||
|
}
|
||
|
|
||
|
// static
|
||
|
std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
|
||
|
if (vms_path.empty())
|
||
|
return ".";
|
||
|
|
||
|
if (vms_path[0] == '/') {
|
||
|
// This is not really a VMS path. Most likely the server is emulating UNIX.
|
||
|
// Return path as-is.
|
||
|
return vms_path;
|
||
|
}
|
||
|
|
||
|
if (vms_path == "[]")
|
||
|
return "/";
|
||
|
|
||
|
std::string result(vms_path);
|
||
|
if (vms_path[0] == '[') {
|
||
|
// It's a relative path.
|
||
|
base::ReplaceFirstSubstringAfterOffset(
|
||
|
&result, 0, "[.", base::StringPiece());
|
||
|
} else {
|
||
|
// It's an absolute path.
|
||
|
result.insert(0, "/");
|
||
|
base::ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
|
||
|
base::ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
|
||
|
}
|
||
|
std::replace(result.begin(), result.end(), '.', '/');
|
||
|
std::replace(result.begin(), result.end(), ']', '/');
|
||
|
|
||
|
// Make sure the result doesn't end with a slash.
|
||
|
if (!result.empty() && result.back() == '/')
|
||
|
result = result.substr(0, result.length() - 1);
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
// Lazy-initialized map of abbreviated month names.
|
||
|
class AbbreviatedMonthsMap {
|
||
|
public:
|
||
|
static AbbreviatedMonthsMap* GetInstance() {
|
||
|
return base::Singleton<AbbreviatedMonthsMap>::get();
|
||
|
}
|
||
|
|
||
|
// Converts abbreviated month name |text| to its number (in range 1-12).
|
||
|
// On success returns true and puts the number in |number|.
|
||
|
bool GetMonthNumber(const base::string16& text, int* number) {
|
||
|
// Ignore the case of the month names. The simplest way to handle that
|
||
|
// is to make everything lowercase.
|
||
|
base::string16 text_lower(base::i18n::ToLower(text));
|
||
|
|
||
|
if (map_.find(text_lower) == map_.end())
|
||
|
return false;
|
||
|
|
||
|
*number = map_[text_lower];
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
friend struct base::DefaultSingletonTraits<AbbreviatedMonthsMap>;
|
||
|
|
||
|
// Constructor, initializes the map based on ICU data. It is much faster
|
||
|
// to do that just once.
|
||
|
AbbreviatedMonthsMap() {
|
||
|
int32_t locales_count;
|
||
|
const icu::Locale* locales =
|
||
|
icu::DateFormat::getAvailableLocales(locales_count);
|
||
|
|
||
|
for (int32_t locale = 0; locale < locales_count; locale++) {
|
||
|
UErrorCode status(U_ZERO_ERROR);
|
||
|
|
||
|
icu::DateFormatSymbols format_symbols(locales[locale], status);
|
||
|
|
||
|
// If we cannot get format symbols for some locale, it's not a fatal
|
||
|
// error. Just try another one.
|
||
|
if (U_FAILURE(status))
|
||
|
continue;
|
||
|
|
||
|
int32_t months_count;
|
||
|
const icu::UnicodeString* months =
|
||
|
format_symbols.getShortMonths(months_count);
|
||
|
|
||
|
for (int32_t month = 0; month < months_count; month++) {
|
||
|
base::string16 month_name(
|
||
|
base::i18n::UnicodeStringToString16(months[month]));
|
||
|
|
||
|
// Ignore the case of the month names. The simplest way to handle that
|
||
|
// is to make everything lowercase.
|
||
|
month_name = base::i18n::ToLower(month_name);
|
||
|
|
||
|
map_[month_name] = month + 1;
|
||
|
|
||
|
// Sometimes ICU returns longer strings, but in FTP listings a shorter
|
||
|
// abbreviation is used (for example for the Russian locale). Make sure
|
||
|
// we always have a map entry for a three-letter abbreviation.
|
||
|
map_[month_name.substr(0, 3)] = month + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Fail loudly if the data returned by ICU is obviously incomplete.
|
||
|
// This is intended to catch cases like http://crbug.com/177428
|
||
|
// much earlier. Note that the issue above turned out to be non-trivial
|
||
|
// to reproduce - crash data is much better indicator of a problem
|
||
|
// than incomplete bug reports.
|
||
|
CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
|
||
|
CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
|
||
|
CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
|
||
|
CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
|
||
|
CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
|
||
|
CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
|
||
|
CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
|
||
|
CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
|
||
|
CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
|
||
|
CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
|
||
|
CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
|
||
|
CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
|
||
|
}
|
||
|
|
||
|
// Maps lowercase month names to numbers in range 1-12.
|
||
|
std::map<base::string16, int> map_;
|
||
|
|
||
|
DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
|
||
|
};
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
// static
|
||
|
bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
|
||
|
int* number) {
|
||
|
return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
|
||
|
}
|
||
|
|
||
|
// static
|
||
|
bool FtpUtil::LsDateListingToTime(const base::string16& month,
|
||
|
const base::string16& day,
|
||
|
const base::string16& rest,
|
||
|
const base::Time& current_time,
|
||
|
base::Time* result) {
|
||
|
base::Time::Exploded time_exploded = { 0 };
|
||
|
|
||
|
if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
|
||
|
// Work around garbage sent by some servers in the same column
|
||
|
// as the month. Take just last 3 characters of the string.
|
||
|
if (month.length() < 3 ||
|
||
|
!AbbreviatedMonthToNumber(month.substr(month.length() - 3),
|
||
|
&time_exploded.month)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!base::StringToInt(day, &time_exploded.day_of_month))
|
||
|
return false;
|
||
|
if (time_exploded.day_of_month > 31)
|
||
|
return false;
|
||
|
|
||
|
if (!base::StringToInt(rest, &time_exploded.year)) {
|
||
|
// Maybe it's time. Does it look like time? Note that it can be any of
|
||
|
// "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
|
||
|
if (rest.length() > 5)
|
||
|
return false;
|
||
|
|
||
|
size_t colon_pos = rest.find(':');
|
||
|
if (colon_pos == base::string16::npos)
|
||
|
return false;
|
||
|
if (colon_pos > 2)
|
||
|
return false;
|
||
|
|
||
|
if (!base::StringToInt(
|
||
|
StringPiece16(rest.begin(), rest.begin() + colon_pos),
|
||
|
&time_exploded.hour)) {
|
||
|
return false;
|
||
|
}
|
||
|
if (!base::StringToInt(
|
||
|
StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
|
||
|
&time_exploded.minute)) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Guess the year.
|
||
|
base::Time::Exploded current_exploded;
|
||
|
current_time.UTCExplode(¤t_exploded);
|
||
|
|
||
|
// If it's not possible for the parsed date to be in the current year,
|
||
|
// use the previous year.
|
||
|
if (time_exploded.month > current_exploded.month ||
|
||
|
(time_exploded.month == current_exploded.month &&
|
||
|
time_exploded.day_of_month > current_exploded.day_of_month)) {
|
||
|
time_exploded.year = current_exploded.year - 1;
|
||
|
} else {
|
||
|
time_exploded.year = current_exploded.year;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We don't know the time zone of the listing, so just use UTC.
|
||
|
return base::Time::FromUTCExploded(time_exploded, result);
|
||
|
}
|
||
|
|
||
|
// static
|
||
|
bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
|
||
|
const base::string16& time,
|
||
|
base::Time* result) {
|
||
|
base::Time::Exploded time_exploded = { 0 };
|
||
|
|
||
|
// Date should be in format MM-DD-YY[YY].
|
||
|
std::vector<base::StringPiece16> date_parts =
|
||
|
base::SplitStringPiece(date, base::ASCIIToUTF16("-"),
|
||
|
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
|
||
|
if (date_parts.size() != 3)
|
||
|
return false;
|
||
|
if (!base::StringToInt(date_parts[0], &time_exploded.month))
|
||
|
return false;
|
||
|
if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
|
||
|
return false;
|
||
|
if (!base::StringToInt(date_parts[2], &time_exploded.year))
|
||
|
return false;
|
||
|
if (time_exploded.year < 0)
|
||
|
return false;
|
||
|
// If year has only two digits then assume that 00-79 is 2000-2079,
|
||
|
// and 80-99 is 1980-1999.
|
||
|
if (time_exploded.year < 80)
|
||
|
time_exploded.year += 2000;
|
||
|
else if (time_exploded.year < 100)
|
||
|
time_exploded.year += 1900;
|
||
|
|
||
|
// Time should be in format HH:MM[(AM|PM)]
|
||
|
if (time.length() < 5)
|
||
|
return false;
|
||
|
|
||
|
std::vector<base::StringPiece16> time_parts = base::SplitStringPiece(
|
||
|
base::StringPiece16(time).substr(0, 5), base::ASCIIToUTF16(":"),
|
||
|
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
|
||
|
if (time_parts.size() != 2)
|
||
|
return false;
|
||
|
if (!base::StringToInt(time_parts[0], &time_exploded.hour))
|
||
|
return false;
|
||
|
if (!base::StringToInt(time_parts[1], &time_exploded.minute))
|
||
|
return false;
|
||
|
if (!time_exploded.HasValidValues())
|
||
|
return false;
|
||
|
|
||
|
if (time.length() > 5) {
|
||
|
if (time.length() != 7)
|
||
|
return false;
|
||
|
base::string16 am_or_pm(time.substr(5, 2));
|
||
|
if (base::EqualsASCII(am_or_pm, "PM")) {
|
||
|
if (time_exploded.hour < 12)
|
||
|
time_exploded.hour += 12;
|
||
|
} else if (base::EqualsASCII(am_or_pm, "AM")) {
|
||
|
if (time_exploded.hour == 12)
|
||
|
time_exploded.hour = 0;
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We don't know the time zone of the server, so just use UTC.
|
||
|
return base::Time::FromUTCExploded(time_exploded, result);
|
||
|
}
|
||
|
|
||
|
// static
|
||
|
base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
|
||
|
int columns) {
|
||
|
base::i18n::UTF16CharIterator iter(&text);
|
||
|
|
||
|
for (int i = 0; i < columns; i++) {
|
||
|
// Skip the leading whitespace.
|
||
|
while (!iter.end() && u_isspace(iter.get()))
|
||
|
iter.Advance();
|
||
|
|
||
|
// Skip the actual text of i-th column.
|
||
|
while (!iter.end() && !u_isspace(iter.get()))
|
||
|
iter.Advance();
|
||
|
}
|
||
|
|
||
|
base::string16 result(text.substr(iter.array_pos()));
|
||
|
base::TrimWhitespace(result, base::TRIM_ALL, &result);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
} // namespace net
|