// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/ftp/ftp_util.h" #include #include #include "base/i18n/case_conversion.h" #include "base/i18n/char_iterator.h" #include "base/i18n/unicodestring.h" #include "base/logging.h" #include "base/macros.h" #include "base/memory/singleton.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_piece.h" #include "base/strings/string_split.h" #include "base/strings/string_tokenizer.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/time/time.h" #include "third_party/icu/source/common/unicode/uchar.h" #include "third_party/icu/source/i18n/unicode/datefmt.h" #include "third_party/icu/source/i18n/unicode/dtfmtsym.h" using base::ASCIIToUTF16; using base::StringPiece16; // For examples of Unix<->VMS path conversions, see the unit test file. On VMS // a path looks differently depending on whether it's a file or directory. namespace net { // static std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) { if (unix_path.empty()) return std::string(); base::StringTokenizer tokenizer(unix_path, "/"); std::vector tokens; while (tokenizer.GetNext()) tokens.push_back(tokenizer.token()); if (unix_path[0] == '/') { // It's an absolute path. if (tokens.empty()) { // It's just "/" or a series of slashes, which all mean the same thing. return "[]"; } if (tokens.size() == 1) return tokens.front(); // Return without leading slashes. std::string result(tokens[0] + ":["); if (tokens.size() == 2) { // Don't ask why, it just works that way on VMS. result.append("000000"); } else { result.append(tokens[1]); for (size_t i = 2; i < tokens.size() - 1; i++) result.append("." + tokens[i]); } result.append("]" + tokens.back()); return result; } if (tokens.size() == 1) return unix_path; std::string result("["); for (size_t i = 0; i < tokens.size() - 1; i++) result.append("." + tokens[i]); result.append("]" + tokens.back()); return result; } // static std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) { if (unix_path.empty()) return std::string(); std::string path(unix_path); if (path.back() != '/') path.append("/"); // Reuse logic from UnixFilePathToVMS by appending a fake file name to the // real path and removing it after conversion. path.append("x"); path = UnixFilePathToVMS(path); return path.substr(0, path.length() - 1); } // static std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) { if (vms_path.empty()) return "."; if (vms_path[0] == '/') { // This is not really a VMS path. Most likely the server is emulating UNIX. // Return path as-is. return vms_path; } if (vms_path == "[]") return "/"; std::string result(vms_path); if (vms_path[0] == '[') { // It's a relative path. base::ReplaceFirstSubstringAfterOffset( &result, 0, "[.", base::StringPiece()); } else { // It's an absolute path. result.insert(0, "/"); base::ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/"); base::ReplaceSubstringsAfterOffset(&result, 0, ":[", "/"); } std::replace(result.begin(), result.end(), '.', '/'); std::replace(result.begin(), result.end(), ']', '/'); // Make sure the result doesn't end with a slash. if (!result.empty() && result.back() == '/') result = result.substr(0, result.length() - 1); return result; } namespace { // Lazy-initialized map of abbreviated month names. class AbbreviatedMonthsMap { public: static AbbreviatedMonthsMap* GetInstance() { return base::Singleton::get(); } // Converts abbreviated month name |text| to its number (in range 1-12). // On success returns true and puts the number in |number|. bool GetMonthNumber(const base::string16& text, int* number) { // Ignore the case of the month names. The simplest way to handle that // is to make everything lowercase. base::string16 text_lower(base::i18n::ToLower(text)); if (map_.find(text_lower) == map_.end()) return false; *number = map_[text_lower]; return true; } private: friend struct base::DefaultSingletonTraits; // Constructor, initializes the map based on ICU data. It is much faster // to do that just once. AbbreviatedMonthsMap() { int32_t locales_count; const icu::Locale* locales = icu::DateFormat::getAvailableLocales(locales_count); for (int32_t locale = 0; locale < locales_count; locale++) { UErrorCode status(U_ZERO_ERROR); icu::DateFormatSymbols format_symbols(locales[locale], status); // If we cannot get format symbols for some locale, it's not a fatal // error. Just try another one. if (U_FAILURE(status)) continue; int32_t months_count; const icu::UnicodeString* months = format_symbols.getShortMonths(months_count); for (int32_t month = 0; month < months_count; month++) { base::string16 month_name( base::i18n::UnicodeStringToString16(months[month])); // Ignore the case of the month names. The simplest way to handle that // is to make everything lowercase. month_name = base::i18n::ToLower(month_name); map_[month_name] = month + 1; // Sometimes ICU returns longer strings, but in FTP listings a shorter // abbreviation is used (for example for the Russian locale). Make sure // we always have a map entry for a three-letter abbreviation. map_[month_name.substr(0, 3)] = month + 1; } } // Fail loudly if the data returned by ICU is obviously incomplete. // This is intended to catch cases like http://crbug.com/177428 // much earlier. Note that the issue above turned out to be non-trivial // to reproduce - crash data is much better indicator of a problem // than incomplete bug reports. CHECK_EQ(1, map_[ASCIIToUTF16("jan")]); CHECK_EQ(2, map_[ASCIIToUTF16("feb")]); CHECK_EQ(3, map_[ASCIIToUTF16("mar")]); CHECK_EQ(4, map_[ASCIIToUTF16("apr")]); CHECK_EQ(5, map_[ASCIIToUTF16("may")]); CHECK_EQ(6, map_[ASCIIToUTF16("jun")]); CHECK_EQ(7, map_[ASCIIToUTF16("jul")]); CHECK_EQ(8, map_[ASCIIToUTF16("aug")]); CHECK_EQ(9, map_[ASCIIToUTF16("sep")]); CHECK_EQ(10, map_[ASCIIToUTF16("oct")]); CHECK_EQ(11, map_[ASCIIToUTF16("nov")]); CHECK_EQ(12, map_[ASCIIToUTF16("dec")]); } // Maps lowercase month names to numbers in range 1-12. std::map map_; DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap); }; } // namespace // static bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text, int* number) { return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number); } // static bool FtpUtil::LsDateListingToTime(const base::string16& month, const base::string16& day, const base::string16& rest, const base::Time& current_time, base::Time* result) { base::Time::Exploded time_exploded = { 0 }; if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) { // Work around garbage sent by some servers in the same column // as the month. Take just last 3 characters of the string. if (month.length() < 3 || !AbbreviatedMonthToNumber(month.substr(month.length() - 3), &time_exploded.month)) { return false; } } if (!base::StringToInt(day, &time_exploded.day_of_month)) return false; if (time_exploded.day_of_month > 31) return false; if (!base::StringToInt(rest, &time_exploded.year)) { // Maybe it's time. Does it look like time? Note that it can be any of // "HH:MM", "H:MM", "HH:M" or maybe even "H:M". if (rest.length() > 5) return false; size_t colon_pos = rest.find(':'); if (colon_pos == base::string16::npos) return false; if (colon_pos > 2) return false; if (!base::StringToInt( StringPiece16(rest.begin(), rest.begin() + colon_pos), &time_exploded.hour)) { return false; } if (!base::StringToInt( StringPiece16(rest.begin() + colon_pos + 1, rest.end()), &time_exploded.minute)) { return false; } // Guess the year. base::Time::Exploded current_exploded; current_time.UTCExplode(¤t_exploded); // If it's not possible for the parsed date to be in the current year, // use the previous year. if (time_exploded.month > current_exploded.month || (time_exploded.month == current_exploded.month && time_exploded.day_of_month > current_exploded.day_of_month)) { time_exploded.year = current_exploded.year - 1; } else { time_exploded.year = current_exploded.year; } } // We don't know the time zone of the listing, so just use UTC. return base::Time::FromUTCExploded(time_exploded, result); } // static bool FtpUtil::WindowsDateListingToTime(const base::string16& date, const base::string16& time, base::Time* result) { base::Time::Exploded time_exploded = { 0 }; // Date should be in format MM-DD-YY[YY]. std::vector date_parts = base::SplitStringPiece(date, base::ASCIIToUTF16("-"), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); if (date_parts.size() != 3) return false; if (!base::StringToInt(date_parts[0], &time_exploded.month)) return false; if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month)) return false; if (!base::StringToInt(date_parts[2], &time_exploded.year)) return false; if (time_exploded.year < 0) return false; // If year has only two digits then assume that 00-79 is 2000-2079, // and 80-99 is 1980-1999. if (time_exploded.year < 80) time_exploded.year += 2000; else if (time_exploded.year < 100) time_exploded.year += 1900; // Time should be in format HH:MM[(AM|PM)] if (time.length() < 5) return false; std::vector time_parts = base::SplitStringPiece( base::StringPiece16(time).substr(0, 5), base::ASCIIToUTF16(":"), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); if (time_parts.size() != 2) return false; if (!base::StringToInt(time_parts[0], &time_exploded.hour)) return false; if (!base::StringToInt(time_parts[1], &time_exploded.minute)) return false; if (!time_exploded.HasValidValues()) return false; if (time.length() > 5) { if (time.length() != 7) return false; base::string16 am_or_pm(time.substr(5, 2)); if (base::EqualsASCII(am_or_pm, "PM")) { if (time_exploded.hour < 12) time_exploded.hour += 12; } else if (base::EqualsASCII(am_or_pm, "AM")) { if (time_exploded.hour == 12) time_exploded.hour = 0; } else { return false; } } // We don't know the time zone of the server, so just use UTC. return base::Time::FromUTCExploded(time_exploded, result); } // static base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text, int columns) { base::i18n::UTF16CharIterator iter(&text); for (int i = 0; i < columns; i++) { // Skip the leading whitespace. while (!iter.end() && u_isspace(iter.get())) iter.Advance(); // Skip the actual text of i-th column. while (!iter.end() && !u_isspace(iter.get())) iter.Advance(); } base::string16 result(text.substr(iter.array_pos())); base::TrimWhitespace(result, base::TRIM_ALL, &result); return result; } } // namespace net