|  | // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // File utilities that use the ICU library go in this file. | 
|  |  | 
|  | #include "base/i18n/file_util_icu.h" | 
|  |  | 
|  | #include <stdint.h> | 
|  |  | 
|  | #include <memory> | 
|  |  | 
|  | #include "base/files/file_path.h" | 
|  | #include "base/i18n/icu_string_conversions.h" | 
|  | #include "base/i18n/string_compare.h" | 
|  | #include "base/logging.h" | 
|  | #include "base/macros.h" | 
|  | #include "base/memory/singleton.h" | 
|  | #include "base/strings/string_util.h" | 
|  | #include "base/strings/sys_string_conversions.h" | 
|  | #include "base/strings/utf_string_conversions.h" | 
|  | #include "build_config.h" | 
|  | #include "third_party/icu/source/common/unicode/uniset.h" | 
|  | #include "third_party/icu/source/i18n/unicode/coll.h" | 
|  |  | 
|  | namespace base { | 
|  | namespace i18n { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class IllegalCharacters { | 
|  | public: | 
|  | static IllegalCharacters* GetInstance() { | 
|  | return Singleton<IllegalCharacters>::get(); | 
|  | } | 
|  |  | 
|  | bool DisallowedEverywhere(UChar32 ucs4) { | 
|  | return !!illegal_anywhere_->contains(ucs4); | 
|  | } | 
|  |  | 
|  | bool DisallowedLeadingOrTrailing(UChar32 ucs4) { | 
|  | return !!illegal_at_ends_->contains(ucs4); | 
|  | } | 
|  |  | 
|  | bool IsAllowedName(const string16& s) { | 
|  | return s.empty() || (!!illegal_anywhere_->containsNone( | 
|  | icu::UnicodeString(s.c_str(), s.size())) && | 
|  | !illegal_at_ends_->contains(*s.begin()) && | 
|  | !illegal_at_ends_->contains(*s.rbegin())); | 
|  | } | 
|  |  | 
|  | private: | 
|  | friend class Singleton<IllegalCharacters>; | 
|  | friend struct DefaultSingletonTraits<IllegalCharacters>; | 
|  |  | 
|  | IllegalCharacters(); | 
|  | ~IllegalCharacters() = default; | 
|  |  | 
|  | // set of characters considered invalid anywhere inside a filename. | 
|  | std::unique_ptr<icu::UnicodeSet> illegal_anywhere_; | 
|  |  | 
|  | // set of characters considered invalid at either end of a filename. | 
|  | std::unique_ptr<icu::UnicodeSet> illegal_at_ends_; | 
|  |  | 
|  | DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); | 
|  | }; | 
|  |  | 
|  | IllegalCharacters::IllegalCharacters() { | 
|  | UErrorCode everywhere_status = U_ZERO_ERROR; | 
|  | UErrorCode ends_status = U_ZERO_ERROR; | 
|  | // Control characters, formatting characters, non-characters, path separators, | 
|  | // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). | 
|  | // See  http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx | 
|  | // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx | 
|  | // Note that code points in the "Other, Format" (Cf) category are ignored on | 
|  | // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being | 
|  | // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is | 
|  | // also excluded due to the possibility of interacting poorly with short | 
|  | // filenames on VFAT. (Related to CVE-2014-9390) | 
|  | illegal_anywhere_.reset(new icu::UnicodeSet( | 
|  | UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), | 
|  | everywhere_status)); | 
|  | illegal_at_ends_.reset(new icu::UnicodeSet( | 
|  | UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); | 
|  | DCHECK(U_SUCCESS(everywhere_status)); | 
|  | DCHECK(U_SUCCESS(ends_status)); | 
|  |  | 
|  | // Add non-characters. If this becomes a performance bottleneck by | 
|  | // any chance, do not add these to |set| and change IsFilenameLegal() | 
|  | // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling | 
|  | // IsAllowedName(). | 
|  | illegal_anywhere_->add(0xFDD0, 0xFDEF); | 
|  | for (int i = 0; i <= 0x10; ++i) { | 
|  | int plane_base = 0x10000 * i; | 
|  | illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); | 
|  | } | 
|  | illegal_anywhere_->freeze(); | 
|  | illegal_at_ends_->freeze(); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | bool IsFilenameLegal(const string16& file_name) { | 
|  | return IllegalCharacters::GetInstance()->IsAllowedName(file_name); | 
|  | } | 
|  |  | 
|  | void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, | 
|  | char replace_char) { | 
|  | IllegalCharacters* illegal = IllegalCharacters::GetInstance(); | 
|  |  | 
|  | DCHECK(!(illegal->DisallowedEverywhere(replace_char))); | 
|  | DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); | 
|  |  | 
|  | int cursor = 0;  // The ICU macros expect an int. | 
|  | while (cursor < static_cast<int>(file_name->size())) { | 
|  | int char_begin = cursor; | 
|  | uint32_t code_point; | 
|  | #if defined(OS_WIN) | 
|  | // Windows uses UTF-16 encoding for filenames. | 
|  | U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 
|  | code_point); | 
|  | #elif defined(OS_POSIX) || defined(OS_FUCHSIA) | 
|  | // Mac and Chrome OS use UTF-8 encoding for filenames. | 
|  | // Linux doesn't actually define file system encoding. Try to parse as | 
|  | // UTF-8. | 
|  | U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), | 
|  | code_point); | 
|  | #else | 
|  | #error Unsupported platform | 
|  | #endif | 
|  |  | 
|  | if (illegal->DisallowedEverywhere(code_point) || | 
|  | ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && | 
|  | illegal->DisallowedLeadingOrTrailing(code_point))) { | 
|  | file_name->replace(char_begin, cursor - char_begin, 1, replace_char); | 
|  | // We just made the potentially multi-byte/word char into one that only | 
|  | // takes one byte/word, so need to adjust the cursor to point to the next | 
|  | // character again. | 
|  | cursor = char_begin + 1; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { | 
|  | UErrorCode error_code = U_ZERO_ERROR; | 
|  | // Use the default collator. The default locale should have been properly | 
|  | // set by the time this constructor is called. | 
|  | std::unique_ptr<icu::Collator> collator( | 
|  | icu::Collator::createInstance(error_code)); | 
|  | DCHECK(U_SUCCESS(error_code)); | 
|  | // Make it case-sensitive. | 
|  | collator->setStrength(icu::Collator::TERTIARY); | 
|  |  | 
|  | #if defined(OS_WIN) | 
|  | return CompareString16WithCollator(*collator, WideToUTF16(a.value()), | 
|  | WideToUTF16(b.value())) == UCOL_LESS; | 
|  |  | 
|  | #elif defined(OS_POSIX) || defined(OS_FUCHSIA) | 
|  | // On linux, the file system encoding is not defined. We assume | 
|  | // SysNativeMBToWide takes care of it. | 
|  | return CompareString16WithCollator( | 
|  | *collator, WideToUTF16(SysNativeMBToWide(a.value())), | 
|  | WideToUTF16(SysNativeMBToWide(b.value()))) == UCOL_LESS; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | void NormalizeFileNameEncoding(FilePath* file_name) { | 
|  | #if defined(OS_CHROMEOS) | 
|  | std::string normalized_str; | 
|  | if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), kCodepageUTF8, | 
|  | &normalized_str) && | 
|  | !normalized_str.empty()) { | 
|  | *file_name = file_name->DirName().Append(FilePath(normalized_str)); | 
|  | } | 
|  | #endif | 
|  | } | 
|  |  | 
|  | }  // namespace i18n | 
|  | }  // namespace base |