base/i18n/file_util_icu_unittest.cc - gn - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/i18n/file_util_icu.h"

 #include <stddef.h>

 #include "base/files/file_util.h"
 #include "base/macros.h"
 #include "base/strings/utf_string_conversions.h"
 #include "build/build_config.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "testing/platform_test.h"

 namespace base {
 namespace i18n {

 // file_util winds up using autoreleased objects on the Mac, so this needs
 // to be a PlatformTest
 class FileUtilICUTest : public PlatformTest {
 };

 #if defined(OS_POSIX) && !defined(OS_MACOSX)

 // On linux, file path is parsed and filtered as UTF-8.
 static const struct GoodBadPairLinux {
   const char* bad_name;
   const char* good_name;
 } kLinuxIllegalCharacterCases[] = {
   {"bad*\\/file:name?.jpg", "bad---file-name-.jpg"},
   {"**********::::.txt", "--------------.txt"},
   {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
   {" _ ", "-_-"},
   {".", "-"},
   {" .( ). ", "-.( ).-"},
   {"     ", "-   -"},
 };

 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathLinuxTest) {
   for (size_t i = 0; i < arraysize(kLinuxIllegalCharacterCases); ++i) {
     std::string bad_name(kLinuxIllegalCharacterCases[i].bad_name);
     ReplaceIllegalCharactersInPath(&bad_name, '-');
     EXPECT_EQ(kLinuxIllegalCharacterCases[i].good_name, bad_name);
   }
 }

 #endif

 // For Mac & Windows, which both do Unicode validation on filenames. These
 // characters are given as wide strings since its more convenient to specify
 // unicode characters. For Mac they should be converted to UTF-8.
 static const struct goodbad_pair {
   const wchar_t* bad_name;
   const wchar_t* good_name;
 } kIllegalCharacterCases[] = {
     {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
     {L"**********::::.txt", L"--------------.txt"},
     // We can't use UCNs (universal character names) for C0/C1 characters and
     // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
     {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
     {L"bad*file\\?name.jpg", L"bad-file--name.jpg"},
     {L"\t  bad*file\\name/.jpg", L"-  bad-file-name-.jpg"},
     {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
     {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
     {L"\u0635\u200C\u0644.mp3", L"\u0635-\u0644.mp3"},
     {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
     // Unassigned codepoints are ok.
     {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
     // Non-characters are not allowed.
     {L"bad\uFFFFfile\U0010FFFEname.jpg", L"bad-file-name.jpg"},
     {L"bad\uFDD0file\uFDEFname.jpg", L"bad-file-name.jpg"},
     // CVE-2014-9390
     {L"(\u200C.\u200D.\u200E.\u200F.\u202A.\u202B.\u202C.\u202D.\u202E.\u206A."
      L"\u206B.\u206C.\u206D.\u206F.\uFEFF)",
      L"(-.-.-.-.-.-.-.-.-.-.-.-.-.-.-)"},
     {L"config~1", L"config-1"},
     {L" _ ", L"-_-"},
     {L" ", L"-"},
     {L"\u2008.(\u2007).\u3000", L"-.(\u2007).-"},
     {L"     ", L"-   -"},
     {L".    ", L"-   -"}
 };

 #if defined(OS_WIN) || defined(OS_MACOSX) || defined(OS_POSIX)

 TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
   for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
 #if defined(OS_WIN)
     std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
     ReplaceIllegalCharactersInPath(&bad_name, '-');
     EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
 #else
     std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name));
     ReplaceIllegalCharactersInPath(&bad_name, '-');
     EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name);
 #endif
   }
 }

 #endif

 TEST_F(FileUtilICUTest, IsFilenameLegalTest) {
   EXPECT_TRUE(IsFilenameLegal(string16()));

   for (const auto& test_case : kIllegalCharacterCases) {
     string16 bad_name = WideToUTF16(test_case.bad_name);
     string16 good_name = WideToUTF16(test_case.good_name);

     EXPECT_TRUE(IsFilenameLegal(good_name)) << good_name;
     if (good_name != bad_name)
       EXPECT_FALSE(IsFilenameLegal(bad_name)) << bad_name;
   }
 }

 #if defined(OS_CHROMEOS)
 static const struct normalize_name_encoding_test_cases {
   const char* original_path;
   const char* normalized_path;
 } kNormalizeFileNameEncodingTestCases[] = {
   { "foo_na\xcc\x88me.foo", "foo_n\xc3\xa4me.foo"},
   { "foo_dir_na\xcc\x88me/foo_na\xcc\x88me.foo",
     "foo_dir_na\xcc\x88me/foo_n\xc3\xa4me.foo"},
   { "", ""},
   { "foo_dir_na\xcc\x88me/", "foo_dir_n\xc3\xa4me"}
 };

 TEST_F(FileUtilICUTest, NormalizeFileNameEncoding) {
   for (size_t i = 0; i < arraysize(kNormalizeFileNameEncodingTestCases); i++) {
     FilePath path(kNormalizeFileNameEncodingTestCases[i].original_path);
     NormalizeFileNameEncoding(&path);
     EXPECT_EQ(FilePath(kNormalizeFileNameEncodingTestCases[i].normalized_path),
               path);
   }
 }

 #endif

 }  // namespace i18n
 }  // namespace base
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "base/i18n/file_util_icu.h"

	#include <stddef.h>

	#include "base/files/file_util.h"
	#include "base/macros.h"
	#include "base/strings/utf_string_conversions.h"
	#include "build/build_config.h"
	#include "testing/gtest/include/gtest/gtest.h"
	#include "testing/platform_test.h"

	namespace base {
	namespace i18n {

	// file_util winds up using autoreleased objects on the Mac, so this needs
	// to be a PlatformTest
	class FileUtilICUTest : public PlatformTest {
	};

	#if defined(OS_POSIX) && !defined(OS_MACOSX)

	// On linux, file path is parsed and filtered as UTF-8.
	static const struct GoodBadPairLinux {
	const char* bad_name;
	const char* good_name;
	} kLinuxIllegalCharacterCases[] = {
	{"bad*\\/file:name?.jpg", "bad---file-name-.jpg"},
	{"**********::::.txt", "--------------.txt"},
	{"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
	{" _ ", "-_-"},
	{".", "-"},
	{" .( ). ", "-.( ).-"},
	{" ", "- -"},
	};

	TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathLinuxTest) {
	for (size_t i = 0; i < arraysize(kLinuxIllegalCharacterCases); ++i) {
	std::string bad_name(kLinuxIllegalCharacterCases[i].bad_name);
	ReplaceIllegalCharactersInPath(&bad_name, '-');
	EXPECT_EQ(kLinuxIllegalCharacterCases[i].good_name, bad_name);
	}
	}

	#endif

	// For Mac & Windows, which both do Unicode validation on filenames. These
	// characters are given as wide strings since its more convenient to specify
	// unicode characters. For Mac they should be converted to UTF-8.
	static const struct goodbad_pair {
	const wchar_t* bad_name;
	const wchar_t* good_name;
	} kIllegalCharacterCases[] = {
	{L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
	{L"**********::::.txt", L"--------------.txt"},
	// We can't use UCNs (universal character names) for C0/C1 characters and
	// U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
	{L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
	{L"bad*file\\?name.jpg", L"bad-file--name.jpg"},
	{L"\t bad*file\\name/.jpg", L"- bad-file-name-.jpg"},
	{L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
	{L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
	{L"\u0635\u200C\u0644.mp3", L"\u0635-\u0644.mp3"},
	{L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
	// Unassigned codepoints are ok.
	{L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
	// Non-characters are not allowed.
	{L"bad\uFFFFfile\U0010FFFEname.jpg", L"bad-file-name.jpg"},
	{L"bad\uFDD0file\uFDEFname.jpg", L"bad-file-name.jpg"},
	// CVE-2014-9390
	{L"(\u200C.\u200D.\u200E.\u200F.\u202A.\u202B.\u202C.\u202D.\u202E.\u206A."
	L"\u206B.\u206C.\u206D.\u206F.\uFEFF)",
	L"(-.-.-.-.-.-.-.-.-.-.-.-.-.-.-)"},
	{L"config~1", L"config-1"},
	{L" _ ", L"-_-"},
	{L" ", L"-"},
	{L"\u2008.(\u2007).\u3000", L"-.(\u2007).-"},
	{L" ", L"- -"},
	{L". ", L"- -"}
	};

	#if defined(OS_WIN) \|\| defined(OS_MACOSX) \|\| defined(OS_POSIX)

	TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
	for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
	#if defined(OS_WIN)
	std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
	ReplaceIllegalCharactersInPath(&bad_name, '-');
	EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
	#else
	std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name));
	ReplaceIllegalCharactersInPath(&bad_name, '-');
	EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name);
	#endif
	}
	}

	#endif

	TEST_F(FileUtilICUTest, IsFilenameLegalTest) {
	EXPECT_TRUE(IsFilenameLegal(string16()));

	for (const auto& test_case : kIllegalCharacterCases) {
	string16 bad_name = WideToUTF16(test_case.bad_name);
	string16 good_name = WideToUTF16(test_case.good_name);

	EXPECT_TRUE(IsFilenameLegal(good_name)) << good_name;
	if (good_name != bad_name)
	EXPECT_FALSE(IsFilenameLegal(bad_name)) << bad_name;
	}
	}

	#if defined(OS_CHROMEOS)
	static const struct normalize_name_encoding_test_cases {
	const char* original_path;
	const char* normalized_path;
	} kNormalizeFileNameEncodingTestCases[] = {
	{ "foo_na\xcc\x88me.foo", "foo_n\xc3\xa4me.foo"},
	{ "foo_dir_na\xcc\x88me/foo_na\xcc\x88me.foo",
	"foo_dir_na\xcc\x88me/foo_n\xc3\xa4me.foo"},
	{ "", ""},
	{ "foo_dir_na\xcc\x88me/", "foo_dir_n\xc3\xa4me"}
	};

	TEST_F(FileUtilICUTest, NormalizeFileNameEncoding) {
	for (size_t i = 0; i < arraysize(kNormalizeFileNameEncodingTestCases); i++) {
	FilePath path(kNormalizeFileNameEncodingTestCases[i].original_path);
	NormalizeFileNameEncoding(&path);
	EXPECT_EQ(FilePath(kNormalizeFileNameEncodingTestCases[i].normalized_path),
	path);
	}
	}

	#endif

	} // namespace i18n
	} // namespace base