base/i18n/string_search_unittest.cc - gn - Git at Google

 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <stddef.h>

 #include <string>

 #include "base/i18n/rtl.h"
 #include "base/i18n/string_search.h"
 #include "base/strings/string16.h"
 #include "base/strings/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "third_party/icu/source/i18n/unicode/usearch.h"

 namespace base {
 namespace i18n {

 // Note on setting default locale for testing: The current default locale on
 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
 // string search is case-sensitive, when normally it should be
 // case-insensitive. In other locales (including en_US which English speakers
 // in the U.S. use), this search would be case-insensitive as expected.

 TEST(StringSearchTest, ASCII) {
   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");

   size_t index = 0;
   size_t length = 0;

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(5U, length);

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
       &index, &length));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
   EXPECT_EQ(4U, index);
   EXPECT_EQ(6U, length);

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("searching within empty string"), string16(),
       &index, &length));

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(0U, length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
       &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(18U, length);

   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
 }

 TEST(StringSearchTest, UnicodeLocaleIndependent) {
   // Base characters
   const string16 e_base = WideToUTF16(L"e");
   const string16 E_base = WideToUTF16(L"E");
   const string16 a_base = WideToUTF16(L"a");

   // Composed characters
   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");

   // Decomposed characters
   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");

   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");

   size_t index = 0;
   size_t length = 0;

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_base, e_with_acute_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_accent, e_base, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_base.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_base, e_with_acute_combining_mark, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_combining_mark.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_combining_mark, e_base, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_base.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_combining_mark, e_with_acute_accent,
       &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_accent, e_with_acute_combining_mark,
       &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_combining_mark.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_combining_mark, e_with_grave_combining_mark,
       &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_grave_combining_mark.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_grave_combining_mark, e_with_acute_combining_mark,
       &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_combining_mark.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_grave_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_combining_mark.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_acute_accent, e_with_acute_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_grave_accent, e_with_acute_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_grave_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_acute_accent.size(), length);

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
       E_base, e_with_grave_accent, &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(e_with_grave_accent.size(), length);

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       a_with_acute_accent, e_with_acute_accent, &index, &length));

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
       a_with_acute_combining_mark, e_with_acute_combining_mark,
       &index, &length));

   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
 }

 TEST(StringSearchTest, UnicodeLocaleDependent) {
   // Base characters
   const string16 a_base = WideToUTF16(L"a");

   // Composed characters
   const string16 a_with_ring = WideToUTF16(L"\u00e5");

   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
                                                  nullptr));

   const char* default_locale = uloc_getDefault();
   SetICUDefaultLocale("da");

   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
                                                   nullptr));

   SetICUDefaultLocale(default_locale);
 }

 TEST(StringSearchTest, FixedPatternMultipleSearch) {
   std::string default_locale(uloc_getDefault());
   bool locale_is_posix = (default_locale == "en_US_POSIX");
   if (locale_is_posix)
     SetICUDefaultLocale("en_US");

   size_t index = 0;
   size_t length = 0;

   // Search "hello" over multiple texts.
   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
   EXPECT_EQ(2U, index);
   EXPECT_EQ(5U, length);
   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
   EXPECT_EQ(0U, index);
   EXPECT_EQ(5U, length);

   if (locale_is_posix)
     SetICUDefaultLocale(default_locale.data());
 }

 }  // namespace i18n
 }  // namespace base
	// Copyright (c) 2011 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <stddef.h>

	#include <string>

	#include "base/i18n/rtl.h"
	#include "base/i18n/string_search.h"
	#include "base/strings/string16.h"
	#include "base/strings/utf_string_conversions.h"
	#include "testing/gtest/include/gtest/gtest.h"
	#include "third_party/icu/source/i18n/unicode/usearch.h"

	namespace base {
	namespace i18n {

	// Note on setting default locale for testing: The current default locale on
	// the Mac trybot is en_US_POSIX, with which primary-level collation strength
	// string search is case-sensitive, when normally it should be
	// case-insensitive. In other locales (including en_US which English speakers
	// in the U.S. use), this search would be case-insensitive as expected.

	TEST(StringSearchTest, ASCII) {
	std::string default_locale(uloc_getDefault());
	bool locale_is_posix = (default_locale == "en_US_POSIX");
	if (locale_is_posix)
	SetICUDefaultLocale("en_US");

	size_t index = 0;
	size_t length = 0;

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(5U, length);

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
	&index, &length));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
	EXPECT_EQ(4U, index);
	EXPECT_EQ(6U, length);

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("searching within empty string"), string16(),
	&index, &length));

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(0U, length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
	&index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(18U, length);

	if (locale_is_posix)
	SetICUDefaultLocale(default_locale.data());
	}

	TEST(StringSearchTest, UnicodeLocaleIndependent) {
	// Base characters
	const string16 e_base = WideToUTF16(L"e");
	const string16 E_base = WideToUTF16(L"E");
	const string16 a_base = WideToUTF16(L"a");

	// Composed characters
	const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
	const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
	const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
	const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
	const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");

	// Decomposed characters
	const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
	const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
	const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
	const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
	const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");

	std::string default_locale(uloc_getDefault());
	bool locale_is_posix = (default_locale == "en_US_POSIX");
	if (locale_is_posix)
	SetICUDefaultLocale("en_US");

	size_t index = 0;
	size_t length = 0;

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_base, e_with_acute_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_accent, e_base, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_base.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_base, e_with_acute_combining_mark, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_combining_mark.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_combining_mark, e_base, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_base.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_combining_mark, e_with_acute_accent,
	&index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_accent, e_with_acute_combining_mark,
	&index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_combining_mark.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_combining_mark, e_with_grave_combining_mark,
	&index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_grave_combining_mark.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_grave_combining_mark, e_with_acute_combining_mark,
	&index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_combining_mark.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_grave_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_combining_mark.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_acute_accent, e_with_acute_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_grave_accent, e_with_acute_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_grave_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_acute_accent.size(), length);

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
	E_base, e_with_grave_accent, &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(e_with_grave_accent.size(), length);

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	a_with_acute_accent, e_with_acute_accent, &index, &length));

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
	a_with_acute_combining_mark, e_with_acute_combining_mark,
	&index, &length));

	if (locale_is_posix)
	SetICUDefaultLocale(default_locale.data());
	}

	TEST(StringSearchTest, UnicodeLocaleDependent) {
	// Base characters
	const string16 a_base = WideToUTF16(L"a");

	// Composed characters
	const string16 a_with_ring = WideToUTF16(L"\u00e5");

	EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
	nullptr));

	const char* default_locale = uloc_getDefault();
	SetICUDefaultLocale("da");

	EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
	nullptr));

	SetICUDefaultLocale(default_locale);
	}

	TEST(StringSearchTest, FixedPatternMultipleSearch) {
	std::string default_locale(uloc_getDefault());
	bool locale_is_posix = (default_locale == "en_US_POSIX");
	if (locale_is_posix)
	SetICUDefaultLocale("en_US");

	size_t index = 0;
	size_t length = 0;

	// Search "hello" over multiple texts.
	FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
	EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
	EXPECT_EQ(2U, index);
	EXPECT_EQ(5U, length);
	EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
	EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
	EXPECT_EQ(0U, index);
	EXPECT_EQ(5U, length);

	if (locale_is_posix)
	SetICUDefaultLocale(default_locale.data());
	}

	} // namespace i18n
	} // namespace base