| // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | #include <stddef.h> | 
 |  | 
 | #include <string> | 
 |  | 
 | #include "base/i18n/rtl.h" | 
 | #include "base/i18n/string_search.h" | 
 | #include "base/strings/string16.h" | 
 | #include "base/strings/utf_string_conversions.h" | 
 | #include "testing/gtest/include/gtest/gtest.h" | 
 | #include "third_party/icu/source/i18n/unicode/usearch.h" | 
 |  | 
 | namespace base { | 
 | namespace i18n { | 
 |  | 
 | // Note on setting default locale for testing: The current default locale on | 
 | // the Mac trybot is en_US_POSIX, with which primary-level collation strength | 
 | // string search is case-sensitive, when normally it should be | 
 | // case-insensitive. In other locales (including en_US which English speakers | 
 | // in the U.S. use), this search would be case-insensitive as expected. | 
 |  | 
 | TEST(StringSearchTest, ASCII) { | 
 |   std::string default_locale(uloc_getDefault()); | 
 |   bool locale_is_posix = (default_locale == "en_US_POSIX"); | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale("en_US"); | 
 |  | 
 |   size_t index = 0; | 
 |   size_t length = 0; | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(5U, length); | 
 |  | 
 |   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | 
 |       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"), | 
 |       &index, &length)); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); | 
 |   EXPECT_EQ(4U, index); | 
 |   EXPECT_EQ(6U, length); | 
 |  | 
 |   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | 
 |       ASCIIToUTF16("searching within empty string"), string16(), | 
 |       &index, &length)); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(0U, length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), | 
 |       &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(18U, length); | 
 |  | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale(default_locale.data()); | 
 | } | 
 |  | 
 | TEST(StringSearchTest, UnicodeLocaleIndependent) { | 
 |   // Base characters | 
 |   const string16 e_base = WideToUTF16(L"e"); | 
 |   const string16 E_base = WideToUTF16(L"E"); | 
 |   const string16 a_base = WideToUTF16(L"a"); | 
 |  | 
 |   // Composed characters | 
 |   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); | 
 |   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); | 
 |   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); | 
 |   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); | 
 |   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); | 
 |  | 
 |   // Decomposed characters | 
 |   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); | 
 |   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); | 
 |   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); | 
 |   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); | 
 |   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); | 
 |  | 
 |   std::string default_locale(uloc_getDefault()); | 
 |   bool locale_is_posix = (default_locale == "en_US_POSIX"); | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale("en_US"); | 
 |  | 
 |   size_t index = 0; | 
 |   size_t length = 0; | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_base, e_with_acute_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_accent, e_base, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_base.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_base, e_with_acute_combining_mark, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_combining_mark.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_combining_mark, e_base, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_base.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_combining_mark, e_with_acute_accent, | 
 |       &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_accent, e_with_acute_combining_mark, | 
 |       &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_combining_mark.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_combining_mark, e_with_grave_combining_mark, | 
 |       &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_grave_combining_mark.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_grave_combining_mark, e_with_acute_combining_mark, | 
 |       &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_combining_mark.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_grave_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_combining_mark.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       E_with_acute_accent, e_with_acute_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       E_with_grave_accent, e_with_acute_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_grave_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_acute_accent.size(), length); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( | 
 |       E_base, e_with_grave_accent, &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(e_with_grave_accent.size(), length); | 
 |  | 
 |   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | 
 |       a_with_acute_accent, e_with_acute_accent, &index, &length)); | 
 |  | 
 |   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( | 
 |       a_with_acute_combining_mark, e_with_acute_combining_mark, | 
 |       &index, &length)); | 
 |  | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale(default_locale.data()); | 
 | } | 
 |  | 
 | TEST(StringSearchTest, UnicodeLocaleDependent) { | 
 |   // Base characters | 
 |   const string16 a_base = WideToUTF16(L"a"); | 
 |  | 
 |   // Composed characters | 
 |   const string16 a_with_ring = WideToUTF16(L"\u00e5"); | 
 |  | 
 |   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr, | 
 |                                                  nullptr)); | 
 |  | 
 |   const char* default_locale = uloc_getDefault(); | 
 |   SetICUDefaultLocale("da"); | 
 |  | 
 |   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr, | 
 |                                                   nullptr)); | 
 |  | 
 |   SetICUDefaultLocale(default_locale); | 
 | } | 
 |  | 
 | TEST(StringSearchTest, FixedPatternMultipleSearch) { | 
 |   std::string default_locale(uloc_getDefault()); | 
 |   bool locale_is_posix = (default_locale == "en_US_POSIX"); | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale("en_US"); | 
 |  | 
 |   size_t index = 0; | 
 |   size_t length = 0; | 
 |  | 
 |   // Search "hello" over multiple texts. | 
 |   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); | 
 |   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); | 
 |   EXPECT_EQ(2U, index); | 
 |   EXPECT_EQ(5U, length); | 
 |   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); | 
 |   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); | 
 |   EXPECT_EQ(0U, index); | 
 |   EXPECT_EQ(5U, length); | 
 |  | 
 |   if (locale_is_posix) | 
 |     SetICUDefaultLocale(default_locale.data()); | 
 | } | 
 |  | 
 | }  // namespace i18n | 
 | }  // namespace base |