|  | // Copyright 2015 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "base/strings/pattern.h" | 
|  |  | 
|  | #include "base/third_party/icu/icu_utf.h" | 
|  |  | 
|  | namespace base { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | constexpr bool IsWildcard(base_icu::UChar32 character) { | 
|  | return character == '*' || character == '?'; | 
|  | } | 
|  |  | 
|  | // Searches for the next subpattern of |pattern| in |string|, up to the given | 
|  | // |maximum_distance|. The subpattern extends from the start of |pattern| up to | 
|  | // the first wildcard character (or the end of the string). If the value of | 
|  | // |maximum_distance| is negative, the maximum distance is considered infinite. | 
|  | template <typename CHAR, typename NEXT> | 
|  | constexpr bool SearchForChars(const CHAR** pattern, | 
|  | const CHAR* pattern_end, | 
|  | const CHAR** string, | 
|  | const CHAR* string_end, | 
|  | int maximum_distance, | 
|  | NEXT next) { | 
|  | const CHAR* pattern_start = *pattern; | 
|  | const CHAR* string_start = *string; | 
|  | bool escape = false; | 
|  | while (true) { | 
|  | if (*pattern == pattern_end) { | 
|  | // If this is the end of the pattern, only accept the end of the string; | 
|  | // anything else falls through to the mismatch case. | 
|  | if (*string == string_end) | 
|  | return true; | 
|  | } else { | 
|  | // If we have found a wildcard, we're done. | 
|  | if (!escape && IsWildcard(**pattern)) | 
|  | return true; | 
|  |  | 
|  | // Check if the escape character is found. If so, skip it and move to the | 
|  | // next character. | 
|  | if (!escape && **pattern == '\\') { | 
|  | escape = true; | 
|  | next(pattern, pattern_end); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | escape = false; | 
|  |  | 
|  | if (*string == string_end) | 
|  | return false; | 
|  |  | 
|  | // Check if the chars match, if so, increment the ptrs. | 
|  | const CHAR* pattern_next = *pattern; | 
|  | const CHAR* string_next = *string; | 
|  | base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | 
|  | if (pattern_char == next(&string_next, string_end) && | 
|  | pattern_char != CBU_SENTINEL) { | 
|  | *pattern = pattern_next; | 
|  | *string = string_next; | 
|  | continue; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Mismatch. If we have reached the maximum distance, return false, | 
|  | // otherwise restart at the beginning of the pattern with the next character | 
|  | // in the string. | 
|  | // TODO(bauerb): This is a naive implementation of substring search, which | 
|  | // could be implemented with a more efficient algorithm, e.g. | 
|  | // Knuth-Morris-Pratt (at the expense of requiring preprocessing). | 
|  | if (maximum_distance == 0) | 
|  | return false; | 
|  |  | 
|  | // Because unlimited distance is represented as -1, this will never reach 0 | 
|  | // and therefore fail the match above. | 
|  | maximum_distance--; | 
|  | *pattern = pattern_start; | 
|  | next(&string_start, string_end); | 
|  | *string = string_start; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Consumes consecutive wildcard characters (? or *). Returns the maximum number | 
|  | // of characters matched by the sequence of wildcards, or -1 if the wildcards | 
|  | // match an arbitrary number of characters (which is the case if it contains at | 
|  | // least one *). | 
|  | template <typename CHAR, typename NEXT> | 
|  | constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) { | 
|  | int num_question_marks = 0; | 
|  | bool has_asterisk = false; | 
|  | while (*pattern != end) { | 
|  | if (**pattern == '?') { | 
|  | num_question_marks++; | 
|  | } else if (**pattern == '*') { | 
|  | has_asterisk = true; | 
|  | } else { | 
|  | break; | 
|  | } | 
|  |  | 
|  | next(pattern, end); | 
|  | } | 
|  | return has_asterisk ? -1 : num_question_marks; | 
|  | } | 
|  |  | 
|  | template <typename CHAR, typename NEXT> | 
|  | constexpr bool MatchPatternT(const CHAR* eval, | 
|  | const CHAR* eval_end, | 
|  | const CHAR* pattern, | 
|  | const CHAR* pattern_end, | 
|  | NEXT next) { | 
|  | do { | 
|  | int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next); | 
|  | if (!SearchForChars(&pattern, pattern_end, &eval, eval_end, | 
|  | maximum_wildcard_length, next)) { | 
|  | return false; | 
|  | } | 
|  | } while (pattern != pattern_end); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | struct NextCharUTF8 { | 
|  | base_icu::UChar32 operator()(const char** p, const char* end) { | 
|  | base_icu::UChar32 c; | 
|  | int offset = 0; | 
|  | CBU8_NEXT(*p, offset, end - *p, c); | 
|  | *p += offset; | 
|  | return c; | 
|  | } | 
|  | }; | 
|  |  | 
|  | struct NextCharUTF16 { | 
|  | base_icu::UChar32 operator()(const char16** p, const char16* end) { | 
|  | base_icu::UChar32 c; | 
|  | int offset = 0; | 
|  | CBU16_NEXT(*p, offset, end - *p, c); | 
|  | *p += offset; | 
|  | return c; | 
|  | } | 
|  | }; | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | bool MatchPattern(StringPiece eval, StringPiece pattern) { | 
|  | return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), | 
|  | pattern.data() + pattern.size(), NextCharUTF8()); | 
|  | } | 
|  |  | 
|  | bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) { | 
|  | return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), | 
|  | pattern.data() + pattern.size(), NextCharUTF16()); | 
|  | } | 
|  |  | 
|  | }  // namespace base |