Remove base/i18n Change-Id: Ic84e5156febcca4e6813ca30fd0a245664c84648 Reviewed-on: https://gn-review.googlesource.com/1421 Reviewed-by: Brett Wilson <brettw@chromium.org> Commit-Queue: Scott Graham <scottmg@chromium.org>
diff --git a/base/i18n/OWNERS b/base/i18n/OWNERS deleted file mode 100644 index d717b8d..0000000 --- a/base/i18n/OWNERS +++ /dev/null
@@ -1 +0,0 @@ -jshin@chromium.org
diff --git a/base/i18n/base_i18n_export.h b/base/i18n/base_i18n_export.h deleted file mode 100644 index e8a2add..0000000 --- a/base/i18n/base_i18n_export.h +++ /dev/null
@@ -1,29 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_BASE_I18N_EXPORT_H_ -#define BASE_I18N_BASE_I18N_EXPORT_H_ - -#if defined(COMPONENT_BUILD) -#if defined(WIN32) - -#if defined(BASE_I18N_IMPLEMENTATION) -#define BASE_I18N_EXPORT __declspec(dllexport) -#else -#define BASE_I18N_EXPORT __declspec(dllimport) -#endif // defined(BASE_I18N_IMPLEMENTATION) - -#else // defined(WIN32) -#if defined(BASE_I18N_IMPLEMENTATION) -#define BASE_I18N_EXPORT __attribute__((visibility("default"))) -#else -#define BASE_I18N_EXPORT -#endif -#endif - -#else // defined(COMPONENT_BUILD) -#define BASE_I18N_EXPORT -#endif - -#endif // BASE_I18N_BASE_I18N_EXPORT_H_
diff --git a/base/i18n/base_i18n_switches.cc b/base/i18n/base_i18n_switches.cc deleted file mode 100644 index 103d665..0000000 --- a/base/i18n/base_i18n_switches.cc +++ /dev/null
@@ -1,21 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/base_i18n_switches.h" - -namespace switches { - -// Force the UI to a specific direction. Valid values are "ltr" (left-to-right) -// and "rtl" (right-to-left). -const char kForceUIDirection[] = "force-ui-direction"; - -// Force the text rendering to a specific direction. Valid values are "ltr" -// (left-to-right) and "rtl" (right-to-left). Only tested meaningfully with -// RTL. -const char kForceTextDirection[] = "force-text-direction"; - -const char kForceDirectionLTR[] = "ltr"; -const char kForceDirectionRTL[] = "rtl"; - -} // namespace switches
diff --git a/base/i18n/base_i18n_switches.h b/base/i18n/base_i18n_switches.h deleted file mode 100644 index d1ba690..0000000 --- a/base/i18n/base_i18n_switches.h +++ /dev/null
@@ -1,21 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_BASE_I18N_SWITCHES_H_ -#define BASE_I18N_BASE_I18N_SWITCHES_H_ - -#include "base/i18n/base_i18n_export.h" - -namespace switches { - -BASE_I18N_EXPORT extern const char kForceUIDirection[]; -BASE_I18N_EXPORT extern const char kForceTextDirection[]; - -// kForce*Direction choices for the switches above. -BASE_I18N_EXPORT extern const char kForceDirectionLTR[]; -BASE_I18N_EXPORT extern const char kForceDirectionRTL[]; - -} // namespace switches - -#endif // BASE_I18N_BASE_I18N_SWITCHES_H_
diff --git a/base/i18n/bidi_line_iterator.cc b/base/i18n/bidi_line_iterator.cc deleted file mode 100644 index 3f7f868..0000000 --- a/base/i18n/bidi_line_iterator.cc +++ /dev/null
@@ -1,119 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/bidi_line_iterator.h" - -#include "base/logging.h" - -namespace base { -namespace i18n { - -namespace { - -UBiDiLevel GetParagraphLevelForDirection(TextDirection direction) { - switch (direction) { - case UNKNOWN_DIRECTION: - return UBIDI_DEFAULT_LTR; - break; - case RIGHT_TO_LEFT: - return 1; // Highest RTL level. - break; - case LEFT_TO_RIGHT: - return 0; // Highest LTR level. - break; - default: - NOTREACHED(); - return 0; - } -} - -// Overrides the default bidi class for a given character, for the custom -// "AS_URL" behavior. Returns U_BIDI_CLASS_DEFAULT to defer to the default ICU -// behavior. -// -// Matches the C callback interface of ICU's UBiDiClassCallback type (which is -// why there is an unused argument). -UCharDirection GetURLBiDiClassCallback(const void* /*unused*/, UChar32 c) { - // Note: Use a switch statement instead of strchr() to avoid iterating over a - // string for each character (the switch allows for much better compiler - // optimization). - switch (c) { - // The set of characters that delimit URL components (separating the scheme, - // username, password, domain labels, host, path segments, query - // names/values and fragment). - case '#': - case '&': - case '.': - case '/': - case ':': - case '=': - case '?': - case '@': - // Treat all of these characters as strong LTR, which effectively - // surrounds all of the text components of a URL (e.g., the domain labels - // and path segments) in a left-to-right embedding. This ensures that the - // URL components read from left to right, regardless of any RTL - // characters. (Within each component, RTL sequences are rendered from - // right to left as expected.) - return U_LEFT_TO_RIGHT; - default: - return U_BIDI_CLASS_DEFAULT; - } -} - -} // namespace - -BiDiLineIterator::BiDiLineIterator() : bidi_(nullptr) {} - -BiDiLineIterator::~BiDiLineIterator() { - if (bidi_) { - ubidi_close(bidi_); - bidi_ = nullptr; - } -} - -bool BiDiLineIterator::Open(const string16& text, - TextDirection direction, - CustomBehavior behavior) { - DCHECK(!bidi_); - UErrorCode error = U_ZERO_ERROR; - bidi_ = ubidi_openSized(static_cast<int>(text.length()), 0, &error); - if (U_FAILURE(error)) - return false; - - if (behavior == CustomBehavior::AS_URL) { - ubidi_setClassCallback(bidi_, GetURLBiDiClassCallback, nullptr, nullptr, - nullptr, &error); - if (U_FAILURE(error)) - return false; - } - - ubidi_setPara(bidi_, text.data(), static_cast<int>(text.length()), - GetParagraphLevelForDirection(direction), nullptr, &error); - return (U_SUCCESS(error)); -} - -int BiDiLineIterator::CountRuns() const { - DCHECK(bidi_ != nullptr); - UErrorCode error = U_ZERO_ERROR; - const int runs = ubidi_countRuns(bidi_, &error); - return U_SUCCESS(error) ? runs : 0; -} - -UBiDiDirection BiDiLineIterator::GetVisualRun(int index, - int* start, - int* length) const { - DCHECK(bidi_ != nullptr); - return ubidi_getVisualRun(bidi_, index, start, length); -} - -void BiDiLineIterator::GetLogicalRun(int start, - int* end, - UBiDiLevel* level) const { - DCHECK(bidi_ != nullptr); - ubidi_getLogicalRun(bidi_, start, end, level); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/bidi_line_iterator.h b/base/i18n/bidi_line_iterator.h deleted file mode 100644 index d840f61..0000000 --- a/base/i18n/bidi_line_iterator.h +++ /dev/null
@@ -1,60 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_BIDI_LINE_ITERATOR_H_ -#define BASE_I18N_BIDI_LINE_ITERATOR_H_ - -#include "base/i18n/base_i18n_export.h" -#include "base/i18n/rtl.h" -#include "base/macros.h" -#include "base/strings/string16.h" -#include "third_party/icu/source/common/unicode/ubidi.h" -#include "third_party/icu/source/common/unicode/uchar.h" - -namespace base { -namespace i18n { - -// A simple wrapper class for the bidirectional iterator of ICU. -// This class uses the bidirectional iterator of ICU to split a line of -// bidirectional texts into visual runs in its display order. -class BASE_I18N_EXPORT BiDiLineIterator { - public: - // Specifies some alternative iteration behavior. - enum class CustomBehavior { - // No special behavior. - NONE, - // Treat URL delimiter characters as strong LTR. This is a special treatment - // for URLs that purposefully violates the URL Standard, as an experiment. - // It should only be used behind a flag. - AS_URL - }; - - BiDiLineIterator(); - ~BiDiLineIterator(); - - // Initializes the bidirectional iterator with the specified text. Returns - // whether initialization succeeded. - bool Open(const string16& text, - TextDirection direction, - CustomBehavior behavior); - - // Returns the number of visual runs in the text, or zero on error. - int CountRuns() const; - - // Gets the logical offset, length, and direction of the specified visual run. - UBiDiDirection GetVisualRun(int index, int* start, int* length) const; - - // Given a start position, figure out where the run ends (and the BiDiLevel). - void GetLogicalRun(int start, int* end, UBiDiLevel* level) const; - - private: - UBiDi* bidi_; - - DISALLOW_COPY_AND_ASSIGN(BiDiLineIterator); -}; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_BIDI_LINE_ITERATOR_H_
diff --git a/base/i18n/break_iterator.cc b/base/i18n/break_iterator.cc deleted file mode 100644 index 251cd00..0000000 --- a/base/i18n/break_iterator.cc +++ /dev/null
@@ -1,191 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/break_iterator.h" - -#include <stdint.h> - -#include "base/logging.h" -#include "third_party/icu/source/common/unicode/ubrk.h" -#include "third_party/icu/source/common/unicode/uchar.h" -#include "third_party/icu/source/common/unicode/ustring.h" - -namespace base { -namespace i18n { - -const size_t npos = static_cast<size_t>(-1); - -BreakIterator::BreakIterator(const StringPiece16& str, BreakType break_type) - : iter_(nullptr), - string_(str), - break_type_(break_type), - prev_(npos), - pos_(0) {} - -BreakIterator::BreakIterator(const StringPiece16& str, const string16& rules) - : iter_(nullptr), - string_(str), - rules_(rules), - break_type_(RULE_BASED), - prev_(npos), - pos_(0) {} - -BreakIterator::~BreakIterator() { - if (iter_) - ubrk_close(static_cast<UBreakIterator*>(iter_)); -} - -bool BreakIterator::Init() { - UErrorCode status = U_ZERO_ERROR; - UParseError parse_error; - UBreakIteratorType break_type; - switch (break_type_) { - case BREAK_CHARACTER: - break_type = UBRK_CHARACTER; - break; - case BREAK_WORD: - break_type = UBRK_WORD; - break; - case BREAK_LINE: - case BREAK_NEWLINE: - case RULE_BASED: // (Keep compiler happy, break_type not used in this case) - break_type = UBRK_LINE; - break; - default: - NOTREACHED() << "invalid break_type_"; - return false; - } - if (break_type_ == RULE_BASED) { - iter_ = ubrk_openRules(rules_.c_str(), - static_cast<int32_t>(rules_.length()), - string_.data(), - static_cast<int32_t>(string_.size()), - &parse_error, - &status); - if (U_FAILURE(status)) { - NOTREACHED() << "ubrk_openRules failed to parse rule string at line " - << parse_error.line << ", offset " << parse_error.offset; - } - } else { - iter_ = ubrk_open(break_type, nullptr, string_.data(), - static_cast<int32_t>(string_.size()), &status); - if (U_FAILURE(status)) { - NOTREACHED() << "ubrk_open failed for type " << break_type - << " with error " << status; - } - } - - if (U_FAILURE(status)) { - return false; - } - - // Move the iterator to the beginning of the string. - ubrk_first(static_cast<UBreakIterator*>(iter_)); - return true; -} - -bool BreakIterator::Advance() { - int32_t pos; - int32_t status; - prev_ = pos_; - switch (break_type_) { - case BREAK_CHARACTER: - case BREAK_WORD: - case BREAK_LINE: - case RULE_BASED: - pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); - if (pos == UBRK_DONE) { - pos_ = npos; - return false; - } - pos_ = static_cast<size_t>(pos); - return true; - case BREAK_NEWLINE: - do { - pos = ubrk_next(static_cast<UBreakIterator*>(iter_)); - if (pos == UBRK_DONE) - break; - pos_ = static_cast<size_t>(pos); - status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); - } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT); - if (pos == UBRK_DONE && prev_ == pos_) { - pos_ = npos; - return false; - } - return true; - default: - NOTREACHED() << "invalid break_type_"; - return false; - } -} - -bool BreakIterator::SetText(const base::char16* text, const size_t length) { - UErrorCode status = U_ZERO_ERROR; - ubrk_setText(static_cast<UBreakIterator*>(iter_), - text, length, &status); - pos_ = 0; // implicit when ubrk_setText is done - prev_ = npos; - if (U_FAILURE(status)) { - NOTREACHED() << "ubrk_setText failed"; - return false; - } - string_ = StringPiece16(text, length); - return true; -} - -bool BreakIterator::IsWord() const { - return GetWordBreakStatus() == IS_WORD_BREAK; -} - -BreakIterator::WordBreakStatus BreakIterator::GetWordBreakStatus() const { - int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)); - if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) - return IS_LINE_OR_CHAR_BREAK; - // In ICU 60, trying to advance past the end of the text does not change - // |status| so that |pos_| has to be checked as well as |status|. - // See http://bugs.icu-project.org/trac/ticket/13447 . - return (status == UBRK_WORD_NONE || pos_ == npos) ? IS_SKIPPABLE_WORD - : IS_WORD_BREAK; -} - -bool BreakIterator::IsEndOfWord(size_t position) const { - if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) - return false; - - UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); - UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); - int32_t status = ubrk_getRuleStatus(iter); - return (!!boundary && status != UBRK_WORD_NONE); -} - -bool BreakIterator::IsStartOfWord(size_t position) const { - if (break_type_ != BREAK_WORD && break_type_ != RULE_BASED) - return false; - - UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); - UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position)); - ubrk_next(iter); - int32_t next_status = ubrk_getRuleStatus(iter); - return (!!boundary && next_status != UBRK_WORD_NONE); -} - -bool BreakIterator::IsGraphemeBoundary(size_t position) const { - if (break_type_ != BREAK_CHARACTER) - return false; - - UBreakIterator* iter = static_cast<UBreakIterator*>(iter_); - return !!ubrk_isBoundary(iter, static_cast<int32_t>(position)); -} - -string16 BreakIterator::GetString() const { - return GetStringPiece().as_string(); -} - -StringPiece16 BreakIterator::GetStringPiece() const { - DCHECK(prev_ != npos && pos_ != npos); - return string_.substr(prev_, pos_ - prev_); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/break_iterator.h b/base/i18n/break_iterator.h deleted file mode 100644 index dc30b64..0000000 --- a/base/i18n/break_iterator.h +++ /dev/null
@@ -1,182 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_BREAK_ITERATOR_H_ -#define BASE_I18N_BREAK_ITERATOR_H_ - -#include <stddef.h> - -#include "base/i18n/base_i18n_export.h" -#include "base/macros.h" -#include "base/strings/string16.h" -#include "base/strings/string_piece.h" - -// The BreakIterator class iterates through the words, word breaks, and -// line breaks in a UTF-16 string. -// -// It provides several modes, BREAK_WORD, BREAK_LINE, and BREAK_NEWLINE, -// which modify how characters are aggregated into the returned string. -// -// Under BREAK_WORD mode, once a word is encountered any non-word -// characters are not included in the returned string (e.g. in the -// UTF-16 equivalent of the string " foo bar! ", the word breaks are at -// the periods in ". .foo. .bar.!. ."). -// Note that Chinese/Japanese/Thai do not use spaces between words so that -// boundaries can fall in the middle of a continuous run of non-space / -// non-punctuation characters. -// -// Under BREAK_LINE mode, once a line breaking opportunity is encountered, -// any non-word characters are included in the returned string, breaking -// only when a space-equivalent character or a line breaking opportunity -// is encountered (e.g. in the UTF16-equivalent of the string " foo bar! ", -// the breaks are at the periods in ". .foo .bar! ."). -// -// Note that lines can be broken at any character/syllable/grapheme cluster -// boundary in Chinese/Japanese/Korean and at word boundaries in Thai -// (Thai does not use spaces between words). Therefore, this is NOT the same -// as breaking only at space-equivalent characters where its former -// name (BREAK_SPACE) implied. -// -// Under BREAK_NEWLINE mode, all characters are included in the returned -// string, breaking only when a newline-equivalent character is encountered -// (eg. in the UTF-16 equivalent of the string "foo\nbar!\n\n", the line -// breaks are at the periods in ".foo\n.bar\n.\n."). -// -// To extract the words from a string, move a BREAK_WORD BreakIterator -// through the string and test whether IsWord() is true. E.g., -// BreakIterator iter(str, BreakIterator::BREAK_WORD); -// if (!iter.Init()) -// return false; -// while (iter.Advance()) { -// if (iter.IsWord()) { -// // Region [iter.prev(), iter.pos()) contains a word. -// VLOG(1) << "word: " << iter.GetString(); -// } -// } - -namespace base { -namespace i18n { - -class BASE_I18N_EXPORT BreakIterator { - public: - enum BreakType { - BREAK_WORD, - BREAK_LINE, - // TODO(jshin): Remove this after reviewing call sites. - // If call sites really need break only on space-like characters - // implement it separately. - BREAK_SPACE = BREAK_LINE, - BREAK_NEWLINE, - BREAK_CHARACTER, - // But don't remove this one! - RULE_BASED, - }; - - enum WordBreakStatus { - // The end of text that the iterator recognizes as word characters. - // Non-word characters are things like punctuation and spaces. - IS_WORD_BREAK, - // Characters that the iterator can skip past, such as punctuation, - // whitespace, and, if using RULE_BASED mode, characters from another - // character set. - IS_SKIPPABLE_WORD, - // Only used if not in BREAK_WORD or RULE_BASED mode. This is returned for - // newlines, line breaks, and character breaks. - IS_LINE_OR_CHAR_BREAK - }; - - // Requires |str| to live as long as the BreakIterator does. - BreakIterator(const StringPiece16& str, BreakType break_type); - // Make a rule-based iterator. BreakType == RULE_BASED is implied. - // TODO(andrewhayden): This signature could easily be misinterpreted as - // "(const string16& str, const string16& locale)". We should do something - // better. - BreakIterator(const StringPiece16& str, const string16& rules); - ~BreakIterator(); - - // Init() must be called before any of the iterators are valid. - // Returns false if ICU failed to initialize. - bool Init(); - - // Advance to the next break. Returns false if we've run past the end of - // the string. (Note that the very last "break" is after the final - // character in the string, and when we advance to that position it's the - // last time Advance() returns true.) - bool Advance(); - - // Updates the text used by the iterator, resetting the iterator as if - // if Init() had been called again. Any old state is lost. Returns true - // unless there is an error setting the text. - bool SetText(const base::char16* text, const size_t length); - - // Under BREAK_WORD mode, returns true if the break we just hit is the - // end of a word. (Otherwise, the break iterator just skipped over e.g. - // whitespace or punctuation.) Under BREAK_LINE and BREAK_NEWLINE modes, - // this distinction doesn't apply and it always returns false. - bool IsWord() const; - - // Under BREAK_WORD mode: - // - Returns IS_SKIPPABLE_WORD if non-word characters, such as punctuation or - // spaces, are found. - // - Returns IS_WORD_BREAK if the break we just hit is the end of a sequence - // of word characters. - // Under RULE_BASED mode: - // - Returns IS_SKIPPABLE_WORD if characters outside the rules' character set - // or non-word characters, such as punctuation or spaces, are found. - // - Returns IS_WORD_BREAK if the break we just hit is the end of a sequence - // of word characters that are in the rules' character set. - // Not under BREAK_WORD or RULE_BASED mode: - // - Returns IS_LINE_OR_CHAR_BREAK. - BreakIterator::WordBreakStatus GetWordBreakStatus() const; - - // Under BREAK_WORD mode, returns true if |position| is at the end of word or - // at the start of word. It always returns false under BREAK_LINE and - // BREAK_NEWLINE modes. - bool IsEndOfWord(size_t position) const; - bool IsStartOfWord(size_t position) const; - - // Under BREAK_CHARACTER mode, returns whether |position| is a Unicode - // grapheme boundary. - bool IsGraphemeBoundary(size_t position) const; - - // Returns the string between prev() and pos(). - // Advance() must have been called successfully at least once for pos() to - // have advanced to somewhere useful. - string16 GetString() const; - - StringPiece16 GetStringPiece() const; - - // Returns the value of pos() returned before Advance() was last called. - size_t prev() const { return prev_; } - - // Returns the current break position within the string, - // or BreakIterator::npos when done. - size_t pos() const { return pos_; } - - private: - // ICU iterator, avoiding ICU ubrk.h dependence. - // This is actually an ICU UBreakiterator* type, which turns out to be - // a typedef for a void* in the ICU headers. Using void* directly prevents - // callers from needing access to the ICU public headers directory. - void* iter_; - - // The string we're iterating over. Can be changed with SetText(...) - StringPiece16 string_; - - // Rules for our iterator. Mutually exclusive with break_type_. - const string16 rules_; - - // The breaking style (word/space/newline). Mutually exclusive with rules_ - BreakType break_type_; - - // Previous and current iterator positions. - size_t prev_, pos_; - - DISALLOW_COPY_AND_ASSIGN(BreakIterator); -}; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_BREAK_ITERATOR_H_
diff --git a/base/i18n/build_utf8_validator_tables.cc b/base/i18n/build_utf8_validator_tables.cc deleted file mode 100644 index 0cdcc35..0000000 --- a/base/i18n/build_utf8_validator_tables.cc +++ /dev/null
@@ -1,470 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Create a state machine for validating UTF-8. The algorithm in brief: -// 1. Convert the complete unicode range of code points, except for the -// surrogate code points, to an ordered array of sequences of bytes in -// UTF-8. -// 2. Convert individual bytes to ranges, starting from the right of each byte -// sequence. For each range, ensure the bytes on the left and the ranges -// on the right are the identical. -// 3. Convert the resulting list of ranges into a state machine, collapsing -// identical states. -// 4. Convert the state machine to an array of bytes. -// 5. Output as a C++ file. -// -// To use: -// $ ninja -C out/Release build_utf8_validator_tables -// $ out/Release/build_utf8_validator_tables -// --output=base/i18n/utf8_validator_tables.cc -// $ git add base/i18n/utf8_validator_tables.cc -// -// Because the table is not expected to ever change, it is checked into the -// repository rather than being regenerated at build time. -// -// This code uses type uint8_t throughout to represent bytes, to avoid -// signed/unsigned char confusion. - -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <algorithm> -#include <map> -#include <string> -#include <vector> - -#include "base/command_line.h" -#include "base/files/file_path.h" -#include "base/files/file_util.h" -#include "base/logging.h" -#include "base/macros.h" -#include "base/numerics/safe_conversions.h" -#include "base/strings/stringprintf.h" -#include "third_party/icu/source/common/unicode/utf8.h" - -namespace { - -const char kHelpText[] = - "Usage: build_utf8_validator_tables [ --help ] [ --output=<file> ]\n"; - -const char kProlog[] = - "// Copyright 2013 The Chromium Authors. All rights reserved.\n" - "// Use of this source code is governed by a BSD-style license that can " - "be\n" - "// found in the LICENSE file.\n" - "\n" - "// This file is auto-generated by build_utf8_validator_tables.\n" - "// DO NOT EDIT.\n" - "\n" - "#include \"base/i18n/utf8_validator_tables.h\"\n" - "\n" - "namespace base {\n" - "namespace internal {\n" - "\n" - "const uint8_t kUtf8ValidatorTables[] = {\n"; - -const char kEpilog[] = - "};\n" - "\n" - "const size_t kUtf8ValidatorTablesSize = arraysize(kUtf8ValidatorTables);\n" - "\n" - "} // namespace internal\n" - "} // namespace base\n"; - -// Ranges are inclusive at both ends--they represent [from, to] -class Range { - public: - // Ranges always start with just one byte. - explicit Range(uint8_t value) : from_(value), to_(value) {} - - // Range objects are copyable and assignable to be used in STL - // containers. Since they only contain non-pointer POD types, the default copy - // constructor, assignment operator and destructor will work. - - // Add a byte to the range. We intentionally only support adding a byte at the - // end, since that is the only operation the code needs. - void AddByte(uint8_t to) { - CHECK(to == to_ + 1); - to_ = to; - } - - uint8_t from() const { return from_; } - uint8_t to() const { return to_; } - - bool operator<(const Range& rhs) const { - return (from() < rhs.from() || (from() == rhs.from() && to() < rhs.to())); - } - - bool operator==(const Range& rhs) const { - return from() == rhs.from() && to() == rhs.to(); - } - - private: - uint8_t from_; - uint8_t to_; -}; - -// A vector of Ranges is like a simple regular expression--it corresponds to -// a set of strings of the same length that have bytes in each position in -// the appropriate range. -typedef std::vector<Range> StringSet; - -// A UTF-8 "character" is represented by a sequence of bytes. -typedef std::vector<uint8_t> Character; - -// In the second stage of the algorithm, we want to convert a large list of -// Characters into a small list of StringSets. -struct Pair { - Character character; - StringSet set; -}; - -typedef std::vector<Pair> PairVector; - -// A class to print a table of numbers in the same style as clang-format. -class TablePrinter { - public: - explicit TablePrinter(FILE* stream) - : stream_(stream), values_on_this_line_(0), current_offset_(0) {} - - void PrintValue(uint8_t value) { - if (values_on_this_line_ == 0) { - fputs(" ", stream_); - } else if (values_on_this_line_ == kMaxValuesPerLine) { - fprintf(stream_, " // 0x%02x\n ", current_offset_); - values_on_this_line_ = 0; - } - fprintf(stream_, " 0x%02x,", static_cast<int>(value)); - ++values_on_this_line_; - ++current_offset_; - } - - void NewLine() { - while (values_on_this_line_ < kMaxValuesPerLine) { - fputs(" ", stream_); - ++values_on_this_line_; - } - fprintf(stream_, " // 0x%02x\n", current_offset_); - values_on_this_line_ = 0; - } - - private: - // stdio stream. Not owned. - FILE* stream_; - - // Number of values so far printed on this line. - int values_on_this_line_; - - // Total values printed so far. - int current_offset_; - - static const int kMaxValuesPerLine = 8; - - DISALLOW_COPY_AND_ASSIGN(TablePrinter); -}; - -// Start by filling a PairVector with characters. The resulting vector goes from -// "\x00" to "\xf4\x8f\xbf\xbf". -PairVector InitializeCharacters() { - PairVector vector; - for (int i = 0; i <= 0x10FFFF; ++i) { - if (i >= 0xD800 && i < 0xE000) { - // Surrogate codepoints are not permitted. Non-character code points are - // explicitly permitted. - continue; - } - uint8_t bytes[4]; - unsigned int offset = 0; - UBool is_error = false; - U8_APPEND(bytes, offset, arraysize(bytes), i, is_error); - DCHECK(!is_error); - DCHECK_GT(offset, 0u); - DCHECK_LE(offset, arraysize(bytes)); - Pair pair = {Character(bytes, bytes + offset), StringSet()}; - vector.push_back(pair); - } - return vector; -} - -// Construct a new Pair from |character| and the concatenation of |new_range| -// and |existing_set|, and append it to |pairs|. -void ConstructPairAndAppend(const Character& character, - const Range& new_range, - const StringSet& existing_set, - PairVector* pairs) { - Pair new_pair = {character, StringSet(1, new_range)}; - new_pair.set.insert( - new_pair.set.end(), existing_set.begin(), existing_set.end()); - pairs->push_back(new_pair); -} - -// Each pass over the PairVector strips one byte off the right-hand-side of the -// characters and adds a range to the set on the right. For example, the first -// pass converts the range from "\xe0\xa0\x80" to "\xe0\xa0\xbf" to ("\xe0\xa0", -// [\x80-\xbf]), then the second pass converts the range from ("\xe0\xa0", -// [\x80-\xbf]) to ("\xe0\xbf", [\x80-\xbf]) to ("\xe0", -// [\xa0-\xbf][\x80-\xbf]). -void MoveRightMostCharToSet(PairVector* pairs) { - PairVector new_pairs; - PairVector::const_iterator it = pairs->begin(); - while (it != pairs->end() && it->character.empty()) { - new_pairs.push_back(*it); - ++it; - } - CHECK(it != pairs->end()); - Character unconverted_bytes(it->character.begin(), it->character.end() - 1); - Range new_range(it->character.back()); - StringSet converted = it->set; - ++it; - while (it != pairs->end()) { - const Pair& current_pair = *it++; - if (current_pair.character.size() == unconverted_bytes.size() + 1 && - std::equal(unconverted_bytes.begin(), - unconverted_bytes.end(), - current_pair.character.begin()) && - converted == current_pair.set) { - // The particular set of UTF-8 codepoints we are validating guarantees - // that each byte range will be contiguous. This would not necessarily be - // true for an arbitrary set of UTF-8 codepoints. - DCHECK_EQ(new_range.to() + 1, current_pair.character.back()); - new_range.AddByte(current_pair.character.back()); - continue; - } - ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs); - unconverted_bytes = Character(current_pair.character.begin(), - current_pair.character.end() - 1); - new_range = Range(current_pair.character.back()); - converted = current_pair.set; - } - ConstructPairAndAppend(unconverted_bytes, new_range, converted, &new_pairs); - new_pairs.swap(*pairs); -} - -void MoveAllCharsToSets(PairVector* pairs) { - // Since each pass of the function moves one character, and UTF-8 sequences - // are at most 4 characters long, this simply runs the algorithm four times. - for (int i = 0; i < 4; ++i) { - MoveRightMostCharToSet(pairs); - } -#if DCHECK_IS_ON() - for (PairVector::const_iterator it = pairs->begin(); it != pairs->end(); - ++it) { - DCHECK(it->character.empty()); - } -#endif -} - -// Logs the generated string sets in regular-expression style, ie. [\x00-\x7f], -// [\xc2-\xdf][\x80-\xbf], etc. This can be a useful sanity-check that the -// algorithm is working. Use the command-line option -// --vmodule=build_utf8_validator_tables=1 to see this output. -void LogStringSets(const PairVector& pairs) { - for (PairVector::const_iterator pair_it = pairs.begin(); - pair_it != pairs.end(); - ++pair_it) { - std::string set_as_string; - for (StringSet::const_iterator set_it = pair_it->set.begin(); - set_it != pair_it->set.end(); - ++set_it) { - set_as_string += base::StringPrintf("[\\x%02x-\\x%02x]", - static_cast<int>(set_it->from()), - static_cast<int>(set_it->to())); - } - VLOG(1) << set_as_string; - } -} - -// A single state in the state machine is represented by a sorted vector of -// start bytes and target states. All input bytes in the range between the start -// byte and the next entry in the vector (or 0xFF) result in a transition to the -// target state. -struct StateRange { - uint8_t from; - uint8_t target_state; -}; - -typedef std::vector<StateRange> State; - -// Generates a state where all bytes go to state 1 (invalid). This is also used -// as an initialiser for other states (since bytes from outside the desired -// range are invalid). -State GenerateInvalidState() { - const StateRange range = {0, 1}; - return State(1, range); -} - -// A map from a state (ie. a set of strings which will match from this state) to -// a number (which is an index into the array of states). -typedef std::map<StringSet, uint8_t> StateMap; - -// Create a new state corresponding to |set|, add it |states| and |state_map| -// and return the index it was given in |states|. -uint8_t MakeState(const StringSet& set, - std::vector<State>* states, - StateMap* state_map) { - DCHECK(!set.empty()); - const Range& range = set.front(); - const StringSet rest(set.begin() + 1, set.end()); - const StateMap::const_iterator where = state_map->find(rest); - const uint8_t target_state = where == state_map->end() - ? MakeState(rest, states, state_map) - : where->second; - DCHECK_LT(0, range.from()); - DCHECK_LT(range.to(), 0xFF); - const StateRange new_state_initializer[] = { - {0, 1}, - {range.from(), target_state}, - {static_cast<uint8_t>(range.to() + 1), 1}}; - states->push_back( - State(new_state_initializer, - new_state_initializer + arraysize(new_state_initializer))); - const uint8_t new_state_number = - base::checked_cast<uint8_t>(states->size() - 1); - CHECK(state_map->insert(std::make_pair(set, new_state_number)).second); - return new_state_number; -} - -std::vector<State> GenerateStates(const PairVector& pairs) { - // States 0 and 1 are the initial/valid state and invalid state, respectively. - std::vector<State> states(2, GenerateInvalidState()); - StateMap state_map; - state_map.insert(std::make_pair(StringSet(), 0)); - for (PairVector::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { - DCHECK(it->character.empty()); - DCHECK(!it->set.empty()); - const Range& range = it->set.front(); - const StringSet rest(it->set.begin() + 1, it->set.end()); - const StateMap::const_iterator where = state_map.find(rest); - const uint8_t target_state = where == state_map.end() - ? MakeState(rest, &states, &state_map) - : where->second; - if (states[0].back().from == range.from()) { - DCHECK_EQ(1, states[0].back().target_state); - states[0].back().target_state = target_state; - DCHECK_LT(range.to(), 0xFF); - const StateRange new_range = {static_cast<uint8_t>(range.to() + 1), 1}; - states[0].push_back(new_range); - } else { - DCHECK_LT(range.to(), 0xFF); - const StateRange new_range_initializer[] = { - {range.from(), target_state}, - {static_cast<uint8_t>(range.to() + 1), 1}}; - states[0] - .insert(states[0].end(), - new_range_initializer, - new_range_initializer + arraysize(new_range_initializer)); - } - } - return states; -} - -// Output the generated states as a C++ table. Two tricks are used to compact -// the table: each state in the table starts with a shift value which indicates -// how many bits we can discard from the right-hand-side of the byte before -// doing the table lookup. Secondly, only the state-transitions for bytes -// with the top-bit set are included in the table; bytes without the top-bit set -// are just ASCII and are handled directly by the code. -void PrintStates(const std::vector<State>& states, FILE* stream) { - // First calculate the start-offset of each state. This allows the state - // machine to jump directly to the correct offset, avoiding an extra - // indirection. State 0 starts at offset 0. - std::vector<uint8_t> state_offset(1, 0); - std::vector<uint8_t> shifts; - uint8_t pos = 0; - - for (std::vector<State>::const_iterator state_it = states.begin(); - state_it != states.end(); - ++state_it) { - // We want to set |shift| to the (0-based) index of the least-significant - // set bit in any of the ranges for this state, since this tells us how many - // bits we can discard and still determine what range a byte lies in. Sadly - // it appears that ffs() is not portable, so we do it clumsily. - uint8_t shift = 7; - for (State::const_iterator range_it = state_it->begin(); - range_it != state_it->end(); - ++range_it) { - while (shift > 0 && range_it->from % (1 << shift) != 0) { - --shift; - } - } - shifts.push_back(shift); - pos += 1 + (1 << (7 - shift)); - state_offset.push_back(pos); - } - - DCHECK_EQ(129, state_offset[1]); - - fputs(kProlog, stream); - TablePrinter table_printer(stream); - - for (uint8_t state_index = 0; state_index < states.size(); ++state_index) { - const uint8_t shift = shifts[state_index]; - uint8_t next_range = 0; - uint8_t target_state = 1; - fprintf(stream, - " // State %d, offset 0x%02x\n", - static_cast<int>(state_index), - static_cast<int>(state_offset[state_index])); - table_printer.PrintValue(shift); - for (int i = 0; i < 0x100; i += (1 << shift)) { - if (next_range < states[state_index].size() && - states[state_index][next_range].from == i) { - target_state = states[state_index][next_range].target_state; - ++next_range; - } - if (i >= 0x80) { - table_printer.PrintValue(state_offset[target_state]); - } - } - table_printer.NewLine(); - } - - fputs(kEpilog, stream); -} - -} // namespace - -int main(int argc, char* argv[]) { - base::CommandLine::Init(argc, argv); - logging::LoggingSettings settings; - settings.logging_dest = logging::LOG_TO_SYSTEM_DEBUG_LOG; - logging::InitLogging(settings); - if (base::CommandLine::ForCurrentProcess()->HasSwitch("help")) { - fwrite(kHelpText, 1, arraysize(kHelpText), stdout); - exit(EXIT_SUCCESS); - } - base::FilePath filename = - base::CommandLine::ForCurrentProcess()->GetSwitchValuePath("output"); - - FILE* output = stdout; - if (!filename.empty()) { - output = base::OpenFile(filename, "wb"); - if (!output) - PLOG(FATAL) << "Couldn't open '" << filename.AsUTF8Unsafe() - << "' for writing"; - } - - // Step 1: Enumerate the characters - PairVector pairs = InitializeCharacters(); - // Step 2: Convert to sets. - MoveAllCharsToSets(&pairs); - if (VLOG_IS_ON(1)) { - LogStringSets(pairs); - } - // Step 3: Generate states. - std::vector<State> states = GenerateStates(pairs); - // Step 4/5: Print output - PrintStates(states, output); - - if (!filename.empty()) { - if (!base::CloseFile(output)) - PLOG(FATAL) << "Couldn't finish writing '" << filename.AsUTF8Unsafe() - << "'"; - } - - return EXIT_SUCCESS; -}
diff --git a/base/i18n/case_conversion.cc b/base/i18n/case_conversion.cc deleted file mode 100644 index a4a104c..0000000 --- a/base/i18n/case_conversion.cc +++ /dev/null
@@ -1,90 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/case_conversion.h" - -#include <stdint.h> - -#include "base/numerics/safe_conversions.h" -#include "base/strings/string16.h" -#include "base/strings/string_util.h" -#include "third_party/icu/source/common/unicode/uchar.h" -#include "third_party/icu/source/common/unicode/unistr.h" -#include "third_party/icu/source/common/unicode/ustring.h" - -namespace base { -namespace i18n { - -namespace { - -// Provides a uniform interface for upper/lower/folding which take take -// slightly varying parameters. -typedef int32_t (*CaseMapperFunction)(UChar* dest, int32_t dest_capacity, - const UChar* src, int32_t src_length, - UErrorCode* error); - -int32_t ToUpperMapper(UChar* dest, int32_t dest_capacity, - const UChar* src, int32_t src_length, - UErrorCode* error) { - // Use default locale. - return u_strToUpper(dest, dest_capacity, src, src_length, nullptr, error); -} - -int32_t ToLowerMapper(UChar* dest, int32_t dest_capacity, - const UChar* src, int32_t src_length, - UErrorCode* error) { - // Use default locale. - return u_strToLower(dest, dest_capacity, src, src_length, nullptr, error); -} - -int32_t FoldCaseMapper(UChar* dest, int32_t dest_capacity, - const UChar* src, int32_t src_length, - UErrorCode* error) { - return u_strFoldCase(dest, dest_capacity, src, src_length, - U_FOLD_CASE_DEFAULT, error); -} - -// Provides similar functionality as UnicodeString::caseMap but on string16. -string16 CaseMap(StringPiece16 string, CaseMapperFunction case_mapper) { - string16 dest; - if (string.empty()) - return dest; - - // Provide an initial guess that the string length won't change. The typical - // strings we use will very rarely change length in this process, so don't - // optimize for that case. - dest.resize(string.size()); - - UErrorCode error; - do { - error = U_ZERO_ERROR; - - // ICU won't terminate the string if there's not enough room for the null - // terminator, but will otherwise. So we don't need to save room for that. - // Don't use WriteInto, which assumes null terminators. - int32_t new_length = case_mapper( - &dest[0], saturated_cast<int32_t>(dest.size()), - string.data(), saturated_cast<int32_t>(string.size()), - &error); - dest.resize(new_length); - } while (error == U_BUFFER_OVERFLOW_ERROR); - return dest; -} - -} // namespace - -string16 ToLower(StringPiece16 string) { - return CaseMap(string, &ToLowerMapper); -} - -string16 ToUpper(StringPiece16 string) { - return CaseMap(string, &ToUpperMapper); -} - -string16 FoldCase(StringPiece16 string) { - return CaseMap(string, &FoldCaseMapper); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/case_conversion.h b/base/i18n/case_conversion.h deleted file mode 100644 index 0631a80..0000000 --- a/base/i18n/case_conversion.h +++ /dev/null
@@ -1,48 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_CASE_CONVERSION_H_ -#define BASE_I18N_CASE_CONVERSION_H_ - -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" -#include "base/strings/string_piece.h" - -namespace base { -namespace i18n { - -// UNICODE CASE-HANDLING ADVICE -// -// In English it's always safe to convert to upper-case or lower-case text -// and get a good answer. But some languages have rules specific to those -// locales. One example is the Turkish I: -// http://www.i18nguy.com/unicode/turkish-i18n.html -// -// ToLower/ToUpper use the current ICU locale which will take into account -// the user language preference. Use this when dealing with user typing. -// -// FoldCase canonicalizes to a standardized form independent of the current -// locale. Use this when comparing general Unicode strings that don't -// necessarily belong in the user's current locale (like commands, protocol -// names, other strings from the web) for case-insensitive equality. -// -// Note that case conversions will change the length of the string in some -// not-uncommon cases. Never assume that the output is the same length as -// the input. - -// Returns the lower case equivalent of string. Uses ICU's current locale. -BASE_I18N_EXPORT string16 ToLower(StringPiece16 string); - -// Returns the upper case equivalent of string. Uses ICU's current locale. -BASE_I18N_EXPORT string16 ToUpper(StringPiece16 string); - -// Convert the given string to a canonical case, independent of the current -// locale. For ASCII the canonical form is lower case. -// See http://unicode.org/faq/casemap_charprop.html#2 -BASE_I18N_EXPORT string16 FoldCase(StringPiece16 string); - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_CASE_CONVERSION_H_
diff --git a/base/i18n/char_iterator.cc b/base/i18n/char_iterator.cc deleted file mode 100644 index d80b8b6..0000000 --- a/base/i18n/char_iterator.cc +++ /dev/null
@@ -1,80 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/char_iterator.h" - -#include "third_party/icu/source/common/unicode/utf8.h" -#include "third_party/icu/source/common/unicode/utf16.h" - -namespace base { -namespace i18n { - -UTF8CharIterator::UTF8CharIterator(const std::string* str) - : str_(reinterpret_cast<const uint8_t*>(str->data())), - len_(str->size()), - array_pos_(0), - next_pos_(0), - char_pos_(0), - char_(0) { - if (len_) - U8_NEXT(str_, next_pos_, len_, char_); -} - -UTF8CharIterator::~UTF8CharIterator() = default; - -bool UTF8CharIterator::Advance() { - if (array_pos_ >= len_) - return false; - - array_pos_ = next_pos_; - char_pos_++; - if (next_pos_ < len_) - U8_NEXT(str_, next_pos_, len_, char_); - - return true; -} - -UTF16CharIterator::UTF16CharIterator(const string16* str) - : str_(reinterpret_cast<const char16*>(str->data())), - len_(str->size()), - array_pos_(0), - next_pos_(0), - char_pos_(0), - char_(0) { - if (len_) - ReadChar(); -} - -UTF16CharIterator::UTF16CharIterator(const char16* str, size_t str_len) - : str_(str), - len_(str_len), - array_pos_(0), - next_pos_(0), - char_pos_(0), - char_(0) { - if (len_) - ReadChar(); -} - -UTF16CharIterator::~UTF16CharIterator() = default; - -bool UTF16CharIterator::Advance() { - if (array_pos_ >= len_) - return false; - - array_pos_ = next_pos_; - char_pos_++; - if (next_pos_ < len_) - ReadChar(); - - return true; -} - -void UTF16CharIterator::ReadChar() { - // This is actually a huge macro, so is worth having in a separate function. - U16_NEXT(str_, next_pos_, len_, char_); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/char_iterator.h b/base/i18n/char_iterator.h deleted file mode 100644 index 33f2934..0000000 --- a/base/i18n/char_iterator.h +++ /dev/null
@@ -1,134 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_CHAR_ITERATOR_H_ -#define BASE_I18N_CHAR_ITERATOR_H_ - -#include <stddef.h> -#include <stdint.h> - -#include <string> - -#include "base/i18n/base_i18n_export.h" -#include "base/macros.h" -#include "base/strings/string16.h" -#include "build_config.h" - -// The CharIterator classes iterate through the characters in UTF8 and -// UTF16 strings. Example usage: -// -// UTF8CharIterator iter(&str); -// while (!iter.end()) { -// VLOG(1) << iter.get(); -// iter.Advance(); -// } - -#if defined(OS_WIN) -typedef unsigned char uint8_t; -#endif - -namespace base { -namespace i18n { - -class BASE_I18N_EXPORT UTF8CharIterator { - public: - // Requires |str| to live as long as the UTF8CharIterator does. - explicit UTF8CharIterator(const std::string* str); - ~UTF8CharIterator(); - - // Return the starting array index of the current character within the - // string. - int32_t array_pos() const { return array_pos_; } - - // Return the logical index of the current character, independent of the - // number of bytes each character takes. - int32_t char_pos() const { return char_pos_; } - - // Return the current char. - int32_t get() const { return char_; } - - // Returns true if we're at the end of the string. - bool end() const { return array_pos_ == len_; } - - // Advance to the next actual character. Returns false if we're at the - // end of the string. - bool Advance(); - - private: - // The string we're iterating over. - const uint8_t* str_; - - // The length of the encoded string. - int32_t len_; - - // Array index. - int32_t array_pos_; - - // The next array index. - int32_t next_pos_; - - // Character index. - int32_t char_pos_; - - // The current character. - int32_t char_; - - DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); -}; - -class BASE_I18N_EXPORT UTF16CharIterator { - public: - // Requires |str| to live as long as the UTF16CharIterator does. - explicit UTF16CharIterator(const string16* str); - UTF16CharIterator(const char16* str, size_t str_len); - ~UTF16CharIterator(); - - // Return the starting array index of the current character within the - // string. - int32_t array_pos() const { return array_pos_; } - - // Return the logical index of the current character, independent of the - // number of codewords each character takes. - int32_t char_pos() const { return char_pos_; } - - // Return the current char. - int32_t get() const { return char_; } - - // Returns true if we're at the end of the string. - bool end() const { return array_pos_ == len_; } - - // Advance to the next actual character. Returns false if we're at the - // end of the string. - bool Advance(); - - private: - // Fills in the current character we found and advances to the next - // character, updating all flags as necessary. - void ReadChar(); - - // The string we're iterating over. - const char16* str_; - - // The length of the encoded string. - int32_t len_; - - // Array index. - int32_t array_pos_; - - // The next array index. - int32_t next_pos_; - - // Character index. - int32_t char_pos_; - - // The current character. - int32_t char_; - - DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); -}; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_CHAR_ITERATOR_H_
diff --git a/base/i18n/character_encoding.cc b/base/i18n/character_encoding.cc deleted file mode 100644 index a1068c3..0000000 --- a/base/i18n/character_encoding.cc +++ /dev/null
@@ -1,42 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/character_encoding.h" - -#include "base/macros.h" -#include "third_party/icu/source/common/unicode/ucnv.h" - -namespace base { -namespace { - -// An array of all supported canonical encoding names. -const char* const kCanonicalEncodingNames[] = { - "Big5", "EUC-JP", "EUC-KR", "gb18030", - "GBK", "IBM866", "ISO-2022-JP", "ISO-8859-10", - "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", - "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", - "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-8-I", - "KOI8-R", "KOI8-U", "macintosh", "Shift_JIS", - "UTF-16LE", "UTF-8", "windows-1250", "windows-1251", - "windows-1252", "windows-1253", "windows-1254", "windows-1255", - "windows-1256", "windows-1257", "windows-1258", "windows-874"}; - -} // namespace - -std::string GetCanonicalEncodingNameByAliasName(const std::string& alias_name) { - for (auto* encoding_name : kCanonicalEncodingNames) { - if (alias_name == encoding_name) - return alias_name; - } - static const char* kStandards[3] = {"HTML", "MIME", "IANA"}; - for (auto* standard : kStandards) { - UErrorCode error_code = U_ZERO_ERROR; - const char* canonical_name = - ucnv_getStandardName(alias_name.c_str(), standard, &error_code); - if (U_SUCCESS(error_code) && canonical_name) - return canonical_name; - } - return std::string(); -} -} // namespace base
diff --git a/base/i18n/character_encoding.h b/base/i18n/character_encoding.h deleted file mode 100644 index 974cb5a..0000000 --- a/base/i18n/character_encoding.h +++ /dev/null
@@ -1,20 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_CHARACTER_ENCODING_H_ -#define BASE_I18N_CHARACTER_ENCODING_H_ - -#include <string> - -#include "base/i18n/base_i18n_export.h" - -namespace base { - -// Return canonical encoding name according to the encoding alias name. -BASE_I18N_EXPORT std::string GetCanonicalEncodingNameByAliasName( - const std::string& alias_name); - -} // namespace base - -#endif // BASE_I18N_CHARACTER_ENCODING_H_
diff --git a/base/i18n/encoding_detection.cc b/base/i18n/encoding_detection.cc deleted file mode 100644 index f6bbf4a..0000000 --- a/base/i18n/encoding_detection.cc +++ /dev/null
@@ -1,40 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/encoding_detection.h" - -#include "build_config.h" -#include "third_party/ced/src/compact_enc_det/compact_enc_det.h" - -// third_party/ced/src/util/encodings/encodings.h, which is included -// by the include above, undefs UNICODE because that is a macro used -// internally in ced. If we later in the same translation unit do -// anything related to Windows or Windows headers those will then use -// the ASCII versions which we do not want. To avoid that happening in -// jumbo builds, we redefine UNICODE again here. -#if defined(OS_WIN) -#define UNICODE 1 -#endif // OS_WIN - -namespace base { - -bool DetectEncoding(const std::string& text, std::string* encoding) { - int consumed_bytes; - bool is_reliable; - Encoding enc = CompactEncDet::DetectEncoding( - text.c_str(), text.length(), nullptr, nullptr, nullptr, - UNKNOWN_ENCODING, - UNKNOWN_LANGUAGE, - CompactEncDet::QUERY_CORPUS, // plain text - false, // Include 7-bit encodings - &consumed_bytes, - &is_reliable); - - if (enc == UNKNOWN_ENCODING) - return false; - - *encoding = MimeEncodingName(enc); - return true; -} -} // namespace base
diff --git a/base/i18n/encoding_detection.h b/base/i18n/encoding_detection.h deleted file mode 100644 index c8e660c..0000000 --- a/base/i18n/encoding_detection.h +++ /dev/null
@@ -1,21 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_ENCODING_DETECTION_H_ -#define BASE_I18N_ENCODING_DETECTION_H_ - -#include <string> - -#include "base/compiler_specific.h" -#include "base/i18n/base_i18n_export.h" - -namespace base { - -// Detect encoding of |text| and put the name of encoding in |encoding|. -// Returns true on success. -BASE_I18N_EXPORT bool DetectEncoding(const std::string& text, - std::string* encoding) WARN_UNUSED_RESULT; -} // namespace base - -#endif // BASE_I18N_ENCODING_DETECTION_H_
diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc deleted file mode 100644 index 20a7d2d..0000000 --- a/base/i18n/file_util_icu.cc +++ /dev/null
@@ -1,179 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// File utilities that use the ICU library go in this file. - -#include "base/i18n/file_util_icu.h" - -#include <stdint.h> - -#include <memory> - -#include "base/files/file_path.h" -#include "base/i18n/icu_string_conversions.h" -#include "base/i18n/string_compare.h" -#include "base/logging.h" -#include "base/macros.h" -#include "base/memory/singleton.h" -#include "base/strings/string_util.h" -#include "base/strings/sys_string_conversions.h" -#include "base/strings/utf_string_conversions.h" -#include "build_config.h" -#include "third_party/icu/source/common/unicode/uniset.h" -#include "third_party/icu/source/i18n/unicode/coll.h" - -namespace base { -namespace i18n { - -namespace { - -class IllegalCharacters { - public: - static IllegalCharacters* GetInstance() { - return Singleton<IllegalCharacters>::get(); - } - - bool DisallowedEverywhere(UChar32 ucs4) { - return !!illegal_anywhere_->contains(ucs4); - } - - bool DisallowedLeadingOrTrailing(UChar32 ucs4) { - return !!illegal_at_ends_->contains(ucs4); - } - - bool IsAllowedName(const string16& s) { - return s.empty() || (!!illegal_anywhere_->containsNone( - icu::UnicodeString(s.c_str(), s.size())) && - !illegal_at_ends_->contains(*s.begin()) && - !illegal_at_ends_->contains(*s.rbegin())); - } - - private: - friend class Singleton<IllegalCharacters>; - friend struct DefaultSingletonTraits<IllegalCharacters>; - - IllegalCharacters(); - ~IllegalCharacters() = default; - - // set of characters considered invalid anywhere inside a filename. - std::unique_ptr<icu::UnicodeSet> illegal_anywhere_; - - // set of characters considered invalid at either end of a filename. - std::unique_ptr<icu::UnicodeSet> illegal_at_ends_; - - DISALLOW_COPY_AND_ASSIGN(IllegalCharacters); -}; - -IllegalCharacters::IllegalCharacters() { - UErrorCode everywhere_status = U_ZERO_ERROR; - UErrorCode ends_status = U_ZERO_ERROR; - // Control characters, formatting characters, non-characters, path separators, - // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). - // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx - // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx - // Note that code points in the "Other, Format" (Cf) category are ignored on - // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being - // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is - // also excluded due to the possibility of interacting poorly with short - // filenames on VFAT. (Related to CVE-2014-9390) - illegal_anywhere_.reset(new icu::UnicodeSet( - UNICODE_STRING_SIMPLE("[[\"~*/:<>?\\\\|][:Cc:][:Cf:]]"), - everywhere_status)); - illegal_at_ends_.reset(new icu::UnicodeSet( - UNICODE_STRING_SIMPLE("[[:WSpace:][.]]"), ends_status)); - DCHECK(U_SUCCESS(everywhere_status)); - DCHECK(U_SUCCESS(ends_status)); - - // Add non-characters. If this becomes a performance bottleneck by - // any chance, do not add these to |set| and change IsFilenameLegal() - // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addiition to calling - // IsAllowedName(). - illegal_anywhere_->add(0xFDD0, 0xFDEF); - for (int i = 0; i <= 0x10; ++i) { - int plane_base = 0x10000 * i; - illegal_anywhere_->add(plane_base + 0xFFFE, plane_base + 0xFFFF); - } - illegal_anywhere_->freeze(); - illegal_at_ends_->freeze(); -} - -} // namespace - -bool IsFilenameLegal(const string16& file_name) { - return IllegalCharacters::GetInstance()->IsAllowedName(file_name); -} - -void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, - char replace_char) { - IllegalCharacters* illegal = IllegalCharacters::GetInstance(); - - DCHECK(!(illegal->DisallowedEverywhere(replace_char))); - DCHECK(!(illegal->DisallowedLeadingOrTrailing(replace_char))); - - int cursor = 0; // The ICU macros expect an int. - while (cursor < static_cast<int>(file_name->size())) { - int char_begin = cursor; - uint32_t code_point; -#if defined(OS_WIN) - // Windows uses UTF-16 encoding for filenames. - U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), - code_point); -#elif defined(OS_POSIX) || defined(OS_FUCHSIA) - // Mac and Chrome OS use UTF-8 encoding for filenames. - // Linux doesn't actually define file system encoding. Try to parse as - // UTF-8. - U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), - code_point); -#else -#error Unsupported platform -#endif - - if (illegal->DisallowedEverywhere(code_point) || - ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) && - illegal->DisallowedLeadingOrTrailing(code_point))) { - file_name->replace(char_begin, cursor - char_begin, 1, replace_char); - // We just made the potentially multi-byte/word char into one that only - // takes one byte/word, so need to adjust the cursor to point to the next - // character again. - cursor = char_begin + 1; - } - } -} - -bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { - UErrorCode error_code = U_ZERO_ERROR; - // Use the default collator. The default locale should have been properly - // set by the time this constructor is called. - std::unique_ptr<icu::Collator> collator( - icu::Collator::createInstance(error_code)); - DCHECK(U_SUCCESS(error_code)); - // Make it case-sensitive. - collator->setStrength(icu::Collator::TERTIARY); - -#if defined(OS_WIN) - return CompareString16WithCollator(*collator, WideToUTF16(a.value()), - WideToUTF16(b.value())) == UCOL_LESS; - -#elif defined(OS_POSIX) || defined(OS_FUCHSIA) - // On linux, the file system encoding is not defined. We assume - // SysNativeMBToWide takes care of it. - return CompareString16WithCollator( - *collator, WideToUTF16(SysNativeMBToWide(a.value())), - WideToUTF16(SysNativeMBToWide(b.value()))) == UCOL_LESS; -#endif -} - -void NormalizeFileNameEncoding(FilePath* file_name) { -#if defined(OS_CHROMEOS) - std::string normalized_str; - if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), kCodepageUTF8, - &normalized_str) && - !normalized_str.empty()) { - *file_name = file_name->DirName().Append(FilePath(normalized_str)); - } -#endif -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h deleted file mode 100644 index f8bd9f4..0000000 --- a/base/i18n/file_util_icu.h +++ /dev/null
@@ -1,58 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_FILE_UTIL_ICU_H_ -#define BASE_I18N_FILE_UTIL_ICU_H_ - -// File utilities that use the ICU library go in this file. - -#include "base/files/file_path.h" -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" - -namespace base { -namespace i18n { - -// Returns true if file_name does not have any illegal character. The input -// param has the same restriction as that for ReplaceIllegalCharacters. -BASE_I18N_EXPORT bool IsFilenameLegal(const string16& file_name); - -// Replaces characters in |file_name| that are illegal for file names with -// |replace_char|. |file_name| must not be a full or relative path, but just the -// file name component (since slashes are considered illegal). Any leading or -// trailing whitespace or periods in |file_name| is also replaced with the -// |replace_char|. -// -// Example: -// "bad:file*name?.txt" will be turned into "bad_file_name_.txt" when -// |replace_char| is '_'. -// -// Warning: Do not use this function as the sole means of sanitizing a filename. -// While the resulting filename itself would be legal, it doesn't necessarily -// mean that the file will behave safely. On Windows, certain reserved names -// refer to devices rather than files (E.g. LPT1), and some filenames could be -// interpreted as shell namespace extensions (E.g. Foo.{<GUID>}). -// -// On Windows, Chrome OS and Mac, the file system encoding is already known and -// parsed as UTF-8 and UTF-16 accordingly. -// On Linux, the file name will be parsed as UTF8. -// TODO(asanka): Move full filename sanitization logic here. -BASE_I18N_EXPORT void ReplaceIllegalCharactersInPath( - FilePath::StringType* file_name, - char replace_char); - -// Compares two filenames using the current locale information. This can be -// used to sort directory listings. It behaves like "operator<" for use in -// std::sort. -BASE_I18N_EXPORT bool LocaleAwareCompareFilenames(const FilePath& a, - const FilePath& b); - -// Calculates the canonical file-system representation of |file_name| base name. -// Modifies |file_name| in place. No-op if not on ChromeOS. -BASE_I18N_EXPORT void NormalizeFileNameEncoding(FilePath* file_name); - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_FILE_UTIL_ICU_H_
diff --git a/base/i18n/i18n_constants.cc b/base/i18n/i18n_constants.cc deleted file mode 100644 index 7d2f5fc..0000000 --- a/base/i18n/i18n_constants.cc +++ /dev/null
@@ -1,13 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/i18n_constants.h" - -namespace base { - -const char kCodepageLatin1[] = "ISO-8859-1"; -const char kCodepageUTF8[] = "UTF-8"; - -} // namespace base -
diff --git a/base/i18n/i18n_constants.h b/base/i18n/i18n_constants.h deleted file mode 100644 index c1bd87d..0000000 --- a/base/i18n/i18n_constants.h +++ /dev/null
@@ -1,21 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_I18N_CONSTANTS_H_ -#define BASE_I18N_I18N_CONSTANTS_H_ - -#include "base/i18n/base_i18n_export.h" - -namespace base { - -// Names of codepages (charsets) understood by icu. -BASE_I18N_EXPORT extern const char kCodepageLatin1[]; // a.k.a. ISO 8859-1 -BASE_I18N_EXPORT extern const char kCodepageUTF8[]; - -// The other possible options are UTF-16BE and UTF-16LE, but they are unused in -// Chromium as of this writing. - -} // namespace base - -#endif // BASE_I18N_I18N_CONSTANTS_H_
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc deleted file mode 100644 index 6ec9980..0000000 --- a/base/i18n/icu_string_conversions.cc +++ /dev/null
@@ -1,223 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/icu_string_conversions.h" - -#include <stddef.h> -#include <stdint.h> - -#include <memory> -#include <vector> - -#include "base/logging.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "third_party/icu/source/common/unicode/normalizer2.h" -#include "third_party/icu/source/common/unicode/ucnv.h" -#include "third_party/icu/source/common/unicode/ucnv_cb.h" -#include "third_party/icu/source/common/unicode/ucnv_err.h" -#include "third_party/icu/source/common/unicode/ustring.h" - -namespace base { - -namespace { -// ToUnicodeCallbackSubstitute() is based on UCNV_TO_U_CALLBACK_SUBSTITUTE -// in source/common/ucnv_err.c. - -// Copyright (c) 1995-2006 International Business Machines Corporation -// and others -// -// All rights reserved. -// - -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, and/or -// sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, provided that the above copyright notice(s) and -// this permission notice appear in all copies of the Software and that -// both the above copyright notice(s) and this permission notice appear in -// supporting documentation. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT -// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS -// INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT -// OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -// OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -// OR PERFORMANCE OF THIS SOFTWARE. -// -// Except as contained in this notice, the name of a copyright holder -// shall not be used in advertising or otherwise to promote the sale, use -// or other dealings in this Software without prior written authorization -// of the copyright holder. - -// ___________________________________________________________________________ -// -// All trademarks and registered trademarks mentioned herein are the property -// of their respective owners. - -void ToUnicodeCallbackSubstitute(const void* context, - UConverterToUnicodeArgs *to_args, - const char* code_units, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) { - static const UChar kReplacementChar = 0xFFFD; - if (reason <= UCNV_IRREGULAR) { - if (context == nullptr || - (*(reinterpret_cast<const char*>(context)) == 'i' && - reason == UCNV_UNASSIGNED)) { - *err = U_ZERO_ERROR; - ucnv_cbToUWriteUChars(to_args, &kReplacementChar, 1, 0, err); - } - // else the caller must have set the error code accordingly. - } - // else ignore the reset, close and clone calls. -} - -bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src, - int uchar_len, OnStringConversionError::Type on_error, - std::string* encoded) { - int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(uchar_len, - ucnv_getMaxCharSize(converter)); - encoded->resize(encoded_max_length); - - UErrorCode status = U_ZERO_ERROR; - - // Setup our error handler. - switch (on_error) { - case OnStringConversionError::FAIL: - ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, nullptr, - nullptr, nullptr, &status); - break; - case OnStringConversionError::SKIP: - case OnStringConversionError::SUBSTITUTE: - ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, nullptr, - nullptr, nullptr, &status); - break; - default: - NOTREACHED(); - } - - // ucnv_fromUChars returns size not including terminating null - int actual_size = ucnv_fromUChars(converter, &(*encoded)[0], - encoded_max_length, uchar_src, uchar_len, &status); - encoded->resize(actual_size); - ucnv_close(converter); - if (U_SUCCESS(status)) - return true; - encoded->clear(); // Make sure the output is empty on error. - return false; -} - -// Set up our error handler for ToUTF-16 converters -void SetUpErrorHandlerForToUChars(OnStringConversionError::Type on_error, - UConverter* converter, UErrorCode* status) { - switch (on_error) { - case OnStringConversionError::FAIL: - ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, - nullptr, status); - break; - case OnStringConversionError::SKIP: - ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, nullptr, nullptr, - nullptr, status); - break; - case OnStringConversionError::SUBSTITUTE: - ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, nullptr, - nullptr, nullptr, status); - break; - default: - NOTREACHED(); - } -} - -} // namespace - -// Codepage <-> Wide/UTF-16 --------------------------------------------------- - -bool UTF16ToCodepage(const string16& utf16, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::string* encoded) { - encoded->clear(); - - UErrorCode status = U_ZERO_ERROR; - UConverter* converter = ucnv_open(codepage_name, &status); - if (!U_SUCCESS(status)) - return false; - - return ConvertFromUTF16(converter, utf16.c_str(), - static_cast<int>(utf16.length()), on_error, encoded); -} - -bool CodepageToUTF16(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16) { - utf16->clear(); - - UErrorCode status = U_ZERO_ERROR; - UConverter* converter = ucnv_open(codepage_name, &status); - if (!U_SUCCESS(status)) - return false; - - // Even in the worst case, the maximum length in 2-byte units of UTF-16 - // output would be at most the same as the number of bytes in input. There - // is no single-byte encoding in which a character is mapped to a - // non-BMP character requiring two 2-byte units. - // - // Moreover, non-BMP characters in legacy multibyte encodings - // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are - // BOCU and SCSU, but we don't care about them. - size_t uchar_max_length = encoded.length() + 1; - - SetUpErrorHandlerForToUChars(on_error, converter, &status); - std::unique_ptr<char16[]> buffer(new char16[uchar_max_length]); - int actual_size = ucnv_toUChars(converter, buffer.get(), - static_cast<int>(uchar_max_length), encoded.data(), - static_cast<int>(encoded.length()), &status); - ucnv_close(converter); - if (!U_SUCCESS(status)) { - utf16->clear(); // Make sure the output is empty on error. - return false; - } - - utf16->assign(buffer.get(), actual_size); - return true; -} - -bool ConvertToUtf8AndNormalize(const std::string& text, - const std::string& charset, - std::string* result) { - result->clear(); - string16 utf16; - if (!CodepageToUTF16( - text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) - return false; - - UErrorCode status = U_ZERO_ERROR; - const icu::Normalizer2* normalizer = icu::Normalizer2::getNFCInstance(status); - DCHECK(U_SUCCESS(status)); - if (U_FAILURE(status)) - return false; - int32_t utf16_length = static_cast<int32_t>(utf16.length()); - icu::UnicodeString normalized(utf16.data(), utf16_length); - int32_t normalized_prefix_length = - normalizer->spanQuickCheckYes(normalized, status); - if (normalized_prefix_length < utf16_length) { - icu::UnicodeString un_normalized(normalized, normalized_prefix_length); - normalized.truncate(normalized_prefix_length); - normalizer->normalizeSecondAndAppend(normalized, un_normalized, status); - } - if (U_FAILURE(status)) - return false; - normalized.toUTF8String(*result); - return true; -} - -} // namespace base
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h deleted file mode 100644 index cbdcb99..0000000 --- a/base/i18n/icu_string_conversions.h +++ /dev/null
@@ -1,57 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_ICU_STRING_CONVERSIONS_H_ -#define BASE_I18N_ICU_STRING_CONVERSIONS_H_ - -#include <string> - -#include "base/i18n/base_i18n_export.h" -#include "base/i18n/i18n_constants.h" -#include "base/strings/string16.h" - -namespace base { - -// Defines the error handling modes of UTF16ToCodepage and CodepageToUTF16. -class OnStringConversionError { - public: - enum Type { - // The function will return failure. The output buffer will be empty. - FAIL, - - // The offending characters are skipped and the conversion will proceed as - // if they did not exist. - SKIP, - - // When converting to Unicode, the offending byte sequences are substituted - // by Unicode replacement character (U+FFFD). When converting from Unicode, - // this is the same as SKIP. - SUBSTITUTE, - }; - - private: - OnStringConversionError() = delete; -}; - -// Converts between UTF-16 strings and the encoding specified. If the -// encoding doesn't exist or the encoding fails (when on_error is FAIL), -// returns false. -BASE_I18N_EXPORT bool UTF16ToCodepage(const string16& utf16, - const char* codepage_name, - OnStringConversionError::Type on_error, - std::string* encoded); -BASE_I18N_EXPORT bool CodepageToUTF16(const std::string& encoded, - const char* codepage_name, - OnStringConversionError::Type on_error, - string16* utf16); - -// Converts from any codepage to UTF-8 and ensures the resulting UTF-8 is -// normalized. -BASE_I18N_EXPORT bool ConvertToUtf8AndNormalize(const std::string& text, - const std::string& charset, - std::string* result); - -} // namespace base - -#endif // BASE_I18N_ICU_STRING_CONVERSIONS_H_
diff --git a/base/i18n/icu_util.cc b/base/i18n/icu_util.cc deleted file mode 100644 index 1cbfbd6..0000000 --- a/base/i18n/icu_util.cc +++ /dev/null
@@ -1,275 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/icu_util.h" - -#if defined(OS_WIN) -#include <windows.h> -#endif - -#include <string> - -#include "base/debug/alias.h" -#include "base/files/file_path.h" -#include "base/files/memory_mapped_file.h" -#include "base/logging.h" -#include "base/path_service.h" -#include "base/strings/string_util.h" -#include "base/strings/sys_string_conversions.h" -#include "build_config.h" -#include "third_party/icu/source/common/unicode/putil.h" -#include "third_party/icu/source/common/unicode/udata.h" -#if (defined(OS_LINUX) && !defined(OS_CHROMEOS)) || defined(OS_ANDROID) -#include "third_party/icu/source/i18n/unicode/timezone.h" -#endif - -#if defined(OS_ANDROID) -#include "base/android/apk_assets.h" -#include "base/android/timezone_utils.h" -#endif - -#if defined(OS_IOS) -#include "base/ios/ios_util.h" -#endif - -#if defined(OS_MACOSX) -#include "base/mac/foundation_util.h" -#endif - -#if defined(OS_FUCHSIA) -#include "base/base_paths_fuchsia.h" -#endif - -namespace base { -namespace i18n { - -#if ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED -#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat" -#if defined(OS_WIN) -#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt.dll" -#endif -#endif - -namespace { -#if !defined(OS_NACL) -#if DCHECK_IS_ON() -// Assert that we are not called more than once. Even though calling this -// function isn't harmful (ICU can handle it), being called twice probably -// indicates a programming error. -bool g_check_called_once = true; -bool g_called_once = false; -#endif // DCHECK_IS_ON() - -#if ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE - -// Use an unversioned file name to simplify a icu version update down the road. -// No need to change the filename in multiple places (gyp files, windows -// build pkg configurations, etc). 'l' stands for Little Endian. -// This variable is exported through the header file. -const char kIcuDataFileName[] = "icudtl.dat"; -#if defined(OS_ANDROID) -const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat"; -#endif - -// File handle intentionally never closed. Not using File here because its -// Windows implementation guards against two instances owning the same -// PlatformFile (which we allow since we know it is never freed). -PlatformFile g_icudtl_pf = kInvalidPlatformFile; -MemoryMappedFile* g_icudtl_mapped_file = nullptr; -MemoryMappedFile::Region g_icudtl_region; - -void LazyInitIcuDataFile() { - if (g_icudtl_pf != kInvalidPlatformFile) { - return; - } -#if defined(OS_ANDROID) - int fd = base::android::OpenApkAsset(kAndroidAssetsIcuDataFileName, - &g_icudtl_region); - g_icudtl_pf = fd; - if (fd != -1) { - return; - } -// For unit tests, data file is located on disk, so try there as a fallback. -#endif // defined(OS_ANDROID) -#if !defined(OS_MACOSX) - FilePath data_path; - if (!PathService::Get(DIR_ASSETS, &data_path)) { - LOG(ERROR) << "Can't find " << kIcuDataFileName; - return; - } - data_path = data_path.AppendASCII(kIcuDataFileName); -#else - // Assume it is in the framework bundle's Resources directory. - ScopedCFTypeRef<CFStringRef> data_file_name( - SysUTF8ToCFStringRef(kIcuDataFileName)); - FilePath data_path = mac::PathForFrameworkBundleResource(data_file_name); -#if defined(OS_IOS) - FilePath override_data_path = base::ios::FilePathOfEmbeddedICU(); - if (!override_data_path.empty()) { - data_path = override_data_path; - } -#endif // !defined(OS_IOS) - if (data_path.empty()) { - LOG(ERROR) << kIcuDataFileName << " not found in bundle"; - return; - } -#endif // !defined(OS_MACOSX) - File file(data_path, File::FLAG_OPEN | File::FLAG_READ); - if (file.IsValid()) { - g_icudtl_pf = file.TakePlatformFile(); - g_icudtl_region = MemoryMappedFile::Region::kWholeFile; - } -} - -bool InitializeICUWithFileDescriptorInternal( - PlatformFile data_fd, - const MemoryMappedFile::Region& data_region) { - // This can be called multiple times in tests. - if (g_icudtl_mapped_file) { - return true; - } - if (data_fd == kInvalidPlatformFile) { - LOG(ERROR) << "Invalid file descriptor to ICU data received."; - return false; - } - - std::unique_ptr<MemoryMappedFile> icudtl_mapped_file(new MemoryMappedFile()); - if (!icudtl_mapped_file->Initialize(File(data_fd), data_region)) { - LOG(ERROR) << "Couldn't mmap icu data file"; - return false; - } - g_icudtl_mapped_file = icudtl_mapped_file.release(); - - UErrorCode err = U_ZERO_ERROR; - udata_setCommonData(const_cast<uint8_t*>(g_icudtl_mapped_file->data()), &err); -#if defined(OS_ANDROID) - if (err == U_ZERO_ERROR) { - // On Android, we can't leave it up to ICU to set the default timezone - // because ICU's timezone detection does not work in many timezones (e.g. - // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host - // timezone and set the ICU default timezone accordingly in advance of - // actual use. See crbug.com/722821 and - // https://ssl.icu-project.org/trac/ticket/13208 . - base::string16 timezone_id = base::android::GetDefaultTimeZoneId(); - icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone( - icu::UnicodeString(FALSE, timezone_id.data(), timezone_id.length()))); - } -#endif - // Never try to load ICU data from files. - udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); - return err == U_ZERO_ERROR; -} -#endif // ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE -#endif // !defined(OS_NACL) - -} // namespace - -#if !defined(OS_NACL) -#if ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE -#if defined(OS_ANDROID) -bool InitializeICUWithFileDescriptor( - PlatformFile data_fd, - const MemoryMappedFile::Region& data_region) { -#if DCHECK_IS_ON() - DCHECK(!g_check_called_once || !g_called_once); - g_called_once = true; -#endif - return InitializeICUWithFileDescriptorInternal(data_fd, data_region); -} - -PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) { - CHECK_NE(g_icudtl_pf, kInvalidPlatformFile); - *out_region = g_icudtl_region; - return g_icudtl_pf; -} -#endif - -const uint8_t* GetRawIcuMemory() { - CHECK(g_icudtl_mapped_file); - return g_icudtl_mapped_file->data(); -} - -bool InitializeICUFromRawMemory(const uint8_t* raw_memory) { -#if !defined(COMPONENT_BUILD) -#if DCHECK_IS_ON() - DCHECK(!g_check_called_once || !g_called_once); - g_called_once = true; -#endif - - UErrorCode err = U_ZERO_ERROR; - udata_setCommonData(const_cast<uint8_t*>(raw_memory), &err); - // Never try to load ICU data from files. - udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); - return err == U_ZERO_ERROR; -#else - return true; -#endif -} - -#endif // ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE - -bool InitializeICU() { -#if DCHECK_IS_ON() - DCHECK(!g_check_called_once || !g_called_once); - g_called_once = true; -#endif - - bool result; -#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED) - FilePath data_path; - PathService::Get(DIR_ASSETS, &data_path); - data_path = data_path.AppendASCII(ICU_UTIL_DATA_SHARED_MODULE_NAME); - - HMODULE module = LoadLibrary(data_path.value().c_str()); - if (!module) { - LOG(ERROR) << "Failed to load " << ICU_UTIL_DATA_SHARED_MODULE_NAME; - return false; - } - - FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL); - if (!addr) { - LOG(ERROR) << ICU_UTIL_DATA_SYMBOL << ": not found in " - << ICU_UTIL_DATA_SHARED_MODULE_NAME; - return false; - } - - UErrorCode err = U_ZERO_ERROR; - udata_setCommonData(reinterpret_cast<void*>(addr), &err); - // Never try to load ICU data from files. - udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); - result = (err == U_ZERO_ERROR); -#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC) - // The ICU data is statically linked. - result = true; -#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) - // If the ICU data directory is set, ICU won't actually load the data until - // it is needed. This can fail if the process is sandboxed at that time. - // Instead, we map the file in and hand off the data so the sandbox won't - // cause any problems. - LazyInitIcuDataFile(); - result = - InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region); -#endif - -// To respond to the timezone change properly, the default timezone -// cache in ICU has to be populated on starting up. -// TODO(jungshik): Some callers do not care about tz at all. If necessary, -// add a boolean argument to this function to init'd the default tz only -// when requested. -#if defined(OS_LINUX) && !defined(OS_CHROMEOS) - if (result) - std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault()); -#endif - return result; -} -#endif // !defined(OS_NACL) - -void AllowMultipleInitializeCallsForTesting() { -#if DCHECK_IS_ON() && !defined(OS_NACL) - g_check_called_once = false; -#endif -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/icu_util.h b/base/i18n/icu_util.h deleted file mode 100644 index 9bae8a1..0000000 --- a/base/i18n/icu_util.h +++ /dev/null
@@ -1,67 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_ICU_UTIL_H_ -#define BASE_I18N_ICU_UTIL_H_ - -#include <stdint.h> - -#include "base/files/memory_mapped_file.h" -#include "base/i18n/base_i18n_export.h" -#include "build_config.h" - -#define ICU_UTIL_DATA_FILE 0 -#define ICU_UTIL_DATA_SHARED 1 -#define ICU_UTIL_DATA_STATIC 2 - -namespace base { -namespace i18n { - -#if !defined(OS_NACL) -// Call this function to load ICU's data tables for the current process. This -// function should be called before ICU is used. -BASE_I18N_EXPORT bool InitializeICU(); - -#if ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE -#if defined(OS_ANDROID) -// Returns the PlatformFile and Region that was initialized by InitializeICU(). -// Use with InitializeICUWithFileDescriptor(). -BASE_I18N_EXPORT PlatformFile GetIcuDataFileHandle( - MemoryMappedFile::Region* out_region); - -// Android uses a file descriptor passed by browser process to initialize ICU -// in render processes. -BASE_I18N_EXPORT bool InitializeICUWithFileDescriptor( - PlatformFile data_fd, - const MemoryMappedFile::Region& data_region); -#endif - -// Returns a void pointer to the memory mapped ICU data file. -// -// There are cases on Android where we would be unsafely reusing a file -// descriptor within the same process when initializing two copies of ICU from -// different binaries in the same address space. This returns an unowned -// pointer to the memory mapped icu data file; consumers copies of base must -// not outlive the copy of base that owns the memory mapped file. -BASE_I18N_EXPORT const uint8_t* GetRawIcuMemory(); - -// Initializes ICU memory -// -// This does nothing in component builds; this initialization should only be -// done in cases where there could be two copies of base in a single process in -// non-component builds. (The big example is standalone service libraries: the -// Service Manager will have a copy of base linked in, and the majority of -// service libraries will have base linked in but in non-component builds, -// these will be separate copies of base.) -BASE_I18N_EXPORT bool InitializeICUFromRawMemory(const uint8_t* raw_memory); -#endif // ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE -#endif // !defined(OS_NACL) - -// In a test binary, the call above might occur twice. -BASE_I18N_EXPORT void AllowMultipleInitializeCallsForTesting(); - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_ICU_UTIL_H_
diff --git a/base/i18n/message_formatter.cc b/base/i18n/message_formatter.cc deleted file mode 100644 index c69dd07..0000000 --- a/base/i18n/message_formatter.cc +++ /dev/null
@@ -1,142 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/message_formatter.h" - -#include "base/i18n/unicodestring.h" -#include "base/logging.h" -#include "base/numerics/safe_conversions.h" -#include "base/time/time.h" -#include "third_party/icu/source/common/unicode/unistr.h" -#include "third_party/icu/source/common/unicode/utypes.h" -#include "third_party/icu/source/i18n/unicode/fmtable.h" -#include "third_party/icu/source/i18n/unicode/msgfmt.h" - -using icu::UnicodeString; - -namespace base { -namespace i18n { -namespace { -UnicodeString UnicodeStringFromStringPiece(StringPiece str) { - return UnicodeString::fromUTF8( - icu::StringPiece(str.data(), base::checked_cast<int32_t>(str.size()))); -} -} // anonymous namespace - -namespace internal { -MessageArg::MessageArg() : formattable(nullptr) {} - -MessageArg::MessageArg(const char* s) - : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {} - -MessageArg::MessageArg(StringPiece s) - : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {} - -MessageArg::MessageArg(const std::string& s) - : formattable(new icu::Formattable(UnicodeString::fromUTF8(s))) {} - -MessageArg::MessageArg(const string16& s) - : formattable(new icu::Formattable(UnicodeString(s.data(), s.size()))) {} - -MessageArg::MessageArg(int i) : formattable(new icu::Formattable(i)) {} - -MessageArg::MessageArg(int64_t i) : formattable(new icu::Formattable(i)) {} - -MessageArg::MessageArg(double d) : formattable(new icu::Formattable(d)) {} - -MessageArg::MessageArg(const Time& t) - : formattable(new icu::Formattable(static_cast<UDate>(t.ToJsTime()))) {} - -MessageArg::~MessageArg() = default; - -// Tests if this argument has a value, and if so increments *count. -bool MessageArg::has_value(int *count) const { - if (formattable == nullptr) - return false; - - ++*count; - return true; -} - -} // namespace internal - -string16 MessageFormatter::FormatWithNumberedArgs( - StringPiece16 msg, - const internal::MessageArg& arg0, - const internal::MessageArg& arg1, - const internal::MessageArg& arg2, - const internal::MessageArg& arg3, - const internal::MessageArg& arg4, - const internal::MessageArg& arg5, - const internal::MessageArg& arg6) { - int32_t args_count = 0; - icu::Formattable args[] = { - arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(), - arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(), - arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(), - arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(), - arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(), - arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(), - arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(), - }; - - UnicodeString msg_string(msg.data(), msg.size()); - UErrorCode error = U_ZERO_ERROR; - icu::MessageFormat format(msg_string, error); - icu::UnicodeString formatted; - icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE); - format.format(args, args_count, formatted, ignore, error); - if (U_FAILURE(error)) { - LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with " - << u_errorName(error); - return string16(); - } - return i18n::UnicodeStringToString16(formatted); -} - -string16 MessageFormatter::FormatWithNamedArgs( - StringPiece16 msg, - StringPiece name0, const internal::MessageArg& arg0, - StringPiece name1, const internal::MessageArg& arg1, - StringPiece name2, const internal::MessageArg& arg2, - StringPiece name3, const internal::MessageArg& arg3, - StringPiece name4, const internal::MessageArg& arg4, - StringPiece name5, const internal::MessageArg& arg5, - StringPiece name6, const internal::MessageArg& arg6) { - icu::UnicodeString names[] = { - UnicodeStringFromStringPiece(name0), - UnicodeStringFromStringPiece(name1), - UnicodeStringFromStringPiece(name2), - UnicodeStringFromStringPiece(name3), - UnicodeStringFromStringPiece(name4), - UnicodeStringFromStringPiece(name5), - UnicodeStringFromStringPiece(name6), - }; - int32_t args_count = 0; - icu::Formattable args[] = { - arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(), - arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(), - arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(), - arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(), - arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(), - arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(), - arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(), - }; - - UnicodeString msg_string(msg.data(), msg.size()); - UErrorCode error = U_ZERO_ERROR; - icu::MessageFormat format(msg_string, error); - - icu::UnicodeString formatted; - format.format(names, args, args_count, formatted, error); - if (U_FAILURE(error)) { - LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with " - << u_errorName(error); - return string16(); - } - return i18n::UnicodeStringToString16(formatted); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/message_formatter.h b/base/i18n/message_formatter.h deleted file mode 100644 index 36a656d..0000000 --- a/base/i18n/message_formatter.h +++ /dev/null
@@ -1,128 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_MESSAGE_FORMATTER_H_ -#define BASE_I18N_MESSAGE_FORMATTER_H_ - -#include <stdint.h> - -#include <memory> -#include <string> - -#include "base/i18n/base_i18n_export.h" -#include "base/macros.h" -#include "base/strings/string16.h" -#include "base/strings/string_piece.h" -#include "third_party/icu/source/common/unicode/uversion.h" - -U_NAMESPACE_BEGIN -class Formattable; -U_NAMESPACE_END - -namespace base { - -class Time; - -namespace i18n { - -class MessageFormatter; - -namespace internal { - -class BASE_I18N_EXPORT MessageArg { - public: - MessageArg(const char* s); - MessageArg(StringPiece s); - MessageArg(const std::string& s); - MessageArg(const string16& s); - MessageArg(int i); - MessageArg(int64_t i); - MessageArg(double d); - MessageArg(const Time& t); - ~MessageArg(); - - private: - friend class base::i18n::MessageFormatter; - MessageArg(); - // Tests if this argument has a value, and if so increments *count. - bool has_value(int* count) const; - std::unique_ptr<icu::Formattable> formattable; - DISALLOW_COPY_AND_ASSIGN(MessageArg); -}; - -} // namespace internal - -// Message Formatter with the ICU message format syntax support. -// It can format strings (UTF-8 and UTF-16), numbers and base::Time with -// plural, gender and other 'selectors' support. This is handy if you -// have multiple parameters of differnt types and some of them require -// plural or gender/selector support. -// -// To use this API for locale-sensitive formatting, retrieve a 'message -// template' in the ICU message format from a message bundle (e.g. with -// l10n_util::GetStringUTF16()) and pass it to FormatWith{Named,Numbered}Args. -// -// MessageFormat specs: -// http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html -// http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details -// Examples: -// http://userguide.icu-project.org/formatparse/messages -// message_formatter_unittest.cc -// go/plurals inside Google. -// TODO(jshin): Document this API in md format docs. -// Caveat: -// When plural/select/gender is used along with other format specifiers such -// as date or number, plural/select/gender should be at the top level. It's -// not an ICU restriction but a constraint imposed by Google's translation -// infrastructure. Message A does not work. It must be revised to Message B. -// -// A. -// Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph> -// by {1, plural, =1{a user} other{# users}} -// -// B. -// {1, plural, -// =1{Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph> -// by a user.} -// other{Rated <ph name="RATING">{0, number,0.0}<ex>3.2</ex></ph> -// by # users.}} - -class BASE_I18N_EXPORT MessageFormatter { - public: - static string16 FormatWithNamedArgs( - StringPiece16 msg, - StringPiece name0 = StringPiece(), - const internal::MessageArg& arg0 = internal::MessageArg(), - StringPiece name1 = StringPiece(), - const internal::MessageArg& arg1 = internal::MessageArg(), - StringPiece name2 = StringPiece(), - const internal::MessageArg& arg2 = internal::MessageArg(), - StringPiece name3 = StringPiece(), - const internal::MessageArg& arg3 = internal::MessageArg(), - StringPiece name4 = StringPiece(), - const internal::MessageArg& arg4 = internal::MessageArg(), - StringPiece name5 = StringPiece(), - const internal::MessageArg& arg5 = internal::MessageArg(), - StringPiece name6 = StringPiece(), - const internal::MessageArg& arg6 = internal::MessageArg()); - - static string16 FormatWithNumberedArgs( - StringPiece16 msg, - const internal::MessageArg& arg0 = internal::MessageArg(), - const internal::MessageArg& arg1 = internal::MessageArg(), - const internal::MessageArg& arg2 = internal::MessageArg(), - const internal::MessageArg& arg3 = internal::MessageArg(), - const internal::MessageArg& arg4 = internal::MessageArg(), - const internal::MessageArg& arg5 = internal::MessageArg(), - const internal::MessageArg& arg6 = internal::MessageArg()); - - private: - MessageFormatter() = delete; - DISALLOW_COPY_AND_ASSIGN(MessageFormatter); -}; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_MESSAGE_FORMATTER_H_
diff --git a/base/i18n/number_formatting.cc b/base/i18n/number_formatting.cc deleted file mode 100644 index 0ab031e..0000000 --- a/base/i18n/number_formatting.cc +++ /dev/null
@@ -1,97 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/number_formatting.h" - -#include <stddef.h> - -#include <memory> - -#include "base/format_macros.h" -#include "base/i18n/message_formatter.h" -#include "base/i18n/unicodestring.h" -#include "base/lazy_instance.h" -#include "base/logging.h" -#include "base/strings/string_util.h" -#include "base/strings/stringprintf.h" -#include "base/strings/utf_string_conversions.h" -#include "third_party/icu/source/common/unicode/ustring.h" -#include "third_party/icu/source/i18n/unicode/numfmt.h" - -namespace base { - -namespace { - -// A simple wrapper around icu::NumberFormat that allows for resetting it -// (as LazyInstance does not). -struct NumberFormatWrapper { - NumberFormatWrapper() { - Reset(); - } - - void Reset() { - // There's no ICU call to destroy a NumberFormat object other than - // operator delete, so use the default Delete, which calls operator delete. - // This can cause problems if a different allocator is used by this file - // than by ICU. - UErrorCode status = U_ZERO_ERROR; - number_format.reset(icu::NumberFormat::createInstance(status)); - DCHECK(U_SUCCESS(status)); - } - - std::unique_ptr<icu::NumberFormat> number_format; -}; - -LazyInstance<NumberFormatWrapper>::DestructorAtExit g_number_format_int = - LAZY_INSTANCE_INITIALIZER; -LazyInstance<NumberFormatWrapper>::DestructorAtExit g_number_format_float = - LAZY_INSTANCE_INITIALIZER; - -} // namespace - -string16 FormatNumber(int64_t number) { - icu::NumberFormat* number_format = - g_number_format_int.Get().number_format.get(); - - if (!number_format) { - // As a fallback, just return the raw number in a string. - return ASCIIToUTF16(StringPrintf("%" PRId64, number)); - } - icu::UnicodeString ustr; - number_format->format(number, ustr); - - return i18n::UnicodeStringToString16(ustr); -} - -string16 FormatDouble(double number, int fractional_digits) { - icu::NumberFormat* number_format = - g_number_format_float.Get().number_format.get(); - - if (!number_format) { - // As a fallback, just return the raw number in a string. - return ASCIIToUTF16(StringPrintf("%f", number)); - } - number_format->setMaximumFractionDigits(fractional_digits); - number_format->setMinimumFractionDigits(fractional_digits); - icu::UnicodeString ustr; - number_format->format(number, ustr); - - return i18n::UnicodeStringToString16(ustr); -} - -string16 FormatPercent(int number) { - return i18n::MessageFormatter::FormatWithNumberedArgs( - ASCIIToUTF16("{0,number,percent}"), static_cast<double>(number) / 100.0); -} - -namespace testing { - -void ResetFormatters() { - g_number_format_int.Get().Reset(); - g_number_format_float.Get().Reset(); -} - -} // namespace testing - -} // namespace base
diff --git a/base/i18n/number_formatting.h b/base/i18n/number_formatting.h deleted file mode 100644 index 9636bf4..0000000 --- a/base/i18n/number_formatting.h +++ /dev/null
@@ -1,38 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_NUMBER_FORMATTING_H_ -#define BASE_I18N_NUMBER_FORMATTING_H_ - -#include <stdint.h> - -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" - -namespace base { - -// Return a number formatted with separators in the user's locale. -// Ex: FormatNumber(1234567) => "1,234,567" in English, "1.234.567" in German -BASE_I18N_EXPORT string16 FormatNumber(int64_t number); - -// Return a number formatted with separators in the user's locale. -// Ex: FormatDouble(1234567.8, 1) -// => "1,234,567.8" in English, "1.234.567,8" in German -BASE_I18N_EXPORT string16 FormatDouble(double number, int fractional_digits); - -// Return a percentage formatted with space and symbol in the user's locale. -// Ex: FormatPercent(12) => "12%" in English, "12 %" in Romanian -BASE_I18N_EXPORT string16 FormatPercent(int number); - -namespace testing { - -// Causes cached formatters to be discarded and recreated. Only useful for -// testing. -BASE_I18N_EXPORT void ResetFormatters(); - -} // namespace testing - -} // namespace base - -#endif // BASE_I18N_NUMBER_FORMATTING_H_
diff --git a/base/i18n/rtl.cc b/base/i18n/rtl.cc deleted file mode 100644 index 295968b..0000000 --- a/base/i18n/rtl.cc +++ /dev/null
@@ -1,491 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/rtl.h" - -#include <stddef.h> -#include <stdint.h> - -#include <algorithm> - -#include "base/command_line.h" -#include "base/files/file_path.h" -#include "base/i18n/base_i18n_switches.h" -#include "base/logging.h" -#include "base/macros.h" -#include "base/strings/string_split.h" -#include "base/strings/string_util.h" -#include "base/strings/sys_string_conversions.h" -#include "base/strings/utf_string_conversions.h" -#include "build_config.h" -#include "third_party/icu/source/common/unicode/locid.h" -#include "third_party/icu/source/common/unicode/uchar.h" -#include "third_party/icu/source/common/unicode/uscript.h" -#include "third_party/icu/source/i18n/unicode/coll.h" - -#if defined(OS_IOS) -#include "base/debug/crash_logging.h" -#include "base/ios/ios_util.h" -#endif - -namespace { - -// Extract language, country and variant, but ignore keywords. For example, -// en-US, ca@valencia, ca-ES@valencia. -std::string GetLocaleString(const icu::Locale& locale) { - const char* language = locale.getLanguage(); - const char* country = locale.getCountry(); - const char* variant = locale.getVariant(); - - std::string result = - (language != nullptr && *language != '\0') ? language : "und"; - - if (country != nullptr && *country != '\0') { - result += '-'; - result += country; - } - - if (variant != nullptr && *variant != '\0') - result += '@' + base::ToLowerASCII(variant); - - return result; -} - -// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong -// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to -// http://unicode.org/reports/tr9/ for more information. -base::i18n::TextDirection GetCharacterDirection(UChar32 character) { - static bool has_switch = base::CommandLine::ForCurrentProcess()->HasSwitch( - switches::kForceTextDirection); - if (has_switch) { - base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); - std::string force_flag = - command_line->GetSwitchValueASCII(switches::kForceTextDirection); - - if (force_flag == switches::kForceDirectionRTL) - return base::i18n::RIGHT_TO_LEFT; - if (force_flag == switches::kForceDirectionLTR) - return base::i18n::LEFT_TO_RIGHT; - } - // Now that we have the character, we use ICU in order to query for the - // appropriate Unicode BiDi character type. - int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); - if ((property == U_RIGHT_TO_LEFT) || - (property == U_RIGHT_TO_LEFT_ARABIC) || - (property == U_RIGHT_TO_LEFT_EMBEDDING) || - (property == U_RIGHT_TO_LEFT_OVERRIDE)) { - return base::i18n::RIGHT_TO_LEFT; - } else if ((property == U_LEFT_TO_RIGHT) || - (property == U_LEFT_TO_RIGHT_EMBEDDING) || - (property == U_LEFT_TO_RIGHT_OVERRIDE)) { - return base::i18n::LEFT_TO_RIGHT; - } - return base::i18n::UNKNOWN_DIRECTION; -} - -} // namespace - -namespace base { -namespace i18n { - -// Represents the locale-specific ICU text direction. -static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; - -// Convert the ICU default locale to a string. -std::string GetConfiguredLocale() { - return GetLocaleString(icu::Locale::getDefault()); -} - -// Convert the ICU canonicalized locale to a string. -std::string GetCanonicalLocale(const std::string& locale) { - return GetLocaleString(icu::Locale::createCanonical(locale.c_str())); -} - -// Convert Chrome locale name to ICU locale name -std::string ICULocaleName(const std::string& locale_string) { - // If not Spanish, just return it. - if (locale_string.substr(0, 2) != "es") - return locale_string; - // Expand es to es-ES. - if (LowerCaseEqualsASCII(locale_string, "es")) - return "es-ES"; - // Map es-419 (Latin American Spanish) to es-FOO depending on the system - // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map - // to es-MX (the most populous in Spanish-speaking Latin America). - if (LowerCaseEqualsASCII(locale_string, "es-419")) { - const icu::Locale& locale = icu::Locale::getDefault(); - std::string language = locale.getLanguage(); - const char* country = locale.getCountry(); - if (LowerCaseEqualsASCII(language, "es") && - !LowerCaseEqualsASCII(country, "es")) { - language += '-'; - language += country; - return language; - } - return "es-MX"; - } - // Currently, Chrome has only "es" and "es-419", but later we may have - // more specific "es-RR". - return locale_string; -} - -void SetICUDefaultLocale(const std::string& locale_string) { -#if defined(OS_IOS) - static base::debug::CrashKeyString* crash_key_locale = - base::debug::AllocateCrashKeyString("icu_locale_input", - base::debug::CrashKeySize::Size256); - base::debug::SetCrashKeyString(crash_key_locale, locale_string); -#endif - icu::Locale locale(ICULocaleName(locale_string).c_str()); - UErrorCode error_code = U_ZERO_ERROR; - const char* lang = locale.getLanguage(); - if (lang != nullptr && *lang != '\0') { - icu::Locale::setDefault(locale, error_code); - } else { - LOG(ERROR) << "Failed to set the ICU default locale to " << locale_string - << ". Falling back to en-US."; - icu::Locale::setDefault(icu::Locale::getUS(), error_code); - } - g_icu_text_direction = UNKNOWN_DIRECTION; -} - -bool IsRTL() { - return ICUIsRTL(); -} - -bool ICUIsRTL() { - if (g_icu_text_direction == UNKNOWN_DIRECTION) { - const icu::Locale& locale = icu::Locale::getDefault(); - g_icu_text_direction = GetTextDirectionForLocaleInStartUp(locale.getName()); - } - return g_icu_text_direction == RIGHT_TO_LEFT; -} - -TextDirection GetForcedTextDirection() { -// On iOS, check for RTL forcing. -#if defined(OS_IOS) - if (base::ios::IsInForcedRTL()) - return base::i18n::RIGHT_TO_LEFT; -#endif - - base::CommandLine* command_line = base::CommandLine::ForCurrentProcess(); - if (command_line->HasSwitch(switches::kForceUIDirection)) { - std::string force_flag = - command_line->GetSwitchValueASCII(switches::kForceUIDirection); - - if (force_flag == switches::kForceDirectionLTR) - return base::i18n::LEFT_TO_RIGHT; - - if (force_flag == switches::kForceDirectionRTL) - return base::i18n::RIGHT_TO_LEFT; - } - - return base::i18n::UNKNOWN_DIRECTION; -} - -TextDirection GetTextDirectionForLocaleInStartUp(const char* locale_name) { - // Check for direction forcing. - TextDirection forced_direction = GetForcedTextDirection(); - if (forced_direction != UNKNOWN_DIRECTION) - return forced_direction; - - // This list needs to be updated in alphabetical order if we add more RTL - // locales. - static const char kRTLLanguageCodes[][3] = {"ar", "fa", "he", "iw", "ur"}; - std::vector<StringPiece> locale_split = - SplitStringPiece(locale_name, "-_", KEEP_WHITESPACE, SPLIT_WANT_ALL); - const StringPiece& language_code = locale_split[0]; - if (std::binary_search(kRTLLanguageCodes, - kRTLLanguageCodes + arraysize(kRTLLanguageCodes), - language_code)) - return RIGHT_TO_LEFT; - return LEFT_TO_RIGHT; -} - -TextDirection GetTextDirectionForLocale(const char* locale_name) { - // Check for direction forcing. - TextDirection forced_direction = GetForcedTextDirection(); - if (forced_direction != UNKNOWN_DIRECTION) - return forced_direction; - - UErrorCode status = U_ZERO_ERROR; - ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); - DCHECK(U_SUCCESS(status)); - // Treat anything other than RTL as LTR. - return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; -} - -TextDirection GetFirstStrongCharacterDirection(const string16& text) { - const UChar* string = text.c_str(); - size_t length = text.length(); - size_t position = 0; - while (position < length) { - UChar32 character; - size_t next_position = position; - U16_NEXT(string, next_position, length, character); - TextDirection direction = GetCharacterDirection(character); - if (direction != UNKNOWN_DIRECTION) - return direction; - position = next_position; - } - return LEFT_TO_RIGHT; -} - -TextDirection GetLastStrongCharacterDirection(const string16& text) { - const UChar* string = text.c_str(); - size_t position = text.length(); - while (position > 0) { - UChar32 character; - size_t prev_position = position; - U16_PREV(string, 0, prev_position, character); - TextDirection direction = GetCharacterDirection(character); - if (direction != UNKNOWN_DIRECTION) - return direction; - position = prev_position; - } - return LEFT_TO_RIGHT; -} - -TextDirection GetStringDirection(const string16& text) { - const UChar* string = text.c_str(); - size_t length = text.length(); - size_t position = 0; - - TextDirection result(UNKNOWN_DIRECTION); - while (position < length) { - UChar32 character; - size_t next_position = position; - U16_NEXT(string, next_position, length, character); - TextDirection direction = GetCharacterDirection(character); - if (direction != UNKNOWN_DIRECTION) { - if (result != UNKNOWN_DIRECTION && result != direction) - return UNKNOWN_DIRECTION; - result = direction; - } - position = next_position; - } - - // Handle the case of a string not containing any strong directionality - // characters defaulting to LEFT_TO_RIGHT. - if (result == UNKNOWN_DIRECTION) - return LEFT_TO_RIGHT; - - return result; -} - -#if defined(OS_WIN) -bool AdjustStringForLocaleDirection(string16* text) { - if (!IsRTL() || text->empty()) - return false; - - // Marking the string as LTR if the locale is RTL and the string does not - // contain strong RTL characters. Otherwise, mark the string as RTL. - bool has_rtl_chars = StringContainsStrongRTLChars(*text); - if (!has_rtl_chars) - WrapStringWithLTRFormatting(text); - else - WrapStringWithRTLFormatting(text); - - return true; -} - -bool UnadjustStringForLocaleDirection(string16* text) { - if (!IsRTL() || text->empty()) - return false; - - *text = StripWrappingBidiControlCharacters(*text); - return true; -} -#else -bool AdjustStringForLocaleDirection(string16* text) { - // On OS X & GTK the directionality of a label is determined by the first - // strongly directional character. - // However, we want to make sure that in an LTR-language-UI all strings are - // left aligned and vice versa. - // A problem can arise if we display a string which starts with user input. - // User input may be of the opposite directionality to the UI. So the whole - // string will be displayed in the opposite directionality, e.g. if we want to - // display in an LTR UI [such as US English]: - // - // EMAN_NOISNETXE is now installed. - // - // Since EXTENSION_NAME begins with a strong RTL char, the label's - // directionality will be set to RTL and the string will be displayed visually - // as: - // - // .is now installed EMAN_NOISNETXE - // - // In order to solve this issue, we prepend an LRM to the string. An LRM is a - // strongly directional LTR char. - // We also append an LRM at the end, which ensures that we're in an LTR - // context. - - // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the - // box so there is no issue with displaying zero-width bidi control characters - // on any system. Thus no need for the !IsRTL() check here. - if (text->empty()) - return false; - - bool ui_direction_is_rtl = IsRTL(); - - bool has_rtl_chars = StringContainsStrongRTLChars(*text); - if (!ui_direction_is_rtl && has_rtl_chars) { - WrapStringWithRTLFormatting(text); - text->insert(static_cast<size_t>(0), static_cast<size_t>(1), - kLeftToRightMark); - text->push_back(kLeftToRightMark); - } else if (ui_direction_is_rtl && has_rtl_chars) { - WrapStringWithRTLFormatting(text); - text->insert(static_cast<size_t>(0), static_cast<size_t>(1), - kRightToLeftMark); - text->push_back(kRightToLeftMark); - } else if (ui_direction_is_rtl) { - WrapStringWithLTRFormatting(text); - text->insert(static_cast<size_t>(0), static_cast<size_t>(1), - kRightToLeftMark); - text->push_back(kRightToLeftMark); - } else { - return false; - } - - return true; -} - -bool UnadjustStringForLocaleDirection(string16* text) { - if (text->empty()) - return false; - - size_t begin_index = 0; - char16 begin = text->at(begin_index); - if (begin == kLeftToRightMark || - begin == kRightToLeftMark) { - ++begin_index; - } - - size_t end_index = text->length() - 1; - char16 end = text->at(end_index); - if (end == kLeftToRightMark || - end == kRightToLeftMark) { - --end_index; - } - - string16 unmarked_text = - text->substr(begin_index, end_index - begin_index + 1); - *text = StripWrappingBidiControlCharacters(unmarked_text); - return true; -} - -#endif // !OS_WIN - -void EnsureTerminatedDirectionalFormatting(string16* text) { - int count = 0; - for (auto c : *text) { - if (c == kLeftToRightEmbeddingMark || c == kRightToLeftEmbeddingMark || - c == kLeftToRightOverride || c == kRightToLeftOverride) { - ++count; - } else if (c == kPopDirectionalFormatting && count > 0) { - --count; - } - } - for (int j = 0; j < count; j++) - text->push_back(kPopDirectionalFormatting); -} - -void SanitizeUserSuppliedString(string16* text) { - EnsureTerminatedDirectionalFormatting(text); - AdjustStringForLocaleDirection(text); -} - -bool StringContainsStrongRTLChars(const string16& text) { - const UChar* string = text.c_str(); - size_t length = text.length(); - size_t position = 0; - while (position < length) { - UChar32 character; - size_t next_position = position; - U16_NEXT(string, next_position, length, character); - - // Now that we have the character, we use ICU in order to query for the - // appropriate Unicode BiDi character type. - int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); - if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) - return true; - - position = next_position; - } - - return false; -} - -void WrapStringWithLTRFormatting(string16* text) { - if (text->empty()) - return; - - // Inserting an LRE (Left-To-Right Embedding) mark as the first character. - text->insert(static_cast<size_t>(0), static_cast<size_t>(1), - kLeftToRightEmbeddingMark); - - // Inserting a PDF (Pop Directional Formatting) mark as the last character. - text->push_back(kPopDirectionalFormatting); -} - -void WrapStringWithRTLFormatting(string16* text) { - if (text->empty()) - return; - - // Inserting an RLE (Right-To-Left Embedding) mark as the first character. - text->insert(static_cast<size_t>(0), static_cast<size_t>(1), - kRightToLeftEmbeddingMark); - - // Inserting a PDF (Pop Directional Formatting) mark as the last character. - text->push_back(kPopDirectionalFormatting); -} - -void WrapPathWithLTRFormatting(const FilePath& path, - string16* rtl_safe_path) { - // Wrap the overall path with LRE-PDF pair which essentialy marks the - // string as a Left-To-Right string. - // Inserting an LRE (Left-To-Right Embedding) mark as the first character. - rtl_safe_path->push_back(kLeftToRightEmbeddingMark); -#if defined(OS_MACOSX) - rtl_safe_path->append(UTF8ToUTF16(path.value())); -#elif defined(OS_WIN) - rtl_safe_path->append(path.value()); -#else // defined(OS_POSIX) && !defined(OS_MACOSX) - std::wstring wide_path = base::SysNativeMBToWide(path.value()); - rtl_safe_path->append(WideToUTF16(wide_path)); -#endif - // Inserting a PDF (Pop Directional Formatting) mark as the last character. - rtl_safe_path->push_back(kPopDirectionalFormatting); -} - -string16 GetDisplayStringInLTRDirectionality(const string16& text) { - // Always wrap the string in RTL UI (it may be appended to RTL string). - // Also wrap strings with an RTL first strong character direction in LTR UI. - if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { - string16 text_mutable(text); - WrapStringWithLTRFormatting(&text_mutable); - return text_mutable; - } - return text; -} - -string16 StripWrappingBidiControlCharacters(const string16& text) { - if (text.empty()) - return text; - size_t begin_index = 0; - char16 begin = text[begin_index]; - if (begin == kLeftToRightEmbeddingMark || - begin == kRightToLeftEmbeddingMark || - begin == kLeftToRightOverride || - begin == kRightToLeftOverride) - ++begin_index; - size_t end_index = text.length() - 1; - if (text[end_index] == kPopDirectionalFormatting) - --end_index; - return text.substr(begin_index, end_index - begin_index + 1); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/rtl.h b/base/i18n/rtl.h deleted file mode 100644 index d3ba29f..0000000 --- a/base/i18n/rtl.h +++ /dev/null
@@ -1,168 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_RTL_H_ -#define BASE_I18N_RTL_H_ - -#include <string> - -#include "base/compiler_specific.h" -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" -#include "build_config.h" - -namespace base { - -class FilePath; - -namespace i18n { - -const char16 kRightToLeftMark = 0x200F; -const char16 kLeftToRightMark = 0x200E; -const char16 kLeftToRightEmbeddingMark = 0x202A; -const char16 kRightToLeftEmbeddingMark = 0x202B; -const char16 kPopDirectionalFormatting = 0x202C; -const char16 kLeftToRightOverride = 0x202D; -const char16 kRightToLeftOverride = 0x202E; - -// Locale.java mirrored this enum TextDirection. Please keep in sync. -enum TextDirection { - UNKNOWN_DIRECTION = 0, - RIGHT_TO_LEFT = 1, - LEFT_TO_RIGHT = 2, - TEXT_DIRECTION_MAX = LEFT_TO_RIGHT, -}; - -// Get the locale that the currently running process has been configured to use. -// The return value is of the form language[-country] (e.g., en-US) where the -// language is the 2 or 3 letter code from ISO-639. -BASE_I18N_EXPORT std::string GetConfiguredLocale(); - -// Canonicalize a string (eg. a POSIX locale string) to a Chrome locale name. -BASE_I18N_EXPORT std::string GetCanonicalLocale(const std::string& locale); - -// Sets the default locale of ICU. -// Once the application locale of Chrome in GetApplicationLocale is determined, -// the default locale of ICU need to be changed to match the application locale -// so that ICU functions work correctly in a locale-dependent manner. -// This is handy in that we don't have to call GetApplicationLocale() -// everytime we call locale-dependent ICU APIs as long as we make sure -// that this is called before any locale-dependent API is called. -BASE_I18N_EXPORT void SetICUDefaultLocale(const std::string& locale_string); - -// Returns true if the application text direction is right-to-left. -BASE_I18N_EXPORT bool IsRTL(); - -// Returns whether the text direction for the default ICU locale is RTL. This -// assumes that SetICUDefaultLocale has been called to set the default locale to -// the UI locale of Chrome. -// NOTE: Generally, you should call IsRTL() instead of this. -BASE_I18N_EXPORT bool ICUIsRTL(); - -// Gets the explicitly forced text direction for debugging. If no forcing is -// applied, returns UNKNOWN_DIRECTION. -BASE_I18N_EXPORT TextDirection GetForcedTextDirection(); - -// Returns the text direction for |locale_name|. -// As a startup optimization, this method checks the locale against a list of -// Chrome-supported RTL locales. -BASE_I18N_EXPORT TextDirection -GetTextDirectionForLocaleInStartUp(const char* locale_name); - -// Returns the text direction for |locale_name|. -BASE_I18N_EXPORT TextDirection GetTextDirectionForLocale( - const char* locale_name); - -// Given the string in |text|, returns the directionality of the first or last -// character with strong directionality in the string. If no character in the -// text has strong directionality, LEFT_TO_RIGHT is returned. The Bidi -// character types L, LRE, LRO, R, AL, RLE, and RLO are considered as strong -// directionality characters. Please refer to http://unicode.org/reports/tr9/ -// for more information. -BASE_I18N_EXPORT TextDirection GetFirstStrongCharacterDirection( - const string16& text); -BASE_I18N_EXPORT TextDirection GetLastStrongCharacterDirection( - const string16& text); - -// Given the string in |text|, returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if all the -// strong directionality characters in the string are of the same -// directionality. It returns UNKNOWN_DIRECTION if the string contains a mix of -// LTR and RTL strong directionality characters. Defaults to LEFT_TO_RIGHT if -// the string does not contain directionality characters. Please refer to -// http://unicode.org/reports/tr9/ for more information. -BASE_I18N_EXPORT TextDirection GetStringDirection(const string16& text); - -// Given the string in |text|, this function modifies the string in place with -// the appropriate Unicode formatting marks that mark the string direction -// (either left-to-right or right-to-left). The function checks both the current -// locale and the contents of the string in order to determine the direction of -// the returned string. The function returns true if the string in |text| was -// properly adjusted. -// -// Certain LTR strings are not rendered correctly when the context is RTL. For -// example, the string "Foo!" will appear as "!Foo" if it is rendered as is in -// an RTL context. Calling this function will make sure the returned localized -// string is always treated as a right-to-left string. This is done by -// inserting certain Unicode formatting marks into the returned string. -// -// ** Notes about the Windows version of this function: -// TODO(idana) bug 6806: this function adjusts the string in question only -// if the current locale is right-to-left. The function does not take care of -// the opposite case (an RTL string displayed in an LTR context) since -// adjusting the string involves inserting Unicode formatting characters that -// Windows does not handle well unless right-to-left language support is -// installed. Since the English version of Windows doesn't have right-to-left -// language support installed by default, inserting the direction Unicode mark -// results in Windows displaying squares. -BASE_I18N_EXPORT bool AdjustStringForLocaleDirection(string16* text); - -// Undoes the actions of the above function (AdjustStringForLocaleDirection). -BASE_I18N_EXPORT bool UnadjustStringForLocaleDirection(string16* text); - -// Ensures |text| contains no unterminated directional formatting characters, by -// appending the appropriate pop-directional-formatting characters to the end of -// |text|. -BASE_I18N_EXPORT void EnsureTerminatedDirectionalFormatting(string16* text); - -// Sanitizes the |text| by terminating any directional override/embedding -// characters and then adjusting the string for locale direction. -BASE_I18N_EXPORT void SanitizeUserSuppliedString(string16* text); - -// Returns true if the string contains at least one character with strong right -// to left directionality; that is, a character with either R or AL Unicode -// BiDi character type. -BASE_I18N_EXPORT bool StringContainsStrongRTLChars(const string16& text); - -// Wraps a string with an LRE-PDF pair which essentialy marks the string as a -// Left-To-Right string. Doing this is useful in order to make sure LTR -// strings are rendered properly in an RTL context. -BASE_I18N_EXPORT void WrapStringWithLTRFormatting(string16* text); - -// Wraps a string with an RLE-PDF pair which essentialy marks the string as a -// Right-To-Left string. Doing this is useful in order to make sure RTL -// strings are rendered properly in an LTR context. -BASE_I18N_EXPORT void WrapStringWithRTLFormatting(string16* text); - -// Wraps file path to get it to display correctly in RTL UI. All filepaths -// should be passed through this function before display in UI for RTL locales. -BASE_I18N_EXPORT void WrapPathWithLTRFormatting(const FilePath& path, - string16* rtl_safe_path); - -// Return the string in |text| wrapped with LRE (Left-To-Right Embedding) and -// PDF (Pop Directional Formatting) marks, if needed for UI display purposes. -BASE_I18N_EXPORT string16 GetDisplayStringInLTRDirectionality( - const string16& text) WARN_UNUSED_RESULT; - -// Strip the beginning (U+202A..U+202B, U+202D..U+202E) and/or ending (U+202C) -// explicit bidi control characters from |text|, if there are any. Otherwise, -// return the text itself. Explicit bidi control characters display and have -// semantic effect. They can be deleted so they might not always appear in a -// pair. -BASE_I18N_EXPORT string16 StripWrappingBidiControlCharacters( - const string16& text) WARN_UNUSED_RESULT; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_RTL_H_
diff --git a/base/i18n/streaming_utf8_validator.cc b/base/i18n/streaming_utf8_validator.cc deleted file mode 100644 index 19c86a3..0000000 --- a/base/i18n/streaming_utf8_validator.cc +++ /dev/null
@@ -1,59 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This implementation doesn't use ICU. The ICU macros are oriented towards -// character-at-a-time processing, whereas byte-at-a-time processing is easier -// with streaming input. - -#include "base/i18n/streaming_utf8_validator.h" - -#include "base/i18n/utf8_validator_tables.h" -#include "base/logging.h" - -namespace base { -namespace { - -uint8_t StateTableLookup(uint8_t offset) { - DCHECK_LT(offset, internal::kUtf8ValidatorTablesSize); - return internal::kUtf8ValidatorTables[offset]; -} - -} // namespace - -StreamingUtf8Validator::State StreamingUtf8Validator::AddBytes(const char* data, - size_t size) { - // Copy |state_| into a local variable so that the compiler doesn't have to be - // careful of aliasing. - uint8_t state = state_; - for (const char* p = data; p != data + size; ++p) { - if ((*p & 0x80) == 0) { - if (state == 0) - continue; - state = internal::I18N_UTF8_VALIDATOR_INVALID_INDEX; - break; - } - const uint8_t shift_amount = StateTableLookup(state); - const uint8_t shifted_char = (*p & 0x7F) >> shift_amount; - state = StateTableLookup(state + shifted_char + 1); - // State may be INVALID here, but this code is optimised for the case of - // valid UTF-8 and it is more efficient (by about 2%) to not attempt an - // early loop exit unless we hit an ASCII character. - } - state_ = state; - return state == 0 ? VALID_ENDPOINT - : state == internal::I18N_UTF8_VALIDATOR_INVALID_INDEX - ? INVALID - : VALID_MIDPOINT; -} - -void StreamingUtf8Validator::Reset() { - state_ = 0u; -} - -bool StreamingUtf8Validator::Validate(const std::string& string) { - return StreamingUtf8Validator().AddBytes(string.data(), string.size()) == - VALID_ENDPOINT; -} - -} // namespace base
diff --git a/base/i18n/streaming_utf8_validator.h b/base/i18n/streaming_utf8_validator.h deleted file mode 100644 index ebf38a6..0000000 --- a/base/i18n/streaming_utf8_validator.h +++ /dev/null
@@ -1,66 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// A streaming validator for UTF-8. Validation is based on the definition in -// RFC-3629. In particular, it does not reject the invalid characters rejected -// by base::IsStringUTF8(). -// -// The implementation detects errors on the first possible byte. - -#ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ -#define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ - -#include <stddef.h> -#include <stdint.h> - -#include <string> - -#include "base/i18n/base_i18n_export.h" -#include "base/macros.h" - -namespace base { - -class BASE_I18N_EXPORT StreamingUtf8Validator { - public: - // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it - // processes characters it alternates between VALID_ENDPOINT and - // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the - // state changes permanently to INVALID. - enum State { - VALID_ENDPOINT, - VALID_MIDPOINT, - INVALID - }; - - StreamingUtf8Validator() : state_(0u) {} - // Trivial destructor intentionally omitted. - - // Validate |size| bytes starting at |data|. If the concatenation of all calls - // to AddBytes() since this object was constructed or reset is a valid UTF-8 - // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8 - // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was - // present, returns INVALID. - State AddBytes(const char* data, size_t size); - - // Return the object to a freshly-constructed state so that it can be re-used. - void Reset(); - - // Validate a complete string using the same criteria. Returns true if the - // string only contains complete, valid UTF-8 codepoints. - static bool Validate(const std::string& string); - - private: - // The current state of the validator. Value 0 is the initial/valid state. - // The state is stored as an offset into |kUtf8ValidatorTables|. The special - // state |kUtf8InvalidState| is invalid. - uint8_t state_; - - // This type could be made copyable but there is currently no use-case for - // it. - DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); -}; - -} // namespace base - -#endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
diff --git a/base/i18n/streaming_utf8_validator_perftest.cc b/base/i18n/streaming_utf8_validator_perftest.cc deleted file mode 100644 index ad328f8..0000000 --- a/base/i18n/streaming_utf8_validator_perftest.cc +++ /dev/null
@@ -1,240 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// All data that is passed through a WebSocket with type "Text" needs to be -// validated as UTF8. Since this is done on the IO thread, it needs to be -// reasonably fast. - -// We are only interested in the performance on valid UTF8. Invalid UTF8 will -// result in a connection failure, so is unlikely to become a source of -// performance issues. - -#include "base/i18n/streaming_utf8_validator.h" - -#include <stddef.h> - -#include <string> - -#include "base/bind.h" -#include "base/callback.h" -#include "base/macros.h" -#include "base/strings/string_util.h" -#include "base/strings/stringprintf.h" -#include "base/test/perf_time_logger.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace base { -namespace { - -// We want to test ranges of valid UTF-8 sequences. These ranges are inclusive. -// They are intended to be large enough that the validator needs to do -// meaningful work while being in some sense "realistic" (eg. control characters -// are not included). -const char kOneByteSeqRangeStart[] = " "; // U+0020 -const char kOneByteSeqRangeEnd[] = "~"; // U+007E - -const char kTwoByteSeqRangeStart[] = "\xc2\xa0"; // U+00A0 non-breaking space -const char kTwoByteSeqRangeEnd[] = "\xc9\x8f"; // U+024F small y with stroke - -const char kThreeByteSeqRangeStart[] = "\xe3\x81\x82"; // U+3042 Hiragana "a" -const char kThreeByteSeqRangeEnd[] = "\xe9\xbf\x83"; // U+9FC3 "to blink" - -const char kFourByteSeqRangeStart[] = "\xf0\xa0\x80\x8b"; // U+2000B -const char kFourByteSeqRangeEnd[] = "\xf0\xaa\x9a\xb2"; // U+2A6B2 - -// The different lengths of strings to test. -const size_t kTestLengths[] = {1, 32, 256, 32768, 1 << 20}; - -// Simplest possible byte-at-a-time validator, to provide a baseline -// for comparison. This is only tried on 1-byte UTF-8 sequences, as -// the results will not be meaningful with sequences containing -// top-bit-set bytes. -bool IsString7Bit(const std::string& s) { - for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) { - if (*it & 0x80) - return false; - } - return true; -} - -// Assumes that |previous| is a valid UTF-8 sequence, and attempts to return -// the next one. Is just barely smart enough to iterate through the ranges -// defined about. -std::string NextUtf8Sequence(const std::string& previous) { - DCHECK(StreamingUtf8Validator::Validate(previous)); - std::string next = previous; - for (int i = static_cast<int>(previous.length() - 1); i >= 0; --i) { - // All bytes in a UTF-8 sequence except the first one are - // constrained to the range 0x80 to 0xbf, inclusive. When we - // increment past 0xbf, we carry into the previous byte. - if (i > 0 && next[i] == '\xbf') { - next[i] = '\x80'; - continue; // carry - } - ++next[i]; - break; // no carry - } - DCHECK(StreamingUtf8Validator::Validate(next)) - << "Result \"" << next << "\" failed validation"; - return next; -} - -typedef bool (*TestTargetType)(const std::string&); - -// Run fuction |target| over |test_string| |times| times, and report the results -// using |description|. -bool RunTest(const std::string& description, - TestTargetType target, - const std::string& test_string, - int times) { - base::PerfTimeLogger timer(description.c_str()); - bool result = true; - for (int i = 0; i < times; ++i) { - result = target(test_string) && result; - } - timer.Done(); - return result; -} - -// Construct a string by repeating |input| enough times to equal or exceed -// |length|. -std::string ConstructRepeatedTestString(const std::string& input, - size_t length) { - std::string output = input; - while (output.length() * 2 < length) { - output += output; - } - if (output.length() < length) { - output += ConstructRepeatedTestString(input, length - output.length()); - } - return output; -} - -// Construct a string by expanding the range of UTF-8 sequences -// between |input_start| and |input_end|, inclusive, and then -// repeating the resulting string until it equals or exceeds |length| -// bytes. |input_start| and |input_end| must be valid UTF-8 -// sequences. -std::string ConstructRangedTestString(const std::string& input_start, - const std::string& input_end, - size_t length) { - std::string output = input_start; - std::string input = input_start; - while (output.length() < length && input != input_end) { - input = NextUtf8Sequence(input); - output += input; - } - if (output.length() < length) { - output = ConstructRepeatedTestString(output, length); - } - return output; -} - -struct TestFunctionDescription { - TestTargetType function; - const char* function_name; -}; - -bool IsStringUTF8(const std::string& str) { - return base::IsStringUTF8(base::StringPiece(str)); -} - -// IsString7Bit is intentionally placed last so it can be excluded easily. -const TestFunctionDescription kTestFunctions[] = { - {&StreamingUtf8Validator::Validate, "StreamingUtf8Validator"}, - {&IsStringUTF8, "IsStringUTF8"}, {&IsString7Bit, "IsString7Bit"}}; - -// Construct a test string from |construct_test_string| for each of the lengths -// in |kTestLengths| in turn. For each string, run each test in |test_functions| -// for a number of iterations such that the total number of bytes validated -// is around 16MB. -void RunSomeTests( - const char format[], - base::Callback<std::string(size_t length)> construct_test_string, - const TestFunctionDescription* test_functions, - size_t test_count) { - for (size_t i = 0; i < arraysize(kTestLengths); ++i) { - const size_t length = kTestLengths[i]; - const std::string test_string = construct_test_string.Run(length); - const int real_length = static_cast<int>(test_string.length()); - const int times = (1 << 24) / real_length; - for (size_t test_index = 0; test_index < test_count; ++test_index) { - EXPECT_TRUE(RunTest(StringPrintf(format, - test_functions[test_index].function_name, - real_length, - times), - test_functions[test_index].function, - test_string, - times)); - } - } -} - -TEST(StreamingUtf8ValidatorPerfTest, OneByteRepeated) { - RunSomeTests("%s: bytes=1 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kOneByteSeqRangeStart), - kTestFunctions, - 3); -} - -TEST(StreamingUtf8ValidatorPerfTest, OneByteRange) { - RunSomeTests("%s: bytes=1 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kOneByteSeqRangeStart, - kOneByteSeqRangeEnd), - kTestFunctions, - 3); -} - -TEST(StreamingUtf8ValidatorPerfTest, TwoByteRepeated) { - RunSomeTests("%s: bytes=2 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kTwoByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, TwoByteRange) { - RunSomeTests("%s: bytes=2 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kTwoByteSeqRangeStart, - kTwoByteSeqRangeEnd), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRepeated) { - RunSomeTests( - "%s: bytes=3 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kThreeByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRange) { - RunSomeTests("%s: bytes=3 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kThreeByteSeqRangeStart, - kThreeByteSeqRangeEnd), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, FourByteRepeated) { - RunSomeTests("%s: bytes=4 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kFourByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, FourByteRange) { - RunSomeTests("%s: bytes=4 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kFourByteSeqRangeStart, - kFourByteSeqRangeEnd), - kTestFunctions, - 2); -} - -} // namespace -} // namespace base
diff --git a/base/i18n/string_compare.cc b/base/i18n/string_compare.cc deleted file mode 100644 index 649c281..0000000 --- a/base/i18n/string_compare.cc +++ /dev/null
@@ -1,29 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/string_compare.h" - -#include "base/logging.h" -#include "base/strings/utf_string_conversions.h" -#include "third_party/icu/source/common/unicode/unistr.h" - -namespace base { -namespace i18n { - -// Compares the character data stored in two different string16 strings by -// specified Collator instance. -UCollationResult CompareString16WithCollator(const icu::Collator& collator, - const string16& lhs, - const string16& rhs) { - UErrorCode error = U_ZERO_ERROR; - UCollationResult result = collator.compare( - icu::UnicodeString(FALSE, lhs.c_str(), static_cast<int>(lhs.length())), - icu::UnicodeString(FALSE, rhs.c_str(), static_cast<int>(rhs.length())), - error); - DCHECK(U_SUCCESS(error)); - return result; -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/string_compare.h b/base/i18n/string_compare.h deleted file mode 100644 index 5fcc5fe..0000000 --- a/base/i18n/string_compare.h +++ /dev/null
@@ -1,28 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_STRING_COMPARE_H_ -#define BASE_I18N_STRING_COMPARE_H_ - -#include <algorithm> -#include <string> -#include <vector> - -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" -#include "third_party/icu/source/i18n/unicode/coll.h" - -namespace base { -namespace i18n { - -// Compares the two strings using the specified collator. -BASE_I18N_EXPORT UCollationResult -CompareString16WithCollator(const icu::Collator& collator, - const string16& lhs, - const string16& rhs); - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_STRING_COMPARE_H_
diff --git a/base/i18n/string_search.cc b/base/i18n/string_search.cc deleted file mode 100644 index 2f6fee4..0000000 --- a/base/i18n/string_search.cc +++ /dev/null
@@ -1,81 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <stdint.h> - -#include "base/i18n/string_search.h" -#include "base/logging.h" - -#include "third_party/icu/source/i18n/unicode/usearch.h" - -namespace base { -namespace i18n { - -FixedPatternStringSearchIgnoringCaseAndAccents:: -FixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this) - : find_this_(find_this) { - // usearch_open requires a valid string argument to be searched, even if we - // want to set it by usearch_setText afterwards. So, supplying a dummy text. - const string16& dummy = find_this_; - - UErrorCode status = U_ZERO_ERROR; - search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(), - dummy.size(), uloc_getDefault(), - nullptr, // breakiter - &status); - if (U_SUCCESS(status)) { - UCollator* collator = usearch_getCollator(search_); - ucol_setStrength(collator, UCOL_PRIMARY); - usearch_reset(search_); - } -} - -FixedPatternStringSearchIgnoringCaseAndAccents:: -~FixedPatternStringSearchIgnoringCaseAndAccents() { - if (search_) - usearch_close(search_); -} - -bool FixedPatternStringSearchIgnoringCaseAndAccents::Search( - const string16& in_this, size_t* match_index, size_t* match_length) { - UErrorCode status = U_ZERO_ERROR; - usearch_setText(search_, in_this.data(), in_this.size(), &status); - - // Default to basic substring search if usearch fails. According to - // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail - // if either |find_this| or |in_this| are empty. In either case basic - // substring search will give the correct return value. - if (!U_SUCCESS(status)) { - size_t index = in_this.find(find_this_); - if (index == string16::npos) { - return false; - } else { - if (match_index) - *match_index = index; - if (match_length) - *match_length = find_this_.size(); - return true; - } - } - - int32_t index = usearch_first(search_, &status); - if (!U_SUCCESS(status) || index == USEARCH_DONE) - return false; - if (match_index) - *match_index = static_cast<size_t>(index); - if (match_length) - *match_length = static_cast<size_t>(usearch_getMatchedLength(search_)); - return true; -} - -bool StringSearchIgnoringCaseAndAccents(const string16& find_this, - const string16& in_this, - size_t* match_index, - size_t* match_length) { - return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search( - in_this, match_index, match_length); -} - -} // namespace i18n -} // namespace base
diff --git a/base/i18n/string_search.h b/base/i18n/string_search.h deleted file mode 100644 index 07a29c1..0000000 --- a/base/i18n/string_search.h +++ /dev/null
@@ -1,55 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_STRING_SEARCH_H_ -#define BASE_I18N_STRING_SEARCH_H_ - -#include <stddef.h> - -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" - -struct UStringSearch; - -namespace base { -namespace i18n { - -// Returns true if |in_this| contains |find_this|. If |match_index| or -// |match_length| are non-NULL, they are assigned the start position and total -// length of the match. -// -// Only differences between base letters are taken into consideration. Case and -// accent differences are ignored. Please refer to 'primary level' in -// http://userguide.icu-project.org/collation/concepts for additional details. -BASE_I18N_EXPORT - bool StringSearchIgnoringCaseAndAccents(const string16& find_this, - const string16& in_this, - size_t* match_index, - size_t* match_length); - -// This class is for speeding up multiple StringSearchIgnoringCaseAndAccents() -// with the same |find_this| argument. |find_this| is passed as the constructor -// argument, and precomputation for searching is done only at that timing. -class BASE_I18N_EXPORT FixedPatternStringSearchIgnoringCaseAndAccents { - public: - explicit FixedPatternStringSearchIgnoringCaseAndAccents( - const string16& find_this); - ~FixedPatternStringSearchIgnoringCaseAndAccents(); - - // Returns true if |in_this| contains |find_this|. If |match_index| or - // |match_length| are non-NULL, they are assigned the start position and total - // length of the match. - bool Search(const string16& in_this, - size_t* match_index, - size_t* match_length); - - private: - string16 find_this_; - UStringSearch* search_; -}; - -} // namespace i18n -} // namespace base - -#endif // BASE_I18N_STRING_SEARCH_H_
diff --git a/base/i18n/time_formatting.cc b/base/i18n/time_formatting.cc deleted file mode 100644 index 3a5394a..0000000 --- a/base/i18n/time_formatting.cc +++ /dev/null
@@ -1,301 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/time_formatting.h" - -#include <stddef.h> - -#include <memory> - -#include "base/i18n/unicodestring.h" -#include "base/logging.h" -#include "base/strings/utf_string_conversions.h" -#include "base/time/time.h" -#include "third_party/icu/source/common/unicode/utypes.h" -#include "third_party/icu/source/i18n/unicode/datefmt.h" -#include "third_party/icu/source/i18n/unicode/dtitvfmt.h" -#include "third_party/icu/source/i18n/unicode/dtptngen.h" -#include "third_party/icu/source/i18n/unicode/fmtable.h" -#include "third_party/icu/source/i18n/unicode/measfmt.h" -#include "third_party/icu/source/i18n/unicode/smpdtfmt.h" - -namespace base { -namespace { - -string16 TimeFormat(const icu::DateFormat* formatter, - const Time& time) { - DCHECK(formatter); - icu::UnicodeString date_string; - - formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string); - return i18n::UnicodeStringToString16(date_string); -} - -string16 TimeFormatWithoutAmPm(const icu::DateFormat* formatter, - const Time& time) { - DCHECK(formatter); - icu::UnicodeString time_string; - - icu::FieldPosition ampm_field(icu::DateFormat::kAmPmField); - formatter->format( - static_cast<UDate>(time.ToDoubleT() * 1000), time_string, ampm_field); - int ampm_length = ampm_field.getEndIndex() - ampm_field.getBeginIndex(); - if (ampm_length) { - int begin = ampm_field.getBeginIndex(); - // Doesn't include any spacing before the field. - if (begin) - begin--; - time_string.removeBetween(begin, ampm_field.getEndIndex()); - } - return i18n::UnicodeStringToString16(time_string); -} - -icu::SimpleDateFormat CreateSimpleDateFormatter(const char* pattern) { - // Generate a locale-dependent format pattern. The generator will take - // care of locale-dependent formatting issues like which separator to - // use (some locales use '.' instead of ':'), and where to put the am/pm - // marker. - UErrorCode status = U_ZERO_ERROR; - std::unique_ptr<icu::DateTimePatternGenerator> generator( - icu::DateTimePatternGenerator::createInstance(status)); - DCHECK(U_SUCCESS(status)); - icu::UnicodeString generated_pattern = - generator->getBestPattern(icu::UnicodeString(pattern), status); - DCHECK(U_SUCCESS(status)); - - // Then, format the time using the generated pattern. - icu::SimpleDateFormat formatter(generated_pattern, status); - DCHECK(U_SUCCESS(status)); - - return formatter; -} - -UMeasureFormatWidth DurationWidthToMeasureWidth(DurationFormatWidth width) { - switch (width) { - case DURATION_WIDTH_WIDE: return UMEASFMT_WIDTH_WIDE; - case DURATION_WIDTH_SHORT: return UMEASFMT_WIDTH_SHORT; - case DURATION_WIDTH_NARROW: return UMEASFMT_WIDTH_NARROW; - case DURATION_WIDTH_NUMERIC: return UMEASFMT_WIDTH_NUMERIC; - } - NOTREACHED(); - return UMEASFMT_WIDTH_COUNT; -} - -const char* DateFormatToString(DateFormat format) { - switch (format) { - case DATE_FORMAT_YEAR_MONTH: - return UDAT_YEAR_MONTH; - case DATE_FORMAT_MONTH_WEEKDAY_DAY: - return UDAT_MONTH_WEEKDAY_DAY; - } - NOTREACHED(); - return UDAT_YEAR_MONTH_DAY; -} - -} // namespace - -string16 TimeFormatTimeOfDay(const Time& time) { - // We can omit the locale parameter because the default should match - // Chrome's application locale. - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createTimeInstance(icu::DateFormat::kShort)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatTimeOfDayWithMilliseconds(const Time& time) { - icu::SimpleDateFormat formatter = CreateSimpleDateFormatter("HmsSSS"); - return TimeFormatWithoutAmPm(&formatter, time); -} - -string16 TimeFormatTimeOfDayWithHourClockType(const Time& time, - HourClockType type, - AmPmClockType ampm) { - // Just redirect to the normal function if the default type matches the - // given type. - HourClockType default_type = GetHourClockType(); - if (default_type == type && (type == k24HourClock || ampm == kKeepAmPm)) { - return TimeFormatTimeOfDay(time); - } - - const char* base_pattern = (type == k12HourClock ? "ahm" : "Hm"); - icu::SimpleDateFormat formatter = CreateSimpleDateFormatter(base_pattern); - - if (ampm == kKeepAmPm) { - return TimeFormat(&formatter, time); - } else { - return TimeFormatWithoutAmPm(&formatter, time); - } -} - -string16 TimeFormatShortDate(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateInstance(icu::DateFormat::kMedium)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatShortDateNumeric(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateInstance(icu::DateFormat::kShort)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatShortDateAndTime(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatShortDateAndTimeWithTimeZone(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort, - icu::DateFormat::kLong)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatMonthAndYear(const Time& time) { - icu::SimpleDateFormat formatter = - CreateSimpleDateFormatter(DateFormatToString(DATE_FORMAT_YEAR_MONTH)); - return TimeFormat(&formatter, time); -} - -string16 TimeFormatFriendlyDateAndTime(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatFriendlyDate(const Time& time) { - std::unique_ptr<icu::DateFormat> formatter( - icu::DateFormat::createDateInstance(icu::DateFormat::kFull)); - return TimeFormat(formatter.get(), time); -} - -string16 TimeFormatWithPattern(const Time& time, const char* pattern) { - icu::SimpleDateFormat formatter = CreateSimpleDateFormatter(pattern); - return TimeFormat(&formatter, time); -} - -bool TimeDurationFormat(const TimeDelta time, - const DurationFormatWidth width, - string16* out) { - DCHECK(out); - UErrorCode status = U_ZERO_ERROR; - const int total_minutes = static_cast<int>(time.InSecondsF() / 60 + 0.5); - const int hours = total_minutes / 60; - const int minutes = total_minutes % 60; - UMeasureFormatWidth u_width = DurationWidthToMeasureWidth(width); - - // TODO(derat): Delete the |status| checks and LOG(ERROR) calls throughout - // this function once the cause of http://crbug.com/677043 is tracked down. - const icu::Measure measures[] = { - icu::Measure(hours, icu::MeasureUnit::createHour(status), status), - icu::Measure(minutes, icu::MeasureUnit::createMinute(status), status)}; - if (U_FAILURE(status)) { - LOG(ERROR) << "Creating MeasureUnit or Measure for " << hours << "h" - << minutes << "m failed: " << u_errorName(status); - return false; - } - - icu::MeasureFormat measure_format(icu::Locale::getDefault(), u_width, status); - if (U_FAILURE(status)) { - LOG(ERROR) << "Creating MeasureFormat for " - << icu::Locale::getDefault().getName() - << " failed: " << u_errorName(status); - return false; - } - - icu::UnicodeString formatted; - icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE); - measure_format.formatMeasures(measures, 2, formatted, ignore, status); - if (U_FAILURE(status)) { - LOG(ERROR) << "formatMeasures failed: " << u_errorName(status); - return false; - } - - *out = i18n::UnicodeStringToString16(formatted); - return true; -} - -bool TimeDurationFormatWithSeconds(const TimeDelta time, - const DurationFormatWidth width, - string16* out) { - DCHECK(out); - UErrorCode status = U_ZERO_ERROR; - const int64_t total_seconds = static_cast<int>(time.InSecondsF() + 0.5); - const int hours = total_seconds / 3600; - const int minutes = (total_seconds - hours * 3600) / 60; - const int seconds = total_seconds % 60; - UMeasureFormatWidth u_width = DurationWidthToMeasureWidth(width); - - const icu::Measure measures[] = { - icu::Measure(hours, icu::MeasureUnit::createHour(status), status), - icu::Measure(minutes, icu::MeasureUnit::createMinute(status), status), - icu::Measure(seconds, icu::MeasureUnit::createSecond(status), status)}; - icu::MeasureFormat measure_format(icu::Locale::getDefault(), u_width, status); - icu::UnicodeString formatted; - icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE); - measure_format.formatMeasures(measures, 3, formatted, ignore, status); - *out = i18n::UnicodeStringToString16(formatted); - return U_SUCCESS(status) == TRUE; -} - -string16 DateIntervalFormat(const Time& begin_time, - const Time& end_time, - DateFormat format) { - UErrorCode status = U_ZERO_ERROR; - - std::unique_ptr<icu::DateIntervalFormat> formatter( - icu::DateIntervalFormat::createInstance(DateFormatToString(format), - status)); - - icu::FieldPosition pos = 0; - UDate start_date = static_cast<UDate>(begin_time.ToDoubleT() * 1000); - UDate end_date = static_cast<UDate>(end_time.ToDoubleT() * 1000); - icu::DateInterval interval(start_date, end_date); - icu::UnicodeString formatted; - formatter->format(&interval, formatted, pos, status); - return i18n::UnicodeStringToString16(formatted); -} - -HourClockType GetHourClockType() { - // TODO(satorux,jshin): Rework this with ures_getByKeyWithFallback() - // once it becomes public. The short time format can be found at - // "calendar/gregorian/DateTimePatterns/3" in the resources. - std::unique_ptr<icu::SimpleDateFormat> formatter( - static_cast<icu::SimpleDateFormat*>( - icu::DateFormat::createTimeInstance(icu::DateFormat::kShort))); - // Retrieve the short time format. - icu::UnicodeString pattern_unicode; - formatter->toPattern(pattern_unicode); - - // Determine what hour clock type the current locale uses, by checking - // "a" (am/pm marker) in the short time format. This is reliable as "a" - // is used by all of 12-hour clock formats, but not any of 24-hour clock - // formats, as shown below. - // - // % grep -A4 DateTimePatterns third_party/icu/source/data/locales/*.txt | - // grep -B1 -- -- |grep -v -- '--' | - // perl -nle 'print $1 if /^\S+\s+"(.*)"/' |sort -u - // - // H.mm - // H:mm - // HH.mm - // HH:mm - // a h:mm - // ah:mm - // ahh:mm - // h-mm a - // h:mm a - // hh:mm a - // - // See http://userguide.icu-project.org/formatparse/datetime for details - // about the date/time format syntax. - if (pattern_unicode.indexOf('a') == -1) { - return k24HourClock; - } else { - return k12HourClock; - } -} - -} // namespace base
diff --git a/base/i18n/time_formatting.h b/base/i18n/time_formatting.h deleted file mode 100644 index 41793b3..0000000 --- a/base/i18n/time_formatting.h +++ /dev/null
@@ -1,142 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Basic time formatting methods. These methods use the current locale -// formatting for displaying the time. - -#ifndef BASE_I18N_TIME_FORMATTING_H_ -#define BASE_I18N_TIME_FORMATTING_H_ - -#include "base/compiler_specific.h" -#include "base/i18n/base_i18n_export.h" -#include "base/strings/string16.h" - -namespace base { - -class Time; -class TimeDelta; - -// Argument type used to specify the hour clock type. -enum HourClockType { - k12HourClock, // Uses 1-12. e.g., "3:07 PM" - k24HourClock, // Uses 0-23. e.g., "15:07" -}; - -// Argument type used to specify whether or not to include AM/PM sign. -enum AmPmClockType { - kDropAmPm, // Drops AM/PM sign. e.g., "3:07" - kKeepAmPm, // Keeps AM/PM sign. e.g., "3:07 PM" -}; - -// Should match UMeasureFormatWidth in measfmt.h; replicated here to avoid -// requiring third_party/icu dependencies with this file. -enum DurationFormatWidth { - DURATION_WIDTH_WIDE, // "3 hours, 7 minutes" - DURATION_WIDTH_SHORT, // "3 hr, 7 min" - DURATION_WIDTH_NARROW, // "3h 7m" - DURATION_WIDTH_NUMERIC // "3:07" -}; - -// Date formats from third_party/icu/source/i18n/unicode/udat.h. Add more as -// necessary. -enum DateFormat { - // November 2007 - DATE_FORMAT_YEAR_MONTH, - // Tuesday, 7 November - DATE_FORMAT_MONTH_WEEKDAY_DAY, -}; - -// TODO(derat@chromium.org): Update all of these functions to return boolean -// "success" values and use out-params for formatted strings: -// http://crbug.com/698802 - -// Returns the time of day, e.g., "3:07 PM". -BASE_I18N_EXPORT string16 TimeFormatTimeOfDay(const Time& time); - -// Returns the time of day in 24-hour clock format with millisecond accuracy, -// e.g., "15:07:30.568" -BASE_I18N_EXPORT string16 TimeFormatTimeOfDayWithMilliseconds(const Time& time); - -// Returns the time of day in the specified hour clock type. e.g. -// "3:07 PM" (type == k12HourClock, ampm == kKeepAmPm). -// "3:07" (type == k12HourClock, ampm == kDropAmPm). -// "15:07" (type == k24HourClock). -BASE_I18N_EXPORT string16 TimeFormatTimeOfDayWithHourClockType( - const Time& time, - HourClockType type, - AmPmClockType ampm); - -// Returns a shortened date, e.g. "Nov 7, 2007" -BASE_I18N_EXPORT string16 TimeFormatShortDate(const Time& time); - -// Returns a numeric date such as 12/13/52. -BASE_I18N_EXPORT string16 TimeFormatShortDateNumeric(const Time& time); - -// Returns a numeric date and time such as "12/13/52 2:44:30 PM". -BASE_I18N_EXPORT string16 TimeFormatShortDateAndTime(const Time& time); - -// Returns a month and year, e.g. "November 2007" -BASE_I18N_EXPORT string16 TimeFormatMonthAndYear(const Time& time); - -// Returns a numeric date and time with time zone such as -// "12/13/52 2:44:30 PM PST". -BASE_I18N_EXPORT string16 -TimeFormatShortDateAndTimeWithTimeZone(const Time& time); - -// Formats a time in a friendly sentence format, e.g. -// "Monday, March 6, 2008 2:44:30 PM". -BASE_I18N_EXPORT string16 TimeFormatFriendlyDateAndTime(const Time& time); - -// Formats a time in a friendly sentence format, e.g. -// "Monday, March 6, 2008". -BASE_I18N_EXPORT string16 TimeFormatFriendlyDate(const Time& time); - -// Formats a time using a skeleton to produce a format for different locales -// when an unusual time format is needed, e.g. "Feb. 2, 18:00". -// -// See http://userguide.icu-project.org/formatparse/datetime for details. -BASE_I18N_EXPORT string16 TimeFormatWithPattern(const Time& time, - const char* pattern); - -// Formats a time duration of hours and minutes into various formats, e.g., -// "3:07" or "3 hours, 7 minutes", and returns true on success. See -// DurationFormatWidth for details. -// -// Please don't use width = DURATION_WIDTH_NUMERIC when the time duration -// can possibly be larger than 24h, as the hour value will be cut below 24 -// after formatting. -// TODO(chengx): fix function output when width = DURATION_WIDTH_NUMERIC -// (http://crbug.com/675791) -BASE_I18N_EXPORT bool TimeDurationFormat(const TimeDelta time, - const DurationFormatWidth width, - string16* out) WARN_UNUSED_RESULT; - -// Formats a time duration of hours, minutes and seconds into various formats, -// e.g., "3:07:30" or "3 hours, 7 minutes, 30 seconds", and returns true on -// success. See DurationFormatWidth for details. -// -// Please don't use width = DURATION_WIDTH_NUMERIC when the time duration -// can possibly be larger than 24h, as the hour value will be cut below 24 -// after formatting. -// TODO(chengx): fix function output when width = DURATION_WIDTH_NUMERIC -// (http://crbug.com/675791) -BASE_I18N_EXPORT bool TimeDurationFormatWithSeconds( - const TimeDelta time, - const DurationFormatWidth width, - string16* out) WARN_UNUSED_RESULT; - -// Formats a date interval into various formats, e.g. "2 December - 4 December" -// or "March 2016 - December 2016". See DateFormat for details. -BASE_I18N_EXPORT string16 DateIntervalFormat(const Time& begin_time, - const Time& end_time, - DateFormat format); - -// Gets the hour clock type of the current locale. e.g. -// k12HourClock (en-US). -// k24HourClock (en-GB). -BASE_I18N_EXPORT HourClockType GetHourClockType(); - -} // namespace base - -#endif // BASE_I18N_TIME_FORMATTING_H_
diff --git a/base/i18n/timezone.cc b/base/i18n/timezone.cc deleted file mode 100644 index 8624e07..0000000 --- a/base/i18n/timezone.cc +++ /dev/null
@@ -1,34 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/i18n/timezone.h" - -#include <memory> -#include <string> - -#include "third_party/icu/source/common/unicode/unistr.h" -#include "third_party/icu/source/i18n/unicode/timezone.h" - -namespace base { - -std::string CountryCodeForCurrentTimezone() { - std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault()); - icu::UnicodeString id; - // ICU returns '001' (world) for Etc/GMT. Preserve the old behavior - // only for Etc/GMT while returning an empty string for Etc/UTC and - // Etc/UCT because they're less likely to be chosen by mistake in UK in - // place of Europe/London (Briitish Time). - if (zone->getID(id) == UNICODE_STRING_SIMPLE("Etc/GMT")) - return "GB"; - char region_code[4]; - UErrorCode status = U_ZERO_ERROR; - int length = zone->getRegion(id, region_code, 4, status); - // Return an empty string if region_code is a 3-digit numeric code such - // as 001 (World) for Etc/UTC, Etc/UCT. - return (U_SUCCESS(status) && length == 2) - ? std::string(region_code, static_cast<size_t>(length)) - : std::string(); -} - -} // namespace base
diff --git a/base/i18n/timezone.h b/base/i18n/timezone.h deleted file mode 100644 index 7557d44..0000000 --- a/base/i18n/timezone.h +++ /dev/null
@@ -1,24 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_TIMEZONE_H_ -#define BASE_I18N_TIMEZONE_H_ - -#include <string> - -#include "base/i18n/base_i18n_export.h" - -namespace base { - -// Checks the system timezone and turns it into a two-character ISO 3166 country -// code. This may fail (for example, it used to always fail on Android), in -// which case it will return an empty string. It'll also return an empty string -// when the timezone is Etc/UTC or Etc/UCT, but will return 'GB" for Etc/GMT -// because people in the UK tends to select Etc/GMT by mistake instead of -// Europe/London (British Time). -BASE_I18N_EXPORT std::string CountryCodeForCurrentTimezone(); - -} // namespace base - -#endif // BASE_I18N_TIMEZONE_H_
diff --git a/base/i18n/unicodestring.h b/base/i18n/unicodestring.h deleted file mode 100644 index b62c526..0000000 --- a/base/i18n/unicodestring.h +++ /dev/null
@@ -1,32 +0,0 @@ -// Copyright (c) 2017 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_UNICODESTRING_H_ -#define BASE_I18N_UNICODESTRING_H_ - -#include "base/strings/string16.h" -#include "third_party/icu/source/common/unicode/unistr.h" -#include "third_party/icu/source/common/unicode/uvernum.h" - -#if U_ICU_VERSION_MAJOR_NUM >= 59 -#include "third_party/icu/source/common/unicode/char16ptr.h" -#endif - -namespace base { -namespace i18n { - -inline string16 UnicodeStringToString16(const icu::UnicodeString& unistr) { -#if U_ICU_VERSION_MAJOR_NUM >= 59 - return base::string16(icu::toUCharPtr(unistr.getBuffer()), - static_cast<size_t>(unistr.length())); -#else - return base::string16(unistr.getBuffer(), - static_cast<size_t>(unistr.length())); -#endif -} - -} // namespace i18n -} // namespace base - -#endif // BASE_UNICODESTRING_H_
diff --git a/base/i18n/utf8_validator_tables.cc b/base/i18n/utf8_validator_tables.cc deleted file mode 100644 index 913afc7..0000000 --- a/base/i18n/utf8_validator_tables.cc +++ /dev/null
@@ -1,55 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This file is auto-generated by build_utf8_validator_tables. -// DO NOT EDIT. - -#include "base/i18n/utf8_validator_tables.h" - -namespace base { -namespace internal { - -const uint8_t kUtf8ValidatorTables[] = { - // State 0, offset 0x00 - 0x00, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x08 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x10 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x18 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x20 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x28 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x30 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x38 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x40 - 0x81, 0x81, 0x81, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x48 - 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x50 - 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x58 - 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83, // 0x60 - 0x83, 0x86, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, // 0x68 - 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8e, 0x8b, // 0x70 - 0x8b, 0x93, 0x9c, 0x9c, 0x9c, 0x9f, 0x81, 0x81, // 0x78 - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0x80 - 0x81, // 0x81 - // State 1, offset 0x81 - 0x07, 0x81, // 0x83 - // State 2, offset 0x83 - 0x06, 0x00, 0x81, // 0x86 - // State 3, offset 0x86 - 0x05, 0x81, 0x83, 0x81, 0x81, // 0x8b - // State 4, offset 0x8b - 0x06, 0x83, 0x81, // 0x8e - // State 5, offset 0x8e - 0x05, 0x83, 0x81, 0x81, 0x81, // 0x93 - // State 6, offset 0x93 - 0x04, 0x81, 0x8b, 0x8b, 0x8b, 0x81, 0x81, 0x81, // 0x9b - 0x81, // 0x9c - // State 7, offset 0x9c - 0x06, 0x8b, 0x81, // 0x9f - // State 8, offset 0x9f - 0x04, 0x8b, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, // 0xa7 - 0x81, // 0xa8 -}; - -const size_t kUtf8ValidatorTablesSize = arraysize(kUtf8ValidatorTables); - -} // namespace internal -} // namespace base
diff --git a/base/i18n/utf8_validator_tables.h b/base/i18n/utf8_validator_tables.h deleted file mode 100644 index 939616b..0000000 --- a/base/i18n/utf8_validator_tables.h +++ /dev/null
@@ -1,32 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_I18N_UTF8_VALIDATOR_TABLES_H_ -#define BASE_I18N_UTF8_VALIDATOR_TABLES_H_ - -#include <stddef.h> -#include <stdint.h> - -#include "base/macros.h" - -namespace base { -namespace internal { - -// The tables for all states; a list of entries of the form (right_shift, -// next_state, next_state, ....). The right_shifts are used to reduce the -// overall size of the table. The table only covers bytes in the range -// [0x80, 0xFF] to save space. -extern const uint8_t kUtf8ValidatorTables[]; - -extern const size_t kUtf8ValidatorTablesSize; - -// The offset of the INVALID state in kUtf8ValidatorTables. -enum { - I18N_UTF8_VALIDATOR_INVALID_INDEX = 129 -}; - -} // namespace internal -} // namespace base - -#endif // BASE_I18N_UTF8_VALIDATOR_TABLES_H_
diff --git a/base/win/i18n.cc b/base/win/i18n.cc deleted file mode 100644 index d7017e3..0000000 --- a/base/win/i18n.cc +++ /dev/null
@@ -1,171 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/win/i18n.h" - -#include <windows.h> - -#include "base/logging.h" -#include "base/macros.h" - -namespace { - -// Keep this enum in sync with kLanguageFunctionNames. -enum LanguageFunction { - SYSTEM_LANGUAGES, - USER_LANGUAGES, - PROCESS_LANGUAGES, - THREAD_LANGUAGES, - NUM_FUNCTIONS -}; - -const char kSystemLanguagesFunctionName[] = "GetSystemPreferredUILanguages"; -const char kUserLanguagesFunctionName[] = "GetUserPreferredUILanguages"; -const char kProcessLanguagesFunctionName[] = "GetProcessPreferredUILanguages"; -const char kThreadLanguagesFunctionName[] = "GetThreadPreferredUILanguages"; - -// Keep this array in sync with enum LanguageFunction. -const char *const kLanguageFunctionNames[] = { - &kSystemLanguagesFunctionName[0], - &kUserLanguagesFunctionName[0], - &kProcessLanguagesFunctionName[0], - &kThreadLanguagesFunctionName[0] -}; - -static_assert(NUM_FUNCTIONS == arraysize(kLanguageFunctionNames), - "LanguageFunction enum and kLanguageFunctionNames array must be " - "kept in sync"); - -// Calls one of the MUI Get*PreferredUILanguages functions, placing the result -// in |languages|. |function| identifies the function to call and |flags| is -// the function-specific flags (callers must not specify MUI_LANGUAGE_ID or -// MUI_LANGUAGE_NAME). Returns true if at least one language is placed in -// |languages|. -bool GetMUIPreferredUILanguageList(LanguageFunction function, ULONG flags, - std::vector<wchar_t>* languages) { - DCHECK(0 <= function && NUM_FUNCTIONS > function); - DCHECK_EQ(0U, (flags & (MUI_LANGUAGE_ID | MUI_LANGUAGE_NAME))); - DCHECK(languages); - - HMODULE kernel32 = GetModuleHandle(L"kernel32.dll"); - if (NULL != kernel32) { - typedef BOOL (WINAPI* GetPreferredUILanguages_Fn)( - DWORD, PULONG, PZZWSTR, PULONG); - GetPreferredUILanguages_Fn get_preferred_ui_languages = - reinterpret_cast<GetPreferredUILanguages_Fn>( - GetProcAddress(kernel32, kLanguageFunctionNames[function])); - if (NULL != get_preferred_ui_languages) { - const ULONG call_flags = flags | MUI_LANGUAGE_NAME; - ULONG language_count = 0; - ULONG buffer_length = 0; - if (get_preferred_ui_languages(call_flags, &language_count, NULL, - &buffer_length) && - 0 != buffer_length) { - languages->resize(buffer_length); - if (get_preferred_ui_languages(call_flags, &language_count, - &(*languages)[0], &buffer_length) && - 0 != language_count) { - DCHECK(languages->size() == buffer_length); - return true; - } else { - DPCHECK(0 == language_count) - << "Failed getting preferred UI languages."; - } - } else { - DPCHECK(0 == buffer_length) - << "Failed getting size of preferred UI languages."; - } - } else { - DVLOG(2) << "MUI not available."; - } - } else { - NOTREACHED() << "kernel32.dll not found."; - } - - return false; -} - -bool GetUserDefaultUILanguage(std::wstring* language, std::wstring* region) { - DCHECK(language); - - LANGID lang_id = ::GetUserDefaultUILanguage(); - if (LOCALE_CUSTOM_UI_DEFAULT != lang_id) { - const LCID locale_id = MAKELCID(lang_id, SORT_DEFAULT); - // max size for LOCALE_SISO639LANGNAME and LOCALE_SISO3166CTRYNAME is 9 - wchar_t result_buffer[9]; - int result_length = - GetLocaleInfo(locale_id, LOCALE_SISO639LANGNAME, &result_buffer[0], - arraysize(result_buffer)); - DPCHECK(0 != result_length) << "Failed getting language id"; - if (1 < result_length) { - language->assign(&result_buffer[0], result_length - 1); - region->clear(); - if (SUBLANG_NEUTRAL != SUBLANGID(lang_id)) { - result_length = - GetLocaleInfo(locale_id, LOCALE_SISO3166CTRYNAME, &result_buffer[0], - arraysize(result_buffer)); - DPCHECK(0 != result_length) << "Failed getting region id"; - if (1 < result_length) - region->assign(&result_buffer[0], result_length - 1); - } - return true; - } - } else { - // This is entirely unexpected on pre-Vista, which is the only time we - // should try GetUserDefaultUILanguage anyway. - NOTREACHED() << "Cannot determine language for a supplemental locale."; - } - return false; -} - -bool GetPreferredUILanguageList(LanguageFunction function, ULONG flags, - std::vector<std::wstring>* languages) { - std::vector<wchar_t> buffer; - std::wstring language; - std::wstring region; - - if (GetMUIPreferredUILanguageList(function, flags, &buffer)) { - std::vector<wchar_t>::const_iterator scan = buffer.begin(); - language.assign(&*scan); - while (!language.empty()) { - languages->push_back(language); - scan += language.size() + 1; - language.assign(&*scan); - } - } else if (GetUserDefaultUILanguage(&language, ®ion)) { - // Mimic the MUI behavior of putting the neutral version of the lang after - // the regional one (e.g., "fr-CA, fr"). - if (!region.empty()) - languages->push_back(std::wstring(language) - .append(1, L'-') - .append(region)); - languages->push_back(language); - } else { - return false; - } - - return true; -} - -} // namespace - -namespace base { -namespace win { -namespace i18n { - -bool GetUserPreferredUILanguageList(std::vector<std::wstring>* languages) { - DCHECK(languages); - return GetPreferredUILanguageList(USER_LANGUAGES, 0, languages); -} - -bool GetThreadPreferredUILanguageList(std::vector<std::wstring>* languages) { - DCHECK(languages); - return GetPreferredUILanguageList( - THREAD_LANGUAGES, MUI_MERGE_SYSTEM_FALLBACK | MUI_MERGE_USER_FALLBACK, - languages); -} - -} // namespace i18n -} // namespace win -} // namespace base
diff --git a/base/win/i18n.h b/base/win/i18n.h deleted file mode 100644 index 9e74d3f..0000000 --- a/base/win/i18n.h +++ /dev/null
@@ -1,33 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef BASE_WIN_I18N_H_ -#define BASE_WIN_I18N_H_ - -#include <string> -#include <vector> - -#include "base/base_export.h" - -namespace base { -namespace win { -namespace i18n { - -// Adds to |languages| the list of user preferred UI languages from MUI, if -// available, falling-back on the user default UI language otherwise. Returns -// true if at least one language is added. -BASE_EXPORT bool GetUserPreferredUILanguageList( - std::vector<std::wstring>* languages); - -// Adds to |languages| the list of thread, process, user, and system preferred -// UI languages from MUI, if available, falling-back on the user default UI -// language otherwise. Returns true if at least one language is added. -BASE_EXPORT bool GetThreadPreferredUILanguageList( - std::vector<std::wstring>* languages); - -} // namespace i18n -} // namespace win -} // namespace base - -#endif // BASE_WIN_I18N_H_
diff --git a/build/gen.py b/build/gen.py index 37592d4..9cca4e9 100755 --- a/build/gen.py +++ b/build/gen.py
@@ -756,7 +756,6 @@ 'base/win/enum_variant.cc', 'base/win/event_trace_controller.cc', 'base/win/event_trace_provider.cc', - 'base/win/i18n.cc', 'base/win/iat_patch_function.cc', 'base/win/iunknown_impl.cc', 'base/win/message_window.cc',