|  | // Copyright 2014 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // A streaming validator for UTF-8. Validation is based on the definition in | 
|  | // RFC-3629. In particular, it does not reject the invalid characters rejected | 
|  | // by base::IsStringUTF8(). | 
|  | // | 
|  | // The implementation detects errors on the first possible byte. | 
|  |  | 
|  | #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 
|  | #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 
|  |  | 
|  | #include <stddef.h> | 
|  | #include <stdint.h> | 
|  |  | 
|  | #include <string> | 
|  |  | 
|  | #include "base/i18n/base_i18n_export.h" | 
|  | #include "base/macros.h" | 
|  |  | 
|  | namespace base { | 
|  |  | 
|  | class BASE_I18N_EXPORT StreamingUtf8Validator { | 
|  | public: | 
|  | // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it | 
|  | // processes characters it alternates between VALID_ENDPOINT and | 
|  | // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the | 
|  | // state changes permanently to INVALID. | 
|  | enum State { | 
|  | VALID_ENDPOINT, | 
|  | VALID_MIDPOINT, | 
|  | INVALID | 
|  | }; | 
|  |  | 
|  | StreamingUtf8Validator() : state_(0u) {} | 
|  | // Trivial destructor intentionally omitted. | 
|  |  | 
|  | // Validate |size| bytes starting at |data|. If the concatenation of all calls | 
|  | // to AddBytes() since this object was constructed or reset is a valid UTF-8 | 
|  | // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8 | 
|  | // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was | 
|  | // present, returns INVALID. | 
|  | State AddBytes(const char* data, size_t size); | 
|  |  | 
|  | // Return the object to a freshly-constructed state so that it can be re-used. | 
|  | void Reset(); | 
|  |  | 
|  | // Validate a complete string using the same criteria. Returns true if the | 
|  | // string only contains complete, valid UTF-8 codepoints. | 
|  | static bool Validate(const std::string& string); | 
|  |  | 
|  | private: | 
|  | // The current state of the validator. Value 0 is the initial/valid state. | 
|  | // The state is stored as an offset into |kUtf8ValidatorTables|. The special | 
|  | // state |kUtf8InvalidState| is invalid. | 
|  | uint8_t state_; | 
|  |  | 
|  | // This type could be made copyable but there is currently no use-case for | 
|  | // it. | 
|  | DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); | 
|  | }; | 
|  |  | 
|  | }  // namespace base | 
|  |  | 
|  | #endif  // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |