forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreaming_utf8_validator.h
63 lines (49 loc) · 2.18 KB
/
streaming_utf8_validator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// A streaming validator for UTF-8. Validation is based on the definition in
// RFC-3629. In particular, it does not reject the invalid characters rejected
// by base::IsStringUTF8().
//
// The implementation detects errors on the first possible byte.
#ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
#define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
#include <string>
#include "base/basictypes.h"
#include "base/i18n/base_i18n_export.h"
namespace base {
class BASE_I18N_EXPORT StreamingUtf8Validator {
public:
// The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it
// processes characters it alternates between VALID_ENDPOINT and
// VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the
// state changes permanently to INVALID.
enum State {
VALID_ENDPOINT,
VALID_MIDPOINT,
INVALID
};
StreamingUtf8Validator() : state_(0u) {}
// Trivial destructor intentionally omitted.
// Validate |size| bytes starting at |data|. If the concatenation of all calls
// to AddBytes() since this object was constructed or reset is a valid UTF-8
// string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8
// string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was
// present, returns INVALID.
State AddBytes(const char* data, size_t size);
// Return the object to a freshly-constructed state so that it can be re-used.
void Reset();
// Validate a complete string using the same criteria. Returns true if the
// string only contains complete, valid UTF-8 codepoints.
static bool Validate(const std::string& string);
private:
// The current state of the validator. Value 0 is the initial/valid state.
// The state is stored as an offset into |kUtf8ValidatorTables|. The special
// state |kUtf8InvalidState| is invalid.
uint8 state_;
// This type could be made copyable but there is currently no use-case for
// it.
DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator);
};
} // namespace base
#endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_