mirror of
https://github.com/klzgrad/naiveproxy.git
synced 2024-11-28 08:16:09 +03:00
67 lines
2.2 KiB
C
67 lines
2.2 KiB
C
|
// Copyright 2014 The Chromium Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
// found in the LICENSE file.
|
||
|
|
||
|
// A streaming validator for UTF-8. Validation is based on the definition in
|
||
|
// RFC-3629. In particular, it does not reject the invalid characters rejected
|
||
|
// by base::IsStringUTF8().
|
||
|
//
|
||
|
// The implementation detects errors on the first possible byte.
|
||
|
|
||
|
#ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|
||
|
#define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|
||
|
|
||
|
#include <stddef.h>
|
||
|
#include <stdint.h>
|
||
|
|
||
|
#include <string>
|
||
|
|
||
|
#include "base/i18n/base_i18n_export.h"
|
||
|
#include "base/macros.h"
|
||
|
|
||
|
namespace base {
|
||
|
|
||
|
class BASE_I18N_EXPORT StreamingUtf8Validator {
|
||
|
public:
|
||
|
// The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it
|
||
|
// processes characters it alternates between VALID_ENDPOINT and
|
||
|
// VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the
|
||
|
// state changes permanently to INVALID.
|
||
|
enum State {
|
||
|
VALID_ENDPOINT,
|
||
|
VALID_MIDPOINT,
|
||
|
INVALID
|
||
|
};
|
||
|
|
||
|
StreamingUtf8Validator() : state_(0u) {}
|
||
|
// Trivial destructor intentionally omitted.
|
||
|
|
||
|
// Validate |size| bytes starting at |data|. If the concatenation of all calls
|
||
|
// to AddBytes() since this object was constructed or reset is a valid UTF-8
|
||
|
// string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8
|
||
|
// string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was
|
||
|
// present, returns INVALID.
|
||
|
State AddBytes(const char* data, size_t size);
|
||
|
|
||
|
// Return the object to a freshly-constructed state so that it can be re-used.
|
||
|
void Reset();
|
||
|
|
||
|
// Validate a complete string using the same criteria. Returns true if the
|
||
|
// string only contains complete, valid UTF-8 codepoints.
|
||
|
static bool Validate(const std::string& string);
|
||
|
|
||
|
private:
|
||
|
// The current state of the validator. Value 0 is the initial/valid state.
|
||
|
// The state is stored as an offset into |kUtf8ValidatorTables|. The special
|
||
|
// state |kUtf8InvalidState| is invalid.
|
||
|
uint8_t state_;
|
||
|
|
||
|
// This type could be made copyable but there is currently no use-case for
|
||
|
// it.
|
||
|
DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator);
|
||
|
};
|
||
|
|
||
|
} // namespace base
|
||
|
|
||
|
#endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
|