From c2e754829628d1e9b7a16b3389cfdace76950fdf Mon Sep 17 00:00:00 2001 From: misterg Date: Tue, 19 Sep 2017 16:54:40 -0400 Subject: Initial Commit --- absl/strings/internal/utf8.h | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 absl/strings/internal/utf8.h (limited to 'absl/strings/internal/utf8.h') diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h new file mode 100644 index 0000000..705eea7 --- /dev/null +++ b/absl/strings/internal/utf8.h @@ -0,0 +1,52 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. +// +// If you need Unicode specific processing (for example being aware of +// Unicode character boundaries, or knowledge of Unicode casing rules, +// or various forms of equivalence and normalization), take a look at +// files in i18n/utf8. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include +#include + + +namespace absl { +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ -- cgit v1.2.3