From 9043add656177eeac1491a73d2f3ab92bec0013c Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Sat, 29 Dec 2018 14:31:27 -0500 Subject: Imported Upstream version 8.8.2 --- clib/unicode.mli | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 clib/unicode.mli (limited to 'clib/unicode.mli') diff --git a/clib/unicode.mli b/clib/unicode.mli new file mode 100644 index 00000000..23e168cd --- /dev/null +++ b/clib/unicode.mli @@ -0,0 +1,60 @@ +(************************************************************************) +(* * The Coq Proof Assistant / The Coq Development Team *) +(* v * INRIA, CNRS and contributors - Copyright 1999-2018 *) +(* status + +(** Return [None] if a given string can be used as a (Coq) identifier. + Return [Some (b,s)] otherwise, where [s] is an explanation and [b] is severity. *) +val ident_refutation : string -> (bool * string) option + +(** Tells if a valid initial character for an identifier *) +val is_valid_ident_initial : status -> bool + +(** Tells if a valid non-initial character for an identifier *) +val is_valid_ident_trailing : status -> bool + +(** Tells if a character is unclassified *) +val is_unknown : status -> bool + +(** First char of a string, converted to lowercase + @raise Assert_failure if the input string is empty. *) +val lowercase_first_char : string -> string + +(** Split a string supposed to be an ident at the first letter; + as an optimization, return None if the first character is a letter *) +val split_at_first_letter : string -> (string * string) option + +(** Return [true] if all UTF-8 characters in the input string are just plain + ASCII characters. Returns [false] otherwise. *) +val is_basic_ascii : string -> bool + +(** [ascii_of_ident s] maps UTF-8 string to a string composed solely from ASCII + characters. The non-ASCII characters are translated to ["_UUxxxx_"] where + {i xxxx} is the Unicode index of the character in hexadecimal (from four + to six hex digits). To avoid potential name clashes, any preexisting + substring ["_UU"] is turned into ["_UUU"]. *) +val ascii_of_ident : string -> string + +(** Validate an UTF-8 string *) +val is_utf8 : string -> bool + +(** Return the length of a valid UTF-8 string. *) +val utf8_length : string -> int + +(** Variant of {!String.sub} for UTF-8 strings. *) +val utf8_sub : string -> int -> int -> string + +(** Return a "%XX"-escaped string if it contains non UTF-8 characters. *) +val escaped_if_non_utf8 : string -> string -- cgit v1.2.3