summaryrefslogtreecommitdiff
path: root/ide/utf8_convert.mll
diff options
context:
space:
mode:
Diffstat (limited to 'ide/utf8_convert.mll')
-rw-r--r--ide/utf8_convert.mll51
1 files changed, 51 insertions, 0 deletions
diff --git a/ide/utf8_convert.mll b/ide/utf8_convert.mll
new file mode 100644
index 00000000..4c88adc5
--- /dev/null
+++ b/ide/utf8_convert.mll
@@ -0,0 +1,51 @@
+(************************************************************************)
+(* v * The Coq Proof Assistant / The Coq Development Team *)
+(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *)
+(* \VV/ **************************************************************)
+(* // * This file is distributed under the terms of the *)
+(* * GNU Lesser General Public License Version 2.1 *)
+(************************************************************************)
+
+(* $Id: utf8_convert.mll,v 1.1.2.1 2004/07/16 19:30:21 herbelin Exp $ *)
+
+{
+ open Lexing
+ let b = Buffer.create 127
+
+}
+
+(* Replace all occurences of \x{iiii} and \x{iiiiiiii} by UTF-8 valid chars *)
+
+let digit = ['0'-'9''A'-'Z''a'-'z']
+let short = digit digit digit digit
+let long = short short
+
+rule entry = parse
+ | "\\x{" (short | long ) '}'
+ { let s = lexeme lexbuf in
+ let n = String.length s in
+ let code =
+ try Glib.Utf8.from_unichar
+ (int_of_string ("0x"^(String.sub s 3 (n - 4))))
+ with _ -> s
+ in
+ let c = if Glib.Utf8.validate code then code else s in
+ Buffer.add_string b c;
+ entry lexbuf
+ }
+ | _
+ { let s = lexeme lexbuf in
+ Buffer.add_string b s;
+ entry lexbuf}
+ | eof
+ {
+ let s = Buffer.contents b in Buffer.reset b ; s
+ }
+
+
+{
+ let f s =
+ let lb = from_string s in
+ Buffer.reset b;
+ entry lb
+}