aboutsummaryrefslogtreecommitdiffhomepage
path: root/ide/utf8_convert.mll
blob: 52772ad688dc4c68d364f9cf99808d4861bd89c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
(***********************************************************************)
(*  v      *   The Coq Proof Assistant  /  The Coq Development Team    *)
(* <O___,, *        INRIA-Rocquencourt  &  LRI-CNRS-Orsay              *)
(*   \VV/  *************************************************************)
(*    //   *      This file is distributed under the terms of the      *)
(*         *       GNU Lesser General Public License Version 2.1       *)
(***********************************************************************)

(* $Id$ *)

{
  open Lexing 
  let b = Buffer.create 127

}

(* Replace all occurences of \x{iiii} and \x{iiiiiiii} by UTF-8 valid chars *)

let digit = ['0'-'9''A'-'Z''a'-'z']
let short = digit digit digit digit
let long = short short

rule entry = parse
  | "\\x{" (short | long ) '}'
      { let s = lexeme lexbuf in
	let n = String.length s in
	let code = 
	  try Glib.Utf8.from_unichar 
	    (int_of_string ("0x"^(String.sub s 3 (n - 4)))) 
	  with _ -> s
	in
	let c = if Glib.Utf8.validate code then code else s in
	Buffer.add_string b c;
	entry lexbuf
      }
  | _ 
      { let s = lexeme lexbuf in
	Buffer.add_string b s;
	entry lexbuf}
  | eof
      {
	let s = Buffer.contents b in Buffer.reset b ; s
      }


{
  let f s =
   let lb = from_string s in
   Buffer.reset b;
   entry lb
}