summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Adam Chlipala <adamc@hcoop.net>2009-12-03 11:20:13 -0500
committerGravatar Adam Chlipala <adamc@hcoop.net>2009-12-03 11:20:13 -0500
commite7b50987708729b9c3d0c8c0acb58e801142e6b9 (patch)
treea926eddc786dbf468be0d7a96f3d6824291f2156
parent68e0439b741bf84508f90b42611d681ced933874 (diff)
Escaping UTF-8 in MonoOpt
-rw-r--r--src/mono_opt.sml39
-rw-r--r--src/prim.sml15
-rw-r--r--tests/cyrillic.ur2
-rw-r--r--tests/cyrillic.urp2
-rw-r--r--tests/cyrillic.urs1
5 files changed, 49 insertions, 10 deletions
diff --git a/src/mono_opt.sml b/src/mono_opt.sml
index d4441ac7..3a5b4f4c 100644
--- a/src/mono_opt.sml
+++ b/src/mono_opt.sml
@@ -61,14 +61,37 @@ val urlifyFloat = attrifyFloat
val htmlifyInt = attrifyInt
val htmlifyFloat = attrifyFloat
-val htmlifyString = String.translate (fn ch => case ch of
- #"<" => "&lt;"
- | #"&" => "&amp;"
- | _ =>
- if Char.isPrint ch orelse Char.isSpace ch then
- str ch
- else
- "&#" ^ Int.toString (ord ch) ^ ";")
+
+fun htmlifyString s =
+ let
+ fun hs (pos, acc) =
+ if pos >= size s then
+ String.concat (rev acc)
+ else
+ case String.sub (s, pos) of
+ #"<" => hs (pos+1, "&lt;" :: acc)
+ | #"&" => hs (pos+1, "&amp;" :: acc)
+ | ch =>
+ let
+ val n = ord ch
+ fun isCont k = pos + k < size s
+ andalso ord (String.sub (s, pos + k)) div 64 = 2
+ fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc)
+ in
+ if Char.isPrint ch orelse Char.isSpace ch then
+ hs (pos+1, str ch :: acc)
+ else if n div 32 = 6 andalso isCont 1 then
+ unicode 1
+ else if n div 16 = 14 andalso isCont 1 andalso isCont 2 then
+ unicode 2
+ else if n div 8 = 30 andalso isCont 1 andalso isCont 2 andalso isCont 3 then
+ unicode 3
+ else
+ hs (pos+1, "&#" ^ Int.toString (ord ch) ^ ";" :: acc)
+ end
+ in
+ hs (0, [])
+ end
fun hexIt ch =
let
diff --git a/src/prim.sml b/src/prim.sml
index c84c557e..c4b7e839 100644
--- a/src/prim.sml
+++ b/src/prim.sml
@@ -68,12 +68,23 @@ fun toString t =
| String s => s
| Char ch => str ch
+fun pad (n, ch, s) =
+ if size s >= n then
+ s
+ else
+ str ch ^ pad (n-1, ch, s)
+
+val gccify = String.translate (fn ch => if Char.isPrint ch then
+ str ch
+ else
+ "\\" ^ pad (3, #"0", Int.fmt StringCvt.OCT (ord ch)))
+
fun p_t_GCC t =
case t of
Int n => string (int2s n)
| Float n => string (float2s n)
- | String s => box [string "\"", string (String.toString s), string "\""]
- | Char ch => box [string "'", string (String.toString (str ch)), string "'"]
+ | String s => box [string "\"", string (gccify s), string "\""]
+ | Char ch => box [string "'", string (gccify (str ch)), string "'"]
fun equal x =
case x of
diff --git a/tests/cyrillic.ur b/tests/cyrillic.ur
new file mode 100644
index 00000000..f125792f
--- /dev/null
+++ b/tests/cyrillic.ur
@@ -0,0 +1,2 @@
+fun main () = return <xml><body>одел
+Hi!</body></xml>
diff --git a/tests/cyrillic.urp b/tests/cyrillic.urp
new file mode 100644
index 00000000..279d4af5
--- /dev/null
+++ b/tests/cyrillic.urp
@@ -0,0 +1,2 @@
+
+cyrillic
diff --git a/tests/cyrillic.urs b/tests/cyrillic.urs
new file mode 100644
index 00000000..6ac44e0b
--- /dev/null
+++ b/tests/cyrillic.urs
@@ -0,0 +1 @@
+val main : unit -> transaction page