From 2304eece8f5f2b9067cd66d860a332f8721c5321 Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Thu, 3 Dec 2009 11:50:51 -0500 Subject: UTF-8 in dynamic escaping --- src/mono_opt.sml | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'src/mono_opt.sml') diff --git a/src/mono_opt.sml b/src/mono_opt.sml index 3a5b4f4c..bda4d93a 100644 --- a/src/mono_opt.sml +++ b/src/mono_opt.sml @@ -45,6 +45,37 @@ fun attrifyFloat n = else Real.toString n +fun attrifyString s = + let + fun hs (pos, acc) = + if pos >= size s then + String.concat (rev acc) + else + case String.sub (s, pos) of + #"\"" => hs (pos+1, """ :: acc) + | #"&" => hs (pos+1, "&" :: acc) + | ch => + let + val n = ord ch + fun isCont k = pos + k < size s + andalso ord (String.sub (s, pos + k)) div 64 = 2 + fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc) + in + if Char.isPrint ch orelse Char.isSpace ch then + hs (pos+1, str ch :: acc) + else if n div 32 = 6 andalso isCont 1 then + unicode 1 + else if n div 16 = 14 andalso isCont 1 andalso isCont 2 then + unicode 2 + else if n div 8 = 30 andalso isCont 1 andalso isCont 2 andalso isCont 3 then + unicode 3 + else + hs (pos+1, "&#" ^ Int.toString (ord ch) ^ ";" :: acc) + end + in + hs (0, []) + end + fun attrifyChar ch = case ch of #"\"" => """ @@ -54,8 +85,6 @@ fun attrifyChar ch = else "&#" ^ Int.toString (ord ch) ^ ";" -val attrifyString = String.translate attrifyChar - val urlifyInt = attrifyInt val urlifyFloat = attrifyFloat @@ -78,7 +107,7 @@ fun htmlifyString s = andalso ord (String.sub (s, pos + k)) div 64 = 2 fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc) in - if Char.isPrint ch orelse Char.isSpace ch then + if Char.isPrint ch then hs (pos+1, str ch :: acc) else if n div 32 = 6 andalso isCont 1 then unicode 1 -- cgit v1.2.3