summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Adam Chlipala <adamc@hcoop.net>2009-04-30 16:25:27 -0400
committerGravatar Adam Chlipala <adamc@hcoop.net>2009-04-30 16:25:27 -0400
commit82825a3ba67f3a01d8a3658c74d8171dcd21276e (patch)
treed2d165bc47e37dc4a4ea786b7e9289ad4002da20
parentd4646bbb7f107e3c773bec5e18fd44f435ac40ca (diff)
Lexing character entities
-rw-r--r--src/urweb.lex55
-rw-r--r--tests/ent.ur3
-rw-r--r--tests/ent.urp3
-rw-r--r--tests/ent.urs1
4 files changed, 60 insertions, 2 deletions
diff --git a/src/urweb.lex b/src/urweb.lex
index 85cf3bcf..46835fa2 100644
--- a/src/urweb.lex
+++ b/src/urweb.lex
@@ -1,4 +1,6 @@
-(* Copyright (c) 2008, Adam Chlipala
+(* -*- mode: sml-lex -*- *)
+
+(* Copyright (c) 2008-2009, Adam Chlipala
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -106,6 +108,55 @@ fun initialize () = (xmlTag := [];
xmlString := false)
+fun unescape loc s =
+ let
+ fun process (s, acc) =
+ let
+ val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s
+ in
+ if Substring.size after = 0 then
+ Substring.concat (rev (s :: acc))
+ else
+ let
+ val after = Substring.slice (after, 1, NONE)
+ val (befor', after') = Substring.splitl (fn ch => ch <> #";") after
+ in
+ if Substring.size after' = 0 then
+ (ErrorMsg.errorAt' loc "Missing ';' after '&'";
+ "")
+ else
+ let
+ val pre = befor
+ val code = befor'
+ val s = Substring.slice (after', 1, NONE)
+
+ val special =
+ if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#"
+ andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then
+ let
+ val code = Substring.string (Substring.slice (code, 1, NONE))
+ in
+ Option.map chr (Int.fromString code)
+ end
+ else case Substring.string code of
+ "amp" => SOME #"&"
+ | "lt" => SOME #"<"
+ | "gt" => SOME #">"
+ | "quot" => SOME #"\""
+ | _ => NONE
+ in
+ case special of
+ NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity "
+ ^ Substring.string code);
+ "")
+ | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc)
+ end
+ end
+ end
+ in
+ process (Substring.full s, [])
+ end
+
%%
%header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS));
%full
@@ -231,7 +282,7 @@ notags = [^<{\n]+;
pushLevel (fn () => YYBEGIN XML);
Tokens.LBRACE (yypos, yypos + 1));
-<XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
+<XML> {notags} => (Tokens.NOTAGS (unescape (yypos, yypos + size yytext) yytext, yypos, yypos + size yytext));
<XML> . => (ErrorMsg.errorAt' (yypos, yypos)
("illegal XML character: \"" ^ yytext ^ "\"");
diff --git a/tests/ent.ur b/tests/ent.ur
new file mode 100644
index 00000000..fa01e8cf
--- /dev/null
+++ b/tests/ent.ur
@@ -0,0 +1,3 @@
+fun main () = return <xml><body>
+ &lt;Whoa-hoa!&gt; A&#66;CD!
+</body></xml>
diff --git a/tests/ent.urp b/tests/ent.urp
new file mode 100644
index 00000000..f63d1159
--- /dev/null
+++ b/tests/ent.urp
@@ -0,0 +1,3 @@
+debug
+
+ent
diff --git a/tests/ent.urs b/tests/ent.urs
new file mode 100644
index 00000000..6ac44e0b
--- /dev/null
+++ b/tests/ent.urs
@@ -0,0 +1 @@
+val main : unit -> transaction page