(* Copyright (c) 2008, Adam Chlipala * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - The names of contributors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. *) (* Lexing info for Ur/Web programs *) type pos = int type svalue = Tokens.svalue type ('a,'b) token = ('a,'b) Tokens.token type lexresult = (svalue,pos) Tokens.token local val commentLevel = ref 0 val commentPos = ref 0 in fun enterComment pos = (if !commentLevel = 0 then commentPos := pos else (); commentLevel := !commentLevel + 1) fun exitComment () = (ignore (commentLevel := !commentLevel - 1); !commentLevel = 0) fun eof () = let val pos = ErrorMsg.lastLineStart () in if !commentLevel > 0 then ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment" else (); Tokens.EOF (pos, pos) end end val strEnder = ref #"\"" val str = ref ([] : char list) val strStart = ref 0 local val initSig = ref false val offset = ref 0 in fun initialSig () = initSig := true fun pos yypos = yypos - !offset fun newline yypos = if !initSig then (initSig := false; offset := yypos + 1) else ErrorMsg.newline (pos yypos) end val xmlTag = ref ([] : string list) val xmlString = ref true val braceLevels = ref ([] : ((unit -> unit) * int) list) fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels) fun enterBrace () = case !braceLevels of (s, i) :: rest => braceLevels := (s, i+1) :: rest | _ => () fun exitBrace () = case !braceLevels of (s, i) :: rest => if i = 1 then (braceLevels := rest; s ()) else braceLevels := (s, i-1) :: rest | _ => () fun initialize () = (xmlTag := []; xmlString := false) %% %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS)); %full %s COMMENT STRING XML XMLTAG; id = [a-z_][A-Za-z0-9_']*; cid = [A-Z][A-Za-z0-9_]*; ws = [\ \t\012]; intconst = [0-9]+; realconst = [0-9]+\.[0-9]*; notags = [^<{\n]+; %% \n => (newline yypos; continue ()); \n => (newline yypos; continue ()); \n => (newline yypos; continue ()); \n => (newline yypos; Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); {ws}+ => (lex ()); "(*" => (YYBEGIN COMMENT; enterComment (pos yypos); continue ()); "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments"; continue ()); "(*" => (enterComment (pos yypos); continue ()); "*)" => (if exitComment () then YYBEGIN INITIAL else (); continue ()); "\"" => (YYBEGIN STRING; strEnder := #"\""; strStart := pos yypos; str := []; continue()); "'" => (YYBEGIN STRING; strEnder := #"'"; strStart := pos yypos; str := []; continue()); "\\\"" => (str := #"\"" :: !str; continue()); "\\'" => (str := #"'" :: !str; continue()); "\n" => (newline yypos; str := #"\n" :: !str; continue()); . => (let val ch = String.sub (yytext, 0) in if ch = !strEnder then (if !xmlString then (xmlString := false; YYBEGIN XMLTAG) else YYBEGIN INITIAL; Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1)) else (str := ch :: !str; continue ()) end); "<" {id} "/>"=>(let val tag = String.substring (yytext, 1, size yytext - 3) in Tokens.XML_BEGIN_END (tag, yypos, yypos + size yytext) end); "<" {id} ">"=> (let val tag = String.substring (yytext, 1, size yytext - 2) in YYBEGIN XML; xmlTag := tag :: (!xmlTag); Tokens.XML_BEGIN (tag, yypos, yypos + size yytext) end); "" => (let val id = String.substring (yytext, 2, size yytext - 3) in case !xmlTag of id' :: rest => if id = id' then (YYBEGIN INITIAL; xmlTag := rest; Tokens.XML_END (yypos, yypos + size yytext)) else Tokens.END_TAG (id, yypos, yypos + size yytext) | _ => Tokens.END_TAG (id, yypos, yypos + size yytext) end); "<" {id} => (YYBEGIN XMLTAG; Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE), yypos, yypos + size yytext)); "/" => (Tokens.DIVIDE (yypos, yypos + size yytext)); ">" => (YYBEGIN XML; Tokens.GT (yypos, yypos + size yytext)); {ws}+ => (lex ()); {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext)); "=" => (Tokens.EQ (yypos, yypos + size yytext)); {intconst} => (case Int64.fromString yytext of SOME x => Tokens.INT (x, yypos, yypos + size yytext) | NONE => (ErrorMsg.errorAt' (yypos, yypos) ("Expected int, received: " ^ yytext); continue ())); {realconst} => (case Real.fromString yytext of SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext) | NONE => (ErrorMsg.errorAt' (yypos, yypos) ("Expected float, received: " ^ yytext); continue ())); "\"" => (YYBEGIN STRING; xmlString := true; strStart := yypos; str := []; continue ()); "{" => (YYBEGIN INITIAL; pushLevel (fn () => YYBEGIN XMLTAG); Tokens.LBRACE (yypos, yypos + 1)); "(" => (YYBEGIN INITIAL; pushLevel (fn () => YYBEGIN XMLTAG); Tokens.LPAREN (yypos, yypos + 1)); . => (ErrorMsg.errorAt' (yypos, yypos) ("illegal XML tag character: \"" ^ yytext ^ "\""); continue ()); "{" => (YYBEGIN INITIAL; pushLevel (fn () => YYBEGIN XML); Tokens.LBRACE (yypos, yypos + 1)); {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); . => (ErrorMsg.errorAt' (yypos, yypos) ("illegal XML character: \"" ^ yytext ^ "\""); continue ()); "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext)); "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext)); ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext)); "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext)); "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext)); "{" => (enterBrace (); Tokens.LBRACE (pos yypos, pos yypos + size yytext)); "}" => (exitBrace (); Tokens.RBRACE (pos yypos, pos yypos + size yytext)); "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext)); "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext)); "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext)); "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext)); "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext)); "<>" => (Tokens.NE (pos yypos, pos yypos + size yytext)); "<" => (Tokens.LT (pos yypos, pos yypos + size yytext)); ">" => (Tokens.GT (pos yypos, pos yypos + size yytext)); "<=" => (Tokens.LE (pos yypos, pos yypos + size yytext)); ">=" => (Tokens.GE (pos yypos, pos yypos + size yytext)); "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext)); ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext)); "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext)); ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext)); "..." => (Tokens.DOTDOTDOT (pos yypos, pos yypos + size yytext)); "." => (Tokens.DOT (pos yypos, pos yypos + size yytext)); "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext)); "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext)); "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext)); "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext)); "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext)); "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext)); "*" => (Tokens.STAR (pos yypos, pos yypos + size yytext)); "<-" => (Tokens.LARROW (pos yypos, pos yypos + size yytext)); ";" => (Tokens.SEMI (pos yypos, pos yypos + size yytext)); "+" => (Tokens.PLUS (pos yypos, pos yypos + size yytext)); "-" => (Tokens.MINUS (pos yypos, pos yypos + size yytext)); "/" => (Tokens.DIVIDE (yypos, yypos + size yytext)); "%" => (Tokens.MOD (pos yypos, pos yypos + size yytext)); "@" => (Tokens.AT (pos yypos, pos yypos + size yytext)); "con" => (Tokens.CON (pos yypos, pos yypos + size yytext)); "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext)); "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext)); "of" => (Tokens.OF (pos yypos, pos yypos + size yytext)); "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext)); "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext)); "and" => (Tokens.AND (pos yypos, pos yypos + size yytext)); "fun" => (Tokens.FUN (pos yypos, pos yypos + size yytext)); "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext)); "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext)); "case" => (Tokens.CASE (pos yypos, pos yypos + size yytext)); "if" => (Tokens.IF (pos yypos, pos yypos + size yytext)); "then" => (Tokens.THEN (pos yypos, pos yypos + size yytext)); "else" => (Tokens.ELSE (pos yypos, pos yypos + size yytext)); "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext)); "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext)); "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext)); "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext)); "let" => (Tokens.LET (pos yypos, pos yypos + size yytext)); "in" => (Tokens.IN (pos yypos, pos yypos + size yytext)); "end" => (Tokens.END (pos yypos, pos yypos + size yytext)); "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext)); "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext)); "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext)); "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext)); "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext)); "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext)); "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext)); "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext)); "table" => (Tokens.TABLE (pos yypos, pos yypos + size yytext)); "sequence" => (Tokens.SEQUENCE (pos yypos, pos yypos + size yytext)); "class" => (Tokens.CLASS (pos yypos, pos yypos + size yytext)); "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext)); "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext)); "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext)); "SELECT" => (Tokens.SELECT (pos yypos, pos yypos + size yytext)); "FROM" => (Tokens.FROM (pos yypos, pos yypos + size yytext)); "AS" => (Tokens.AS (pos yypos, pos yypos + size yytext)); "WHERE" => (Tokens.CWHERE (pos yypos, pos yypos + size yytext)); "SQL" => (Tokens.SQL (pos yypos, pos yypos + size yytext)); "GROUP" => (Tokens.GROUP (pos yypos, pos yypos + size yytext)); "ORDER" => (Tokens.ORDER (pos yypos, pos yypos + size yytext)); "BY" => (Tokens.BY (pos yypos, pos yypos + size yytext)); "HAVING" => (Tokens.HAVING (pos yypos, pos yypos + size yytext)); "LIMIT" => (Tokens.LIMIT (pos yypos, pos yypos + size yytext)); "OFFSET" => (Tokens.OFFSET (pos yypos, pos yypos + size yytext)); "ALL" => (Tokens.ALL (pos yypos, pos yypos + size yytext)); "UNION" => (Tokens.UNION (pos yypos, pos yypos + size yytext)); "INTERSECT" => (Tokens.INTERSECT (pos yypos, pos yypos + size yytext)); "EXCEPT" => (Tokens.EXCEPT (pos yypos, pos yypos + size yytext)); "TRUE" => (Tokens.TRUE (pos yypos, pos yypos + size yytext)); "FALSE" => (Tokens.FALSE (pos yypos, pos yypos + size yytext)); "AND" => (Tokens.CAND (pos yypos, pos yypos + size yytext)); "OR" => (Tokens.OR (pos yypos, pos yypos + size yytext)); "NOT" => (Tokens.NOT (pos yypos, pos yypos + size yytext)); "COUNT" => (Tokens.COUNT (pos yypos, pos yypos + size yytext)); "AVG" => (Tokens.AVG (pos yypos, pos yypos + size yytext)); "SUM" => (Tokens.SUM (pos yypos, pos yypos + size yytext)); "MIN" => (Tokens.MIN (pos yypos, pos yypos + size yytext)); "MAX" => (Tokens.MAX (pos yypos, pos yypos + size yytext)); "ASC" => (Tokens.ASC (pos yypos, pos yypos + size yytext)); "DESC" => (Tokens.DESC (pos yypos, pos yypos + size yytext)); "INSERT" => (Tokens.INSERT (pos yypos, pos yypos + size yytext)); "INTO" => (Tokens.INTO (pos yypos, pos yypos + size yytext)); "VALUES" => (Tokens.VALUES (pos yypos, pos yypos + size yytext)); "UPDATE" => (Tokens.UPDATE (pos yypos, pos yypos + size yytext)); "SET" => (Tokens.SET (pos yypos, pos yypos + size yytext)); "DELETE" => (Tokens.DELETE (pos yypos, pos yypos + size yytext)); "CURRENT_TIMESTAMP" => (Tokens.CURRENT_TIMESTAMP (pos yypos, pos yypos + size yytext)); {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext)); {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext)); {intconst} => (case Int64.fromString yytext of SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext) | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) ("Expected int, received: " ^ yytext); continue ())); {realconst} => (case Real64.fromString yytext of SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext) | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) ("Expected float, received: " ^ yytext); continue ())); . => (continue()); . => (ErrorMsg.errorAt' (pos yypos, pos yypos) ("illegal character: \"" ^ yytext ^ "\""); continue ());