datatype RE = Lambda | Empty | Simple of string | Union of RE * RE | Concat of RE * RE | Closure of RE ; infixr 5 || ; infixr 6 ^^ ; fun a ^^ b = Concat (a, b); fun a || b = Union (a, b); datatype token = Done | Bar | Star | Hash | Zero | LeftParen | RightParen | Single of string | BadInput; exception error of string; fun first(str): string = String.extract(str, 0, SOME 1); fun rest(str): string = String.extract(str, 1, NONE); fun lexan "": token list = [ ] | lexan s = case (first s) of " " => (lexan (rest s)) (* ignore spaces *) | "#" => Hash :: (lexan (rest s)) | "0" => Zero :: (lexan (rest s)) | "|" => Bar :: (lexan (rest s)) | "*" => Star :: (lexan (rest s)) | "(" => LeftParen :: (lexan (rest s)) | ")" => RightParen :: (lexan (rest s)) | ch => if ch >= "a" andalso ch <= "z" then (Single ch) :: (lexan (rest s)) else [BadInput]; fun tok2str Done = "" | tok2str Bar = "|" | tok2str Star = "*" | tok2str Hash = "#" | tok2str Zero = "0" | tok2str LeftParen = "(" | tok2str RightParen = ")" | tok2str (Single s) = s | tok2str BadInput = ""; val lookahead = ref Done; val input = ref [Done]; fun init s = ( input := lexan s; lookahead := hd(!input); input := tl(!input) ); fun match t = (* if t is the next token in the input, reads it, otherwise fails *) if (!lookahead) = t then if null(!input) then lookahead := Done else ( lookahead := hd(!input); input := tl(!input) ) else raise error ("looking for: " ^ (tok2str t) ^ " found: " ^ (tok2str (!lookahead))); fun alt () = (* alt ::= concat moreAlt *) let val x = concat () val y = moreAlt () in case y of NONE => x | SOME z => Union(x,z) end and moreAlt () = (* moreAlt ::= bar alt moreAlt | Lambda *) case (!lookahead) of Bar => let val _ = match Bar val x = alt() val y = moreAlt () in case y of NONE => SOME x | (SOME z) => SOME(Union(x,z)) end | _ => NONE and concat () = (* concat ::= closure moreConcat *) let val x = closure () val y = moreConcat () in case y of NONE => x | SOME z => (Concat(x,z)) end and moreConcat () = (* moreConcat ::= closure moreConcat | Lambda *) if (couldBeSimple (!lookahead)) then let val x = closure() val y = moreConcat() in case y of NONE => SOME x | SOME z => SOME(Concat(x,z)) end else NONE and couldBeSimple LeftParen = true | couldBeSimple Hash = true | couldBeSimple Zero = true | couldBeSimple (Single _) = true | couldBeSimple _ = false and closure () = let val x = simple() in case !lookahead of Star => (match Star; Closure x) | _ => x end and simple () = (* simple ::= id | ( alt ) | # | 0 *) case !lookahead of Single c => let val _ = match (Single c) in Simple(c) end | LeftParen => let val _ = match LeftParen val x = alt(); val _ = match RightParen in x end | Hash => let val _ = match Hash in Lambda end | Zero => let val _ = match Zero in Empty end | x => raise error ("In simple no match: " ^ (tok2str x)); (* Top level parser *) fun parse s = let val _ = init s val ans = alt() val _ = match Done in ans end; (* Tests *) fun assert true = true | assert false = false and deny b = assert (not b); val a = Simple "a"; val b = Simple "b"; val c = Simple "c"; val p1 = parse "a(b* | c)#"; val t1 = assert (p1 = Concat (Simple "a", Concat (Union (Closure (Simple "b"), Simple "c"), Lambda) ) ); (* The nested parentehsese make this hard to read; try using the infix operators ^^ and || instead: *) val t1' = assert (p1 = a ^^ ((Closure b) || c) ^^ Lambda); val t2 = assert ((parse "a|b") = Union (Simple "a", Simple "b"));