A staged regular expression library for MetaOCaml, based on "Regular-expression derivatives reexamined"
-
Install the BER MetaOCaml compiler using OPAM:
opam switch 4.14.1+BER eval $(opam env)
-
Add the metaocaml-opam repository:
opam remote add metaocaml git+https://github.com/metaocaml/metaocaml-opam.git
-
Install the
reex
andreex_match
packages:opam pin add reex reex_match
The functions in the Reex
module construct regular expressions:
let letters = plus (range 'A' 'Z') (* [A-Z]+ *)
let keyword = str "let" <|> str "and" <|> str "in"
Alternatively, the Reex.regex
function builds a regular expression from a string:
let letters = regex "[A-Z]+"
let keyword = regex "let|and|in"
The Reex_match.match_
function generates OCaml code that matches one or more regular expressions. For example, the following call
.<fun i s ->
.~(match_ ~options:{default_options with match_type=`ranges} .<i>. .<s>.
[chr 'a' , (fun _ _ -> .<"a">.);
plus (chr 'b'), (fun _ _ -> .<"b">.)]) >.
generates the following OCaml code for matching "a" or "b+":
fun i s ->
let rec f ~start ~index ~len s =
match String.unsafe_get s index with
| 'c'..'\255'|'\000'..'`' -> failwith "no match"
| 'b' -> g ~start ~index:(index + 1) ~len s
| 'a' -> h ~start ~index:(index + 1) ~len s
and g ~start ~index ~len s =
match String.unsafe_get s index with
| 'c'..'\255'|'\000'..'a' -> "b"
| 'b' -> g ~start ~index:(index + 1) ~len s
and h ~start ~index ~len s = "a" in
f ~start:i ~index:i ~len:(String.length s) s