Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed lexers to use Sedlex #392

Open
wants to merge 9 commits into
base: dev-0-1-0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions satysfi.opam
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ depends: [
"otfed" {= "0.0.1"}
"ppx_deriving"
"re" {build}
"sedlex" {>= "3.0" & < "4.0"}
"uutf"
"yojson-with-position" {= "1.4.2+satysfi"}
"omd" {< "2.0.0~"}
Expand Down
83 changes: 83 additions & 0 deletions src/chardecoder/dataLexer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
open DataParser

let int_of_hex s = int_of_string ("0x" ^ s)

let lexeme = Sedlexing.Utf8.lexeme


let digit = [%sedlex.regexp? '0'..'9']
let hex = [%sedlex.regexp? digit | 'A'..'F']
let cp = [%sedlex.regexp? hex, hex, hex, hex, (Star hex)]
let break = [%sedlex.regexp? "\r\n" | '\r' | '\n']
let nonbreak = [%sedlex.regexp? Compl '\r' | Compl '\n']
let upper = [%sedlex.regexp? 'A'..'Z']
let lower = [%sedlex.regexp? 'a'..'z']
let alph = [%sedlex.regexp? upper | lower]
let space = [%sedlex.regexp? ' ' | '\t']


let rec expr_raw lexbuf =
match%sedlex lexbuf with
| Plus (space | break) -> expr_raw lexbuf
| "#" -> (
let () = lex_comment lexbuf in
expr_raw lexbuf
)
| cp -> (
let cpstr1 = lexeme lexbuf in
let cpstr2opt = lex_cp lexbuf in
match cpstr2opt with
| Some(cpstr2) -> CODEPOINTRANGE(int_of_hex cpstr1, int_of_hex cpstr2)
| None -> CODEPOINT(int_of_hex cpstr1)
)
| alph, Star (alph | digit | "_") -> DATA(lexeme lexbuf)
| eof -> EOI
| _ -> failwith ("DataLexer: illegal token " ^ (lexeme lexbuf))


and lex_comment lexbuf =
match%sedlex lexbuf with
| break -> ()
| any -> lex_comment lexbuf
| _ -> failwith ("DataLexer: illegal token " ^ (lexeme lexbuf))


and lex_cp lexbuf =
match%sedlex lexbuf with
| (Star space), ';' -> None
| ".." -> Some(lex_cp2 lexbuf)
| _ -> failwith ("DataLexer: illegal token " ^ (lexeme lexbuf))


and lex_cp2 lexbuf =
match%sedlex lexbuf with
| cp -> (
let cpstr2 = lexeme lexbuf in
if lex_cp3 lexbuf then
cpstr2
else
failwith ("DataLexer: illegal token " ^ (lexeme lexbuf))
)
| _ -> failwith ("DataLexer: illegal token " ^ (lexeme lexbuf))

and lex_cp3 lexbuf =
match%sedlex lexbuf with
| (Star space), ';' -> true
| _ -> false


let expr lexbuf = expr_raw lexbuf


let parse lexbuf =
let lexer () =
let (ante_position, post_position) =
Sedlexing.lexing_positions lexbuf
in
let token = expr lexbuf in
(token, ante_position, post_position)
in
let parser =
MenhirLib.Convert.Simplified.traditional2revised DataParser.main
in
parser lexer
2 changes: 2 additions & 0 deletions src/chardecoder/dataLexer.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
val expr : Sedlexing.lexbuf -> DataParser.token
val parse : Sedlexing.lexbuf -> (CharBasis.code_point_kind * string) list
40 changes: 0 additions & 40 deletions src/chardecoder/dataLexer.mll

This file was deleted.

2 changes: 1 addition & 1 deletion src/chardecoder/lineBreakDataMap.ml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ let line_break_map_ref : (line_break_class BatIMap.t) ref = ref (BatIMap.empty ~

let set_from_file (abspath : abs_path) =
let channel = open_in_abs abspath in
let line_break_list = DataParser.main DataLexer.expr (Lexing.from_channel channel) in
let line_break_list = DataLexer.parse (Sedlexing.Utf8.from_channel channel) in
let line_break_map_raw = line_break_list |> CharBasis.map_of_list class_of_string in
let line_break_map =
List.fold_left (fun mapacc (cp, lbc) ->
Expand Down
4 changes: 2 additions & 2 deletions src/chardecoder/scriptDataMap.ml
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ let script_map_ref : (script BatIMap.t) ref = ref (BatIMap.empty ~eq:(=))
let set_from_file (abspath_S : abs_path) (abspath_EAW : abs_path) =
let eaw_map =
let channel_EAW = open_in_abs abspath_EAW in
let eaw_list = DataParser.main DataLexer.expr (Lexing.from_channel channel_EAW) in
let eaw_list = DataLexer.parse (Sedlexing.Utf8.from_channel channel_EAW) in
close_in channel_EAW;
eaw_list |> CharBasis.map_of_list read_east_asian_width
in
let script_map =
let channel_S = open_in_abs abspath_S in
let script_list = DataParser.main DataLexer.expr (Lexing.from_channel channel_S) in
let script_list = DataLexer.parse (Sedlexing.Utf8.from_channel channel_S) in
close_in channel_S;
script_list |> CharBasis.map_of_list (read_script eaw_map)
in
Expand Down
9 changes: 4 additions & 5 deletions src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
core_kernel.pairing_heap
menhirLib
otfed
sedlex
uutf
yojson-with-position
omd
ocamlgraph
)
(preprocess (pps
ppx_deriving.show
sedlex.ppx
))
)

Expand All @@ -28,14 +30,11 @@
(copy_files# text-mode/*.{ml,mli})
(copy_files# md/*.{ml,mli})
(copy_files# chardecoder/*.{ml,mli})
(copy_files chardecoder/*.{mll,mly})
(copy_files chardecoder/*.{mly})
(copy_files# frontend/*.{ml,mli})
(copy_files frontend/*.{mll,mly})
(copy_files frontend/*.{mly})
(copy_files# frontend/bytecomp/*.{ml,mli})

(ocamllex
(modules lexer dataLexer))

(menhir
(modules parser dataParser)
(flags (--table --explain)))
Expand Down
4 changes: 2 additions & 2 deletions src/frontend/fileDependencyResolver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ let rec register_library_file (graph : FileDependencyGraph.t) ~prev:(vertex_prev
Logging.begin_to_parse_file abspath;
let curdir = Filename.dirname (get_abs_path_string abspath) in
let inc = open_in_abs abspath in
let (header, utsrc) = ParserInterface.process (basename_abs abspath) (Lexing.from_channel inc) in
let (header, utsrc) = ParserInterface.process (basename_abs abspath) (Sedlexing.Utf8.from_channel inc) in
close_in inc;
let lib =
match utsrc with
Expand Down Expand Up @@ -83,7 +83,7 @@ let register_document_file (graph : FileDependencyGraph.t) (abspath_in : abs_pat
let file_in = open_in_abs abspath_in in
let curdir = Filename.dirname (get_abs_path_string abspath_in) in
let (header, utsrc) =
ParserInterface.process (Filename.basename (get_abs_path_string abspath_in)) (Lexing.from_channel file_in)
ParserInterface.process (Filename.basename (get_abs_path_string abspath_in)) (Sedlexing.Utf8.from_channel file_in)
in
let utast =
match utsrc with
Expand Down
Loading