Skip to content

Implement math syntax #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ type inline_element =
| `Styled of style * inline_element with_location list
| `Reference of
reference_kind * string with_location * inline_element with_location list
| `Link of string * inline_element with_location list ]
| `Link of string * inline_element with_location list
| `Math_span of string ]
(** Inline elements are equivalent to what would be found in a [span] in HTML.
Mostly these are straightforward. The [`Reference] constructor takes a triple
whose second element is the reference itself, and the third the replacement
Expand All @@ -40,7 +41,8 @@ type nestable_block_element =
| `List of
[ `Unordered | `Ordered ]
* [ `Light | `Heavy ]
* nestable_block_element with_location list list ]
* nestable_block_element with_location list list
| `Math_block of string ]
(** Some block elements may be nested within lists or tags, but not all.
The [`List] constructor has a parameter of type [\[`Light | `Heavy\]].
This corresponds to the syntactic constructor used (see the
Expand Down
54 changes: 53 additions & 1 deletion src/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ let unescape_word : string -> string = fun s ->
scan_word 0;
Buffer.contents buffer

type math_kind =
Inline | Block

let math_constr kind x =
match kind with
| Inline -> `Math_span x
| Block -> `Math_block x

(* This is used for code and verbatim blocks. It can be done with a regular
expression, but the regexp gets quite ugly, so a function is easier to
Expand Down Expand Up @@ -320,6 +326,13 @@ rule token input = parse

| "{_"
{ emit input (`Begin_style `Subscript) }

| "{math" space_char
{ math Block (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf }

| "{m" horizontal_space
{ math Inline (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf }


| "{!modules:" ([^ '}']* as modules) '}'
{ emit input (`Modules modules) }
Expand Down Expand Up @@ -538,7 +551,46 @@ and code_span buffer nesting_level start_offset input = parse
{ Buffer.add_char buffer c;
code_span buffer nesting_level start_offset input lexbuf }


and math kind buffer nesting_level start_offset input = parse
| '}'
{ if nesting_level == 0 then
emit input (math_constr kind (Buffer.contents buffer)) ~start_offset
else begin
Buffer.add_char buffer '}';
math kind buffer (nesting_level - 1) start_offset input lexbuf
end
}
| '{'
{ Buffer.add_char buffer '{';
math kind buffer (nesting_level + 1) start_offset input lexbuf }
| ("\\{" | "\\}") as s
{ Buffer.add_string buffer s;
math kind buffer nesting_level start_offset input lexbuf }
| (newline) as s
{
match kind with
| Inline ->
warning
input
(Parse_error.not_allowed
~what:(Token.describe (`Blank_line "\n"))
~in_what:(Token.describe (math_constr kind "")));
Buffer.add_char buffer '\n';
math kind buffer nesting_level start_offset input lexbuf
| Block ->
Buffer.add_string buffer s;
math kind buffer nesting_level start_offset input lexbuf
}
| eof
{ warning
input
(Parse_error.not_allowed
~what:(Token.describe `End)
~in_what:(Token.describe (math_constr kind "")));
emit input (math_constr kind (Buffer.contents buffer)) ~start_offset }
| _ as c
{ Buffer.add_char buffer c;
math kind buffer nesting_level start_offset input lexbuf }

and verbatim buffer last_false_terminator start_offset input = parse
| (space_char as c) "v}"
Expand Down
15 changes: 7 additions & 8 deletions src/syntax.ml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ type token_that_always_begins_an_inline_element =
| `Simple_reference of string
| `Begin_reference_with_replacement_text of string
| `Simple_link of string
| `Begin_link_with_replacement_text of string ]
| `Begin_link_with_replacement_text of string
| `Math_span of string ]

(* Check that the token constructors above actually are all in [Token.t]. *)
let _check_subset : token_that_always_begins_an_inline_element -> Token.t =
Expand Down Expand Up @@ -99,12 +100,9 @@ let rec inline_element :
| `Plus ->
junk input;
Loc.at location (`Word "+")
| `Code_span c ->
| (`Code_span _ | `Math_span _ | `Raw_markup _) as token ->
junk input;
Loc.at location (`Code_span c)
| `Raw_markup (raw_markup_target, s) ->
junk input;
Loc.at location (`Raw_markup (raw_markup_target, s))
Loc.at location token
| `Begin_style s as parent_markup ->
junk input;

Expand Down Expand Up @@ -730,10 +728,11 @@ let rec block_element_list :
let block = Loc.at location block in
let acc = block :: acc in
consume_block_elements ~parsed_a_tag `After_text acc
| { value = `Code_block (_, s) as token; location } as next_token ->
| ( { value = `Code_block (_, { value = s; _ }) as token; location }
| { value = `Math_block s as token; location } ) as next_token ->
warn_if_after_tags next_token;
warn_if_after_text next_token;
if s.value = "" then
if s = "" then
Parse_error.should_not_be_empty ~what:(Token.describe token) location
|> add_warning input;

Expand Down
4 changes: 4 additions & 0 deletions src/token.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ type t =
string
| `Code_span of string
| `Raw_markup of string option * string
| `Math_span of string
| `Math_block of string
| `Begin_style of style
| `Begin_paragraph_style of paragraph_style
| (* Other inline element markup. *)
Expand Down Expand Up @@ -123,6 +125,8 @@ let describe : [< t | `Comment ] -> string = function
| `Begin_style `Emphasis -> "'{e ...}' (emphasized text)"
| `Begin_style `Superscript -> "'{^...}' (superscript)"
| `Begin_style `Subscript -> "'{_...}' (subscript)"
| `Math_span _ -> "'{m ...}' (math span)"
| `Math_block _ -> "'{math ...}' (math block)"
| `Simple_reference _ -> "'{!...}' (cross-reference)"
| `Begin_reference_with_replacement_text _ ->
"'{{!...} ...}' (cross-reference)"
Expand Down
75 changes: 75 additions & 0 deletions test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ module Ast_to_sexp = struct
| `Code_span c -> List [ Atom "code_span"; Atom c ]
| `Raw_markup (target, s) ->
List [ Atom "raw_markup"; opt str target; Atom s ]
| `Math_span s -> List [ Atom "math_span"; Atom s ]
| `Styled (s, es) ->
List [ style s; List (List.map (at.at (inline_element at)) es) ]
| `Reference (kind, r, es) ->
Expand All @@ -61,6 +62,7 @@ module Ast_to_sexp = struct
| `Paragraph es ->
List
[ Atom "paragraph"; List (List.map (at.at (inline_element at)) es) ]
| `Math_block s -> List [ Atom "math_block"; Atom s ]
| `Code_block (None, c) -> List [ Atom "code_block"; at.at str c ]
| `Code_block (Some meta, c) ->
List [ Atom "code_block"; code_block_meta at meta; at.at str c ]
Expand Down Expand Up @@ -5303,3 +5305,76 @@ let%expect_test _ =
(warnings ())) |}]
end in
()

let%expect_test _ =
let module Math = struct
let block =
test "{math \\sum_{i=0}^n x^i%}";
[%expect
{|
((output (((f.ml (1 0) (1 24)) (math_block "\\sum_{i=0}^n x^i%"))))
(warnings ())) |}]

let complex_block =
test
{|{math
\alpha(x)=\left\{
\begin{array}{ll} % beginning of the array
x \% 4\\ % some variable modulo 4
\frac{1}{1+e^{-kx}}\\ % something else
\frac{e^x-e^{-x}}{e^x+e^{-x}} % another action
\end{array} % end of the array
\right.
}|};
[%expect
{|
((output
(((f.ml (1 0) (9 7))
(math_block
" \\alpha(x)=\\left\\{\
\n \\begin{array}{ll} % beginning of the array\
\n x \\% 4\\\\ % some variable modulo 4\
\n \\frac{1}{1+e^{-kx}}\\\\ % something else\
\n \\frac{e^x-e^{-x}}{e^x+e^{-x}} % another action\
\n \\end{array} % end of the array\
\n \\right.\
\n "))))
(warnings ())) |}]

let inline =
test "{m x + 4}";
[%expect
{|
((output
(((f.ml (1 0) (1 9))
(paragraph (((f.ml (1 0) (1 9)) (math_span "x + 4")))))))
(warnings ())) |}]

let inline_nested =
test "{m \\sub_{i=0}^n x^i}";
[%expect
{|
((output
(((f.ml (1 0) (1 20))
(paragraph (((f.ml (1 0) (1 20)) (math_span "\\sub_{i=0}^n x^i")))))))
(warnings ())) |}]

let inline_false_nesting =
test "{m \\{ \\mathbb{only_left}}";
[%expect
{|
((output
(((f.ml (1 0) (1 25))
(paragraph (((f.ml (1 0) (1 25)) (math_span "\\{ \\mathbb{only_left}")))))))
(warnings ())) |}]

let inline_false_terminator =
test "{m \\mathbb{only_left}\\}}";
[%expect
{|
((output
(((f.ml (1 0) (1 24))
(paragraph (((f.ml (1 0) (1 24)) (math_span "\\mathbb{only_left}\\}")))))))
(warnings ())) |}]
end in
()