Skip to content

Commit c8cb679

Browse files
authored
Merge pull request #114 from Kolaru/new_parser
New parser - simplified with a tokenizer pass
2 parents cad0b7f + 06b8643 commit c8cb679

File tree

11 files changed

+272
-363
lines changed

11 files changed

+272
-363
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "MathTeXEngine"
22
uuid = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53"
33
authors = ["Benoît Richard <kolaru@hotmail.com>"]
4-
version = "0.5.7"
4+
version = "0.6.0"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ The table below contains the list of all supported LaTeX construction and their
114114
| Function | `\sin` | `:function` | `name` |
115115
| Generic symbol | `ω` | `:symbol` | `unicode_char` |
116116
| Group | `{ }` | `:group` | `elements...` |
117+
| Inline math | `$ $` | `:inline_math` | `content` |
117118
| Integral | `\int_a^b` | `:integral` | `symbol, low_bound, high_bound` |
118119
| Math fonts | `\mathrm{}` | `:font` | `font_modifier, expr` |
119120
| Punctuation | `!` | `:punctuation` |

src/MathTeXEngine.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ import FreeTypeAbstraction:
1919
height_insensitive_boundingbox, leftinkbound, rightinkbound,
2020
topinkbound, bottominkbound
2121

22-
export TeXExpr, texparse
22+
export TeXExpr, texparse, TeXParseError, manual_texexpr
2323
export TeXElement, TeXChar, VLine, HLine, generate_tex_elements
2424
export texfont
2525
export glyph_index
2626

2727
# Reexport from LaTeXStrings
2828
export @L_str
2929

30+
include("parser/tokenizer.jl")
3031
include("parser/texexpr.jl")
3132
include("parser/commands_data.jl")
3233
include("parser/commands_registration.jl")

src/engine/layout.jl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ function tex_layout(expr, state)
2626
try
2727
if isleaf(expr)
2828
char = args[1]
29+
if char == ' ' && state.tex_mode == :inline_math
30+
return Space(0.0)
31+
end
2932
texchar = TeXChar(char, state, head)
3033
return texchar
3134
elseif head == :combining_accent
@@ -92,7 +95,7 @@ function tex_layout(expr, state)
9295
],
9396
scales
9497
)
95-
elseif head == :font || head == :text
98+
elseif head == :font
9699
modifier, content = args
97100
return tex_layout(content, add_font_modifier(state, modifier))
98101
elseif head == :frac
@@ -124,8 +127,12 @@ function tex_layout(expr, state)
124127
name = args[1]
125128
elements = TeXChar.(collect(name), state, Ref(:function))
126129
return horizontal_layout(elements)
127-
elseif head == :group || head == :expr
128-
elements = tex_layout.(args, state)
130+
elseif head == :group || head == :expr || head == :inline_math
131+
mode = head == :inline_math ? :inline_math : state.tex_mode
132+
elements = tex_layout.(args, change_mode(state, mode))
133+
if isempty(elements)
134+
return Space(0.0)
135+
end
129136
return horizontal_layout(elements)
130137
elseif head == :integral
131138
pad = 0.1
@@ -196,6 +203,11 @@ function tex_layout(expr, state)
196203
(rightinkbound(content), 0)
197204
]
198205
)
206+
elseif head == :text
207+
modifier, content = args
208+
new_state = add_font_modifier(state, modifier)
209+
new_state = change_mode(new_state, :text)
210+
return tex_layout(content, new_state)
199211
elseif head == :underover
200212
core, sub, super = tex_layout.(args, state)
201213

@@ -290,6 +302,7 @@ function generate_tex_elements(str, font_family=FontFamily())
290302
return unravel(layout)
291303
end
292304

305+
#=
293306
# Still hacky as hell
294307
function generate_tex_elements(str::LaTeXString, font_family=FontFamily())
295308
parts = String.(split(str, raw"$"))
@@ -302,3 +315,4 @@ function generate_tex_elements(str::LaTeXString, font_family=FontFamily())
302315
303316
return unravel(horizontal_layout(groups))
304317
end
318+
=#

src/engine/layout_context.jl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
11
struct LayoutState
22
font_family::FontFamily
33
font_modifiers::Vector{Symbol}
4+
tex_mode::Symbol
45
end
56

7+
LayoutState(font_family::FontFamily, modifiers::Vector) = LayoutState(font_family, modifiers, :text)
68
LayoutState(font_family::FontFamily) = LayoutState(font_family, Symbol[])
79
LayoutState() = LayoutState(FontFamily())
810

9-
Base.broadcastable(state::LayoutState) = [state]
11+
Base.broadcastable(state::LayoutState) = Ref(state)
12+
13+
function change_mode(state::LayoutState, mode)
14+
LayoutState(state.font_family, state.font_modifiers, mode)
15+
end
1016

1117
function add_font_modifier(state::LayoutState, modifier)
12-
modifiers = [state.font_modifiers..., modifier]
13-
return LayoutState(state.font_family, modifiers)
18+
modifiers = vcat(state.font_modifiers, modifier)
19+
return LayoutState(state.font_family, modifiers, state.tex_mode)
1420
end
1521

1622
function get_font(state::LayoutState, char_type)

src/parser/commands_registration.jl

Lines changed: 77 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -27,100 +27,119 @@ function Base.get(d::CanonicalDict, key, default)
2727
return default
2828
end
2929

30-
31-
# Each symbol or command has a unique canonical representation
30+
# Each symbol or com_str has a unique canonical representation
3231
const symbol_to_canonical = CanonicalDict{Char}()
33-
const command_to_canonical = CanonicalDict{String}()
3432

3533
function canonical_expr(char::Char)
3634
haskey(symbol_to_canonical, char) && return symbol_to_canonical[char]
3735
return TeXExpr(:char, char)
3836
end
3937

40-
canonical_expr(command::String) = get(command_to_canonical, command, nothing)
41-
42-
# Symbols missing from the REPL completion data
43-
latex_symbols[raw"\neq"] = ""
44-
45-
function get_symbol_char(command)
46-
if !haskey(latex_symbols, command)
47-
@warn "unknown command $command"
38+
function get_symbol_char(com_str)
39+
if !haskey(latex_symbols, com_str)
40+
@warn "unknown com_str $com_str"
4841
return '?'
4942
end
5043

51-
return first(latex_symbols[command])
52-
end
53-
54-
# Numbers
55-
for char in join(0:9)
56-
symbol_to_canonical[char] = TeXExpr(:digit, char)
44+
return first(latex_symbols[com_str])
5745
end
5846

5947
##
60-
## Special commands
48+
## Commands
6149
##
6250

63-
command_to_canonical[raw"\frac"] = TeXExpr(:argument_gatherer, [:frac, 2])
64-
command_to_canonical[raw"\sqrt"] = TeXExpr(:argument_gatherer, [:sqrt, 1])
65-
command_to_canonical[raw"\overline"] = TeXExpr(:argument_gatherer, [:overline, 1])
66-
command_to_canonical[raw"\{"] = TeXExpr(:delimiter, '{')
67-
command_to_canonical[raw"\}"] = TeXExpr(:delimiter, '}')
51+
function command_expr(com_str, args)
52+
template = copy(command_definitions[com_str][1])
53+
return TeXExpr(template.head, vcat(template.args, args))
54+
end
55+
required_args(com_str) = command_definitions[com_str][2]
56+
57+
const command_definitions = Dict(
58+
raw"\frac" => (TeXExpr(:frac), 2),
59+
raw"\sqrt" => (TeXExpr(:sqrt), 1),
60+
raw"\overline" => (TeXExpr(:overline), 1),
61+
raw"\{" => (TeXExpr(:delimiter, '{'), 0),
62+
raw"\}" => (TeXExpr(:delimiter, '}'), 0),
63+
)
64+
65+
for func in underover_functions
66+
com_str = "\\" * func
67+
template = TeXExpr(:underover, Any[TeXExpr(:function, func), nothing, nothing])
68+
command_definitions[com_str] = (template, 0)
69+
end
70+
71+
for func in generic_functions
72+
com_str = "\\" * func
73+
command_definitions[com_str] = (TeXExpr(:function, func), 0)
74+
end
75+
76+
for (com_str, width) in space_commands
77+
command_definitions[com_str] = (TeXExpr(:space, width), 0)
78+
end
79+
80+
for com_str in combining_accents
81+
combining_char = get_symbol_char(com_str)
82+
template = TeXExpr(:combining_accent, TeXExpr(:symbol, combining_char))
83+
command_definitions[com_str] = (template, 1)
84+
end
85+
86+
for name in font_names
87+
com_str = "\\math$name"
88+
command_definitions[com_str] = (TeXExpr(:font, Symbol(name)), 1)
89+
com_str = "\\text$name"
90+
command_definitions[com_str] = (TeXExpr(:text, Symbol(name)), 1)
91+
end
92+
command_definitions["\\text"] = (TeXExpr(:text, :rm), 1)
6893

6994
##
70-
## Commands from the commands_data.jl file
95+
## Symbols
7196
##
7297

98+
# Symbols missing from the REPL completion data
99+
latex_symbols[raw"\neq"] = ""
100+
101+
# Numbers
102+
for char in join(0:9)
103+
symbol_to_canonical[char] = TeXExpr(:digit, char)
104+
end
105+
73106
for symbol in spaced_symbols
74107
symbol_expr = TeXExpr(:symbol, symbol)
75108
symbol_to_canonical[symbol] = TeXExpr(:spaced, symbol_expr)
76109
end
77110

78111
# Special case for hyphen that must be replaced by a minus sign
79-
# TODO Make sure it is not replaced outside of math mode
112+
# TODO Make sure it is not replaced outside of math mode and when starting a group
80113
symbol_to_canonical['-'] = TeXExpr(:spaced, TeXExpr(:symbol, ''))
81114

82-
for command in spaced_commands
83-
symbol = get_symbol_char(command)
115+
for com_str in spaced_commands
116+
symbol = get_symbol_char(com_str)
84117
symbol_expr = TeXExpr(:symbol, symbol)
85-
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:spaced, symbol_expr)
118+
template = TeXExpr(:spaced, symbol_expr)
119+
symbol_to_canonical[symbol] = template
120+
command_definitions[com_str] = (template, 0)
86121
end
87122

88-
for command in underover_commands
89-
symbol = get_symbol_char(command)
123+
for com_str in underover_commands
124+
symbol = get_symbol_char(com_str)
90125
symbol_expr = TeXExpr(:symbol, symbol)
91-
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:underover, Any[symbol_expr, nothing, nothing])
92-
end
93-
94-
for func in underover_functions
95-
command = "\\" * func
96-
command_to_canonical[command] = TeXExpr(:underover, Any[TeXExpr(:function, func), nothing, nothing])
126+
template = TeXExpr(:underover, Any[symbol_expr, nothing, nothing])
127+
symbol_to_canonical[symbol] = template
128+
command_definitions[com_str] = (template, 0)
97129
end
98130

99-
for command in integral_commands
100-
symbol = get_symbol_char(command)
131+
for com_str in integral_commands
132+
symbol = get_symbol_char(com_str)
101133
symbol_expr = TeXExpr(:symbol, symbol)
102-
symbol_to_canonical[symbol] = command_to_canonical[command] = TeXExpr(:integral, Any[symbol_expr, nothing, nothing])
103-
end
104-
105-
for func in generic_functions
106-
command = "\\" * func
107-
command_to_canonical[command] = TeXExpr(:function, func)
108-
end
109-
110-
for (command, width) in space_commands
111-
command_to_canonical[command] = TeXExpr(:space, width)
134+
template = TeXExpr(:integral, Any[symbol_expr, nothing, nothing])
135+
symbol_to_canonical[symbol] = template
136+
command_definitions[com_str] = (template, 0)
112137
end
113138

114139
for (symbol, width) in space_symbols
115140
symbol_to_canonical[symbol] = TeXExpr(:space, width)
116141
end
117142

118-
for command in combining_accents
119-
combining_char = get_symbol_char(command)
120-
symbol_expr = TeXExpr(:symbol, combining_char)
121-
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:combining_accent, 2, symbol_expr])
122-
end
123-
124143
for symbol in punctuation_symbols
125144
symbol = first(symbol)
126145
symbol_to_canonical[symbol] = TeXExpr(:punctuation, symbol)
@@ -131,30 +150,21 @@ for symbol in delimiter_symbols
131150
symbol_to_canonical[symbol] = TeXExpr(:delimiter, symbol)
132151
end
133152

134-
for name in font_names
135-
command = "\\math$name"
136-
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:font, 2, Symbol(name)])
137-
command = "\\text$name"
138-
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:text, 2, Symbol(name)])
139-
end
140-
command = "\\text"
141-
command_to_canonical[command] = TeXExpr(:argument_gatherer, [:text, 2, :rm])
142-
143153
##
144154
## Default behavior
145155
##
146156
# We put it at the end to avoid overwritting existing commands
147157

148-
for (command, symbol) in latex_symbols
158+
for (com_str, symbol) in latex_symbols
149159
symbol = first(symbol) # Convert String to Char
150-
symbol_expr = TeXExpr(:symbol, [symbol])
160+
symbol_expr = TeXExpr(:symbol, symbol)
151161

152162
if !haskey(symbol_to_canonical, symbol)
153163
symbol_to_canonical[symbol] = symbol_expr
154164
end
155165

156166
# Separate case for symbols that have multiple valid commands
157-
if !haskey(command_to_canonical, command)
158-
command_to_canonical[command] = symbol_expr
167+
if !haskey(command_definitions, com_str)
168+
command_definitions[com_str] = (symbol_expr, 0)
159169
end
160170
end

0 commit comments

Comments
 (0)