Skip to content

Commit

Permalink
Towards functioning expl3 (#1188)
Browse files Browse the repository at this point in the history
* report error for undefined token expansion; neutralize undefined tokens

* missing macro stubs

* ensure all binding command sequences have reasonable/expected meanings in expansion

* also enforce defined second token for \expandafter use

* attempt to streamline TeX-compliant expansions, back off from Alignment

* A variety of changes to get expl3 to load:
  define CharDef->equals;
  fix \unless support
  pdf version & \pdfstrcmp
  respect \endlinechar in Mouth
  more careful token comparisions for space, \relax, etc
  especially in <one optional space> after reading numbers
  \edef and friends read the body *while* expanding
  LaTeX \@onefilewithoptions

This allows texlive 2016's l3kernel to be read;
(there are still issues with 2019).
Test cases still need to be added.

* progress with bookkeeping @currnamestack, more expl3 macros

* guard roman numerals from negative ints; cover CharDefs in \meaning; add minor revision for pdf proc

* test: tricky number+tilde interaction in expl3

* Gullet::readOptionalSigns should accept aliases of space

* pdftexrevision is an expandable macro

* cleaner messages for \expandafter and \message

* strange oversight, \ExplSyntaxOn needs to be in test case

* stub aux method which isnt presently needed

* use Object::Equals for space comparison in Number reads

* finer touches

* undo Register experimental changes

* remove debug comments

* fully enforce new readXToken; ensure siunitx declared units are let relaxed when undefined

* Equals instead of explicit meaning check
  • Loading branch information
dginev authored and brucemiller committed Aug 17, 2019
1 parent 407dc04 commit f3a72b6
Show file tree
Hide file tree
Showing 23 changed files with 309 additions and 116 deletions.
5 changes: 5 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ lib/LaTeXML/Package/elsart.cls.ltxml
lib/LaTeXML/Package/elsart.sty.ltxml
lib/LaTeXML/Package/elsart_support.sty.ltxml
lib/LaTeXML/Package/elsarticle.cls.ltxml
lib/LaTeXML/Package/expl3.sty.ltxml
lib/LaTeXML/Package/emulateapj.cls.ltxml
lib/LaTeXML/Package/emulateapj.sty.ltxml
lib/LaTeXML/Package/emulateapj5.sty.ltxml
Expand Down Expand Up @@ -710,6 +711,7 @@ t/70_parse.t
t/80_complex.t
t/81_babel.t
t/82_moderncv.t
t/83_expl3.t
t/90_latexmlpost.t
t/91_latexmlc_api.t
t/92_profiles.t
Expand Down Expand Up @@ -1120,6 +1122,9 @@ t/expansion/whichpkgb
t/expansion/whichpkgb.sty
t/expansion/whichpkgb.sty.sty
t/expansion/whichpkgc.sty
t/expl3/tilde_tricks.pdf
t/expl3/tilde_tricks.tex
t/expl3/tilde_tricks.xml
t/fonts/acc.pdf
t/fonts/acc.tex
t/fonts/acc.xml
Expand Down
6 changes: 6 additions & 0 deletions lib/LaTeXML/Core/Definition/CharDef.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package LaTeXML::Core::Definition::CharDef;
use strict;
use warnings;
use LaTeXML::Global;
use LaTeXML::Common::Object;
use LaTeXML::Common::Error;
use base qw(LaTeXML::Core::Definition::Register);

Expand Down Expand Up @@ -43,6 +44,11 @@ sub invoke {
# Tracing ?
return (defined $cs ? $stomach->invokeToken($cs) : undef); }

sub equals {
my ($self, $other) = @_;
return (defined $other)
&& ((ref $self) eq (ref $other))
&& ($$self{value}->valueOf == $$other{value}->valueOf); }
#===============================================================================
1;

Expand Down
7 changes: 3 additions & 4 deletions lib/LaTeXML/Core/Definition/Conditional.pm
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ sub invoke {
my ($self, $gullet) = @_;
# A real conditional must have condition_type set
if (my $cond_type = $$self{conditional_type}) {
if ($cond_type eq 'if') {
if (($cond_type eq 'if') || ($cond_type eq 'unless')) {
return $self->invoke_conditional($gullet); }
elsif ($cond_type eq 'else') {
return $self->invoke_else($gullet); }
Expand Down Expand Up @@ -126,8 +126,7 @@ sub skipConditionalBody {
elsif (!--$level) { # If no more nesting, we're done.
shift(@$stack); # Done with this frame
return $t; } } # AND Return the finishing token.
elsif ($level > 1) { # Ignore \else,\or nested in the body.
}
elsif ($level > 1) { } # Ignore \else,\or nested in the body.
elsif (($cond_type eq 'or') && (++$n_ors == $nskips)) {
return $t; }
elsif (($cond_type eq 'else') && $nskips
Expand Down Expand Up @@ -188,7 +187,7 @@ sub invoke_fi {

__END__
=pod
=pod
=head1 NAME
Expand Down
2 changes: 1 addition & 1 deletion lib/LaTeXML/Core/Definition/Register.pm
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ sub invoke {

__END__
=pod
=pod
=head1 NAME
Expand Down
59 changes: 38 additions & 21 deletions lib/LaTeXML/Core/Gullet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,15 @@ sub show_pushback {
#**********************************************************************
# Not really 100% sure how this is supposed to work
# See TeX Ch 20, p216 regarding noexpand, \edef with token list registers, etc.
# Solution: Duplicate param tokens, stick NOTEXPANDED infront of expandable tokens.
# Solution: Duplicate param tokens, stick NOTEXPANDED infront of expandable+undefined tokens.
sub neutralizeTokens {
my ($self, @tokens) = @_;
my @result = ();
foreach my $token (@tokens) {
if ($$token[1] == CC_PARAM) { # Inline ->getCatcode!
push(@result, $token); }
elsif (defined(my $defn = LaTeXML::Core::State::lookupDefinition($STATE, $token))) {
elsif (!defined(my $meaning = LaTeXML::Core::State::lookupMeaning($STATE, $token)) ||
defined(my $defn = LaTeXML::Core::State::lookupDefinition($STATE, $token))) {
push(@result, Token('\noexpand', CC_NOTEXPANDED)); }
push(@result, $token); }
return @result; }
Expand Down Expand Up @@ -271,6 +272,9 @@ sub readXToken {
: ($r eq 'LaTeXML::Core::Tokens' ? @$_
: Fatal('misdefined', $r, undef, "Expected a Token, got " . Stringify($_))))) }
@{$r}); } }
elsif ($cc == CC_CS && !(LaTeXML::Core::State::lookupMeaning($STATE, $token))) {
Error('undefined', $token, $self, "The token " . Stringify($token) . " is not defined during expansion. Consuming it and proceeding, expect trouble...");
return; }
else {
return $token; } # just return it
}
Expand Down Expand Up @@ -299,6 +303,9 @@ sub readRawLine {
#**********************************************************************
# Mid-level readers: checking and matching tokens, strings etc.
#**********************************************************************
# General note: TeX uses different tests for Space tokens in different places
# (possibilities: catcode equality, ->equals, Equals and XEquals)

# The following higher-level parsing methods are built upon readToken & unread.
sub readNonSpace {
my ($self) = @_;
Expand All @@ -320,10 +327,12 @@ sub skipSpaces {
unshift(@{ $$self{pushback} }, $tok) if defined $tok; # Unread
return; }

# Skip one space
# if $expanded is true, it acts like <one optional space>, expanding the next token.
sub skip1Space {
my ($self) = @_;
my $token = $self->readToken();
unshift(@{ $$self{pushback} }, $token) if $token && ($$token[1] != CC_SPACE); # Inline ->getCatcode, unread
my ($self, $expanded) = @_;
my $token = ($expanded ? $self->readXToken : $self->readToken);
unshift(@{ $$self{pushback} }, $token) if $token && !Equals($token, T_SPACE);
return; }

# <filler> = <optional spaces> | <filler>\relax<optional spaces>
Expand All @@ -333,8 +342,8 @@ sub skipFiller {
my $tok = $self->readNonSpace;
return unless defined $tok;
# Should \foo work too (where \let\foo\relax) ??
if ($tok->getString ne '\relax') {
unshift(@{ $$self{pushback} }, $tok); # Unread
if (!$tok->equals(T_CS('\relax'))) {
unshift(@{ $$self{pushback} }, $tok); # Unread
return; }
}
return; }
Expand Down Expand Up @@ -372,9 +381,9 @@ sub readBalanced {
# TODO: The current implementation has a limitation where if the balancing end is in a different mouth,
# it will not be recognized.
Error('expected', "}", $self, "Gullet->readBalanced ran out of input in an unbalanced state.",
"started at $startloc");
"started at " . ToString($startloc));
}
return Tokens(@tokens); }
return (wantarray ? (Tokens(@tokens), $token) : Tokens(@tokens)); }

sub ifNext {
my ($self, $token) = @_;
Expand Down Expand Up @@ -433,7 +442,7 @@ sub readUntil {
push(@tokens, $token);
$n++;
if ($$token[1] == CC_BEGIN) { # And if it's a BEGIN, copy till balanced END
push(@tokens, $self->readBalanced->unlist, T_END); } }
push(@tokens, $self->readBalanced); } }
# Notice that IFF the arg looks like {balanced}, the outer braces are stripped
# so that delimited arguments behave more similarly to simple, undelimited arguments.
if (($n == 1) && ($tokens[0][1] == CC_BEGIN)) {
Expand Down Expand Up @@ -473,7 +482,7 @@ sub readArg {
if (!defined $token) {
return; }
elsif ($$token[1] == CC_BEGIN) { # Inline ->getCatcode!
return $self->readBalanced; }
return scalar($self->readBalanced); }
else {
return Tokens($token); } }

Expand Down Expand Up @@ -527,7 +536,7 @@ sub readTokensValue {
if (!defined $token) {
return; }
elsif ($$token[1] == CC_BEGIN) { # Inline ->getCatcode!
return $self->readBalanced; }
return scalar($self->readBalanced); }
elsif (my $defn = LaTeXML::Core::State::lookupDefinition($STATE, $token)) {
if ($defn->isRegister eq 'Tokens') {
return $defn->valueOf($defn->readArguments($self)); }
Expand All @@ -542,25 +551,29 @@ sub readTokensValue {

#======================================================================
# some helpers...
# Note that <one optional space> is kinda special:
# The following Token(s) are expanded until an unexpandable token is found;
# it is discarded if it is a space, but with an Equals() equality test!

# <optional signs> = <optional spaces> | <optional signs><plus or minus><optional spaces>
# return +1 or -1
sub readOptionalSigns {
my ($self) = @_;
my ($sign, $t) = ("+1", '');
while (defined($t = $self->readXToken(0))
&& (($t->getString eq '+') || ($t->getString eq '-') || ($t->equals(T_SPACE)))) {
&& (($t->getString eq '+') || ($t->getString eq '-') || Equals($t, T_SPACE))) {
$sign = -$sign if ($t->getString eq '-'); }
unshift(@{ $$self{pushback} }, $t) if $t; # Unread
return $sign; }

# Read digits (within $range), while expanding and if $skip, skip <one optional space> (expanded!)
sub readDigits {
my ($self, $range, $skip) = @_;
my $string = '';
my ($token, $digit);
while (($token = $self->readXToken(0)) && (($digit = $token->getString) =~ /^[$range]$/)) {
$string .= $digit; }
unshift(@{ $$self{pushback} }, $token) if $token && !($skip && $$token[1] == CC_SPACE); # Inline ->getCatcode, unread
unshift(@{ $$self{pushback} }, $token) if $token && !($skip && Equals($token, T_SPACE)); #Inline
return $string; }

# <factor> = <normal integer> | <decimal constant>
Expand Down Expand Up @@ -599,7 +612,8 @@ sub readNumber {
unshift(@{ $$self{pushback} }, $next); # Unread
Warn('expected', '<number>', $self, "Missing number, treated as zero",
"while processing " . ToString($LaTeXML::CURRENT_TOKEN),
"next token is " . ToString($next));
"next token is " . ToString($next)
, " == " . Stringify($STATE->lookupMeaning($next)));
return Number(0); } }

# <normal integer> = <internal integer> | <integer constant>
Expand All @@ -608,7 +622,7 @@ sub readNumber {
# Return a Number or undef
sub readNormalInteger {
my ($self) = @_;
my $token = $self->readXToken(0);
my $token = $self->readXToken(1); # expand more
if (!defined $token) {
return; }
elsif (($$token[1] == CC_OTHER) && ($token->getString =~ /^[0-9]$/)) { # Read decimal literal
Expand All @@ -621,6 +635,7 @@ sub readNormalInteger {
my $next = $self->readToken;
my $s = ($next && $next->getString) || '';
$s =~ s/^\\//;
$self->skip1Space(1);
return Number(ord($s)); } # Only a character token!!! NOT expanded!!!!
else {
unshift(@{ $$self{pushback} }, $token); # Unread
Expand Down Expand Up @@ -686,7 +701,7 @@ sub readDimension {
sub readUnit {
my ($self) = @_;
if (defined(my $u = $self->readKeyword('ex', 'em'))) {
$self->skip1Space;
$self->skip1Space(1);
return $STATE->convertUnit($u); }
elsif (defined($u = $self->readInternalInteger)) {
return $u->valueOf; } # These are coerced to number=>sp
Expand All @@ -699,7 +714,7 @@ sub readUnit {
my $units = $STATE->lookupValue('UNITS');
$u = $self->readKeyword(keys %$units);
if ($u) {
$self->skip1Space;
$self->skip1Space(1);
return $STATE->convertUnit($u); }
else {
return; } } }
Expand Down Expand Up @@ -735,7 +750,7 @@ sub readMuDimension {
sub readMuUnit {
my ($self) = @_;
if (my $m = $self->readKeyword('mu')) {
$self->skip1Space;
$self->skip1Space(1);
return $STATE->convertUnit($m); }
elsif ($m = $self->readInternalMuGlue) {
return $m->valueOf; }
Expand Down Expand Up @@ -919,14 +934,16 @@ Read and return the next non-space token from the input after discarding any spa
Skip the next spaces from the input.
=item C<< $gullet->skip1Space; >>
=item C<< $gullet->skip1Space($expanded); >>
Skip the next token from the input if it is a space.
If C($expanded> is true, expands ( like C< <one optional space> > ).
=item C<< $tokens = $gullet->readBalanced; >>
Read a sequence of tokens from the input until the balancing '}' (assuming the '{' has
already been read). Returns a L<LaTeXML::Core::Tokens>.
already been read). Returns a L<LaTeXML::Core::Tokens>,
except in an array context, returns the collected tokens and the closing token.
=item C<< $boole = $gullet->ifNext($token); >>
Expand Down
7 changes: 3 additions & 4 deletions lib/LaTeXML/Core/KeyVals.pm
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,7 @@ sub readFrom {
while ((!defined($delim = $gullet->readMatch($punct, $until)))
&& (defined($tok = $gullet->readToken()))) { # Copy next token to args
push(@toks, $tok,
($tok->getCatcode == CC_BEGIN ? ($gullet->readBalanced->unlist, T_END) : ())); }

($tok->getCatcode == CC_BEGIN ? $gullet->readBalanced : ())); }
# reparse (and expand) the tokens representing the value
$value = Tokens(@toks);
$value = $keydef->reparse($gullet, $value) if $keydef && $value;
Expand Down Expand Up @@ -534,9 +533,9 @@ sub beDigested {
my $new = LaTeXML::Core::KeyVals->new(
$prefix, $keysets,
setAll => $setAll, setInternals => $setInternals,
skip => $skip, skipMissing => $skipMissing, hookMissing => $hookMissing,
skip => $skip, skipMissing => $skipMissing, hookMissing => $hookMissing,
was_digested => 1,
punct => $punct, assign => $assign);
punct => $punct, assign => $assign);
$new->setTuples(@newtuples);
return $new; }

Expand Down
11 changes: 9 additions & 2 deletions lib/LaTeXML/Core/Mouth.pm
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ sub getNextChar {
else { # OR ^^ followed by a SINGLE Control char type code???
my $c = $$self{chars}[$$self{colno} + 1];
my $cn = ord($c);
$ch = chr($cn + ($cn > 64 ? -64 : 64));
$ch = chr($cn + ($cn >= 64 ? -64 : 64));
splice(@{ $$self{chars} }, $$self{colno} - 1, 3, $ch);
$$self{nchars} -= 2; }
$cc = $STATE->lookupCatcode($ch) // CC_OTHER; }
Expand Down Expand Up @@ -289,7 +289,14 @@ sub readToken {
$$self{nchars} = 0;
return; }
# Remove trailing space, but NOT a control space! End with CR (not \n) since this gets tokenized!
$line =~ s/((\\ )*)\s*$/$1\r/s;
$line =~ s/((\\ )*)\s*$/$1/s;
# Then append the appropriaate \endlinechar, or "\r"
if (my $eol = $STATE->lookupDefinition(T_CS('\endlinechar'))) {
# \endlinechar=-1 means what?
$eol = $eol->valueOf()->valueOf;
$line .= chr($eol) if $eol > 0; }
else {
$line .= "\r"; }
$$self{chars} = splitChars($line);
$$self{nchars} = scalar(@{ $$self{chars} });
while (($$self{colno} < $$self{nchars})
Expand Down
6 changes: 4 additions & 2 deletions lib/LaTeXML/Core/Parameter.pm
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,16 @@ sub read {
# (eg. \caption(...\label{badchars}}) where you really need to
# cleanup after the fact!
# Hmmm, seem to still need it...
my $startloc = $gullet->getLocator;
$self->setupCatcodes;
my $value = &{ $$self{reader} }($gullet, @{ $$self{extra} || [] });
$value = $value->neutralize(@{ $$self{semiverbatim} }) if $$self{semiverbatim} && (ref $value)
&& $value->can('neutralize');
$self->revertCatcodes;
if ((!defined $value) && !$$self{optional}) {
Error('expected', $self, $gullet,
"Missing argument " . Stringify($self) . " for " . Stringify($fordefn));
"Missing argument " . Stringify($self) . " for " . Stringify($fordefn),
"Started at " . ToString($startloc));
$value = T_OTHER('missing'); }
return $value; }

Expand Down Expand Up @@ -152,7 +154,7 @@ sub revert {

__END__
=pod
=pod
=head1 NAME
Expand Down
Loading

0 comments on commit f3a72b6

Please sign in to comment.