Skip to content

Commit

Permalink
Expansions (brucemiller#2421)
Browse files Browse the repository at this point in the history
* Adjust args to readXToken and readBalanced to support but fully and partially expanded variations

* Default state->lookupExpandable same as gullet->readXToken

* Make GeneralText, XGeneralText use gullet->skipFiller correctly; have Expanded be fully expanded, but introduce ExpandedPartially

* pdfTeX's \expanded, \pdfstrcmp should use XGeneralText parameter types

* Make Expand() expand fully; add ExpandPartially defers \protected,\the

* Use partial expansion for unit type arguments

* Add test case distinguishing fully vs partially expanded cases

* Clarifying comments

* Simplify Expand(),ExpandPartially(), probably more robust, if slightly less efficient

* For gullet->readArg Add optional expanded argument (0,1,2 for not; partial; fully expanded); Use that for Expanded, ExpandedPartially ParameterTypes (slight update of POD)
  • Loading branch information
brucemiller authored Sep 26, 2024
1 parent 4898ab2 commit 6167612
Show file tree
Hide file tree
Showing 10 changed files with 353 additions and 61 deletions.
3 changes: 3 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,9 @@ t/expansion/numexpr.xml
t/expansion/parindent.pdf
t/expansion/parindent.tex
t/expansion/parindent.xml
t/expansion/partial.pdf
t/expansion/partial.tex
t/expansion/partial.xml
t/expansion/pdftex_expanded.pdf
t/expansion/pdftex_expanded.tex
t/expansion/pdftex_expanded.xml
Expand Down
89 changes: 49 additions & 40 deletions lib/LaTeXML/Core/Gullet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -333,18 +333,19 @@ sub unread {
# Note that most tokens pass through here, so be Fast & Clean! readToken is folded in.
# `Toplevel' processing, (if $toplevel is true), used at the toplevel processing by Stomach,
# will step to the next input stream (Mouth) if one is available,
# $toplevel is doing TWO distinct things. When true:
# $toplevel when true:
# * If a mouth is exhausted, move on to the containing mouth to continue reading
# $fully_expand when true, OR when undef but $toplevel is true
# * expand even protected defns, essentially this means expand "for execution"
# Note that, unlike readBalanced, this does NOT defer expansion of \the & friends.
# Also, \noexpand'd tokens effectively act ilke \relax
# For arguments to \if,\ifx, etc use $for_conditional true,
# which handles \noexpand and CS which have been \let to tokens specially.
sub readXToken {
my ($self, $toplevel, $for_conditional) = @_;
my ($self, $toplevel, $for_conditional, $fully_expand) = @_;
$toplevel = 1 unless defined $toplevel;
my $autoclose = $toplevel; # Potentially, these should have distinct controls?
my $for_evaluation = $toplevel;
my $autoclose = $toplevel; # Potentially, these should have distinct controls?
$fully_expand = $toplevel unless defined $fully_expand;
my ($token, $cc, $defn, $atoken, $atype, $ahidden);
while (1) {
while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) {
Expand Down Expand Up @@ -376,7 +377,7 @@ sub readXToken {
if ((ref $defn) eq 'LaTeXML::Core::Token') { # \let to a token? Return it!
return ($for_conditional ? $defn : $token); }
elsif (!$defn->isExpandable # Not expandable or is protected
|| ($$defn{isProtected} && !$for_evaluation)) {
|| ($$defn{isProtected} && !$fully_expand)) {
return $token; }
else {
local $LaTeXML::CURRENT_TOKEN = $token;
Expand All @@ -397,7 +398,8 @@ sub readXToken {
# readBalanced approximates TeX's scan_toks (but doesn't parse \def parameter lists)
# and only optionally requires the openning "{".
# It may return comments in the token lists.
# it optionally ($expand) expands while reading, but deferring \the and related.
# If $expanded is true, it expands while reading, but deferring \the and related
# & \protected, unless $expanded is > 1.
# The $macrodef flag affects whether # parameters are "packed" for macro bodies.
# If $require_open is true, the opening T_BEGIN has not yet been read, and is required.
our $DEFERRED_COMMANDS = {
Expand All @@ -411,7 +413,8 @@ sub readBalanced {
my ($self, $expanded, $macrodef, $require_open) = @_;
$LaTeXML::ALIGN_STATE-- unless $require_open; # assume matching } [BEFORE masking ALIGN_STATE]
local $LaTeXML::ALIGN_STATE = 1000000;
my $startloc = ($$self{verbosity} > 0) && getLocator($self);
my $fully_expand = (defined $expanded) && ($expanded > 1);
my $startloc = ($$self{verbosity} > 0) && getLocator($self);
# Does we need to expand to get the { ???
if ($require_open) {
my $token = ($expanded ? readXToken($self, 0) : readToken($self));
Expand Down Expand Up @@ -462,15 +465,15 @@ sub readBalanced {
&& defined($defn = $STATE->lookupMeaning($token))
&& ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition
&& $defn->isExpandable
&& (!$$defn{isProtected})) { # is this the right logic here? don't expand unless di
&& (!$$defn{isProtected} || $fully_expand)) { # is this the right logic here? don't expand unless di
local $LaTeXML::CURRENT_TOKEN = $token;
my $r;
no warnings 'recursion';
my $expansion = $defn->invoke($self);
next unless $expansion;
# If a special \the type command, push the expansion directly into the result
# Well, almost directly: handle any MARKER tokens now, and possibly un-pack T_PARAM
if ($$DEFERRED_COMMANDS{ $$defn{cs}[0] }) {
if (!$fully_expand && $$DEFERRED_COMMANDS{ $$defn{cs}[0] }) {
foreach my $t (@$expansion) {
my $cc = $$t[1];
if ($cc == CC_MARKER) { handleMarker($self, $t); }
Expand Down Expand Up @@ -552,16 +555,13 @@ sub skip1Space {
return; }

# <filler> = <optional spaces> | <filler>\relax<optional spaces>
# TeX Book p.276 "<left brace> can be implicit", and experimentation, indicate Expansion!!!
sub skipFiller {
my ($self) = @_;
while (1) {
my $tok = readNonSpace($self);
return unless defined $tok;
# Should \foo work too (where \let\foo\relax) ??
if (!$tok->equals(T_CS('\relax'))) {
while (my $tok = readXNonSpace($self)) {
if (!$tok->defined_as(T_CS('\relax'))) {
unread($self, $tok);
return; }
}
return; } }
return; }

sub ifNext {
Expand Down Expand Up @@ -698,14 +698,19 @@ sub readCSName {
# tokens, non-expandable tokens, args, Numbers, ...
#**********************************************************************
sub readArg {
my ($self) = @_;
my ($self, $expanded) = @_;
my $token = readNonSpace($self);
if (!defined $token) {
return; }
elsif ($$token[1] == CC_BEGIN) { # Inline ->getCatcode!
return readBalanced($self, 0); }
return readBalanced($self, $expanded, 0, 0); }
else {
return Tokens($token); } }
if ($expanded) {
return $self->readingFromMouth(LaTeXML::Core::Mouth->new(), sub {
$self->unread(T_BEGIN, $token, T_END);
return $self->readBalanced($expanded, 0, 1); }); }
else {
return Tokens($token); } } }

# Note that this returns an empty array if [] is present,
# otherwise $default or undef.
Expand Down Expand Up @@ -1129,21 +1134,27 @@ Returns an object describing the current location in the input stream.
=over 4
=item C<< $tokens = $gullet->expandTokens($tokens); >>
Return the L<LaTeXML::Core::Tokens> resulting from expanding all the tokens in C<$tokens>.
This is actually only used in a few circumstances where the arguments to
an expandable need explicit expansion; usually expansion happens at the right time.
=item C<< $token = $gullet->readToken; >>
Return the next token from the input source, or undef if there is no more input.
=item C<< $token = $gullet->readXToken($toplevel,$commentsok); >>
=item C<< $token = $gullet->readXToken($toplevel,$for_conditional, $fully_expand); >>
Return the next unexpandable token from the input source, or undef if there is no more input.
If the next token is expandable, it is expanded, and its expansion is reinserted into the input.
If C<$commentsok>, a comment read or pending will be returned.
If the next token is expandable, it is expanded, and its expansion is reinserted into the input,
and reading continues.
If C<$toplevel> is true, it will automatically close empty mouths as it reads, and will also fully expand macros (unless overridden by C<$fully_expand> being explicitly false). Full expansion expands protected macros as well as the results of L<\the> (and similar).
If C<$for_conditional> is true, handle L<\noexpand> appropriately for the arguments to L<\if>.
=item C<< $tokens = $gullet->readBalanced($expanded, $macrodef, $require_open); >>
Read a sequence of tokens from the input until the balancing '}'.
By default assumes the '{' has already been read.
No expansion takes place if C<$expand> is 0 or undef; partial expansion (deferring protected and C<\the>) of C<$expand> is 1; full expansion if it is > 1.
The C<$macrodef> flag affects whether # parameters are "packed" for macro bodies.
If C<$require_open> is true, the opening C<T_BEGIN> has not yet been read, and is required.
Returns a L<LaTeXML::Core::Tokens>.
=item C<< $gullet->unread(@tokens); >>
Expand All @@ -1159,6 +1170,11 @@ Push the C<@tokens> back into the input stream to be re-read.
Read and return the next non-space token from the input after discarding any spaces.
=item C<< $token = $gullet->readXNonSpace; >>
Read and return the next non-space token from the input after discarding any spaces,
partially expanding as it goes.
=item C<< $gullet->skipSpaces; >>
Skip the next spaces from the input.
Expand All @@ -1168,16 +1184,6 @@ Skip the next spaces from the input.
Skip the next token from the input if it is a space.
If C($expanded> is true, expands ( like C< <one optional space> > ).
=item C<< $tokens = $gullet->readBalanced($expanded, $macrodef, $require_open); >>
Read a sequence of tokens from the input until the balancing '}'.
By default assumes the '{' has already been read.
It optionally (C<$expand>) expands while reading, but deferring \the and related.
The C<$macrodef> flag affects whether # parameters are "packed" for macro bodies.
If C<$require_open> is true, the opening C<T_BEGIN> has not yet been read, and is required.
Returns a L<LaTeXML::Core::Tokens>.
=item C<< $boole = $gullet->ifNext($token); >>
Returns true if the next token in the input matches C<$token>;
Expand Down Expand Up @@ -1206,9 +1212,12 @@ in C<@delims>. In a list context, it also returns which of the delimiters ended
=over 4
=item C<< $tokens = $gullet->readArg; >>
=item C<< $tokens = $gullet->readArg($expanded); >>
Read and return a TeX argument; the next Token or Tokens (if surrounded by braces).
Read and return a "normal" TeX argument; the next Token or Tokens (if surrounded by braces).
C<$expanded> controls expansion as if the argument were read and then expanded in isolation:
0,undef or missing gives no expansion; 1 gives partial expansion; > 1 gives full expansion.
In the case of a single unbraced expandable token, it will I<not> read any macro arguments from the following input!
=item C<< $tokens = $gullet->readOptional($default); >>
Expand Down
1 change: 1 addition & 0 deletions lib/LaTeXML/Core/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ sub lookupExpandable {
return unless $token;
my $defn;
my $entry;
$toplevel = 1 unless defined $toplevel; # Default, for full expansion, same as readXToken!
if ($CATCODE_ACTIVE_OR_CS[$$token[1]]
&& ($entry = $$self{meaning}{ $$token[0] })
&& ($defn = $$entry[0])
Expand Down
23 changes: 16 additions & 7 deletions lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ DefParameterType('Optional', sub {

# This is a peculiar type of argument of the form
# <general text> = <filler>{<balanced text><right brace>
# however, <filler> does get expanded while searching for the initial {
# which IS required in contrast to a general argument; ie a single token is not correct.
# [Note: <filler> expands, ignoring spaces and \relax, until opening { ]
DefParameterType('GeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {
$gullet->skipFiller;
return $gullet->readBalanced(0, 0, 1); });

# This is like GeneralText, but it Partially expands the argument (not \protected, nor \the)
DefParameterType('XGeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {
$gullet->skipFiller;
return $gullet->readBalanced(1, 0, 1); });

DefParameterType('Until', sub {
Expand Down Expand Up @@ -148,11 +148,20 @@ DefParameterType('XUntil', sub {
push(@tokens, $token); } }
Tokens(@tokens); });

# This reads a braced tokens list, expanding as it goes,
# but expanding \the-like commands only once.
# Simulate reading a plain argument, and then fully expanding it.
# Similar to when \csname is used
DefParameterType('Expanded', sub {
my ($gullet) = @_;
$gullet->readBalanced(1, 0, 1); },
return $gullet->readArg(2); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });

# Like Expanded, but defers \protected, and \the expanded only once.
# Similar to when \edef is used.
DefParameterType('ExpandedPartially', sub {
my ($gullet) = @_;
return $gullet->readArg(1); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });
Expand Down
4 changes: 2 additions & 2 deletions lib/LaTeXML/Engine/pdfTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,9 @@ DefMacro('\pdfrestore', '');
# pdfspecial modifier → direct:
# stack action → set | push | pop | current

DefMacro('\expanded Expanded', '#1');
DefMacro('\expanded XGeneralText', '#1');

DefMacro('\pdfstrcmp Expanded Expanded', sub {
DefMacro('\pdfstrcmp XGeneralText XGeneralText', sub {
my $cmp = (ToString($_[1]) cmp ToString($_[2]));
return ($cmp == 1 ? T_OTHER('1')
: ($cmp == 0 ? T_OTHER('0')
Expand Down
26 changes: 16 additions & 10 deletions lib/LaTeXML/Package.pm
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ our @EXPORT = (qw(&DefAutoload &DefExpandable
&DefLigature &DefMathLigature),

# Mid-level support for writing definitions.
qw(&Expand &Invocation &Digest &DigestText &DigestIf &DigestLiteral
qw(&Expand &ExpandPartially &Invocation &Digest &DigestText &DigestIf &DigestLiteral
&RawTeX &Let &StartSemiverbatim &EndSemiverbatim
&Tokenize &TokenizeInternal
&IsEmpty),
Expand Down Expand Up @@ -899,17 +899,23 @@ sub generateID_nextid {
#
#======================================================================

# Return $tokens with all tokens expanded
# Return $tokens with all tokens fully expanded
sub Expand {
my (@tokens) = @_;
return () unless @tokens;
return $STATE->getStomach->getGullet->readingFromMouth(LaTeXML::Core::Mouth->new(), sub {
my ($gullet) = @_;
$gullet->unread(@tokens);
my @expanded = ();
while (defined(my $t = $gullet->readXToken(0))) {
push(@expanded, $t); }
return Tokens(@expanded); }); }
my $gullet = $STATE->getStomach->getGullet;
return $gullet->readingFromMouth(LaTeXML::Core::Mouth->new(), sub {
$gullet->unread(T_BEGIN, @tokens, T_END);
return $gullet->readBalanced(2, 0, 1); }); }

# Return $tokens, partially expanded (defer protected, and results of \the)
sub ExpandPartially {
my (@tokens) = @_;
return () unless @tokens;
my $gullet = $STATE->getStomach->getGullet;
return $gullet->readingFromMouth(LaTeXML::Core::Mouth->new(), sub {
$gullet->unread(T_BEGIN, @tokens, T_END);
return $gullet->readBalanced(1, 0, 1); }); }

sub Invocation {
my ($token, @args) = @_;
Expand Down Expand Up @@ -4390,7 +4396,7 @@ is applied only when C<fontTest> returns true.
Predefined Ligatures combine sequences of "." or single-quotes into appropriate
Unicode characters.
=item C<DefMathLigature(I<$string>C<=>>I<$replacment>,I<%options>);>
=item C<DefMathLigature(I<$string>=>I<$replacment>,I<%options>);>
X<DefMathLigature>
A Math Ligature typically combines a sequence of math tokens (XMTok) into a single one.
Expand Down
5 changes: 3 additions & 2 deletions lib/LaTeXML/Package/siunitx.sty.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1126,7 +1126,7 @@ sub six_parse_literalunits {

sub six_process_units {
my ($expr) = @_;
$expr = Expand($expr);
$expr = ExpandPartially($expr); # Apparently only partially here
if (my $defns = six_convertUnits($expr)) {
return six_format_units(six_parse_units($defns)); }
else {
Expand Down Expand Up @@ -1222,8 +1222,9 @@ DefMacro('\lx@six@unitobject@arg{}{}', sub {

# Collapsing nested definitions: If the data of this unit are just more unit objects, return them,
DefMacro('\lx@six@unitobject@collapsible{}{}', sub {
#DefMacro('\lx@six@unitobject@collapsible{} Expanded', sub {
my ($gullet, $name, $data) = @_;
$data = Expand($data);
$data = ExpandPartially($data); # Apparently only partially here
my @tokens = $data->unlist;
my @result = ();
while (@tokens) {
Expand Down
Binary file added t/expansion/partial.pdf
Binary file not shown.
Loading

0 comments on commit 6167612

Please sign in to comment.