Skip to content

Commit

Permalink
Base primitive improvements (brucemiller#2369)
Browse files Browse the repository at this point in the history
* Tweak fontname recognition for cmti (text italic)

* Move initialization of UC/LC to State.pm

* Move a couple of helpers from plain to TeX_Math

* Various tweaks & new defns: \nonscript, fix \delcode, \mathbin etal get Digested arg, \fam should change fonts, more robust \eqno

* Define an expanded general text parameter type (XGeneralText) that expands \the-like; use it for \write, \special; improve test case

* autoopened nodes can be autoclosed

* DefMath can coerce more symbols to simpler primitives (rather than constructors)

* Better math char decoding

* Make \mathopen, etc digest the argument (but something odd with \mathop)

* Improved \left,\right handling to deal with \delimiter and others; New TeXDelimiter parameter type; Delimiter data is now keyed to the unicode; tends to more consistently preserve the various token attributes (name, stretchy, etc)

* Update test cases for more consistently preserved (though dubious) name attribute

* Add left/right \delimiter test

* TeXDelimiter parameter type reverts w/o braces

* Autoopened text nodes can autoclose, but not necessarily others
  • Loading branch information
brucemiller authored May 29, 2024
1 parent 4cd73e7 commit 1438332
Show file tree
Hide file tree
Showing 18 changed files with 265 additions and 200 deletions.
4 changes: 2 additions & 2 deletions lib/LaTeXML/Common/Font.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use LaTeXML::Common::Font::Metric;
use LaTeXML::Common::Font::StandardMetrics;
use LaTeXML::Common::Color;
use List::Util qw(min max sum);
use base qw(LaTeXML::Common::Object);
use base qw(LaTeXML::Common::Object);

# Note that this has evolved way beynond just "font",
# but covers text properties (or even display properties) in general
Expand Down Expand Up @@ -62,7 +62,7 @@ my $FLAG_EMPH = 0x10;
my %font_family = (
cmr => { family => 'serif' }, cmss => { family => 'sansserif' },
cmtt => { family => 'typewriter' }, cmvtt => { family => 'typewriter' },
cmti => { family => 'typewriter', shape => 'italic' },
cmt => { family => 'serif' }, # for cmti "text italic"
cmfib => { family => 'serif' }, cmfr => { family => 'serif' },
cmdh => { family => 'serif' }, cm => { family => 'serif' },
ptm => { family => 'serif' }, ppl => { family => 'serif' },
Expand Down
4 changes: 2 additions & 2 deletions lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use LaTeXML::Common::XML;
use LaTeXML::Util::Radix;
use Unicode::Normalize;
use Scalar::Util qw(blessed);
use base qw(LaTeXML::Common::Object);
use base qw(LaTeXML::Common::Object);

#**********************************************************************
# These two element names are `leaks' of the document structure into
Expand Down Expand Up @@ -771,7 +771,7 @@ sub openText {
$n = $n->parentNode; }
closeToNode($self, $closeto) if $closeto ne $node; # Move to best starting point for this text.
openElement($self, $elementname, font => $font,
_fontswitch => 1, _autoopened => 1)
_fontswitch => 1, _autoopened => 1, _autoclose => 1)
if $bestdiff > 0; # Open if needed.
}
# Finally, insert the darned text.
Expand Down
11 changes: 11 additions & 0 deletions lib/LaTeXML/Core/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ sub new {
$$self{uccode} = {};
$$self{delcode} = {};
$$self{tracing_definitions} = {};
# Initializations that INITEX would have set.
$$self{mathcode}{'.'} = [0];
for (my $c = ord('0') ; $c <= ord('9') ; $c++) {
$$self{mathcode}{ chr($c) } = [0x7000]; }
for (my $c = ord('a') ; $c <= ord('z') ; $c++) {
my $C = $c + ord('A') - ord('a');
$$self{mathcode}{ chr($c) } = [0x7100];
$$self{mathcode}{ chr($C) } = [0x7100];
$$self{uccode}{ chr($c) } = [$C];
$$self{lccode}{ chr($C) } = [$c];
$$self{sfcode}{ chr($C) } = [999]; }
return $self; }

sub assign_internal {
Expand Down
50 changes: 32 additions & 18 deletions lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,13 @@ DefParameterType('Optional', sub {
DefParameterType('GeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {

return $gullet->readBalanced(0, 0, 1); });

DefParameterType('XGeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {
return $gullet->readBalanced(1, 0, 1); });

DefParameterType('Until', sub {
my ($gullet, $until) = @_;
$gullet->readUntil($until); },
Expand Down Expand Up @@ -371,26 +375,36 @@ DefParameterType('BalancedParen', sub {
# It is useful when the content would usually need to have been \protect'd
# in order to correctly deal with catcodes.
# BEWARE: This is NOT a shorthand for a simple digested {}!
DefParameterType('Digested', sub {
no warnings 'recursion';
my ($gullet) = @_;
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = ();
my $token;
do { $token = $gullet->readXToken(0);
} while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax'))));
if (!defined $token) { }
elsif ($token->getCatcode == CC_BEGIN) {
Digest($token);
push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces
else {
push(@list, $STATE->getStomach->invokeToken($token)); }
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
List(@list, mode => ($ismath ? 'math' : 'text')); },
sub readDigested {
no warnings 'recursion';
my ($gullet) = @_;
$gullet->skipSpaces;
my $ismath = $STATE->lookupValue('IN_MATH');
my @list = ();
my $token;
do { $token = $gullet->readXToken(0);
} while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax'))));
if (!defined $token) { }
elsif ($token->getCatcode == CC_BEGIN) {
Digest($token);
push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces
else {
push(@list, $STATE->getStomach->invokeToken($token)); }
@list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list;
return List(@list, mode => ($ismath ? 'math' : 'text')); }

DefParameterType('Digested', \&readDigested,
undigested => 1, # since _already_ digested.
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# Read a Delimiter;
# Formally a delimiter is either a token, or \delimiter<number> or maybe \radical<number>,
# but we don't actually restrict to those.
# Here, we just read a single Digested thing, but reversion gets no braces
DefParameterType('TeXDelimiter', \&readDigested,
undigested => 1, # since _already_ digested.
reversion => sub { Revert($_[0]); });

# A variation: Digest until we encounter a given token!
DefParameterType('DigestUntil', sub {
my ($gullet, $until) = @_;
Expand Down
15 changes: 10 additions & 5 deletions lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,23 @@ DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', su
DefPrimitive('\closeout Number', sub {
my ($stomach, $port) = @_;
$port = ToString($port);
if ($LaTeXML::DEBUG{write}) {
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
Debug("CLOSING $filename with content:\n$contents\n============================="); } }
AssignValue('output_file:' . $port => undef, 'global');
return; });

DefPrimitive('\write Number {}', sub {
DefPrimitive('\write Number XGeneralText', sub {
my ($stomach, $port, $tokens) = @_;
$port = ToString($port);
if (my $filename = LookupValue('output_file:' . $port)) {
my $handle = $filename . '_contents';
my $contents = LookupValue($handle);
AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); }
AssignValue($handle => $contents . UnTeX($tokens, 1) . "\n", 'global'); }
else {
Note(UnTeX(Expand($tokens))); }
Note(UnTeX($tokens)); }
return; });

# Since we don't paginate, we're effectively always "shipping out",
Expand Down Expand Up @@ -145,7 +150,7 @@ DefMacro('\input TeXFileName', sub {
#----------------------------------------------------------------------
# \special c sends material to the dvi file for special processing.

DefPrimitive('\special {}', sub {
DefPrimitive('\special XGeneralText', sub {
my ($stomach, $arg) = @_;
my $special_str = ToString($arg);
# recognize one special graphics inclusion case
Expand All @@ -161,7 +166,7 @@ DefPrimitive('\special {}', sub {
$stomach->getGullet->unread(
T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); }
else {
Info('ignored', 'special', $stomach, 'Unrecognized TeX Special', $arg); }
Info('ignored', 'special', $stomach, 'Unrecognized TeX Special' . ToString($arg)); }
return; });

# adapted from graphicx.sty.ltxml
Expand Down
Loading

0 comments on commit 1438332

Please sign in to comment.