Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
philipru committed Dec 19, 2023
2 parents 2392b86 + 4729c19 commit c2089b8
Show file tree
Hide file tree
Showing 13 changed files with 567 additions and 364 deletions.
22 changes: 22 additions & 0 deletions LICENCE_ASCIIMATH.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Copyright (c) 2014 Pepijn Van Eeckhoudt

MIT License

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 changes: 21 additions & 0 deletions LICENCE_KATEX.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2013-2020 Khan Academy and other contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
3 changes: 1 addition & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ dependencies {
compile (
'org.slf4j:slf4j-api:1.7.21',
'org.pageseeder.xmlwriter:pso-xmlwriter:1.0.4',
'org.pageseeder.diffx:pso-diffx:1.1.1',
'de.rototor.snuggletex:snuggletex-core:1.3.0'
'org.pageseeder.diffx:pso-diffx:1.1.1'
)

compileOnly 'org.eclipse.jdt:org.eclipse.jdt.annotation:2.0.0'
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/pageseeder/psml/process/Process.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.pageseeder.psml.process.config.*;
import org.pageseeder.psml.process.config.Images.ImageSrc;
import org.pageseeder.psml.process.math.AsciiMathConverter;
import org.pageseeder.psml.process.math.TexConverter;
import org.pageseeder.psml.process.util.Files;
import org.pageseeder.psml.process.util.IncludesExcludesMatcher;
import org.pageseeder.psml.process.util.XMLUtils;
Expand Down Expand Up @@ -507,7 +508,7 @@ public void process(Map<String, File> psmlFiles, File source, File destination,
IncludesExcludesMatcher xrefsMatcher = this.xrefs == null ? null : this.xrefs.buildMatcher();
for (String relPath : psmlFiles.keySet()) {
// log
this.logger.debug("Processing file "+relPath);
this.logger.debug("Processing file {}",relPath);
// create temp output file
FileOutputStream fos;
File tempOutput;
Expand Down
163 changes: 115 additions & 48 deletions src/main/java/org/pageseeder/psml/process/math/TexConverter.java
Original file line number Diff line number Diff line change
@@ -1,64 +1,131 @@
package org.pageseeder.psml.process.math;

import uk.ac.ed.ph.snuggletex.*;
import uk.ac.ed.ph.snuggletex.utilities.MessageFormatter;
import org.pageseeder.psml.process.util.WrappingReader;
import org.pageseeder.psml.util.PSCache;

import javax.script.*;
import java.io.IOException;
import java.util.List;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.Map;

public class TexConverter {

/**
* No constructor
*/
private TexConverter() {}
private static final String JS_SCRIPT = "/org/pageseeder/psml/process/math/katex.0.16.9.min.js";

private static Invocable script = null;

private static final Map<String, String> cache = Collections.synchronizedMap(new PSCache<>(200));

/**
* Convert the provided TeX string to mathml content
*
* @param input the input
*
* @return the mathml content
*/
public static String convert(String input) {
/* Create vanilla SnuggleEngine and new SnuggleSession */
SnuggleEngine engine = new SnuggleEngine();
SnuggleSession session = engine.createSession();
// stop at first error
session.getConfiguration().setFailingFast(true);

// trim leading/trailing space
String newInput = input.trim();
// replace all non-breaking space (caused an error in snuggle tex 1.2.2)
//newInput = newInput.replaceAll("[\\u00A0]", " ");
// aligned is not supported but eqnarray is
if (newInput.startsWith("\\begin{aligned}")) {
newInput = newInput.replaceAll("\\{aligned}", "{eqnarray*}");
newInput = newInput.replaceAll("&=", "&=&");
// sanity check
if (input == null || input.trim().isEmpty()) return "";

input = input.trim();

// check cache
String result = cache.get(input);
if (result == null) {

// invoke the function named "parse" with the TeX math as the argument
try {
synchronized (TexConverter.class) {
result = script().invokeFunction("parse", input).toString();
}
// extract mathml from HTML result
result = extractMathML(result);
cache.put(input, result);
} catch (ScriptException | NoSuchMethodException | IOException ex) {
ex.printStackTrace();
throw new IllegalArgumentException("Failed to run KaTex to MathML JS script: " + ex.getMessage());
}
}
// eqnarray can't be used in math mode
if (!newInput.startsWith("\\begin{eqnarray*}"))
newInput = "$$ "+newInput+" $$";
return result;
}

/* Parse some LaTeX input */
SnuggleInput snuggleInput = new SnuggleInput(newInput);
try {
session.parseInput(snuggleInput);
} catch (IOException ex) {
ex.printStackTrace();
throw new IllegalArgumentException("The Tex \""+input+"\" could not be converted to MathML because: " + ex.getMessage());
/**
* Look for mathml content in the string provided
*
* @param result the string from the JS script
*
* @return the mathml extracted
*/
private static String extractMathML(String result) {
int start = result.indexOf("<math");
if (start > 0) {
int semantics = result.indexOf("<semantics>", start);
if (semantics > 0) {
start = semantics + 11;
int annotation = result.indexOf("<annotation encoding=\"application/x-tex\">");
if (annotation > 0) {
result = result.substring(start, annotation);
} else {
result = result.substring(start, result.indexOf("</semantics>", start + 1));
}
return "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">"+result+"</math>";
} else {
return result.substring(start, result.indexOf("</math>", start + 1) + 7);
}
}
return result;
}

/* Specify how we want the resulting XML */
XMLStringOutputOptions options = new XMLStringOutputOptions();
options.setSerializationMethod(SerializationMethod.XML);
options.setEncoding("UTF-8");
options.setIncludingXMLDeclaration(false);
//options.setIndenting(true);
//options.setAddingMathSourceAnnotations(true);
//options.setUsingNamedEntities(true); /* (Only used if caller has an XSLT 2.0 processor) */

/* Convert the results to an XML String, which in this case will
* be a single MathML <math>...</math> element. */
String mathml = session.buildXMLString(options);
List<InputError> errors = session.getErrors();
if (!errors.isEmpty()) {
String message2 = MessageFormatter.formatErrorAsString(errors.get(0));
// add input for non-ascii character error
String message1 = message2.contains("TTEG02") ? "\"" + input + "\" " : "";
throw new IllegalArgumentException("The Tex " + message1 + "could not be converted to MathML because: " + message2);
// Not required as the script doesn't seem to get slower over time
//public static void reset() {
// synchronized (AsciiMathConverter.class) {
// script = null;
// }
//}

/**
* Load the script from the internal resource
*
* @return the script ready to be invoked
*
* @throws ScriptException If loading the script failed
* @throws IOException If loading the script failed
*/
private static Invocable script() throws ScriptException, IOException {
synchronized (TexConverter.class) {
if (script != null) return script;
}
if (mathml.startsWith("<math ") && mathml.endsWith("</math>")) {
mathml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">" + mathml.substring(mathml.indexOf('>')+1);

// load script
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("javascript");
Compilable cengine = (Compilable) engine;

// evaluate JavaScript code
try {
InputStream in = TexConverter.class.getResourceAsStream(JS_SCRIPT);
if (in != null) {
// add the Array.fill() method as it seems to be missing
String scriptPrefix = "Array.prototype.fill = function(arg) { for (var i = 0; i < this.length; i++) { this[i] = arg; } };";
String scriptSuffix = "var parse = function(str) { return katex.renderToString(str, { output: 'mathml' }); };";
CompiledScript cscript = cengine.compile(new WrappingReader(new InputStreamReader(in), scriptPrefix, scriptSuffix));
cscript.eval();
// create an Invocable object by casting the script engine object
script = (Invocable) cscript.getEngine();
return script;
} else {
throw new IllegalArgumentException("Failed to load KaTex to MathML JS script");
}
} catch (ScriptException | IOException ex) {
System.err.println("Failed to load KaTex to MathML JS script: "+ex.getMessage());
throw ex;
}
return mathml;
}

}
3 changes: 3 additions & 0 deletions src/main/java/org/pageseeder/psml/toc/FragmentNumbering.java
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,9 @@ public void processParagraph(Paragraph para, int level, long id, NumberingGenera
} else if (p != null && !NO_PREFIX.equals(p)) {
pref = new Prefix(p, null, adjusted_level, null);
}
if (number != null) {
number.restartNumbering(adjusted_level);
}
updateLocation(para, location);
if (pref == null) return;
// store prefix on fragment
Expand Down
22 changes: 13 additions & 9 deletions src/main/java/org/pageseeder/psml/toc/Paragraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,19 +196,23 @@ public void toXML(XMLWriter xml, int level, @Nullable FragmentNumbering number,
if (!Paragraph.NO_BLOCK_LABEL.equals(this._blocklabel)) {
xml.attribute("block-label", this._blocklabel);
}
if (this._numbered && number != null) {
if (number != null) {
Prefix pref = number.getTranscludedPrefix(treeid, count, fragment(), this._index, true);
if (pref != null) {
xml.attribute("part-level", pref.level);
// don't output undefined prefixes
if (!"".equals(pref.value) || pref.canonical != null) {
xml.attribute("prefix", pref.value);
xml.attribute("canonical", pref.canonical);
}
}
} else {
if (!NO_PREFIX.equals(this._prefix)) {
xml.attribute("prefix", this._prefix);
if (this._numbered) {
if (pref != null) {
// don't output undefined prefixes
if (!"".equals(pref.value) || pref.canonical != null) {
xml.attribute("prefix", pref.value);
xml.attribute("canonical", pref.canonical);
}
}
} else {
if (!NO_PREFIX.equals(this._prefix)) {
xml.attribute("prefix", this._prefix);
}
}
}
xml.closeElement();
Expand Down

Large diffs are not rendered by default.

Loading

0 comments on commit c2089b8

Please sign in to comment.