From 33bd5e13b0d14d0cd5417728c77063b88454ed99 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Thu, 17 Aug 2023 11:42:35 -0500 Subject: [PATCH] CLDR-15034 kbd: Add a KeyboardTool, and KeyboardFlatten utility (#3117) - Flatten tool will resolve per spec - includes tests Originally from (#2633) --- .../unicode/cldr/tool/KeyboardFlatten.java | 268 ++++++++++++++++++ .../org/unicode/cldr/tool/KeyboardTool.java | 36 +++ .../cldr/tool/TestKeyboardFlatten.java | 54 ++++ .../KeyboardFlatten/broken-import-missing.xml | 85 ++++++ .../broken-import-unknownbase.xml | 85 ++++++ .../broken-import-unknownver.xml | 85 ++++++ .../broken-import-wrongparent.xml | 86 ++++++ 7 files changed, 699 insertions(+) create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardFlatten.java create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardTool.java create mode 100644 tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java create mode 100644 tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-missing.xml create mode 100644 tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml create mode 100644 tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml create mode 100644 tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardFlatten.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardFlatten.java new file mode 100644 index 00000000000..ff75a6bfe41 --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardFlatten.java @@ -0,0 +1,268 @@ +package org.unicode.cldr.tool; + +import java.io.File; +import java.io.OutputStream; +import java.net.MalformedURLException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.TransformerFactoryConfigurationError; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; +import org.unicode.cldr.util.CLDRConfig; +import org.unicode.cldr.util.DtdType; +import org.unicode.cldr.util.PathUtilities; +import org.unicode.cldr.util.XMLValidator; +import org.w3c.dom.Comment; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** Read a Keyboard and write it out with no import statements */ +public class KeyboardFlatten { + public static void flatten(String path, OutputStream stream) + throws MalformedURLException, SAXException, TransformerConfigurationException, + TransformerException, TransformerFactoryConfigurationError { + final String filename = PathUtilities.getNormalizedPathString(path); + // Force filerefs to be URI's if needed: note this is independent of any + // other files + String docURI; + docURI = XMLValidator.filenameToURL(filename); + flatten(new InputSource(docURI), filename, stream); + } + + public static void flatten(InputSource inputSource, String filename, OutputStream stream) + throws SAXException, TransformerConfigurationException, TransformerException, + TransformerFactoryConfigurationError, MalformedURLException { + final DocumentBuilderFactory dfactory = getKeyboardDocFactory(); + final ErrorHandler nullHandler = getNullHandler(filename); + // Parse + Document doc = parseDocument(inputSource, filename, dfactory, nullHandler); + + // do the flatten + flattenDoc(dfactory, doc); + + // Write out + write(doc, stream); + } + + private static void flattenDoc(final DocumentBuilderFactory dfactory, Document doc) + throws MalformedURLException { + // Now, flatten it + NodeList imports = doc.getElementsByTagName("import"); + + if (imports.getLength() == 0) { + System.err.println("No imports"); + } else { + for (int i = 0; i < imports.getLength(); i++) { + Node item = imports.item(i); + flattenImport(dfactory, doc, item); + } + // now remove the import nodes + while (imports.getLength() > 0) { + Node item = imports.item(0); + item.getParentNode().removeChild(item); + // item is no longer in list + } + } + } + + private static void flattenImport( + final DocumentBuilderFactory dfactory, Document doc, Node item) + throws MalformedURLException { + final String base = getBase(item); + final String path = getPath(item); + System.err.println("Import: " + base + ":" + path); + if (base.equals("cldr")) { + if (path.startsWith("techpreview/")) { + final String subpath = path.replaceFirst("techpreview/", ""); + final File importDir = + new File( + CLDRConfig.getInstance().getCldrBaseDirectory(), + "keyboards/import"); + final File importFile = new File(importDir, subpath); + applyImportFile(dfactory, doc, item, path, importFile); + } else { + throw new IllegalArgumentException("Unknown cldr base: " + path); + } + } else { + throw new IllegalArgumentException("Unknown base: " + base); + } + } + + private static void applyImportFile( + final DocumentBuilderFactory dfactory, + Document doc, + Node item, + final String path, + final File importFile) + throws MalformedURLException { + if (!importFile.exists()) { + throw new IllegalArgumentException("File " + importFile + " does not exist"); + } + System.err.println("Importing: " + importFile.getAbsolutePath()); + final String ifilename = + PathUtilities.getNormalizedPathString(importFile.getAbsolutePath()); + // Force filerefs to be URI's if needed: note this is independent of any + // other files + String docURI; + docURI = XMLValidator.filenameToURL(ifilename); + + Document importDoc = + parseDocument( + new InputSource(docURI), ifilename, dfactory, getNullHandler(ifilename)); + System.err.println("Parsed import OK"); + // Now perform the import + + // Validate the root element + final Element importedRoot = importDoc.getDocumentElement(); + final Node importParentNode = item.getParentNode(); + if (importParentNode.getNodeType() != Node.ELEMENT_NODE) { + throw new IllegalArgumentException("import parent is not an element"); + } + final Element importParent = (Element) importParentNode; + // Elements must be same name + if (!importParent.getTagName().equals(importedRoot.getTagName())) { + throw new IllegalArgumentException( + "trying to import " + + importedRoot.getTagName() + + " root into child of " + + importParent.getTagName()); + } + System.err.println("Importing into " + importParent.getTagName()); + + Comment preComment = doc.createComment("Begin Imports from " + path); + Comment postComment = doc.createComment("End Imports from " + path); + + // OK here we go + NodeList moveChildren = importedRoot.getChildNodes(); + importParent.insertBefore(preComment, item); + for (int j = 0; j < moveChildren.getLength(); j++) { + final Node child = moveChildren.item(j); + final Node clone = doc.importNode(child, true); + importParent.insertBefore(clone, item); + } + System.err.println("Moved " + moveChildren.getLength() + " children"); + // Add a comment + importParent.insertBefore(postComment, item); + + // done + } + + private static ErrorHandler getNullHandler(final String filename2) { + ErrorHandler nullHandler = + new ErrorHandler() { + @Override + public void warning(SAXParseException e) throws SAXException { + System.err.println(filename2 + ": Warning: " + e.getMessage()); + } + + @Override + public void error(SAXParseException e) throws SAXException { + int col = e.getColumnNumber(); + System.err.println( + filename2 + + ":" + + e.getLineNumber() + + (col >= 0 ? ":" + col : "") + + ": ERROR: Element " + + e.getPublicId() + + " is not valid because " + + e.getMessage()); + } + + @Override + public void fatalError(SAXParseException e) throws SAXException { + System.err.println(filename2 + ": ERROR "); + throw e; + } + }; + return nullHandler; + } + + private static DocumentBuilderFactory getKeyboardDocFactory() throws SAXException { + DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); + // Always set namespaces on + dfactory.setNamespaceAware(true); + dfactory.setValidating(true); + SchemaFactory sfac = SchemaFactory.newDefaultInstance(); + Schema schema = + sfac.newSchema( + new File( + CLDRConfig.getInstance().getCldrBaseDirectory(), + DtdType.keyboard.getXsdPath())); + dfactory.setSchema(schema); + // Set other attributes here as needed + // applyAttributes(dfactory, attributes); + return dfactory; + } + + private static String getBase(Node item) { + final String attrName = "base"; + return getAttributeValue(item, attrName); + } + + private static String getPath(Node item) { + final String attrName = "path"; + return getAttributeValue(item, attrName); + } + + private static String getAttributeValue(Node item, final String attrName) { + return item.getAttributes().getNamedItem(attrName).getTextContent(); + } + + private static Document parseDocument( + InputSource inputSource, + String filename, + DocumentBuilderFactory dfactory, + ErrorHandler nullHandler) { + Document doc = null; + try { + // First, attempt to parse as XML (preferred)... + DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); + docBuilder.setErrorHandler(nullHandler); + // if(docBuilder.isValidating()){ + // System.out.println("The parser is a validating parser"); + // } + doc = docBuilder.parse(inputSource); + } catch (Throwable se) { + if (se instanceof SAXParseException) { + SAXParseException pe = (SAXParseException) se; + int col = pe.getColumnNumber(); + System.err.println( + filename + + ":" + + pe.getLineNumber() + + (col >= 0 ? ":" + col : "") + + ": ERROR:" + + se.toString()); + } else { + System.err.println(filename + ": ERROR:" + se.toString()); + } + } + System.err.println("Doc parse OK"); + return doc; + } + + /** + * Serialize XML out to stream + * + * @param doc + */ + private static void write(Document doc, OutputStream stream) + throws TransformerConfigurationException, TransformerException, + TransformerFactoryConfigurationError { + TransformerFactory.newInstance() + .newTransformer() + .transform(new DOMSource(doc), new StreamResult(stream)); + } +} diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardTool.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardTool.java new file mode 100644 index 00000000000..8f702bbfaed --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/KeyboardTool.java @@ -0,0 +1,36 @@ +package org.unicode.cldr.tool; + +import com.ibm.icu.dev.tool.UOption; +import org.unicode.cldr.util.CLDRTool; + +@CLDRTool(alias = "kbd", description = "Tool for working with CLDR Keyboard files") +public class KeyboardTool { + + private static final UOption[] options = { + UOption.HELP_H(), + UOption.HELP_QUESTION_MARK(), + UOption.create("flatten", 'F', UOption.REQUIRES_ARG), + }; + + public static void help() { + System.out.println( + "CLDR Keyboard Tool\n" + + "----\n" + + "Usage:\n" + + " -h | --help | -? print this help\n" + + " -F infile.xml | --flatten infile.xml > outfile.xml print a flattened xml to stdout, without imports\n" + + ""); + } + + public static void main(String args[]) throws Throwable { + UOption.parseArgs(args, options); + if (options[0].doesOccur || options[1].doesOccur) { + help(); + } else if (options[2].doesOccur) { + System.err.println("Flatten: " + options[2].value); + KeyboardFlatten.flatten(options[2].value, System.out); + } else { + help(); + } + } +} diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java b/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java new file mode 100644 index 00000000000..c2b604b627b --- /dev/null +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/tool/TestKeyboardFlatten.java @@ -0,0 +1,54 @@ +package org.unicode.cldr.tool; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactoryConfigurationError; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.unicode.cldr.util.CLDRConfig; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class TestKeyboardFlatten { + @ParameterizedTest + @ValueSource( + strings = { + "KeyboardFlatten/broken-import-wrongparent.xml", + "KeyboardFlatten/broken-import-unknownbase.xml", + "KeyboardFlatten/broken-import-unknownver.xml", + "KeyboardFlatten/broken-import-missing.xml", + }) + void TestBrokenImports(final String path) throws IOException { + try (final InputStream input = TestKeyboardFlatten.class.getResourceAsStream(path); ) { + final InputSource source = new InputSource(input); + // Expect failure. + assertThrows( + IllegalArgumentException.class, + () -> KeyboardFlatten.flatten(source, path, System.out)); + } + } + + @Test + void TestImportMaltese() + throws TransformerConfigurationException, SAXException, TransformerException, + TransformerFactoryConfigurationError, IOException { + final File base = CLDRConfig.getInstance().getCldrBaseDirectory(); + final File mtxml = new File(base, "keyboards/3.0/mt.xml"); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + KeyboardFlatten.flatten(mtxml.getAbsolutePath(), baos); + baos.close(); + String outstr = baos.toString("UTF-8"); + assertTrue(outstr.contains(" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml new file mode 100644 index 00000000000..b75697565fe --- /dev/null +++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownbase.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml new file mode 100644 index 00000000000..3f301db3ae4 --- /dev/null +++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-unknownver.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml new file mode 100644 index 00000000000..78ea52773d0 --- /dev/null +++ b/tools/cldr-code/src/test/resources/org/unicode/cldr/tool/KeyboardFlatten/broken-import-wrongparent.xml @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +