Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XML Entities, individual and grouped #183

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions configure.php
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,6 @@ function getFileModificationHistory(): array {

globbetyglob("{$ac['basedir']}/scripts", 'make_scripts_executable');


{ # file-entities.php

$cmd = array();
Expand All @@ -756,7 +755,6 @@ function getFileModificationHistory(): array {
}
}


checking("for if we should generate a simplified file");
if ($ac["GENERATE"] != "no") {
if (!file_exists($ac["GENERATE"])) {
Expand Down
19 changes: 19 additions & 0 deletions entities/global.ent-dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf8" ?>
<!--
This is a "global" XML Entity file.
See doc-base/script/entities.php for details.

Place here only small entities that are expected NOT
being replaced or translated in any part of the manual.

DO NOT COPY OR TRANSLATE THIS FILE.

If you want/need to translate some entity placed here,
open an issue on doc-base or doc-en repository, so the
translatable entity is moved to manual.ent.
-->
<entities xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">

<!-- <entity name="ent.name">XML fragment</entity> -->

</entities>
14 changes: 14 additions & 0 deletions entities/manual.ent-dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf8" ?>
<!-- $Revision:$ -->
<!--
This is a "manual" XML Entity file.
See doc-base/script/entities.php for details.

Place here only small entities that are expected to be
translated and/or replaced in any part of the manual.
-->
<entities xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">

<!-- <entity name="ent.name">XML fragment</entity> -->

</entities>
20 changes: 20 additions & 0 deletions entities/remove.ent-dist
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf8" ?>
<!--
This is a "remove" XML Entity file.
See doc-base/script/entities.php for details.

Place here only deprecated entities that are expected NOT
be being used in any part of the manual.

DO NOT COPY OR TRANSLATE THIS FILE.

If an entity moved here is used in a language in your control,
this means that the entity on the original file is planned to be
removed, or already was removed. Consult the original text file
and apply changes accordly.
-->
<entities xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">

<!-- <entity name="ent.name">XML fragment</entity> -->

</entities>
6 changes: 5 additions & 1 deletion manual.xml.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
%language-snippets;
@TRANSLATION_ONLY_INCL_END@

<!-- Entities collected by entities.php -->
<!ENTITY % manual-entities SYSTEM "./temp/entities.ent">
%manual-entities;


<!-- Fallback to English definitions and snippets (in case of missing translation) -->
<!ENTITY % language-defs.default SYSTEM "../@EN_DIR@/language-defs.ent">
<!ENTITY % language-snippets.default SYSTEM "../@EN_DIR@/language-snippets.ent">
Expand Down Expand Up @@ -57,7 +62,6 @@
&install.cloud.index;
&install.fpm.index;
&install.pecl;
&install.composer;
&install.ini;
</book>

Expand Down
84 changes: 84 additions & 0 deletions scripts/dtdent-conv.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php /*
+----------------------------------------------------------------------+
| Copyright (c) 1997-2023 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net, so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: André L F S Bacci <ae php.net> |
+----------------------------------------------------------------------+
| Description: Convert DTD Entities files into XML Entities files. |
+----------------------------------------------------------------------+

See `entities.php` for detailed rationale.

Use this for converting bundled entities files that use <!ENTITY> into
XML version used by `entities.php`.

After converting, add the generated entities in an global.ent or
manual.ent file, and delete the previous one.

After all old style .ent files are split or converted, this script can
be removed. */

ini_set( 'display_errors' , 1 );
ini_set( 'display_startup_errors' , 1 );
error_reporting( E_ALL );

if ( count( $argv ) < 2 )
die(" Syntax: php $argv[0] infile\n" );

$infile = $argv[1];

$content = file_get_contents( $infile );

$pos1 = 0;
while ( true )
{
$pos1 = strpos( $content , "<!ENTITY", $pos1 );
if ( $pos1 === false ) break;

$posS = strpos( $content , "'" , $pos1 );
$posD = strpos( $content , '"' , $pos1 );

if ( $posS < $posD )
$q = "'";
else
$q = '"';

$pos1 += 8;
$pos2 = min( $posS , $posD ) + 1;
$pos3 = strpos( $content , $q , $pos2 );

$name = substr( $content , $pos1 , $pos2 - $pos1 - 1 );
$text = substr( $content , $pos2 , $pos3 - $pos2 );

// weird &ugly; ass, namespace correct, DOMDocumentFragment -> DOMNodeList (ampunstand intended)

$name = trim( $name );
$text = str_replace( "&" , "&amp;" , $text );

$frag = "<entities xmlns='http://docbook.org/ns/docbook' xmlns:xlink='http://www.w3.org/1999/xlink'>\n";
$frag .= " <entity name='$name'>$text</entity>\n";
$frag .= '</entities>';

$dom = new DOMDocument( '1.0' , 'utf8' );
$dom->recover = true;
$dom->resolveExternals = false;
libxml_use_internal_errors( true );

$dom->loadXML( $frag , LIBXML_NSCLEAN );
$dom->normalizeDocument();

libxml_clear_errors();

$text = $dom->saveXML( $dom->getElementsByTagName( "entity" )[0] );
$text = str_replace( "&amp;" , "&" , $text );

echo "\n$text\n";
}
123 changes: 123 additions & 0 deletions scripts/dtdent-split.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<?php /*
+----------------------------------------------------------------------+
| Copyright (c) 1997-2023 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net, so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: André L F S Bacci <ae php.net> |
+----------------------------------------------------------------------+
| Description: Split old DTD .ent file into individual XML files. |
+----------------------------------------------------------------------+

See `entities.php` for detailed rationale.

Use this for spliting `language-snippets-ent` and possible other DTD
entities files into individual .xml files.

After spliting, add generated files under doc-lang/entities/ , and
the original file, in one go.

After all DTD .ent files are split or converted, this script can
be removed. */

ini_set( 'display_errors' , 1 );
ini_set( 'display_startup_errors' , 1 );
error_reporting( E_ALL );

if ( count( $argv ) < 3 )
die(" Syntax: php $argv[0] infile outdir [hash user]\n" );

$infile = $argv[1];
$outdir = $argv[2];
$hash = $argv[3] ?? "";
$user = $argv[4] ?? "_";

$content = file_get_contents( $infile );
$entities = [];

// Parse

$pos1 = 0;
while ( true )
{
$pos1 = strpos( $content , "<!ENTITY", $pos1 );
if ( $pos1 === false ) break;

$posS = strpos( $content , "'" , $pos1 );
$posD = strpos( $content , '"' , $pos1 );

if ( $posS < $posD )
$q = "'";
else
$q = '"';

$pos1 += 8;
$pos2 = min( $posS , $posD ) + 1;
$pos3 = strpos( $content , $q , $pos2 );

$name = substr( $content , $pos1 , $pos2 - $pos1 - 1 );
$text = substr( $content , $pos2 , $pos3 - $pos2 );

$name = trim( $name );

$entities[$name] = $text;
}

// Check

foreach( $entities as $name => $text )
{
$file = "$outdir/$name.xml";
if ( file_exists( $file ) )
echo( "Entity name colision, OVERWROTE: $file\n" );
}

// Write

foreach( $entities as $name => $text )
{
$file = "$outdir/$name.xml";

if ( $hash == "" )
$header = '<!-- $Revision$ -->';
else
$header .= "<!-- EN-Revision: $hash Maintainer: $user Status: ready --><!-- CREDITS: $user -->\n";

file_put_contents( $file , $header . $text );
}

// Test

$dom = new DOMDocument();
$dom->recover = true;
$dom->resolveExternals = false;
libxml_use_internal_errors( true );

foreach( $entities as $name => $text )
{
$file = "$outdir/$name.xml";

$text = file_get_contents( $file );
$text = "<frag>$text</frag>";

$dom->loadXML( $text );
$err = libxml_get_errors();
libxml_clear_errors();

foreach( $err as $e )
{
$msg = trim( $e->message );
if ( str_starts_with( $msg , "Entity '" ) && str_ends_with( $msg , "' not defined" ) )
continue;
die( "Failed to load $file\n" );
}
}

$total = count( $entities );
print "Generated $total files.\n";
Loading
Loading