Skip to content

Commit 4ddcab3

Browse files
committed
Added Anolis and anolislib to the build utilities.
1 parent 454ddc9 commit 4ddcab3

File tree

12 files changed

+1649
-0
lines changed

12 files changed

+1649
-0
lines changed

utils/anolis

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python
2+
# coding=UTF-8
3+
# Copyright (c) 2008 Geoffrey Sneddon
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
# THE SOFTWARE.
22+
"""usage: anolis [options] [input output]
23+
24+
Post-process a document, adding cross-references, table of contents,
25+
etc. If input and output are not supplied input is read from stdin and
26+
output written to stdout.
27+
"""
28+
29+
import sys
30+
31+
sys.path += ("../anolislib",)
32+
33+
from optparse import OptionParser, SUPPRESS_HELP
34+
35+
from lxml import etree
36+
37+
from anolislib import generator, utils
38+
39+
40+
def main():
41+
# Create the options parser
42+
optParser = getOptParser()
43+
opts, args = optParser.parse_args()
44+
45+
# If we have two arguments read/write to files otherwise read/write
46+
# to stdin/stdout
47+
if len(args) >= 2:
48+
try:
49+
input = open(args[0], "rb")
50+
output = open(args[1], "wb")
51+
except IOError, e:
52+
sys.stderr.write(unicode(e) + u"\n")
53+
sys.exit(1)
54+
else:
55+
input = sys.stdin
56+
output = sys.stdout
57+
58+
try:
59+
# Get options
60+
kwargs = vars(opts)
61+
62+
# Get input and generate
63+
64+
tree = generator.fromFile(input, **kwargs)
65+
input.close()
66+
67+
# Write output
68+
generator.toFile(tree, output, **kwargs)
69+
output.close()
70+
except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
71+
sys.stderr.write(unicode(e) + u"\n")
72+
sys.exit(1)
73+
74+
def getOptParser():
75+
def enable(option, opt_str, value, parser, *args, **kwargs):
76+
parser.values.processes.add(value)
77+
78+
def disable(option, opt_str, value, parser, *args, **kwargs):
79+
parser.values.processes.discard(value)
80+
81+
parser = OptionParser(usage = __doc__, version="%prog 1.1dev")
82+
83+
parser.add_option("", "--enable", action="callback", callback=enable,
84+
type="string", dest="processes",
85+
help="Enable the process given as the option value")
86+
87+
parser.add_option("", "--disable", action="callback", callback=disable,
88+
type="string",
89+
help="Disable the process given as the option value")
90+
91+
parser.add_option("", "--parser", type="choice",
92+
choices=("html5lib", "lxml.html"),
93+
help="Choose what parser to use. Valid options: html5lib, lxml.html")
94+
95+
parser.add_option("", "--serializer", type="choice",
96+
choices=("html5lib", "lxml.html"),
97+
help="Choose what serializer to use. Valid options: html5lib, lxml.html")
98+
99+
parser.add_option("", "--newline-char", action="store", type="string",
100+
dest="newline_char",
101+
help="Set the newline character/string used when creating new newlines. This should match the rest of the newlines in the document.")
102+
103+
parser.add_option("", "--indent-char", action="store", type="string",
104+
dest="indent_char",
105+
help="Set the character/string used when creating indenting new blocks of (X)HTML. This should match the rest of the indentation in the document.")
106+
107+
parser.add_option("", "--filter", action="store", dest="filter",
108+
help="CSS selector that matches elements to be removed from the document before processing")
109+
110+
parser.add_option("", "--annotations", action="store", dest="annotation",
111+
help="Path or URI of an annotations file containing status annotations that should be added to sections")
112+
113+
parser.add_option("", "--annotate-w3c-issues", action="store_true",
114+
dest="annotate_w3c_issues",
115+
help="Add links to W3C issue tracker in status annotations")
116+
117+
parser.add_option("", "--force-html4-id", action="store_true",
118+
dest="force_html4_id",
119+
help="Force the ID generation algorithm to create HTML 4 compliant IDs regardless of the DOCTYPE.")
120+
121+
parser.add_option("", "--min-depth", action="store", type="int",
122+
dest="min_depth",
123+
help="Highest ranking header to number/insert into TOC.")
124+
125+
parser.add_option("", "--max-depth", action="store", type="int",
126+
dest="max_depth",
127+
help="Lowest ranking header to number/insert into TOC.")
128+
129+
parser.add_option("", "--allow-duplicate-dfns", action="store_true",
130+
dest="allow_duplicate_dfns",
131+
help="Allow multiple definitions of terms when cross-referencing (the last instance of the term is used when referencing it).")
132+
133+
parser.add_option("", "--w3c-compat", action="store_true",
134+
dest="w3c_compat",
135+
help="Behave in a (mostly) compatible way to the W3C CSS WG's Postprocessor (this implies all of the other --w3c-compat options with the exception of --w3c-compat-crazy-substitution, as that is too crazy).")
136+
137+
parser.add_option("", "--w3c-compat-xref-elements", action="store_true",
138+
dest="w3c_compat_xref_elements",
139+
help="Uses the same list of elements to look for cross-references in as the W3C CSS WG's Postprocessor, even when the elements shouldn't semantically be used for cross-reference terms.")
140+
141+
parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
142+
dest="w3c_compat_xref_a_placement",
143+
help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
144+
145+
parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
146+
dest="w3c_compat_xref_normalization",
147+
help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
148+
149+
parser.add_option("", "--w3c-compat-class-toc", action="store_true",
150+
dest="w3c_compat_class_toc",
151+
help="Add @class='toc' on every ol element in the table of contents (instead of only the root ol element).")
152+
153+
parser.add_option("", "--w3c-compat-substitutions", action="store_true",
154+
dest="w3c_compat_substitutions",
155+
help="Do W3C specific substitutions.")
156+
157+
parser.add_option("", "--w3c-compat-crazy-substitutions", action="store_true",
158+
dest="w3c_compat_crazy_substitutions",
159+
help="Do crazy W3C specific substitutions, which may cause unexpected behaviour (i.e., replacing random strings within the document with no special marker).")
160+
161+
profile = True
162+
try:
163+
import cProfile
164+
import pstats
165+
except ImportError:
166+
try:
167+
import hotshot
168+
import hotshot.stats
169+
except ImportError:
170+
profile = False
171+
172+
if profile:
173+
parser.add_option("", "--profile", action="store_true",
174+
dest="profile", help=SUPPRESS_HELP)
175+
176+
parser.add_option("", "--inject-meta-charset", action="store_true",
177+
dest="inject_meta_charset", help=SUPPRESS_HELP)
178+
179+
parser.add_option("", "--strip-whitespace", action="store_true",
180+
dest="strip_whitespace", help=SUPPRESS_HELP)
181+
182+
parser.add_option("", "--omit-optional-tags", action="store_true",
183+
dest="omit_optional_tags", help=SUPPRESS_HELP)
184+
185+
parser.add_option("", "--quote-attr-values", action="store_true",
186+
dest="quote_attr_values", help=SUPPRESS_HELP)
187+
188+
parser.add_option("", "--use-best-quote-char", action="store_true",
189+
dest="use_best_quote_char", help=SUPPRESS_HELP)
190+
191+
parser.add_option("", "--no-minimize-boolean-attributes",
192+
action="store_false",
193+
dest="minimize_boolean_attributes", help=SUPPRESS_HELP)
194+
195+
parser.add_option("", "--use-trailing-solidus", action="store_true",
196+
dest="use_trailing_solidus", help=SUPPRESS_HELP)
197+
198+
parser.add_option("", "--space-before-trailing-solidus",
199+
action="store_true",
200+
dest="space_before_trailing_solidus", help=SUPPRESS_HELP)
201+
202+
parser.add_option("", "--escape-lt-in-attrs", action="store_true",
203+
dest="escape_lt_in_attrs", help=SUPPRESS_HELP)
204+
205+
parser.add_option("", "--escape-rcdata", action="store_true",
206+
dest="escape_rcdata", help=SUPPRESS_HELP)
207+
208+
parser.add_option("", "--output-encoding", action="store", type=str,
209+
dest="output_encoding", help="Output encoding")
210+
211+
parser.set_defaults(
212+
processes=["filter", "sub", "toc", "xref", "annotate"],
213+
parser="html5lib",
214+
serializer="html5lib",
215+
newline_char=u"\n",
216+
indent_char=u" ",
217+
filter=None,
218+
annotation=None,
219+
annotate_w3c_issues=False,
220+
force_html4_id=False,
221+
min_depth=2,
222+
max_depth=6,
223+
allow_duplicate_dfns=False,
224+
w3c_compat=False,
225+
w3c_compat_xref_elements=False,
226+
w3c_compat_xref_a_placement=False,
227+
w3c_compat_xref_normalization=False,
228+
w3c_compat_class_toc=False,
229+
w3c_compat_substitutions=False,
230+
w3c_compat_crazy_substitutions=False,
231+
profile=False,
232+
inject_meta_charset=False,
233+
omit_optional_tags=False,
234+
quote_attr_values=False,
235+
use_best_quote_char=False,
236+
minimize_boolean_attributes=False,
237+
use_trailing_solidus=False,
238+
space_before_trailing_solidus=False,
239+
escape_lt_in_attrs=False,
240+
escape_rcdata=False,
241+
output_encoding="utf-8"
242+
)
243+
244+
return parser
245+
246+
if __name__ == "__main__":
247+
main()

utils/anolislib/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from generator import *

utils/anolislib/generator.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# coding=UTF-8
2+
# Copyright (c) 2008 Geoffrey Sneddon
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in
12+
# all copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
# THE SOFTWARE.
21+
import sys
22+
23+
import html5lib
24+
from html5lib import treebuilders, treewalkers
25+
from html5lib.serializer import htmlserializer
26+
27+
import lxml.html
28+
from lxml import etree
29+
30+
31+
def process(tree, processes=["sub", "toc", "xref"], **kwargs):
32+
""" Process the given tree. """
33+
34+
# Find number of passes to do
35+
for process in processes:
36+
try:
37+
process_module = getattr(__import__('processes', globals(),
38+
locals(), [process], -1),
39+
process)
40+
except AttributeError:
41+
process_module = __import__(process, globals(), locals(), [], -1)
42+
43+
getattr(process_module, process)(tree, **kwargs)
44+
45+
46+
def fromFile(input, processes=set(["sub", "toc", "xref"]), parser="html5lib",
47+
profile=False, **kwargs):
48+
# Parse as XML:
49+
#if parser == "lxml.etree":
50+
if False:
51+
tree = etree.parse(input)
52+
# Parse as HTML using lxml.html
53+
elif parser == "lxml.html":
54+
tree = lxml.html.parse(input)
55+
# Parse as HTML using html5lib
56+
else:
57+
builder = treebuilders.getTreeBuilder("lxml", etree)
58+
parser = html5lib.HTMLParser(tree=builder)
59+
tree = parser.parse(input)
60+
61+
# Close the input file
62+
input.close()
63+
64+
# Run the generator, and profile, or not, as the case may be
65+
if profile:
66+
import os
67+
import tempfile
68+
statfile = tempfile.mkstemp()[1]
69+
try:
70+
import cProfile
71+
import pstats
72+
cProfile.runctx("process(tree, processes, **kwargs)", globals(),
73+
locals(), statfile)
74+
stats = pstats.Stats(statfile)
75+
except None:
76+
import hotshot
77+
import hotshot.stats
78+
prof = hotshot.Profile(statfile)
79+
prof.runcall(process, tree, processes, **kwargs)
80+
prof.close()
81+
stats = hotshot.stats.load(statfile)
82+
stats.strip_dirs()
83+
stats.sort_stats('time')
84+
stats.print_stats()
85+
os.remove(statfile)
86+
else:
87+
process(tree, processes, **kwargs)
88+
89+
# Return the tree
90+
return tree
91+
92+
93+
def toString(tree, output_encoding="utf-8", serializer="html5lib", **kwargs):
94+
# Serialize to XML
95+
#if serializer == "lxml.etree":
96+
if False:
97+
rendered = etree.tostring(tree, encoding=output_encoding)
98+
# Serialize to HTML using lxml.html
99+
elif serializer == "lxml.html":
100+
rendered = lxml.html.tostring(tree, encoding=output_encoding)
101+
# Serialize to HTML using html5lib
102+
else:
103+
walker = treewalkers.getTreeWalker("lxml")
104+
s = htmlserializer.HTMLSerializer(**kwargs)
105+
rendered = s.render(walker(tree), encoding=output_encoding)
106+
return rendered
107+
108+
def toFile(tree, output, output_encoding="utf-8", serializer="html5lib",
109+
**kwargs):
110+
rendered = toString(tree, output_encoding=output_encoding,
111+
serializer=serializer, **kwargs)
112+
113+
# Write to the output
114+
output.write(rendered)
115+
116+
117+
def fromToFile(input, output, **kwargs):
118+
tree = fromFile(input, **kwargs)
119+
toFile(tree, output, **kwargs)

utils/anolislib/processes/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)