Skip to content

Commit 6788e8a

Browse files
committed
Added first implementation of microjoin.py script.
Added the first implementation of the microjoin.py algorithm. Corrected microsplit.py to handle both the RDFa spec and the HTML5 spec. The scripts are now capable of splitting and re-joining the HTML5 specification. There is a preliminary configuration for creating the RDFa module.
1 parent 38bbadc commit 6788e8a

File tree

10 files changed

+689
-490
lines changed

10 files changed

+689
-490
lines changed

bin/microjoin.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/python
2+
#
3+
# The microjoin script takes a specification configuration file and
4+
# processes the instructions contained in the configuration file to construct
5+
# a complete specification document.
6+
7+
import os, sys, re
8+
from optparse import OptionParser
9+
10+
logToFile = False
11+
options = object()
12+
13+
##
14+
# Logs the output to the console or to a file, depending on the logging
15+
# variable
16+
def log(str):
17+
global logging
18+
if(logToFile):
19+
lfile = open("microjoin.log", "a")
20+
lfile.write(str + '\n')
21+
lfile.close()
22+
else:
23+
print str
24+
25+
##
26+
# Processes a configuration file and performs the processing instructions
27+
# contained therein.
28+
#
29+
# @param options The set of options containing the processing directives.
30+
def processConfiguration(options):
31+
configFile = open(options.configFile, "r")
32+
outputFile = open(os.path.join(options.outputDir, options.outputFile), "w")
33+
34+
# Process the configuration file
35+
for line in configFile:
36+
tokens = line.strip().split()
37+
38+
if(len(tokens) > 0):
39+
if(tokens[0].startswith("#")):
40+
pass
41+
elif(tokens[0] == "include"):
42+
sfile = tokens[1]
43+
if(os.path.exists(sfile)):
44+
log("INFO: Including %s" % (sfile,))
45+
source = open(sfile, "r")
46+
outputFile.write(source.read())
47+
source.close()
48+
else:
49+
log("ERROR: Could not include the file named: %s" % \
50+
(sfile))
51+
52+
##
53+
# Sets up the option string parser for this daemon.
54+
#
55+
# @param argv the argument list specified on the command line.
56+
def setup_parser(argv):
57+
usage = "usage: %prog [options] CONFIGURATION_FILE"
58+
parser = OptionParser(usage)
59+
60+
parser.add_option('-o', '--output-file',
61+
action='store', type='string', dest='outputFile',
62+
default="specification.html",
63+
help='the file to write the processing output to ' + \
64+
'[Default: specification.html]')
65+
66+
parser.add_option('-d', '--output-directory',
67+
action='store', type='string', dest='outputDir',
68+
default="build",
69+
help='the directory to write the output file to ' + \
70+
'[Default: build]')
71+
72+
options, args = parser.parse_args(argv)
73+
largs = parser.largs
74+
75+
return (options, args, largs)
76+
77+
##
78+
# The main entry point for the script.
79+
#
80+
# @param argv the argument list passed to the program.
81+
# @param stdout the standard output stream assigned to the program.
82+
# @param environ the execution environment for the program.
83+
def main(argv, stdout, environ):
84+
# Parse the options
85+
options, args, largs = setup_parser(argv)
86+
options.configFile = largs[-1]
87+
88+
# Check to make sure a spec file was specified
89+
if((len(largs) < 2) or not os.path.exists(options.configFile)):
90+
log("ERROR: A configuration file was not specified.")
91+
log("EXAMPLE: %s configs/html5rdfa.conf" % (argv[0],))
92+
sys.exit(1)
93+
94+
# Create the output directory if it doesn't already exist.
95+
if(not os.path.exists(options.outputDir)):
96+
try:
97+
os.makedirs(options.outputDir)
98+
except OSError:
99+
log("ERROR: The output directory could not be created.")
100+
sys.exit(1)
101+
102+
processConfiguration(options)
103+
104+
# If the program was started from the command line, run main()
105+
if __name__ == "__main__":
106+
main(sys.argv, sys.stdout, os.environ)

bin/microsplit.py

Lines changed: 90 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
def log(str):
1717
global logging
1818
if(logToFile):
19-
lfile = open("microsection.log", "a")
19+
lfile = open("microsplit.log", "a")
2020
lfile.write(str + '\n')
2121
lfile.close()
2222
else:
@@ -34,11 +34,27 @@ def dashifyText(text):
3434
rval = rval.replace("(", "-").replace(")", "-").replace("/", "-")
3535
rval = rval.replace("'", "").replace('"', "").replace(",", "")
3636
rval = rval.replace(" ", "-")
37+
rval = rval.replace("+", "-")
3738
rval = re.sub(r'-[-]+', '-', rval)
3839
rval = re.sub(r'-$', '', rval)
40+
rval = re.sub(r'^-', '', rval)
3941

4042
return rval
4143

44+
##
45+
# Writes a given Table of Contents item to a file specified via tocItem and
46+
# placed into options.outputDir.
47+
#
48+
# @param options the options object that contains the output directory.
49+
# @param tocItem the table of contents file name to write to.
50+
# @param text the text to write to the file.
51+
def writeTocItem(options, tocItem, text):
52+
# Dump the text buffer to the given TOC item
53+
tocItemFilename = os.path.join(options.outputDir, tocItem)
54+
tocItemFile = open(tocItemFilename, "w")
55+
tocItemFile.write(text)
56+
tocItemFile.close()
57+
4258
##
4359
# Processes a specification file and splits the file into microsections.
4460
#
@@ -48,11 +64,34 @@ def processSpecification(options):
4864

4965
# The table of contents stack tracks where we are in the current document
5066
tocStack = ["", "", "html5", "", ""]
51-
toc = []
67+
toc = ["header",]
5268
textBuffer = ""
5369

5470
for line in specfile:
55-
m = re.match(r"^.*\<h(?P<header>.).*?\>(?P<content>.*)\<\/h.\>$", line)
71+
# Get the entire contents of each header element if a header element
72+
# is detected
73+
hm = re.match(r"^.*\<h(?P<header>[0-9]).*?\>.*$", line)
74+
if(hm):
75+
maxLines = 0
76+
# While the ending tag has not been found for the header tag,
77+
# keep searching for it
78+
while(not re.match( \
79+
r"^.*\<h(?P<header>[0-9]).*?\>(?P<content>.*)\<\/h[0-9]\>.*$",
80+
line.replace("\n", "")) and maxLines < 10):
81+
#print "LINE:", line
82+
line += specfile.next()
83+
maxLines += 1
84+
85+
# Issue a warning if there was some sort of parse error
86+
#if(maxLines == 10):
87+
#log("WARNING: Closing header tag not found for:\n%s" % (line,))
88+
89+
# Check to see if we have a header line
90+
m = re.match(
91+
r"^.*\<h(?P<header>[0-9]).*?\>(?P<content>.*)\<\/h[0-9]\>$",
92+
line.replace("\n", ""))
93+
94+
# If a complete header element is detected, process the header contents
5695
if(m):
5796
headerLevel = int(m.group('header'))
5897
# Strip the tags from the content
@@ -64,30 +103,62 @@ def processSpecification(options):
64103
# dump the text buffer to the previous TOC item.
65104
if(headerLevel < 4):
66105
tocStack[headerLevel] = dashHeader
67-
tocItem = "-".join(tocStack[2:headerLevel+1])
68-
69-
# Dump the text buffer to the previous TOC item
70-
if(len(toc) > 1):
71-
tocItemFilename = os.path.join(options.outputDir, toc[-1])
72-
tocItemFile = open(tocItemFilename, "w")
73-
tocItemFile.write(textBuffer)
74-
tocItemFile.close()
75-
76-
# Append the tocItem to the TOC
106+
tocItem = dashifyText("-".join(tocStack[2:headerLevel+1]))
107+
108+
# Write the TOC item to a file
109+
if(len(textBuffer) > 1):
110+
writeTocItem(options, toc[-1], textBuffer)
111+
else:
112+
del toc[-1]
113+
114+
# Append the tocItem to the TOC after checking to see that
115+
# duplicates do not exist.
116+
counter = 1
117+
while(tocItem in toc):
118+
tokens = tocItem.split("-")[0:-1]
119+
tokens.append("%i" % (counter,))
120+
tocItem = "-".join(tokens)
121+
counter += 1
77122
toc.append(tocItem)
78123

79124
# Reset the text buffer with the new item
80125
textBuffer = line
126+
else:
127+
textBuffer += line
81128
else:
82129
textBuffer += line
83130

131+
# Write the last TOC item to the file
132+
if(len(textBuffer) > 1):
133+
writeTocItem(options, toc[-1], textBuffer)
134+
84135
# Generate the configuration file for the microjoin script
85136
ujFilename = os.path.join(options.outputDir,
86137
options.specFile.split(os.sep)[-1]+".conf")
87138
ujFile = open(ujFilename, "w")
88-
ujFile.write("\n".join(toc))
139+
140+
# Write out the usage information for the file
141+
ujFile.write("""# This file was auto-generated using the microsplit.py tool.
142+
#
143+
# You may edit this file in a number of ways and process it using the
144+
# microjoin.py tool to create an entirely new specification. To create a new
145+
# specification, copy this file to the "configs" directory in your repository
146+
# and start modifying it.
147+
#
148+
# * To delete a section, delete any line from the list below.
149+
# * To add a section, insert a line like the folowing:
150+
# include YOUR_MICROSECTION_FILE
151+
# * To apply a patch to the final, combined file, do the following:
152+
# (NOT IMPLEMENTED YET)
153+
# * To construct a new source document, run the following command:
154+
# ./bin/microjoin.py THIS_CONFIGURATION_FILE THE_OUTPUT_FILE
155+
#
156+
""")
157+
158+
for ti in toc:
159+
ujFile.write("include %s\n" % (os.path.join(options.outputDir,ti),))
89160
ujFile.close()
90-
161+
91162
##
92163
# Sets up the option string parser for this daemon.
93164
#
@@ -97,9 +168,10 @@ def setup_parser(argv):
97168
parser = OptionParser(usage)
98169

99170
parser.add_option('-o', '--output-dir',
100-
action='store', type='string', dest='outputDir',
101-
default="microsections.cache",
102-
help='the directory in which to store the output files')
171+
action='store', type='string', dest='outputDir',
172+
default="build",
173+
help='the directory in which to store the output files ' + \
174+
'[Default: build]')
103175

104176
options, args = parser.parse_args(argv)
105177
largs = parser.largs

configs/rdfa-module.conf

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# This file was auto-generated using the microsplit.py tool.
2+
#
3+
# You may edit this file in a number of ways and process it using the
4+
# microjoin.py tool to create an entirely new specification. To create a new
5+
# specification, copy this file to the "configs" directory in your repository
6+
# and start modifying it.
7+
#
8+
# * To delete a section, delete any line from the list below.
9+
# * To add a section, insert a line like the folowing:
10+
# include YOUR_MICROSECTION_FILE
11+
# * To apply a patch to the final, combined file, do the following:
12+
# (NOT IMPLEMENTED YET)
13+
# * To construct a new source document, run the following command:
14+
# ./bin/microjoin.py THIS_CONFIGURATION_FILE THE_OUTPUT_FILE
15+
#
16+
include microsections/rdfa/rdfa
17+
include microsections/rdfa/rdfa-issues
18+
include microsections/rdfa/rdfa-introduction
19+
include microsections/rdfa/rdfa-parsing-model
20+
include microsections/rdfa/rdfa-conformance-requirements
21+
include microsections/rdfa/rdfa-modifications-to-xhtml-rdfa

0 commit comments

Comments
 (0)