-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_sentences.py
31 lines (27 loc) · 962 Bytes
/
get_sentences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from lxml import etree
import argparse
import sys
sys.path.append("./ccg2lambda")
from ccg2jiggxml import read_abc
def main():
parser = argparse.ArgumentParser('')
parser.add_argument('FILE')
parser.add_argument('-i',
'--skip-ill-formed',
action='store_true',
help='skip trees that contain a node whose arity > 2')
parser.add_argument('-d',
'--drop-text',
action='store_true',
help='don\'t contain raw sentences in XML (useful for debugging)')
args = parser.parse_args()
trees = read_abc(args.FILE,
skip_ill_formed=args.skip_ill_formed)
output_str = ''
for t in trees:
sentence = ' '.join(t.tokens) + '.#END#'
output_str = output_str + sentence
print(output_str)
# print(' '.join(t.tokens))
if __name__ == '__main__':
main()