Skip to content

Commit 68076e7

Browse files
committed
Better handle empty lines (either exception or ignore)
1 parent 2342226 commit 68076e7

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

files2rouge/files2rouge.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def run(summ_path,
2828
rouge_args=None,
2929
verbose=False,
3030
saveto=None,
31-
eos="."):
31+
eos=".",
32+
ignore_empty=False):
3233

3334
if saveto is not None:
3435
saveto = open(saveto, 'w')
@@ -46,7 +47,8 @@ def run(summ_path,
4647
system_file=summ_path,
4748
model_dir=model_root,
4849
system_dir=sys_root,
49-
eos=eos)
50+
eos=eos,
51+
ignore_empty=ignore_empty)
5052
print("Running ROUGE...")
5153
log_level = logging.ERROR if not verbose else None
5254
r = pyrouge.Rouge155(rouge_dir=os.path.dirname(s.data['ROUGE_path']),
@@ -86,14 +88,16 @@ def main():
8688
parser.add_argument('-e', '--eos', dest="eos", default='.',
8789
help="""End of sentence separator (for multisentence).
8890
Default: \".\" """)
91+
parser.add_argument("-i", "--ignore_empty", action="store_true")
8992
args = parser.parse_args()
9093

9194
run(args.reference,
9295
args.summary,
9396
args.args,
9497
args.verbose,
9598
args.saveto,
96-
args.eos)
99+
args.eos,
100+
args.ignore_empty)
97101

98102

99103
if __name__ == '__main__':

files2rouge/utils.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,46 @@ def tee(saveto, *args, **kwargs):
2020
print(file=saveto, *args, **kwargs)
2121

2222

23-
def split_files(model_file, system_file, model_dir, system_dir, eos="."):
23+
def split_files(model_file, system_file, model_dir, system_dir,
24+
ignore_empty=False, eos="."):
2425
def outputs(line, f):
2526
split_sen = " .\n".join(line.split(" %s " % eos))
2627
print(split_sen, end="", file=f)
2728

2829
model_count = 0
30+
lines_to_ignore = []
31+
2932
with open(model_file) as fmodel:
3033
for (i, line) in enumerate(fmodel):
34+
3135
if not line:
3236
break
33-
if len(line) == 0:
34-
continue
37+
if line == "\n":
38+
if ignore_empty:
39+
lines_to_ignore.append(i)
40+
continue
41+
else:
42+
raise ValueError("Empty hypothesis at line %d."
43+
" Use `--ignore_empty` to ignore it"
44+
% (i+1))
3545

3646
model_count += 1
3747
with open("%s/m.A.%d.txt" % (model_dir, i), "w") as f:
3848
outputs(line, f)
3949

4050
system_count = 0
51+
line_to_ignore_it = iter(lines_to_ignore)
52+
line_to_ignore = next(line_to_ignore_it, -1)
53+
4154
with open(system_file) as fsystem:
4255
for (i, line) in enumerate(fsystem):
4356
if not line:
4457
break
45-
if len(line) == 0:
58+
if i == line_to_ignore:
59+
line_to_ignore = next(line_to_ignore_it, -1)
4660
continue
61+
if line == "\n":
62+
raise ValueError("Empty system at line %d" % (i+1))
4763

4864
system_count += 1
4965
with open("%s/s.%d.txt" % (system_dir, i), "w") as f:

0 commit comments

Comments
 (0)