Skip to content

Commit

Permalink
确定使用SRL得分,不使用SRL context来计算句子分数
Browse files Browse the repository at this point in the history
  • Loading branch information
PKULiuHui committed Nov 14, 2018
1 parent 41cad81 commit 6c319f1
Show file tree
Hide file tree
Showing 20 changed files with 13,527 additions and 424 deletions.
79 changes: 40 additions & 39 deletions baselines/baseline2/baseline.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
# coding=utf-8
# coding: utf-8
import sys
import os
import argparse

'''
Standard ROUGE
guardian(L)
UB1 Rouge-1: 0.498439 Rouge-2: 0.216667 Rouge-l: 0.324901 Rouge-SU*: 0.216997
UB2 Rouge-1: 0.469815 Rouge-2: 0.278474 Rouge-l: 0.344528 Rouge-SU*: 0.208485
LexRank Rouge-1: 0.210933 Rouge-2: 0.037603 Rouge-l: 0.131110 Rouge-SU*: 0.046715
TextRank Rouge-1: 0.184086 Rouge-2: 0.029617 Rouge-l: 0.117287 Rouge-SU*: 0.037783
ICSI Rouge-1: 0.257562 Rouge-2: 0.060022 Rouge-l: 0.157313 Rouge-SU*: 0.065799
Luhn Rouge-1: 0.154681 Rouge-2: 0.022884 Rouge-l: 0.100451 Rouge-SU*: 0.027575
bbc(L)
UB1 Rouge-1: 0.464780 Rouge-2: 0.195108 Rouge-l: 0.272242 Rouge-SU4: 0.197798
UB2 Rouge-1: 0.413318 Rouge-2: 0.227026 Rouge-l: 0.268316 Rouge-SU4: 0.193755
LexRank Rouge-1: 0.160842 Rouge-2: 0.024327 Rouge-l: 0.097632 Rouge-SU4: 0.042892
TextRank Rouge-1: 0.139200 Rouge-2: 0.021073 Rouge-l: 0.093124 Rouge-SU4: 0.037206
Luhn Rouge-1: 0.141699 Rouge-2: 0.023175 Rouge-l: 0.091994 Rouge-SU4: 0.038216
ICSI Rouge-1: 0.209584 Rouge-2: 0.046293 Rouge-l: 0.135454 Rouge-SU4: 0.063704
Standard ROUGE(整个bbc共1803篇blog)
UB1 Rouge-1: 0.480407 Rouge-2: 0.204490 Rouge-l: 0.280785 Rouge-SU4: 0.208131
UB2 Rouge-1: 0.435176 Rouge-2: 0.243138 Rouge-l: 0.280135 Rouge-SU4: 0.209980
LexRank Rouge-1: 0.171248 Rouge-2: 0.030491 Rouge-l: 0.106553 Rouge-SU4: 0.048841
TextRank Rouge-1: 0.145161 Rouge-2: 0.024316 Rouge-l: 0.095294 Rouge-SU4: 0.040450
Luhn Rouge-1: 0.151129 Rouge-2: 0.026597 Rouge-l: 0.097455 Rouge-SU4: 0.042836
ICSI Rouge-1: 0.221558 Rouge-2: 0.055385 Rouge-l: 0.137137 Rouge-SU4: 0.071310
'''

sys.path.append('../')
sys.path.append('../../')

from utils.data_helpers import load_data
from tqdm import tqdm
Expand All @@ -45,12 +36,14 @@
reload(sys)
sys.setdefaultencoding('utf-8')
parser = argparse.ArgumentParser(description='LiveBlogSum Baseline')
parser.add_argument('-corpus', type=str, default='bbc')
parser.add_argument('-path', type=str, default='../data/')
parser.add_argument('-corpus', type=str, default='bbc_cont_1')
parser.add_argument('-path', type=str, default='../../data/')
parser.add_argument('-sum_len', type=int, default=1)
parser.add_argument('-out', type=str, default='record.txt')

args = parser.parse_args()
args.path = args.path + args.corpus + '/test/'
args.path = args.path + args.corpus
types = ['train', 'valid', 'test']


def get_summary_scores(algo, docs, refs, summary_size):
Expand Down Expand Up @@ -91,28 +84,36 @@ def get_summary_scores(algo, docs, refs, summary_size):


if __name__ == '__main__':
file_names = os.listdir(args.path)
out_file = open(args.out, 'w')
algos = ['UB1', 'UB2', 'LexRank', 'TextRank', 'Luhn', 'ICSI']
R1 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
R2 = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
Rl = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
Rsu = {'UB1': .0, 'UB2': .0, 'ICSI': .0, 'LSA': .0, 'KL': .0, 'Luhn': .0, 'LexRank': .0, 'TextRank': .0}
for filename in tqdm(file_names):
data_file = os.path.join(args.path, filename)
docs, refs = load_data(data_file)
sum_len = len(' '.join(refs[0]).split(' ')) * args.sum_len
print('####', filename, '####')
for algo in algos:
r1, r2, rl, rsu = get_summary_scores(algo, docs, refs, sum_len)
print algo, r1, r2, rl, rsu
R1[algo] += r1
R2[algo] += r2
Rl[algo] += rl
Rsu[algo] += rsu
blog_sum = .0
for t in types:
cur_path = args.path + '/' + t + '/'
file_names = os.listdir(cur_path)
blog_sum += len(file_names)
for filename in tqdm(file_names):
data_file = os.path.join(cur_path, filename)
docs, refs = load_data(data_file)
sum_len = len(' '.join(refs[0]).split(' ')) * args.sum_len
print('####', filename, '####')
out_file.write(filename + '\n')
for algo in algos:
r1, r2, rl, rsu = get_summary_scores(algo, docs, refs, sum_len)
print algo, r1, r2, rl, rsu
out_file.write(algo + ' ' + str(r1) + ' ' + str(r2) + ' ' + str(rl) + ' ' + str(rsu) + '\n')
R1[algo] += r1
R2[algo] += r2
Rl[algo] += rl
Rsu[algo] += rsu
out_file.close()
print('Final Results')
for algo in algos:
R1[algo] /= len(file_names)
R2[algo] /= len(file_names)
Rl[algo] /= len(file_names)
Rsu[algo] /= len(file_names)
R1[algo] /= blog_sum
R2[algo] /= blog_sum
Rl[algo] /= blog_sum
Rsu[algo] /= blog_sum
print('%s Rouge-1: %f Rouge-2: %f Rouge-l: %f Rouge-SU4: %f' % (algo, R1[algo], R2[algo], Rl[algo], Rsu[algo]))
Loading

0 comments on commit 6c319f1

Please sign in to comment.