Skip to content

Commit 738c990

Browse files
author
CogComp Dev
authored
Merge pull request #694 from qiangning/master
readme update for memory usage of Verb SRL in pipeline (close #656); ChunkerTrain bug fixed and model updated (close #685)
2 parents daf593d + ee8332f commit 738c990

File tree

15 files changed

+49
-65
lines changed

15 files changed

+49
-65
lines changed

chunker/doc/performance.txt

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
Date: 10/20/2016
1+
Date: 09/23/2018
22
Tested: Qiang (John) Ning
33
Contact: qning2@illinois.edu
44

5-
Chunker model version: illinois-chunker-model-3.0.77
6-
Trainset: /shared/corpora/corporaWeb/written/eng/chunking/conll2000distributions/train.txt (trained with 50 iterations.)
5+
Chunker model version: illinois-chunker-model-4.0.12
6+
Trainset: /shared/corpora/corporaWeb/written/eng/chunking/conll2000distributions/train.txt (trained with 11 iterations.)
77
Testset:
88
Gold POS: /shared/corpora/corporaWeb/written/eng/chunking/conll2000distributions/test.txt
99
No POS: /shared/corpora/corporaWeb/written/eng/chunking/conll2000distributions/test.noPOS.txt
@@ -12,39 +12,37 @@ Performance:
1212
With Gold POS
1313
Label Precision Recall F1 LCount PCount
1414
----------------------------------------------
15-
ADJP 76.633 69.635 72.967 438 398
16-
ADVP 81.862 79.215 80.516 866 838
17-
CONJP 45.455 55.556 50.000 9 11
18-
INTJ 50.000 50.000 50.000 2 2
15+
ADJP 78.000 71.233 74.463 438 400
16+
ADVP 82.262 79.792 81.008 866 840
17+
CONJP 50.000 55.556 52.632 9 10
18+
INTJ 100.000 50.000 66.667 2 1
1919
LST 0.000 0.000 0.000 5 1
20-
NP 94.106 93.962 94.034 12422 12403
21-
PP 96.770 97.776 97.270 4811 4861
22-
PRT 72.072 75.472 73.733 106 111
23-
SBAR 88.280 87.290 87.782 535 529
24-
UCP 0.000 0.000 0.000 0 5
25-
VP 93.416 93.517 93.466 4658 4663
20+
NP 94.051 94.051 94.051 12422 12422
21+
PP 96.694 97.880 97.283 4811 4870
22+
PRT 73.394 75.472 74.419 106 109
23+
SBAR 87.902 86.916 87.406 535 529
24+
VP 93.845 93.946 93.896 4658 4663
2625
----------------------------------------------
27-
O 0.000 0.000 0.000 1244 1274
26+
O 0.000 0.000 0.000 1214 1221
2827
----------------------------------------------
29-
Overall 93.510 93.393 93.451 23852 23822
30-
Accuracy 88.763 - - - 25096
28+
Overall 93.613 93.585 93.599 23852 23845
29+
Accuracy 89.053 - - - 25066
3130

3231
With NO POS
3332
Label Precision Recall F1 LCount PCount
3433
----------------------------------------------
35-
ADJP 78.608 69.635 73.850 438 388
36-
ADVP 80.427 78.291 79.345 866 843
37-
CONJP 45.455 55.556 50.000 9 11
34+
ADJP 80.051 72.374 76.019 438 396
35+
ADVP 80.806 78.753 79.766 866 844
36+
CONJP 50.000 55.556 52.632 9 10
3837
INTJ 100.000 50.000 66.667 2 1
3938
LST 0.000 0.000 0.000 5 0
40-
NP 94.193 94.019 94.106 12422 12399
41-
PP 96.656 97.942 97.295 4811 4875
42-
PRT 60.417 82.075 69.600 106 144
43-
SBAR 86.813 88.598 87.697 535 546
44-
UCP 0.000 0.000 0.000 0 4
45-
VP 94.105 94.246 94.176 4658 4665
39+
NP 94.224 94.156 94.190 12422 12413
40+
PP 96.540 98.005 97.267 4811 4884
41+
PRT 64.444 82.075 72.199 106 135
42+
SBAR 86.900 88.037 87.465 535 542
43+
VP 94.427 94.568 94.497 4658 4665
4644
----------------------------------------------
47-
O 0.000 0.000 0.000 1231 1207
45+
O 0.000 0.000 0.000 1199 1161
4846
----------------------------------------------
49-
Overall 93.529 93.623 93.576 23852 23876
50-
Accuracy 89.028 - - - 25083
47+
Overall 93.675 93.824 93.750 23852 23890
48+
Accuracy 89.334 - - - 25051

chunker/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
<dependency>
4040
<groupId>edu.illinois.cs.cogcomp</groupId>
4141
<artifactId>illinois-chunker-model</artifactId>
42-
<version>3.0.77</version>
42+
<version>4.0.12</version>
4343
</dependency>
4444
<dependency>
4545
<groupId>edu.illinois.cs.cogcomp</groupId>

chunker/scripts/mvn_demo.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
2-
TESTFILE=test/testIn.txt
3-
OUTFILE=test/testOut.txt
2+
TESTFILE=src/test/resources/testIn.txt
3+
OUTFILE=testOut.txt
44

55
mvn exec:java -Dexec.mainClass=edu.illinois.cs.cogcomp.chunker.main.ChunkerDemo -Dexec.args="$TESTFILE $OUTFILE"
66

chunker/scripts/mvn_test_conll.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env bash
2-
TESTFILE=test/testCoNLL.txt
2+
TESTFILE=src/test/resources/testCoNLL.txt
33

44
# Use the default chunker model
55
if [ $# -eq 0 ]; then

chunker/scripts/mvn_validate.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
1-
TESTFILE=test/testIn.txt
2-
OUTFILE=test/testOut.txt
3-
REFFILE=test/testRefOut.txt
1+
#!/usr/bin/env bash
2+
TESTFILE=src/test/resources/testIn.txt
3+
OUTFILE=testOut.txt
4+
REFFILE=src/test/resources/testRefOut-demo.txt
45

56
mvn exec:java -Dexec.mainClass=edu.illinois.cs.cogcomp.chunker.main.ChunkerDemo -Dexec.args="$TESTFILE $OUTFILE"
67

7-
DIFFFILE=test/testDiff.txt
8+
DIFFFILE=testDiff.txt
89
rm -f ${DIFFFILE}
910
diff $REFFILE $OUTFILE > $DIFFFILE
1011

1112
if [ -e ${DIFFFILE} ]; then
1213
if [ -s ${DIFFFILE} ]; then
13-
echo "$0: *** TEST FAILED ***: Differences found between new output and reference output. See $DIFFFILE for details."
14+
echo "$0: *** TEST FAILED ***: Differences found between new output and reference output. See $OUTFILE and $DIFFFILE for details."
1415
else
1516
echo "$0: Test passed: no difference between new output and reference output."
1617
rm -f $DIFFFILE
18+
rm -f $OUTFILE
1719
fi
1820
else
1921
echo "$0: Error: couldn't find the diff file '$DIFFFILE'."

chunker/src/main/java/edu/illinois/cs/cogcomp/chunker/main/ChunkerTrain.java

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public void trainModels(String trainingData, String modeldir, String modelname,
7979
*/
8080
public void trainModelsWithParser(Parser parser) {
8181
Chunker.isTraining = true;
82-
82+
chunker.forget();
8383
// Run the learner
8484
for (int i = 1; i <= iter; i++) {
8585
LinkedVector ex;
@@ -97,6 +97,7 @@ public void trainModelsWithParser(Parser parser) {
9797

9898
public void trainModelsWithParser(Parser parser, String modeldir, String modelname, double dev_ratio) {
9999
Chunker.isTraining = true;
100+
chunker.forget();
100101
double tmpF1 = 0;
101102
double bestF1 = 0;
102103
int bestIter = 0;
@@ -107,16 +108,11 @@ public void trainModelsWithParser(Parser parser, String modeldir, String modelna
107108
// Get the total number of training set
108109
int cnt = 0;
109110
LinkedVector ex;
110-
while ((ex = (LinkedVector) parser.next()) != null) {
111-
cnt++;
112-
}
111+
while (parser.next() != null) cnt++;
113112
parser.reset();
114113
// Get the boundary between train and dev
114+
dev_ratio = Math.min(1,Math.max(dev_ratio,0));
115115
long idx = Math.round(cnt*(1-dev_ratio));
116-
if( idx < 0 )
117-
idx = 0;
118-
if( idx > cnt )
119-
idx = cnt;
120116

121117
// Run the learner and save F1 for each iteration
122118
for (int i = 1; i <= iter; i++) {
@@ -125,10 +121,8 @@ public void trainModelsWithParser(Parser parser, String modeldir, String modelna
125121
for (int j = 0; j < ex.size(); j++) {
126122
chunker.learn(ex.get(j));
127123
}
128-
if(cnt>=idx)
129-
break;
130-
else
131-
cnt++;
124+
if(cnt>=idx) break;
125+
cnt++;
132126
}
133127
chunker.doneWithRound();
134128
writeModelsToDisk(modeldir,modelname);
@@ -153,6 +147,7 @@ public void trainModelsWithParser(Parser parser, String modeldir, String modelna
153147
System.out.println("Best #Iter = "+bestIter+" (F1="+bestF1+")");
154148
System.out.println("Rerun the learner using best #Iter...");
155149
// Rerun the learner
150+
chunker.forget();
156151
for (int i = 1; i <= bestIter; i++) {
157152
while ((ex = (LinkedVector) parser.next()) != null) {
158153
for (int j = 0; j < ex.size(); j++) {

chunker/src/test/java/edu/illinois/cs/cogcomp/lbj/chunk/tests/TestDiff.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
public class TestDiff {
3838
private static final String testFileName = "testIn.txt";
3939
private static String testFile;
40-
private static final String refFileName = "testRefOutput.txt";
40+
private static final String refFileName = "testRefOut.txt";
4141
private static List<String> refSentences;
4242

4343
@Before
File renamed without changes.

chunker/src/test/resources/testOut.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
[ADVP Arguably ] [NP both ] [VP were ] [PP on ] [NP notice ] [SBAR that ] [NP their behavior ] [VP was ] [ADVP at ] [ADJP least risky ] [NP Mr. Bush ] [VP had threatened ] [NP a veto ] [ADVP previously ] [NP The volatility ] [VP was ] [ADJP dizzying ] [PP for ] [NP traders ]
1+
[ADVP Arguably ] [NP both ] [VP were ] [PP on ] [NP notice ] [SBAR that ] [NP their behavior ] [VP was ] [ADVP at least ] [ADJP risky ] [NP Mr. Bush ] [VP had threatened ] [NP a veto ] [ADVP previously ] [NP The volatility ] [VP was ] [ADJP dizzying ] [PP for ] [NP traders ]
22
(RB Arguably) (, ,) (DT both) (VBD were) (IN on) (NN notice) (IN that) (PRP$ their) (NN behavior) (VBD was) (IN at) (JJS least) (JJ risky) (. .) (NNP Mr.) (NNP Bush) (VBD had) (VBN threatened) (DT a) (NN veto) (RB previously) (. .) (DT The) (NN volatility) (VBD was) (JJ dizzying) (IN for) (NNS traders) (. .)

0 commit comments

Comments
 (0)