Skip to content

Commit 82bf207

Browse files
committed
Add a comment on a result to sweep the number of heads to use in the rattn
1 parent 46dd8fc commit 82bf207

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

stanza/models/constituency_parser.py

+23
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,29 @@ def build_argparse():
707707

708708
parser.add_argument('--use_rattn', default=False, action='store_true', help='Use a local attention layer')
709709
parser.add_argument('--rattn_window', default=16, type=int, help='Number of tokens to use for context in the local attention')
710+
# Ran an experiment on id_icon with in_order, peft, 200 epochs training
711+
# Equivalent experiment with no rattn had an average of 0.8922 dev
712+
# window 16, cat, dim 200, sinks 0
713+
# head dev score
714+
# 1 0.8915
715+
# 2 0.8933
716+
# 3 0.8918
717+
# 4 0.8934
718+
# 5 0.8924
719+
# 6 0.8936
720+
# 8 0.8920
721+
# 10 0.8909
722+
# 12 0.8939
723+
# 14 0.8949
724+
# 16 0.8952
725+
# 18 0.8915
726+
# 20 0.8925
727+
# 25 0.8913
728+
# 30 0.8913
729+
# 40 0.8943
730+
# 50 0.8931
731+
# 75 0.8940
732+
# The average here is 0.8928, which is a tiny bit higher...
710733
parser.add_argument('--rattn_heads', default=16, type=int, help='Number of heads to use for context in the local attention')
711734
parser.add_argument('--no_rattn_forward', default=True, action='store_false', dest='rattn_forward', help="Use or don't use the forward relative attention")
712735
parser.add_argument('--no_rattn_reverse', default=True, action='store_false', dest='rattn_reverse', help="Use or don't use the reverse relative attention")

0 commit comments

Comments
 (0)