We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9064c93 commit 7d6959bCopy full SHA for 7d6959b
src/12_transformer/train.py
@@ -51,7 +51,7 @@ def define_argparser(is_continue=False):
51
p.add_argument(
52
'--batch_size',
53
type=int,
54
- default=256,
+ default=128,
55
help='Mini batch size for gradient descent. Default=%(default)s'
56
)
57
@@ -107,7 +107,7 @@ def define_argparser(is_continue=False):
107
108
'--iteration_per_update',
109
110
- default=16,
+ default=32,
111
help='Number of feed-forward iterations for one parameter update. Default=%(default)s'
112
113
0 commit comments