-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtrain_ensemble.sh
36 lines (29 loc) · 957 Bytes
/
train_ensemble.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
ensemble_size=10
REGION=europe-west1
SCALE_TIER=BASIC_GPU
#JOB_NAME and JOB_DIR must be unique and therefore updated for each run
JOB_NAME=pellet_labels_63_classes1
JOB_DIR=gs://pellet_labels/${JOB_NAME}/pellet_labels_model
#Check that the trainer works properly before launching jobs in gcloud
python3 -m trainer.task --job-dir ./models --train-files ./data/test_data.zip \
--num-epochs 1
function train_model {
gcloud ai-platform jobs submit training ${JOB_NAME}_$1 \
--package-path trainer \
--module-name trainer.task \
--region $REGION \
--scale-tier $SCALE_TIER \
--python-version 3.5 \
--runtime-version 1.13 \
--job-dir $JOB_DIR$1 \
-- \
--train-files gs://pellet_labels/amman_atb_data.zip gs://pellet_labels/i2a_atb_data.zip \
--num-epochs=120 \
--weights 1 3
}
for i in $(seq 1 ${ensemble_size}); do
echo "Starting training for: "$JOB_NAME${i}
train_model ${i} &
done
jobs