From 30f031e5e296192847a4d199d52366b227a7e9a0 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 6 Mar 2018 15:09:51 -0800 Subject: [PATCH 1/3] CNN tutorial: use external code files --- docs/tutorial/cnn_classify.sh | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/tutorial/cnn_classify.sh b/docs/tutorial/cnn_classify.sh index 0747cf15b8..91664ff1c4 100755 --- a/docs/tutorial/cnn_classify.sh +++ b/docs/tutorial/cnn_classify.sh @@ -1,5 +1,9 @@ #!/bin/bash +set -x + +GIT_REMOTE=https://github.com/dmpetrov/cnn_classify.git + mkdir cnn_classify cd cnn_classify git init @@ -9,7 +13,7 @@ git add . git commit -m 'Init DVC' mkdir data -cp ../deeppy/train.zip data/ +cp ../train.zip data/ dvc add data/train.zip git add data/ git status @@ -40,8 +44,8 @@ git add data/.gitignore train.dvc git commit -m 'Unzip files' mkdir code -vi code/process_files.py -vi code/conf.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/common/process_files.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/common/conf.py dvc run -d code/process_files.py -d data/train -o data/cats_and_dogs_small -f process.dvc python code/process_files.py git status #On branch master @@ -64,7 +68,8 @@ echo '*.pyc' >> code/.gitignore git add . git commit -m 'Process raw data' -vi code/model.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/first-cnn/model.py + dvc run -d code/model.py -d code/conf.py -d data/cats_and_dogs_small -o data/model.h5 -o data/history.p python code/model.py git status #On branch master @@ -85,7 +90,7 @@ git status git add . git commit -m 'First model' -vi code/plot.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/common/plot.py dvc run -d code/conf.py -d code/plot.py -d data/history.p -o data/plot_loss.jpeg -o data/plot_acc.jpeg python code/plot.py dvc run -d data/plot_loss.jpeg @@ -114,7 +119,8 @@ git tag -a v0.1-first-cnn -m 'First CNN model' ############################################ v2 -vi code/model.py +rm code/model.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/augm-cnn/model.py # Actual changes: augmentation, Dropout=0.5, batch_size=20-->32, epochs=30-->100 dvc repro @@ -139,7 +145,9 @@ git tag -a v0.2-augm-cnn -m 'CNN model with augmentation' git checkout v0.1-first-cnn -b pre_trained dvc checkout -vi code/model.py +rm code/model.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/vgg16_base/model.py + dvc repro git status #On branch pre_trained @@ -163,7 +171,9 @@ git tag -a v0.3-vgg16_base -, 'Pretrained VGG16 tag' git checkout v0.2-augm-cnn -b vgg16_augm dvc checkout -vi code/model.py +rm code/model.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/vgg16_augm/model.py + dvc repro git status #On branch vgg16_augm @@ -184,7 +194,8 @@ git tag -a v0.4-vgg16_augm -m 'VGG16 with augm tag' ################################################# v5 -vi code/model.py +rm code/model.py +wget -P code https://s3-us-west-2.amazonaws.com/dvc-share/tutorial_cnn_code/vgg_augm_fine/model.py dvc repro git status #On branch vgg16_augm @@ -206,7 +217,7 @@ git tag -a v0.5-vgg_augm_fine -m 'VGG, augmentation and fine-tuning tag' git checkout master git merge vgg16_augm -git remote add origin https://github.com/dmpetrov/cnn_classify.git +git remote add origin ${GIT_REMOTE} git push -u origin master --tags git push -u origin pre_trained From ec741d87d5f90d89cd242bdad82ac5bef1d0bd12 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 6 Mar 2018 15:10:28 -0800 Subject: [PATCH 2/3] Remove EC2 params from config --- dvc/config.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/dvc/config.py b/dvc/config.py index e9765766b1..a8ea854bb0 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -30,30 +30,10 @@ class Config(object): StoragePath = dvc/tutorial -# Default settings for AWS instances: -Type = t2.nano -Image = ami-2d39803a - -SpotPrice = -SpotTimeout = 300 - -KeyPairName = dvc-key -KeyPairDir = ~/.ssh -SecurityGroup = dvc-sg - -Region = us-east-1 -Zone = us-east-1a -SubnetId = - -Volume = my-100gb-drive-io - -Monitoring = false -EbsOptimized = false -AllDisksAsRAID0 = false - [GCP] StoragePath = ProjectName = + ''' def __init__(self, dvc_dir): From f122c19981d22dcd9dd8565a5304f37a57973a65 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 6 Mar 2018 15:52:39 -0800 Subject: [PATCH 3/3] Check dir changes properly. Close #491. --- dvc/output.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dvc/output.py b/dvc/output.py index 405c9f163e..f86070e175 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -195,14 +195,22 @@ def _changed_dir(self): if not os.path.isdir(self.path) or not os.path.isfile(self.cache): return True - dir_info = self._collect_dir() - dir_info_cached = self.load_dir_cache(self.cache) + dir_info = self._collect_dir() # slow! + dir_info_cached = self.load_dir_cache(self.cache) # slow. why? - if dir_info != dir_info_cached: + if not self.are_dir_info_equal(dir_info, dir_info_cached): return True return False + @staticmethod + def are_dir_info_equal(dir_info1, dir_info2): + return Output.dir_info_dict(dir_info1) == Output.dir_info_dict(dir_info2) + + @staticmethod + def dir_info_dict(dir_info): + return {i['relpath']: i['md5'] for i in dir_info} + def changed(self): ret = True