diff --git a/README.md b/README.md index 3912fb8..df324dc 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ usage: python3 auto_crawler.py [--skip true] [--threads 4] [--google true] [--na --naver NAVER Download from naver.com (boolean) -# Integrity Checking +# Data Imbalance Detection Detects data imblance based on number of files. diff --git a/auto_crawler.py b/auto_crawler.py index c007140..daa7827 100644 --- a/auto_crawler.py +++ b/auto_crawler.py @@ -147,10 +147,11 @@ def do_crawling(self): pool.join() print('pool join') - self.integrity_check() + self.imbalance_check() - def integrity_check(self): - print('Integrity Checking...') + print('End Program') + + def imbalance_check(self): print('Data imbalance checking...') dict_num_files = {} @@ -190,6 +191,8 @@ def integrity_check(self): print('Removed {}'.format(dir)) print('Now re-run this program to re-download removed files. (with skip_already_exist=True)') + else: + print('Data imbalance not detected.') if __name__ == '__main__':