Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

W&B: Refactor the wandb_utils.py file #4496

Merged
merged 41 commits into from
Aug 23, 2021
Merged
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d0d89b6
Improve docstrings and run names
AyushExel Jul 27, 2021
a283de6
default wandb login prompt with timeout
AyushExel Jul 27, 2021
33ab468
Merge branch 'ultralytics:master' into master
AyushExel Jul 27, 2021
78d1ed0
return key
AyushExel Jul 27, 2021
bba130b
Merge branch 'master' of https://github.com/AyushExel/yolov5
AyushExel Jul 27, 2021
725bd4a
Update api_key check logic
AyushExel Jul 27, 2021
bb372f5
Properly support zipped dataset feature
AyushExel Jul 28, 2021
27c06f0
update docstring
AyushExel Jul 28, 2021
3cafca4
merge master
AyushExel Jul 28, 2021
e64525a
Revert tuorial change
AyushExel Jul 28, 2021
0a67b4a
extend changes to log_dataset
AyushExel Jul 28, 2021
80eb6aa
add run name
AyushExel Jul 28, 2021
3b039be
bug fix
AyushExel Jul 28, 2021
31f04a2
bug fix
AyushExel Jul 28, 2021
2482fb3
Update comment
AyushExel Jul 28, 2021
6012e3f
fix import check
AyushExel Jul 28, 2021
66e36ef
remove unused import
AyushExel Jul 28, 2021
aa58d9e
Hardcore .yaml file extension
AyushExel Jul 28, 2021
7225c19
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Jul 28, 2021
dfacf7b
reduce code
glenn-jocher Jul 28, 2021
3593b48
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Jul 28, 2021
ab11132
Reformat using pycharm
AyushExel Jul 28, 2021
5d61242
Remove redundant try catch
AyushExel Jul 28, 2021
0a43ca7
More refactoring and bug fixes
AyushExel Jul 28, 2021
4830a54
merge master
AyushExel Jul 28, 2021
570381d
retry
AyushExel Jul 28, 2021
28d7cba
Reformat using pycharm
AyushExel Jul 28, 2021
c72f59f
respect LOGGERS include list
AyushExel Jul 28, 2021
23ef166
Merge branch 'fix_dataset_check' of https://github.com/AyushExel/yolo…
AyushExel Jul 28, 2021
e0e41c9
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Jul 28, 2021
2b664dd
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Jul 30, 2021
c6c5f3e
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Aug 1, 2021
180aed6
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Aug 11, 2021
e0d419a
Fix
AyushExel Aug 11, 2021
61b9e1d
fix
AyushExel Aug 11, 2021
15e8331
Merge branch 'ultralytics:master' into fix_dataset_check
AyushExel Aug 19, 2021
7a12743
refactor constructor
AyushExel Aug 19, 2021
bbe1ef8
refactor
AyushExel Aug 19, 2021
70edf8b
refactor
AyushExel Aug 19, 2021
a1cf7f5
refactor
AyushExel Aug 19, 2021
3b67844
PyCharm reformat
glenn-jocher Aug 23, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 43 additions & 34 deletions utils/loggers/wandb/wandb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ def check_wandb_config_file(data_config_file):
return data_config_file


def check_wandb_dataset(data_file):
is_wandb_artifact = False
if check_file(data_file) and data_file.endswith('.yaml'):
with open(data_file, errors='ignore') as f:
data_dict = yaml.safe_load(f)
is_wandb_artifact = (data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX) or
data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX))
if is_wandb_artifact:
return data_dict
else:
return check_dataset(data_file)


def get_run_info(run_path):
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
run_id = run_path.stem
Expand Down Expand Up @@ -104,7 +117,7 @@ def __init__(self, opt, run_id, job_type='Training'):
- Initialize WandbLogger instance
- Upload dataset if opt.upload_dataset is True
- Setup trainig processes if job_type is 'Training'

arguments:
opt (namespace) -- Commandline arguments for this run
run_id (str) -- Run ID of W&B run to be resumed
Expand Down Expand Up @@ -147,37 +160,35 @@ def __init__(self, opt, run_id, job_type='Training'):
allow_val_change=True) if not wandb.run else wandb.run
if self.wandb_run:
if self.job_type == 'Training':
if not opt.resume:
if opt.upload_dataset:
if opt.upload_dataset:
if not opt.resume:
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)

elif opt.data.endswith('_wandb.yaml'): # When dataset is W&B artifact
with open(opt.data, errors='ignore') as f:
data_dict = yaml.safe_load(f)
self.data_dict = data_dict
else: # Local .yaml dataset file or .zip file
self.data_dict = check_dataset(opt.data)
if opt.resume:
# resume from artifact
if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
self.data_dict = dict(self.wandb_run.config.data_dict)
else: # local resume
self.data_dict = check_wandb_dataset(opt.data)
else:
self.data_dict = check_dataset(opt.data)
self.data_dict = check_wandb_dataset(opt.data)
self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict

self.setup_training(opt)
if not self.wandb_artifact_data_dict:
self.wandb_artifact_data_dict = self.data_dict
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
if not opt.resume:
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict},
allow_val_change=True)
self.setup_training(opt)

if self.job_type == 'Dataset Creation':
self.data_dict = self.check_and_upload_dataset(opt)

def check_and_upload_dataset(self, opt):
"""
Check if the dataset format is compatible and upload it as W&B artifact

arguments:
opt (namespace)-- Commandline arguments for current run

returns:
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
"""
Expand All @@ -196,10 +207,10 @@ def setup_training(self, opt):
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
- Setup log_dict, initialize bbox_interval

arguments:
opt (namespace) -- commandline arguments for this run

"""
self.log_dict, self.current_epoch = {}, 0
self.bbox_interval = opt.bbox_interval
Expand All @@ -211,9 +222,7 @@ def setup_training(self, opt):
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
config.hyp
data_dict = dict(self.wandb_run.config.data_dict) # eliminates the need for config file to resume
else:
data_dict = self.data_dict
data_dict = self.data_dict
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
opt.artifact_alias)
Expand Down Expand Up @@ -243,11 +252,11 @@ def setup_training(self, opt):
def download_dataset_artifact(self, path, alias):
"""
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX

arguments:
path -- path of the dataset to be used for training
alias (str)-- alias of the artifact to be download/used for training

returns:
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
is found otherwise returns (None, None)
Expand All @@ -263,7 +272,7 @@ def download_dataset_artifact(self, path, alias):
def download_model_artifact(self, opt):
"""
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX

arguments:
opt (namespace) -- Commandline arguments for this run
"""
Expand All @@ -281,7 +290,7 @@ def download_model_artifact(self, opt):
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
"""
Log the model checkpoint as W&B artifact

arguments:
path (Path) -- Path of directory containing the checkpoints
opt (namespace) -- Command line arguments for this run
Expand All @@ -305,14 +314,14 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False):
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
"""
Log the dataset as W&B artifact and return the new data file with W&B links

arguments:
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
single_class (boolean) -- train multi-class data as single-class
project (str) -- project name. Used to construct the artifact path
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
file with _wandb postfix. Eg -> data_wandb.yaml

returns:
the new .yaml file with artifact links. it can be used to start training directly from artifacts
"""
Expand Down Expand Up @@ -359,12 +368,12 @@ def map_val_table_path(self):
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
"""
Create and return W&B artifact containing W&B Table of the dataset.

arguments:
dataset (LoadImagesAndLabels) -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
class_to_id (dict(int, str)) -- hash map that maps class ids to labels
name (str) -- name of the artifact

returns:
dataset artifact to be logged or used
"""
Expand Down Expand Up @@ -401,7 +410,7 @@ def create_dataset_table(self, dataset, class_to_id, name='dataset'):
def log_training_progress(self, predn, path, names):
"""
Build evaluation Table. Uses reference from validation dataset table.

arguments:
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
path (str): local path of the current evaluation image
Expand Down Expand Up @@ -431,7 +440,7 @@ def log_training_progress(self, predn, path, names):
def val_one_image(self, pred, predn, path, names, im):
"""
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel

arguments:
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
Expand All @@ -453,7 +462,7 @@ def val_one_image(self, pred, predn, path, names, im):
def log(self, log_dict):
"""
save the metrics to the logging dictionary

arguments:
log_dict (Dict) -- metrics/media to be logged in current step
"""
Expand All @@ -464,7 +473,7 @@ def log(self, log_dict):
def end_epoch(self, best_result=False):
"""
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.

arguments:
best_result (boolean): Boolean representing if the result of this evaluation is best or not
"""
Expand Down