From b80816842d0297d134498c78c984d10007d4356f Mon Sep 17 00:00:00 2001 From: apurva-koti <51172624+apurva-koti@users.noreply.github.com> Date: Fri, 2 Aug 2019 15:41:56 -0700 Subject: [PATCH] Fix for Autologging failing some log statements (#1690) * Added try catches * Addressing review * Apply suggestions from code review Co-Authored-By: Siddharth Murching --- mlflow/keras.py | 44 +++++++++++--------------- mlflow/tensorflow.py | 52 ++++++++++++------------------- mlflow/utils/autologging_utils.py | 11 +++++++ 3 files changed, 50 insertions(+), 57 deletions(-) create mode 100644 mlflow/utils/autologging_utils.py diff --git a/mlflow/keras.py b/mlflow/keras.py index c75018f26d0a6..45ac02f9cf24f 100644 --- a/mlflow/keras.py +++ b/mlflow/keras.py @@ -14,7 +14,6 @@ import os import yaml import gorilla -import warnings import pandas as pd @@ -26,6 +25,7 @@ from mlflow.utils.environment import _mlflow_conda_env from mlflow.utils.model_utils import _get_flavor_configuration from mlflow.utils.annotations import experimental +from mlflow.utils.autologging_utils import try_mlflow_log FLAVOR_NAME = "keras" @@ -356,32 +356,26 @@ class __MLflowKerasCallback(keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): if not logs: return - try: - mlflow.log_metrics(logs, step=epoch) - except mlflow.exceptions.MlflowException as e: - warnings.warn("Logging to MLflow failed: " + str(e)) + try_mlflow_log(mlflow.log_metrics, logs, step=epoch) def on_train_end(self, logs=None): - try: - mlflow.log_param('num_layers', len(self.model.layers)) - mlflow.log_param('optimizer_name', type(self.model.optimizer).__name__) - if hasattr(self.model.optimizer, 'lr'): - lr = self.model.optimizer.lr if \ - type(self.model.optimizer.lr) is float \ - else keras.backend.eval(self.model.optimizer.lr) - mlflow.log_param('learning_rate', lr) - if hasattr(self.model.optimizer, 'epsilon'): - epsilon = self.model.optimizer.epsilon if \ - type(self.model.optimizer.epsilon) is float \ - else keras.backend.eval(self.model.optimizer.epsilon) - mlflow.log_param('epsilon', epsilon) - sum_list = [] - self.model.summary(print_fn=sum_list.append) - summary = '\n'.join(sum_list) - mlflow.set_tag('summary', summary) - log_model(self.model, artifact_path='model') - except mlflow.exceptions.MlflowException as e: - warnings.warn("Logging to Mlflow failed: " + str(e)) + try_mlflow_log(mlflow.log_param, 'num_layers', len(self.model.layers)) + try_mlflow_log(mlflow.log_param, 'optimizer_name', type(self.model.optimizer).__name__) + if hasattr(self.model.optimizer, 'lr'): + lr = self.model.optimizer.lr if \ + type(self.model.optimizer.lr) is float \ + else keras.backend.eval(self.model.optimizer.lr) + try_mlflow_log(mlflow.log_param, 'learning_rate', lr) + if hasattr(self.model.optimizer, 'epsilon'): + epsilon = self.model.optimizer.epsilon if \ + type(self.model.optimizer.epsilon) is float \ + else keras.backend.eval(self.model.optimizer.epsilon) + try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) + sum_list = [] + self.model.summary(print_fn=sum_list.append) + summary = '\n'.join(sum_list) + try_mlflow_log(mlflow.set_tag, 'summary', summary) + try_mlflow_log(log_model, self.model, artifact_path='model') @gorilla.patch(keras.Model) def fit(self, *args, **kwargs): diff --git a/mlflow/tensorflow.py b/mlflow/tensorflow.py index a507385e9e073..26d6466bdb825 100644 --- a/mlflow/tensorflow.py +++ b/mlflow/tensorflow.py @@ -36,6 +36,7 @@ from mlflow.utils.environment import _mlflow_conda_env from mlflow.utils.file_utils import _copy_file_or_tree from mlflow.utils.model_utils import _get_flavor_configuration +from mlflow.utils.autologging_utils import try_mlflow_log from mlflow.entities import Metric @@ -369,26 +370,23 @@ def on_train_end(self, logs=None): # pylint: disable=unused-argument opt = self.model.optimizer if hasattr(opt, 'optimizer'): opt = opt.optimizer - mlflow.log_param('optimizer_name', type(opt).__name__) + try_mlflow_log(mlflow.log_param, 'optimizer_name', type(opt).__name__) if hasattr(opt, '_lr'): lr = opt._lr if type(opt._lr) is float else tensorflow.keras.backend.eval(opt._lr) - mlflow.log_param('learning_rate', lr) + try_mlflow_log(mlflow.log_param('learning_rate', lr)) if hasattr(opt, '_epsilon'): epsilon = opt._epsilon if type(opt._epsilon) is float \ else tensorflow.keras.backend.eval(opt._epsilon) - mlflow.log_param('epsilon', epsilon) + try_mlflow_log(mlflow.log_param, 'epsilon', epsilon) l = [] self.model.summary(print_fn=l.append) summary = '\n'.join(l) - mlflow.set_tag('summary', summary) - mlflow.keras.log_model(self.model, artifact_path='model') + try_mlflow_log(mlflow.set_tag, 'summary', summary) + try_mlflow_log(mlflow.keras.log_model, self.model, artifact_path='model') def _log_artifacts_with_warning(**kwargs): - try: - mlflow.log_artifacts(**kwargs) - except MlflowException as e: - warnings.warn("Logging to MLflow failed: " + str(e)) + try_mlflow_log(mlflow.log_artifacts, **kwargs) def _assoc_list_to_map(lst): @@ -407,15 +405,11 @@ def _flush_queue(): Queue is divided into batches according to run id. """ global _metric_queue - try: - client = mlflow.tracking.MlflowClient() - dic = _assoc_list_to_map(_metric_queue) - for key in dic: - client.log_batch(key, metrics=dic[key], params=[], tags=[]) - except MlflowException as e: - warnings.warn("Logging to MLflow failed: " + str(e)) - finally: - _metric_queue = [] + client = mlflow.tracking.MlflowClient() + dic = _assoc_list_to_map(_metric_queue) + for key in dic: + try_mlflow_log(client.log_batch, key, metrics=dic[key], params=[], tags=[]) + _metric_queue = [] atexit.register(_flush_queue) @@ -514,13 +508,10 @@ def export_saved_model(self, *args, **kwargs): original = gorilla.get_original_attribute(tensorflow.estimator.Estimator, 'export_saved_model') serialized = original(self, *args, **kwargs) - try: - log_model(tf_saved_model_dir=serialized.decode('utf-8'), - tf_meta_graph_tags=[tag_constants.SERVING], - tf_signature_def_key='predict', - artifact_path='model') - except MlflowException as e: - warnings.warn("Logging to MLflow failed: " + str(e)) + try_mlflow_log(log_model, tf_saved_model_dir=serialized.decode('utf-8'), + tf_meta_graph_tags=[tag_constants.SERVING], + tf_signature_def_key='predict', + artifact_path='model') return serialized @gorilla.patch(tensorflow.estimator.Estimator) @@ -528,13 +519,10 @@ def export_savedmodel(self, *args, **kwargs): original = gorilla.get_original_attribute(tensorflow.estimator.Estimator, 'export_savedmodel') serialized = original(self, *args, **kwargs) - try: - log_model(tf_saved_model_dir=serialized.decode('utf-8'), - tf_meta_graph_tags=[tag_constants.SERVING], - tf_signature_def_key='predict', - artifact_path='model') - except MlflowException as e: - warnings.warn("Logging to MLflow failed: " + str(e)) + try_mlflow_log(log_model, tf_saved_model_dir=serialized.decode('utf-8'), + tf_meta_graph_tags=[tag_constants.SERVING], + tf_signature_def_key='predict', + artifact_path='model') return serialized @gorilla.patch(tensorflow.keras.Model) diff --git a/mlflow/utils/autologging_utils.py b/mlflow/utils/autologging_utils.py new file mode 100644 index 0000000000000..b581d8f0cc27c --- /dev/null +++ b/mlflow/utils/autologging_utils.py @@ -0,0 +1,11 @@ +import warnings + + +def try_mlflow_log(fn, *args, **kwargs): + """ + Catch exceptions and log a warning to avoid autolog throwing. + """ + try: + fn(*args, **kwargs) + except Exception as e: # pylint: disable=broad-except + warnings.warn("Logging to MLflow failed: " + str(e), stacklevel=2)