Skip to content

Commit

Permalink
Fix for Autologging failing some log statements (mlflow#1690)
Browse files Browse the repository at this point in the history
* Added try catches

* Addressing review

* Apply suggestions from code review

Co-Authored-By: Siddharth Murching <smurching@gmail.com>
  • Loading branch information
apurva-koti and smurching authored Aug 2, 2019
1 parent 8e92237 commit b808168
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 57 deletions.
44 changes: 19 additions & 25 deletions mlflow/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import os
import yaml
import gorilla
import warnings

import pandas as pd

Expand All @@ -26,6 +25,7 @@
from mlflow.utils.environment import _mlflow_conda_env
from mlflow.utils.model_utils import _get_flavor_configuration
from mlflow.utils.annotations import experimental
from mlflow.utils.autologging_utils import try_mlflow_log


FLAVOR_NAME = "keras"
Expand Down Expand Up @@ -356,32 +356,26 @@ class __MLflowKerasCallback(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
if not logs:
return
try:
mlflow.log_metrics(logs, step=epoch)
except mlflow.exceptions.MlflowException as e:
warnings.warn("Logging to MLflow failed: " + str(e))
try_mlflow_log(mlflow.log_metrics, logs, step=epoch)

def on_train_end(self, logs=None):
try:
mlflow.log_param('num_layers', len(self.model.layers))
mlflow.log_param('optimizer_name', type(self.model.optimizer).__name__)
if hasattr(self.model.optimizer, 'lr'):
lr = self.model.optimizer.lr if \
type(self.model.optimizer.lr) is float \
else keras.backend.eval(self.model.optimizer.lr)
mlflow.log_param('learning_rate', lr)
if hasattr(self.model.optimizer, 'epsilon'):
epsilon = self.model.optimizer.epsilon if \
type(self.model.optimizer.epsilon) is float \
else keras.backend.eval(self.model.optimizer.epsilon)
mlflow.log_param('epsilon', epsilon)
sum_list = []
self.model.summary(print_fn=sum_list.append)
summary = '\n'.join(sum_list)
mlflow.set_tag('summary', summary)
log_model(self.model, artifact_path='model')
except mlflow.exceptions.MlflowException as e:
warnings.warn("Logging to Mlflow failed: " + str(e))
try_mlflow_log(mlflow.log_param, 'num_layers', len(self.model.layers))
try_mlflow_log(mlflow.log_param, 'optimizer_name', type(self.model.optimizer).__name__)
if hasattr(self.model.optimizer, 'lr'):
lr = self.model.optimizer.lr if \
type(self.model.optimizer.lr) is float \
else keras.backend.eval(self.model.optimizer.lr)
try_mlflow_log(mlflow.log_param, 'learning_rate', lr)
if hasattr(self.model.optimizer, 'epsilon'):
epsilon = self.model.optimizer.epsilon if \
type(self.model.optimizer.epsilon) is float \
else keras.backend.eval(self.model.optimizer.epsilon)
try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)
sum_list = []
self.model.summary(print_fn=sum_list.append)
summary = '\n'.join(sum_list)
try_mlflow_log(mlflow.set_tag, 'summary', summary)
try_mlflow_log(log_model, self.model, artifact_path='model')

@gorilla.patch(keras.Model)
def fit(self, *args, **kwargs):
Expand Down
52 changes: 20 additions & 32 deletions mlflow/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from mlflow.utils.environment import _mlflow_conda_env
from mlflow.utils.file_utils import _copy_file_or_tree
from mlflow.utils.model_utils import _get_flavor_configuration
from mlflow.utils.autologging_utils import try_mlflow_log
from mlflow.entities import Metric


Expand Down Expand Up @@ -369,26 +370,23 @@ def on_train_end(self, logs=None): # pylint: disable=unused-argument
opt = self.model.optimizer
if hasattr(opt, 'optimizer'):
opt = opt.optimizer
mlflow.log_param('optimizer_name', type(opt).__name__)
try_mlflow_log(mlflow.log_param, 'optimizer_name', type(opt).__name__)
if hasattr(opt, '_lr'):
lr = opt._lr if type(opt._lr) is float else tensorflow.keras.backend.eval(opt._lr)
mlflow.log_param('learning_rate', lr)
try_mlflow_log(mlflow.log_param('learning_rate', lr))
if hasattr(opt, '_epsilon'):
epsilon = opt._epsilon if type(opt._epsilon) is float \
else tensorflow.keras.backend.eval(opt._epsilon)
mlflow.log_param('epsilon', epsilon)
try_mlflow_log(mlflow.log_param, 'epsilon', epsilon)
l = []
self.model.summary(print_fn=l.append)
summary = '\n'.join(l)
mlflow.set_tag('summary', summary)
mlflow.keras.log_model(self.model, artifact_path='model')
try_mlflow_log(mlflow.set_tag, 'summary', summary)
try_mlflow_log(mlflow.keras.log_model, self.model, artifact_path='model')


def _log_artifacts_with_warning(**kwargs):
try:
mlflow.log_artifacts(**kwargs)
except MlflowException as e:
warnings.warn("Logging to MLflow failed: " + str(e))
try_mlflow_log(mlflow.log_artifacts, **kwargs)


def _assoc_list_to_map(lst):
Expand All @@ -407,15 +405,11 @@ def _flush_queue():
Queue is divided into batches according to run id.
"""
global _metric_queue
try:
client = mlflow.tracking.MlflowClient()
dic = _assoc_list_to_map(_metric_queue)
for key in dic:
client.log_batch(key, metrics=dic[key], params=[], tags=[])
except MlflowException as e:
warnings.warn("Logging to MLflow failed: " + str(e))
finally:
_metric_queue = []
client = mlflow.tracking.MlflowClient()
dic = _assoc_list_to_map(_metric_queue)
for key in dic:
try_mlflow_log(client.log_batch, key, metrics=dic[key], params=[], tags=[])
_metric_queue = []


atexit.register(_flush_queue)
Expand Down Expand Up @@ -514,27 +508,21 @@ def export_saved_model(self, *args, **kwargs):
original = gorilla.get_original_attribute(tensorflow.estimator.Estimator,
'export_saved_model')
serialized = original(self, *args, **kwargs)
try:
log_model(tf_saved_model_dir=serialized.decode('utf-8'),
tf_meta_graph_tags=[tag_constants.SERVING],
tf_signature_def_key='predict',
artifact_path='model')
except MlflowException as e:
warnings.warn("Logging to MLflow failed: " + str(e))
try_mlflow_log(log_model, tf_saved_model_dir=serialized.decode('utf-8'),
tf_meta_graph_tags=[tag_constants.SERVING],
tf_signature_def_key='predict',
artifact_path='model')
return serialized

@gorilla.patch(tensorflow.estimator.Estimator)
def export_savedmodel(self, *args, **kwargs):
original = gorilla.get_original_attribute(tensorflow.estimator.Estimator,
'export_savedmodel')
serialized = original(self, *args, **kwargs)
try:
log_model(tf_saved_model_dir=serialized.decode('utf-8'),
tf_meta_graph_tags=[tag_constants.SERVING],
tf_signature_def_key='predict',
artifact_path='model')
except MlflowException as e:
warnings.warn("Logging to MLflow failed: " + str(e))
try_mlflow_log(log_model, tf_saved_model_dir=serialized.decode('utf-8'),
tf_meta_graph_tags=[tag_constants.SERVING],
tf_signature_def_key='predict',
artifact_path='model')
return serialized

@gorilla.patch(tensorflow.keras.Model)
Expand Down
11 changes: 11 additions & 0 deletions mlflow/utils/autologging_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import warnings


def try_mlflow_log(fn, *args, **kwargs):
"""
Catch exceptions and log a warning to avoid autolog throwing.
"""
try:
fn(*args, **kwargs)
except Exception as e: # pylint: disable=broad-except
warnings.warn("Logging to MLflow failed: " + str(e), stacklevel=2)

0 comments on commit b808168

Please sign in to comment.