Skip to content
This repository was archived by the owner on Sep 3, 2022. It is now read-only.

Commit 15e74c0

Browse files
committed
Move TensorBoard and TensorFlow Events UI rendering to Python function to deprecate magic. (#163)
* Update feature slice view UI. Added Slices Overview. * Move TensorBoard and TensorFlow Events UI rendering to Python function to deprecate magic. Use matplotlib for tf events plotting so it can display well in static HTML pages (such as github). Improve TensorFlow Events list/get APIs. * Follow up on CR comments.
1 parent 9235abd commit 15e74c0

File tree

3 files changed

+111
-95
lines changed

3 files changed

+111
-95
lines changed

datalab/mlalpha/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from ._cloud_predictor import CloudPredictor
2222
from ._job import Jobs
2323
from ._summary import Summary
24-
from ._tensorboard import TensorBoardManager
24+
from ._tensorboard import TensorBoard
2525
from ._dataset import CsvDataSet, BigQueryDataSet
2626
from ._package import Packager
2727
from ._cloud_models import CloudModels, CloudModelVersions

datalab/mlalpha/_summary.py

Lines changed: 94 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,107 +1,127 @@
1-
# Copyright 2016 Google Inc. All rights reserved.
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4-
# in compliance with the License. You may obtain a copy of the License at
5-
#
6-
# http://www.apache.org/licenses/LICENSE-2.0
7-
#
8-
# Unless required by applicable law or agreed to in writing, software distributed under the License
9-
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10-
# or implied. See the License for the specific language governing permissions and limitations under
11-
# the License.
12-
13-
"""Implements Cloud ML Summary wrapper."""
14-
151
import datetime
2+
import fnmatch
163
import glob
4+
import google.cloud.ml as ml
5+
import matplotlib.pyplot as plt
176
import os
7+
import pandas as pd
188
from tensorflow.core.util import event_pb2
199
from tensorflow.python.lib.io import tf_record
2010

21-
import datalab.storage as storage
22-
2311

2412
class Summary(object):
25-
"""Represents TensorFlow summary events from files under a directory."""
13+
"""Represents TensorFlow summary events from files under specified directories."""
2614

27-
def __init__(self, path):
15+
def __init__(self, paths):
2816
"""Initializes an instance of a Summary.
2917
3018
Args:
31-
path: the path of the directory which holds TensorFlow events files.
32-
Can be local path or GCS path.
19+
path: a list of paths to directories which hold TensorFlow events files.
20+
Can be local path or GCS paths. Wild cards allowed.
3321
"""
34-
self._path = path
35-
36-
def _get_events_files(self):
37-
if self._path.startswith('gs://'):
38-
storage._api.Api.verify_permitted_to_read(self._path)
39-
bucket, prefix = storage._bucket.parse_name(self._path)
40-
items = storage.Items(bucket, prefix, None)
41-
filtered_list = [item.uri for item in items if os.path.basename(item.uri).find('tfevents')]
42-
return filtered_list
43-
else:
44-
path_pattern = os.path.join(self._path, '*tfevents*')
45-
return glob.glob(path_pattern)
22+
self._paths = [paths] if isinstance(paths, basestring) else paths
23+
24+
def _glob_events_files(self, paths):
25+
event_files = []
26+
for path in paths:
27+
if path.startswith('gs://'):
28+
event_files += ml.util._file.glob_files(os.path.join(path, '*.tfevents.*'))
29+
else:
30+
dirs = ml.util._file.glob_files(path)
31+
for dir in dirs:
32+
for root, _, filenames in os.walk(dir):
33+
for filename in fnmatch.filter(filenames, '*.tfevents.*'):
34+
event_files.append(os.path.join(root, filename))
35+
return event_files
4636

4737
def list_events(self):
4838
"""List all scalar events in the directory.
4939
5040
Returns:
51-
A set of unique event tags.
41+
A dictionary. Key is the name of a event. Value is a set of dirs that contain that event.
5242
"""
53-
event_tags = set()
54-
for event_file in self._get_events_files():
43+
event_dir_dict = {}
44+
for event_file in self._glob_events_files(self._paths):
45+
dir = os.path.dirname(event_file)
5546
for record in tf_record.tf_record_iterator(event_file):
5647
event = event_pb2.Event.FromString(record)
5748
if event.summary is None or event.summary.value is None:
5849
continue
5950
for value in event.summary.value:
60-
if value.simple_value is None:
51+
if value.simple_value is None or value.tag is None:
6152
continue
62-
if value.tag is not None and value.tag not in event_tags:
63-
event_tags.add(value.tag)
64-
return event_tags
53+
if not value.tag in event_dir_dict:
54+
event_dir_dict[value.tag] = set()
55+
event_dir_dict[value.tag].add(dir)
56+
return event_dir_dict
57+
6558

66-
def get_events(self, event_name):
67-
"""Get all events of a certain tag.
59+
def get_events(self, event_names):
60+
"""Get all events as pandas DataFrames given a list of names.
6861
6962
Args:
70-
event_name: the tag of event to look for.
63+
event_names: A list of events to get.
7164
7265
Returns:
73-
A tuple. First is a list of {time_span, event_name}. Second is a list of {step, event_name}.
74-
75-
Raises:
76-
Exception if event start time cannot be found
66+
A list with the same length as event_names. Each element is a dictionary
67+
{dir1: DataFrame1, dir2: DataFrame2, ...}.
68+
Multiple directories may contain events with the same name, but they are different
69+
events (i.e. 'loss' under trains_set/, and 'loss' under eval_set/.)
7770
"""
78-
events_time = []
79-
events_step = []
80-
event_start_time = None
81-
for event_file in self._get_events_files():
82-
for record in tf_record.tf_record_iterator(event_file):
83-
event = event_pb2.Event.FromString(record)
84-
if event.file_version is not None:
85-
# first event in the file.
86-
time = datetime.datetime.fromtimestamp(event.wall_time)
87-
if event_start_time is None or event_start_time > time:
88-
event_start_time = time
71+
event_names = [event_names] if isinstance(event_names, basestring) else event_names
8972

90-
if event.summary is None or event.summary.value is None:
91-
continue
92-
for value in event.summary.value:
93-
if value.simple_value is None or value.tag is None:
73+
all_events = self.list_events()
74+
dirs_to_look = set()
75+
for event, dirs in all_events.iteritems():
76+
if event in event_names:
77+
dirs_to_look.update(dirs)
78+
79+
ret_events = [dict() for i in range(len(event_names))]
80+
for dir in dirs_to_look:
81+
for event_file in self._glob_events_files([dir]):
82+
for record in tf_record.tf_record_iterator(event_file):
83+
event = event_pb2.Event.FromString(record)
84+
if event.summary is None or event.wall_time is None or event.summary.value is None:
9485
continue
95-
if value.tag == event_name:
96-
if event.wall_time is not None:
97-
time = datetime.datetime.fromtimestamp(event.wall_time)
98-
events_time.append({'time': time, event_name: value.simple_value})
99-
if event.step is not None:
100-
events_step.append({'step': event.step, event_name: value.simple_value})
101-
if event_start_time is None:
102-
raise Exception('Empty or invalid TF events file. Cannot find event start time.')
103-
for event in events_time:
104-
event['time'] = event['time'] - event_start_time # convert time to timespan
105-
events_time = sorted(events_time, key=lambda k: k['time'])
106-
events_step = sorted(events_step, key=lambda k: k['step'])
107-
return events_time, events_step
86+
87+
event_time = datetime.datetime.fromtimestamp(event.wall_time)
88+
for value in event.summary.value:
89+
if value.tag not in event_names or value.simple_value is None:
90+
continue
91+
92+
index = event_names.index(value.tag)
93+
dir_event_dict = ret_events[index]
94+
if dir not in dir_event_dict:
95+
dir_event_dict[dir] = pd.DataFrame(
96+
[[event_time, event.step, value.simple_value]],
97+
columns=['time', 'step', 'value'])
98+
else:
99+
df = dir_event_dict[dir]
100+
# Append a row.
101+
df.loc[len(df)] = [event_time, event.step, value.simple_value]
102+
103+
for dir_event_dict in ret_events:
104+
for df in dir_event_dict.values():
105+
df.sort_values(by=['time'], inplace=True)
106+
107+
return ret_events
108+
109+
def plot(self, event_names, x_axis='step'):
110+
"""Plots a list of events. Each event (a dir+event_name) is represetented as a line
111+
in the graph.
112+
113+
Args:
114+
event_names: A list of events to plot. Each event_name may correspond to multiple events,
115+
each in a different directory.
116+
x_axis: whether to use step or time as x axis.
117+
"""
118+
event_names = [event_names] if isinstance(event_names, basestring) else event_names
119+
events_list = self.get_events(event_names)
120+
for event_name, dir_event_dict in zip(event_names, events_list):
121+
for dir, df in dir_event_dict.iteritems():
122+
label = event_name + ':' + dir
123+
x_column = df['step'] if x_axis == 'step' else df['time']
124+
plt.plot(x_column, df['value'], label=label)
125+
plt.legend(loc='best')
126+
plt.show()
127+

datalab/mlalpha/_tensorboard.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2016 Google Inc. All rights reserved.
1+
# Copyright 2017 Google Inc. All rights reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
44
# in compliance with the License. You may obtain a copy of the License at
@@ -11,6 +11,11 @@
1111
# the License.
1212

1313

14+
try:
15+
import IPython
16+
except ImportError:
17+
raise Exception('This module can only be loaded in ipython.')
18+
1419
import argparse
1520
import psutil
1621
import subprocess
@@ -20,15 +25,13 @@
2025
import datalab.storage
2126

2227

23-
class TensorBoardManager(object):
28+
class TensorBoard(object):
2429
"""Start, shutdown, and list TensorBoard instances.
2530
"""
2631

2732
@staticmethod
28-
def get_running_list():
33+
def list():
2934
"""List running TensorBoard instances.
30-
31-
Returns: A list of {'pid': pid, 'logdir': logdir, 'port': port}
3235
"""
3336
running_list = []
3437
parser = argparse.ArgumentParser()
@@ -41,27 +44,16 @@ def get_running_list():
4144
del cmd_args[0:2] # remove 'python' and 'tensorboard'
4245
args = parser.parse_args(cmd_args)
4346
running_list.append({'pid': p.pid, 'logdir': args.logdir, 'port': args.port})
44-
return running_list
45-
46-
@staticmethod
47-
def get_reverse_proxy_url(port):
48-
"""Get the reverse proxy url. Note that this URL only works with
49-
Datalab web server which supports reverse proxy.
50-
51-
Args:
52-
port: the port of the tensorboard instance.
53-
Returns: the reverse proxy URL.
54-
"""
55-
return '/_proxy/%d/' % port
47+
IPython.display.display(datalab.utils.commands.render_dictionary(
48+
running_list, ['pid', 'logdir', 'port']))
5649

50+
5751
@staticmethod
5852
def start(logdir):
5953
"""Start a TensorBoard instance.
6054
6155
Args:
6256
logdir: the logdir to run TensorBoard on.
63-
Returns:
64-
A tuple. First is the pid of the instance. Second is the port used.
6557
Raises:
6658
Exception if the instance cannot be started.
6759
"""
@@ -77,7 +69,11 @@ def start(logdir):
7769
retry = 5
7870
while (retry > 0):
7971
if datalab.utils.is_http_running_on(port):
80-
return p.pid, port
72+
url = '/_proxy/%d/' % port
73+
html = '<p>TensorBoard was started successfully with pid %d. ' % p.pid
74+
html += 'Click <a href="%s" target="_blank">here</a> to access it.</p>' % url
75+
IPython.display.display_html(html, raw=True)
76+
return
8177
time.sleep(1)
8278
retry -= 1
8379

0 commit comments

Comments
 (0)