-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support choosing tensorboard version from UI #2690
Changes from 4 commits
556ace1
f9259bf
8d541fb
e0d2f4b
c89a22b
1bf624a
301ec41
07ba4fc
68ae3bf
45af753
324bc83
81dab3b
9578720
48bffbc
65ff6bb
df9c9fa
9a22416
d896c4c
13b5cf6
c05c1ee
6bfb09a
65b1d4a
e8f6644
5bed8d8
2900699
a8308c7
d5a2e15
5c5687a
0042e1f
a6374ae
e3ee9f8
8801bb2
4bf953a
34e26ed
fec3711
04048ac
f0e1790
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,7 +71,7 @@ if (isInCluster) { | |
k8sV1CustomObjectClient = kc.makeApiClient(Custom_objectsApi); | ||
} | ||
|
||
function getNameOfViewerResource(logdir: string): string { | ||
function getNameOfViewerResource(logdir: string, tfversion: string): string { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some comments on what the viewer resource id is determined? E.g., the id depends on logdir and/or tfversion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The id depends only on logdir for now. I'll remove the unused parameters. |
||
// TODO: find some hash function with shorter resulting message. | ||
return 'viewer-' + crypto.SHA1(logdir); | ||
} | ||
|
@@ -80,11 +80,11 @@ function getNameOfViewerResource(logdir: string): string { | |
* Create Tensorboard instance via CRD with the given logdir if there is no | ||
* existing Tensorboard instance. | ||
*/ | ||
export async function newTensorboardInstance(logdir: string, podTemplateSpec: Object = defaultPodTemplateSpec): Promise<void> { | ||
export async function newTensorboardInstance(logdir: string, tfversion: string, podTemplateSpec: Object = defaultPodTemplateSpec): Promise<void> { | ||
if (!k8sV1CustomObjectClient) { | ||
throw new Error('Cannot access kubernetes Custom Object API'); | ||
} | ||
const currentPod = await getTensorboardInstance(logdir); | ||
const currentPod = await getTensorboardInstance(logdir, tfversion); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if there's a current pod with a different version? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After some discussion with Jing, we find we are facing two choices:
Maybe we can discuss later to decide which one to choose! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 sgtm |
||
if (currentPod) { | ||
return; | ||
} | ||
|
@@ -93,15 +93,14 @@ export async function newTensorboardInstance(logdir: string, podTemplateSpec: Ob | |
apiVersion: viewerGroup + '/' + viewerVersion, | ||
kind: 'Viewer', | ||
metadata: { | ||
name: getNameOfViewerResource(logdir), | ||
name: getNameOfViewerResource(logdir, tfversion), | ||
namespace: namespace, | ||
}, | ||
spec: { | ||
type: 'tensorboard', | ||
tensorboardSpec: { | ||
logDir: logdir, | ||
// TODO(jingzhang36): tensorflow image version read from input textbox. | ||
tensorflowImage: 'tensorflow/tensorflow:1.13.2', | ||
tensorflowImage: 'tensorflow/tensorflow:'+tfversion | ||
}, | ||
podTemplateSpec | ||
} | ||
|
@@ -114,21 +113,22 @@ export async function newTensorboardInstance(logdir: string, podTemplateSpec: Ob | |
* Finds a running Tensorboard instance created via CRD with the given logdir | ||
* and returns its dns address. | ||
*/ | ||
export async function getTensorboardInstance(logdir: string): Promise<string> { | ||
export async function getTensorboardInstance(logdir: string, tfversion: string): Promise<string> { | ||
if (!k8sV1CustomObjectClient) { | ||
throw new Error('Cannot access kubernetes Custom Object API'); | ||
} | ||
|
||
return await (k8sV1CustomObjectClient.getNamespacedCustomObject( | ||
viewerGroup, viewerVersion, namespace, viewerPlural, | ||
getNameOfViewerResource(logdir))).then( | ||
getNameOfViewerResource(logdir, tfversion))).then( | ||
// Viewer CRD pod has tensorboard instance running at port 6006 while | ||
// viewer CRD service has tensorboard instance running at port 80. Since | ||
// we return service address here (instead of pod address), so use 80. | ||
(viewer: any) => ( | ||
viewer && viewer.body && | ||
viewer.body.spec.tensorboardSpec.logDir == logdir && | ||
viewer.body.spec.type == 'tensorboard') ? | ||
viewer.body.spec.type == 'tensorboard' && | ||
viewer.body.spec.tensorboardSpec.tensorflowImage == 'tensorflow/tensorflow:'+tfversion) ? | ||
`http://${viewer.body.metadata.name}-service.${namespace}.svc.cluster.local:80/tensorboard/${viewer.body.metadata.name}/` : '', | ||
// No existing custom object with the given name, i.e., no existing | ||
// tensorboard instance. | ||
|
@@ -140,14 +140,14 @@ export async function getTensorboardInstance(logdir: string): Promise<string> { | |
* Polls every second for a running Tensorboard instance with the given logdir, | ||
* and returns the address of one if found, or rejects if a timeout expires. | ||
*/ | ||
export function waitForTensorboardInstance(logdir: string, timeout: number): Promise<string> { | ||
export function waitForTensorboardInstance(logdir: string, tfversion: string, timeout: number): Promise<string> { | ||
const start = Date.now(); | ||
return new Promise((resolve, reject) => { | ||
setInterval(async () => { | ||
if (Date.now() - start > timeout) { | ||
reject('Timed out waiting for tensorboard'); | ||
} | ||
const tensorboardAddress = await getTensorboardInstance(logdir); | ||
const tensorboardAddress = await getTensorboardInstance(logdir, tfversion); | ||
if (tensorboardAddress) { | ||
resolve(encodeURIComponent(tensorboardAddress)); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: you are not using any regex features, just using string.startsWith is simpler
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the review and suggestion! I will re-format the code today.