Skip to content

Commit

Permalink
Tensorboard dev improvements (#673)
Browse files Browse the repository at this point in the history
* Fix unhandled promise rejection on tensorboard-dev.js

* exit error log

* fix test

* no python gitab ci

* Update .gitlab-ci.yml

Co-authored-by: Casper da Costa-Luis <work@cdcl.ml>

Co-authored-by: Helio Machado <0x2b3bfa0+git@googlemail.com>
Co-authored-by: Casper da Costa-Luis <work@cdcl.ml>
  • Loading branch information
3 people authored Aug 5, 2021
1 parent 5dbaae7 commit e556f20
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 74 deletions.
7 changes: 1 addition & 6 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,7 @@ test_runner:
tags:
- cml-runner-gpu
script:
- sudo update-alternatives --install /usr/bin/python python $(which python3)
10
- sudo apt-get update && sudo apt-get install -y python-pip python3-pip
- sudo pip install --upgrade pip
- sudo pip install --upgrade setuptools
- sudo pip install tensorboard
- pip install tensorboard

- npm ci
- npm run lint
Expand Down
160 changes: 92 additions & 68 deletions bin/cml-tensorboard-dev.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const tempy = require('tempy');
const { exec, watermarkUri } = require('../src/utils');

const { TB_CREDENTIALS } = process.env;
const isCLI = require.main === module;

const closeFd = (fd) => {
try {
Expand All @@ -21,6 +22,32 @@ const closeFd = (fd) => {
}
};

const tbLink = (opts = {}) => {
const { stdout, stderror, title, name, rmWatermark, md } = opts;

return new Promise((resolve, reject) => {
const parserWatcher = setInterval(async () => {
const data = await fs.readFile(stdout, 'utf8');
const urls = data.match(/(https?:\/\/[^\s]+)/) || [];

if (urls.length) {
let [output] = urls;

if (!rmWatermark) output = watermarkUri({ uri: output, type: 'tb' });
if (md) output = `[${title || name}](${output})`;

resolve(output);
clearInterval(parserWatcher);
}
}, 1 * 5 * 1000);

setTimeout(async () => {
const error = await fs.readFile(stderror, 'utf8');
reject(new Error(`Tensorboard took too long. ${error}`));
}, 1 * 60 * 1000);
});
};

const run = async (opts) => {
const {
md,
Expand All @@ -45,89 +72,86 @@ const run = async (opts) => {
const extraParams = extraParamsFound
? `--name "${name}" --description "${description}"`
: '';

const command = `tensorboard dev upload --logdir ${logdir} ${extraParams}`;

const stdoutPath = tempy.file({ extension: 'log' });
const stdoutFd = await fs.open(stdoutPath, 'a');
const stderrPath = tempy.file({ extension: 'log' });
const stderrFd = await fs.open(stderrPath, 'a');

const proc = spawn(command, [], {
detached: true,
shell: true,
stdio: ['ignore', stdoutFd, stderrFd]
});

proc.unref();
proc.on('exit', (code) => {
throw new Error(`Tensorboard process exited with code ${code}`);
proc.on('exit', async (code) => {
if (code) {
const error = await fs.readFile(stderrPath, 'utf8');
print(`Tensorboard failed with error: ${error}`);
}
process.exit(code);
});

// reads stdout every 5 secs to find the tb uri
setInterval(async () => {
const stdoutData = await fs.readFile(stdoutPath, 'utf8');
const regex = /(https?:\/\/[^\s]+)/;
const matches = stdoutData.match(regex);

if (matches.length) {
let output = matches[0];

if (!rmWatermark) output = watermarkUri({ uri: output, type: 'tb' });
const url = await tbLink({
stdout: stdoutPath,
stderror: stderrPath,
title,
name,
rmWatermark,
md
});
if (!file) print(url);
else await fs.appendFile(file, url);

if (md) output = `[${title || name}](${output})`;
closeFd(stdoutFd) && closeFd(stderrFd);
process.exit(0);
};

if (!file) print(output);
else await fs.appendFile(file, output);
if (isCLI) {
const argv = yargs
.strict()
.usage(`Usage: $0`)
.default('credentials')
.describe(
'credentials',
'TB credentials as json. Usually found at ~/.config/tensorboard/credentials/uploader-creds.json. If not specified will look for the json at the env variable TB_CREDENTIALS.'
)
.alias('credentials', 'c')
.default('logdir')
.describe('logdir', 'Directory containing the logs to process.')
.default('name')
.describe('name', 'Tensorboard experiment title. Max 100 characters.')
.default('description')
.describe(
'description',
'Tensorboard experiment description. Markdown format. Max 600 characters.'
)
.default('plugins')
.boolean('md')
.describe('md', 'Output as markdown [title || name](url).')
.default('title')
.describe(
'title',
'Markdown title, if not specified, param name will be used.'
)
.alias('title', 't')
.default('file')
.describe(
'file',
'Append the output to the given file. Create it if does not exist.'
)
.describe('rm-watermark', 'Avoid CML watermark.')
.alias('file', 'f')
.help('h').argv;

run(argv).catch((e) => {
console.error(e);
process.exit(1);
});
}

closeFd(stdoutFd) && closeFd(stderrFd);
process.exit(0);
}
}, 1 * 5 * 1000);

// waits 1 min before dies
setTimeout(async () => {
closeFd(stdoutFd) && closeFd(stderrFd);
console.error(await fs.readFile(stderrPath, 'utf8'));
throw new Error('Tensorboard took too long! Canceled.');
}, 1 * 60 * 1000);
module.exports = {
tbLink
};

const argv = yargs
.strict()
.usage(`Usage: $0`)
.default('credentials')
.describe(
'credentials',
'TB credentials as json. Usually found at ~/.config/tensorboard/credentials/uploader-creds.json. If not specified will look for the json at the env variable TB_CREDENTIALS.'
)
.alias('credentials', 'c')
.default('logdir')
.describe('logdir', 'Directory containing the logs to process.')
.default('name')
.describe('name', 'Tensorboard experiment title. Max 100 characters.')
.default('description')
.describe(
'description',
'Tensorboard experiment description. Markdown format. Max 600 characters.'
)
.default('plugins')
.boolean('md')
.describe('md', 'Output as markdown [title || name](url).')
.default('title')
.describe(
'title',
'Markdown title, if not specified, param name will be used.'
)
.alias('title', 't')
.default('file')
.describe(
'file',
'Append the output to the given file. Create it if does not exist.'
)
.describe('rm-watermark', 'Avoid CML watermark.')
.alias('file', 'f')
.help('h').argv;

run(argv).catch((e) => {
console.error(e);
process.exit(1);
});
44 changes: 44 additions & 0 deletions bin/cml-tensorboard-dev.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
jest.setTimeout(200000);

const fs = require('fs').promises;
const tempy = require('tempy');
const { exec, isProcRunning, sleep } = require('../src/utils');
const { tbLink } = require('./cml-tensorboard-dev');

const CREDENTIALS =
'{"refresh_token": "1//03FiVnGk2xhnNCgYIARAAGAMSNwF-L9IrPH8FOOVWEYUihFDToqxyLArxfnbKFmxEfhzys_KYVVzBisYlAy225w4HaX3ais5TV_Q", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "373649185512-8v619h5kft38l4456nm2dj4ubeqsrvh6.apps.googleusercontent.com", "client_secret": "pOyAuU2yq2arsM98Bw5hwYtr", "scopes": ["openid", "https://www.googleapis.com/auth/userinfo.email"], "type": "authorized_user"}';

Expand All @@ -16,6 +20,38 @@ const rmTbDevExperiment = async (tbOutput) => {
await exec(`tensorboard dev delete --experiment_id ${id}`);
};

describe('tbLink', () => {
test('timeout without result throws exception', async () => {
const stdout = tempy.file({ extension: 'log' });
const stderror = tempy.file({ extension: 'log' });
const message = 'there is an error';
let error;

await fs.writeFile(stdout, 'nothing');
await fs.writeFile(stderror, message);

try {
await tbLink({ stdout, stderror });
} catch (err) {
error = err;
}

expect(error.message).toBe(`Tensorboard took too long. ${message}`);
});

test('valid url is returned', async () => {
const stdout = tempy.file({ extension: 'log' });
const stderror = tempy.file({ extension: 'log' });
const message = 'https://iterative.ai';

await fs.writeFile(stdout, message);
await fs.writeFile(stderror, '');

const link = await tbLink({ stderror, stdout });
expect(link).toBe(`${message}/?cml=tb`);
});
});

describe('CML e2e', () => {
test('cml-tensorboard-dev.js -h', async () => {
const output = await exec(`node ./bin/cml-tensorboard-dev.js -h`);
Expand Down Expand Up @@ -60,4 +96,12 @@ describe('CML e2e', () => {
expect(output.startsWith(`[${title}](https://`)).toBe(true);
expect(output.includes('cml=tb')).toBe(true);
});

test('cml-tensorboard-dev.js invalid creds', async () => {
try {
await exec(`node ./bin/cml-tensorboard-dev.js --credentials 'invalid'`);
} catch (err) {
expect(err.message.includes('json.decoder.JSONDecodeError')).toBe(true);
}
});
});

0 comments on commit e556f20

Please sign in to comment.