Skip to content

Commit

Permalink
Restarts Prometheus subprocess when it crashes (Jigsaw-Code#663)
Browse files Browse the repository at this point in the history
* gitignore

* Adds logic to restart Prometheus when it crashes

* Refactor Prometheus start logic

* Commit hook fixes

* Addresses review comments

* Addresses review comments
  • Loading branch information
mpmcroy authored May 28, 2020
1 parent 49b601a commit f27eb72
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
31 changes: 19 additions & 12 deletions src/shadowbox/infrastructure/prometheus_scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@ export class PrometheusClient {
}
}

export async function runPrometheusScraper(
args: string[], configFilename: string, configJson: {},
prometheusEndpoint: string): Promise<child_process.ChildProcess> {
mkdirp.sync(path.dirname(configFilename));
export async function startPrometheus(
configFilename: string, configJson: {}, processArgs: string[], endpoint: string) {
await writePrometheusConfigToDisk(configFilename, configJson);
await spawnPrometheusSubprocess(processArgs, endpoint);
}

async function writePrometheusConfigToDisk(configFilename: string, configJson: {}) {
await mkdirp.sync(path.dirname(configFilename));
const ymlTxt = jsyaml.safeDump(configJson, {'sortKeys': true});
// Write the file asynchronously to prevent blocking the node thread.
await new Promise((resolve, reject) => {
Expand All @@ -84,29 +88,32 @@ export async function runPrometheusScraper(
}
});
});
const commandArguments = ['--config.file', configFilename];
commandArguments.push(...args);
const runProcess = child_process.spawn('/root/shadowbox/bin/prometheus', commandArguments);
}

async function spawnPrometheusSubprocess(
processArgs: string[], prometheusEndpoint: string): Promise<child_process.ChildProcess> {
logging.info(`Starting Prometheus with args [${processArgs}]`);
const runProcess = child_process.spawn('/root/shadowbox/bin/prometheus', processArgs);
runProcess.on('error', (error) => {
logging.error(`Error spawning prometheus: ${error}`);
logging.error(`Error spawning Prometheus: ${error}`);
});
// TODO(fortuna): Add restart logic.
runProcess.on('exit', (code, signal) => {
logging.info(`prometheus has exited with error. Code: ${code}, Signal: ${signal}`);
logging.error(`Prometheus has exited with error. Code: ${code}, Signal: ${signal}`);
logging.error('Restarting Prometheus...');
spawnPrometheusSubprocess(processArgs, prometheusEndpoint);
});
// TODO(fortuna): Consider saving the output and expose it through the manager service.
runProcess.stdout.pipe(process.stdout);
runProcess.stderr.pipe(process.stderr);
await waitForPrometheusReady(`${prometheusEndpoint}/api/v1/status/flags`);
logging.info('Prometheus is ready!');
return runProcess;
}

async function waitForPrometheusReady(prometheusEndpoint: string) {
logging.debug('Waiting for Prometheus to be ready...');
while (!(await isHttpEndpointHealthy(prometheusEndpoint))) {
await new Promise((resolve) => setTimeout(resolve, 1000));
}
logging.debug('Prometheus is ready');
}

function isHttpEndpointHealthy(endpoint: string): Promise<boolean> {
Expand Down
21 changes: 12 additions & 9 deletions src/shadowbox/server/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {RealClock} from '../infrastructure/clock';
import {PortProvider} from '../infrastructure/get_port';
import * as json_config from '../infrastructure/json_config';
import * as logging from '../infrastructure/logging';
import {PrometheusClient, runPrometheusScraper} from '../infrastructure/prometheus_scraper';
import {PrometheusClient, startPrometheus} from '../infrastructure/prometheus_scraper';
import {RolloutTracker} from '../infrastructure/rollout';
import {AccessKeyId} from '../model/access_key';

Expand Down Expand Up @@ -146,15 +146,18 @@ async function main() {
if (isReplayProtectionEnabled) {
shadowsocksServer.enableReplayProtection();
}

// Start Prometheus subprocess and wait for it to be up and running.
const prometheusConfigFilename = getPersistentFilename('prometheus/config.yml');
const prometheusTsdbFilename = getPersistentFilename('prometheus/data');
const prometheusEndpoint = `http://${prometheusLocation}`;
// Wait for Prometheus to be up and running.
await runPrometheusScraper(
[
'--storage.tsdb.retention', '31d', '--storage.tsdb.path',
getPersistentFilename('prometheus/data'), '--web.listen-address', prometheusLocation,
'--log.level', verbose ? 'debug' : 'info'
],
getPersistentFilename('prometheus/config.yml'), prometheusConfigJson, prometheusEndpoint);
const prometheusArgs = [
'--config.file', prometheusConfigFilename, '--storage.tsdb.retention.time', '31d',
'--storage.tsdb.path', prometheusTsdbFilename, '--web.listen-address', prometheusLocation,
'--log.level', verbose ? 'debug' : 'info'
];
await startPrometheus(
prometheusConfigFilename, prometheusConfigJson, prometheusArgs, prometheusEndpoint);

const prometheusClient = new PrometheusClient(prometheusEndpoint);
if (!serverConfig.data().portForNewAccessKeys) {
Expand Down

0 comments on commit f27eb72

Please sign in to comment.