Skip to content

Commit

Permalink
Runner detect ACPI shutdown (#1068)
Browse files Browse the repository at this point in the history
* Runner edge cases improvements

* fix parselog

* refactor ugly return

* fix test

* idle check not needed

* fix tests

* fix gl

* multi logs parser

* feedback fixes

* process lost

Co-authored-by: Daniel Barnes <dabarnes2b@gmail.com>

* job not id

* patterns not entities

* Runner detect ACPI termination

* try connect

* on error

* package-lock

* rerun workflow

* remove console

* remove unused

* runner name exception

* review suggestions (#1083)

* revert this

* log warning verbage

* revert this

* move acpiSock to runLocal

* connect info log

* Revert "revert this"

This reverts commit f4c0c04.

* Revert "revert this"

This reverts commit 8a77fae.

* typo

* 🙈

* lock update

Co-authored-by: Daniel Barnes <dabarnes2b@gmail.com>
  • Loading branch information
DavidGOrtega and dacbd authored Jul 4, 2022
1 parent b4c7898 commit 6cc6b9f
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 651 deletions.
61 changes: 34 additions & 27 deletions bin/cml/runner.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
const { join } = require('path');
const { homedir } = require('os');
const fs = require('fs').promises;
const { SpotNotifier } = require('ec2-spot-notification');
const net = require('net');
const kebabcaseKeys = require('kebabcase-keys');
const timestring = require('timestring');
const winston = require('winston');

const CML = require('../../src/cml').default;
const { randid, sleep } = require('../../src/utils');
const tf = require('../../src/terraform');

let cml;
let RUNNER;
let RUNNER_JOBS_RUNNING = [];
let RUNNER_SHUTTING_DOWN = false;
let RUNNER_TIMER = 0;
const RUNNER_JOBS_RUNNING = [];
const GH_5_MIN_TIMEOUT = (72 * 60 - 5) * 60 * 1000;

const shutdown = async (opts) => {
Expand Down Expand Up @@ -46,14 +47,15 @@ const shutdown = async (opts) => {

const retryWorkflows = async () => {
try {
if (!noRetry) {
if (RUNNER_JOBS_RUNNING.length > 0) {
await Promise.all(
RUNNER_JOBS_RUNNING.map(
async (job) => await cml.pipelineRestart({ jobId: job.id })
)
);
}
if (!noRetry && RUNNER_JOBS_RUNNING.length > 0) {
winston.info(`Still pending jobs, retrying workflow...`);

await Promise.all(
RUNNER_JOBS_RUNNING.map(
async (job) =>
await cml.pipelineRerun({ id: job.pipeline, jobId: job.id })
)
);
}
} catch (err) {
winston.error(err);
Expand Down Expand Up @@ -240,21 +242,36 @@ const runLocal = async (opts) => {
await tf.saveTfState({ tfstate, path });
}

if (process.platform === 'linux') {
const acpiSock = net.connect('/var/run/acpid.socket');
acpiSock.on('connect', () => {
winston.info('Connected to acpid service.');
});
acpiSock.on('error', (err) => {
winston.warn(
`Error connecting to ACPI socket: ${err.message}. The acpid.service helps with instance termination detection.`
);
});
acpiSock.on('data', (buf) => {
const data = buf.toString().toLowerCase();
if (data.includes('power') && data.includes('button')) {
shutdown({ ...opts, reason: 'ACPI shutdown' });
}
});
}

const dataHandler = async (data) => {
const logs = await cml.parseRunnerLog({ data });
const logs = await cml.parseRunnerLog({ data, name });
for (const log of logs) {
winston.info('runner status', log);

if (log.status === 'job_started') {
RUNNER_JOBS_RUNNING.push({ id: log.job, date: log.date });
const { job: id, pipeline, date } = log;
RUNNER_JOBS_RUNNING.push({ id, pipeline, date });
}

if (log.status === 'job_ended') {
const { job: jobId } = log;
RUNNER_JOBS_RUNNING = RUNNER_JOBS_RUNNING.filter(
(job) => job.id !== jobId
);

RUNNER_JOBS_RUNNING.pop();
if (single) await shutdown({ ...opts, reason: 'single job' });
}
}
Expand Down Expand Up @@ -295,16 +312,6 @@ const runLocal = async (opts) => {
}

if (!noRetry) {
try {
winston.info(`EC2 id ${await SpotNotifier.instanceId()}`);
SpotNotifier.on('termination', () =>
shutdown({ ...opts, reason: 'spot_termination' })
);
SpotNotifier.start();
} catch (err) {
winston.warn('SpotNotifier can not be started.');
}

if (cml.driver === 'github') {
const watcherSeventyTwo = setInterval(() => {
RUNNER_JOBS_RUNNING.forEach((job) => {
Expand Down
Loading

0 comments on commit 6cc6b9f

Please sign in to comment.