Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(host-metrics)!: fix process.cpu.* metrics #1785

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix(host-metrics): fix process.cpu.* metrics
  • Loading branch information
david-luna committed Nov 9, 2023
commit 05f866fff14b8ca738c6d9758dfe95fcc80e0db2
2 changes: 1 addition & 1 deletion packages/opentelemetry-host-metrics/src/BaseMetrics.ts
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ export abstract class BaseMetrics {
constructor(config: MetricsCollectorConfig) {
this._name = config.name || DEFAULT_NAME;
const meterProvider =
config.meterProvider! || api.metrics.getMeterProvider();
config.meterProvider || api.metrics.getMeterProvider();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewer: I don't know why there was this non null assertion in place. It does not affect the expression and also it was giving an lint warning so I've decided to remove it.

if (!config.meterProvider) {
this._logger.warn('No meter provider, using default');
}
43 changes: 31 additions & 12 deletions packages/opentelemetry-host-metrics/src/stats/common.ts
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ import * as os from 'os';
import { CpuUsageData, MemoryData, ProcessCpuUsageData } from '../types';

const MILLISECOND = 1 / 1e3;
let cpuUsageTime: number | undefined = undefined;
const MICROSECOND = 1 / 1e6;

/**
* We get data as soon as we load the module so the 1st collect
@@ -77,19 +77,38 @@ export function getCpuUsageData(): CpuUsageData[] {
}

/**
* It returns process cpu load delta from last time - to be used with SumObservers.
* When called first time it will return 0 and then delta will be calculated
* We get data as soon as we load the module so the 1st collect
* of the metric already has valuable data to be sent.
*/
let prevProcData: { time: number; usage: NodeJS.CpuUsage } = {
time: Date.now(),
usage: process.cpuUsage(),
};

/**
* Gets the process CPU usage and returns
* - the time spent in `user` state
* - the time spent in `system` state
* - the % of time in `user` state since last measurement
* - the % of time in `system` state since last measurement
*/
export function getProcessCpuUsageData(): ProcessCpuUsageData {
if (typeof cpuUsageTime !== 'number') {
cpuUsageTime = new Date().getTime() - process.uptime() * 1000;
}
const timeElapsed = (new Date().getTime() - cpuUsageTime) / 1000;
const cpuUsage: NodeJS.CpuUsage = process.cpuUsage();
const user = cpuUsage.user * MILLISECOND;
const system = cpuUsage.system * MILLISECOND;
const userP = user / timeElapsed;
const systemP = system / timeElapsed;
const currentTime = Date.now();
const currentUsage = process.cpuUsage();
const prevUsage = prevProcData.usage;
// According to semantic conventions we need to divide by
// - time elapsed (in microseconds to match `process.cpuUsage()` units)
// - number of CPUs
const timeElapsed = (currentTime - prevProcData.time) * 1000;
const cpusTimeElapsed = timeElapsed * prevOsData.cpus.length;

const user = currentUsage.user * MICROSECOND;
const system = currentUsage.system * MICROSECOND;
const userP = (currentUsage.user - prevUsage.user) / cpusTimeElapsed;
const systemP = (currentUsage.system - prevUsage.system) / cpusTimeElapsed;

prevProcData = { time: currentTime, usage: currentUsage };

return {
user,
system,
57 changes: 30 additions & 27 deletions packages/opentelemetry-host-metrics/test/metric.test.ts
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ import { METRIC_ATTRIBUTES } from '../src/enum';
import { HostMetrics } from '../src';

const cpuJson = require('./mocks/cpu.json');
const processJson = require('./mocks/process.json');
const networkJson = require('./mocks/network.json');

class TestMetricReader extends MetricReader {
@@ -75,7 +76,20 @@ const mockedOS = {
},
};

const INTERVAL = 3000;
const mockedProcess = {
uptime: function () {
return 0;
},
procIdx: 0,
cpuUsage: function () {
return processJson[this.procIdx++ % 2];
},
memoryUsage: {
rss: function () {
return 123456;
},
},
};

describe('Host Metrics', () => {
let meterProvider: MeterProvider;
@@ -113,24 +127,17 @@ describe('Host Metrics', () => {
sandbox = sinon.createSandbox();
sandbox.useFakeTimers();

sandbox.stub(os, 'freemem').callsFake(() => {
return mockedOS.freemem();
});
sandbox.stub(os, 'totalmem').returns(mockedOS.totalmem());
sandbox.stub(os, 'freemem').callsFake(mockedOS.freemem);
sandbox.stub(os, 'totalmem').callsFake(mockedOS.totalmem);
sandbox.stub(os, 'cpus').callsFake(() => mockedOS.cpus());
sandbox.stub(process, 'uptime').returns(0);
sandbox.stub(SI, 'networkStats').callsFake(() => {
return mockedSI.networkStats();
});
sandbox.stub(process, 'cpuUsage').callsFake(() => {
return {
user: 90713560,
system: 63192630,
};
});
sandbox.stub(process.memoryUsage, 'rss').callsFake(() => {
return 123456;
});
sandbox.stub(process, 'uptime').callsFake(mockedProcess.uptime);
sandbox
.stub(process, 'cpuUsage')
.callsFake(() => mockedProcess.cpuUsage());
sandbox
.stub(process.memoryUsage, 'rss')
.callsFake(mockedProcess.memoryUsage.rss);
sandbox.stub(SI, 'networkStats').callsFake(mockedSI.networkStats);

reader = new TestMetricReader();

@@ -143,13 +150,9 @@ describe('Host Metrics', () => {
});
await hostMetrics.start();

const dateStub = sandbox
.stub(Date.prototype, 'getTime')
.returns(process.uptime() * 1000 + 1);
// Drop first frame cpu metrics, see
// src/common.ts getCpuUsageData
// src/common.ts getCpuUsageData/getProcessCpuUsageData
await reader.collect();
dateStub.returns(process.uptime() * 1000 + INTERVAL);

// advance the clock for the next collection
sandbox.clock.tick(1000);
@@ -314,15 +317,15 @@ describe('Host Metrics', () => {
it('should export Process CPU time metrics', async () => {
const metric = await getRecords(reader, 'process.cpu.time');

ensureValue(metric, { state: 'user' }, 90713.56);
ensureValue(metric, { state: 'system' }, 63192.630000000005);
ensureValue(metric, { state: 'user' }, 90.71356);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewer: values have just changed in the magnitude since according to NodeJS docs the usage is reported in microseconds and not milliseconds

ensureValue(metric, { state: 'system' }, 63.192629999999994);
});

it('should export Process CPU utilization metrics', async () => {
const metric = await getRecords(reader, 'process.cpu.utilization');

ensureValue(metric, { state: 'user' }, 30247.935978659552);
ensureValue(metric, { state: 'system' }, 21071.23374458153);
ensureValue(metric, { state: 'user' }, 1.5);
ensureValue(metric, { state: 'system' }, 1);
});

it('should export Process Memory usage metrics', async () => {
10 changes: 10 additions & 0 deletions packages/opentelemetry-host-metrics/test/mocks/process.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"user": 87713560,
"system": 61192630
},
{
"user": 90713560,
"system": 63192630
}
]
Loading