Skip to content

Commit db910eb

Browse files
committed
Move metrics to setup and add cgroup metrics
1 parent 64b7726 commit db910eb

File tree

14 files changed

+419
-62
lines changed

14 files changed

+419
-62
lines changed

docs/setup/settings.asciidoc

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ which may cause a delay before pages start being served.
2020
Set to `false` to disable Console. *Default: `true`*
2121

2222
| `cpu.cgroup.path.override:`
23-
| Override for cgroup cpu path when mounted in a
24-
manner that is inconsistent with `/proc/self/cgroup`.
23+
| *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuPath`
24+
and the old name will no longer be supported as of 8.0.
2525

2626
| `cpuacct.cgroup.path.override:`
27-
| Override for cgroup cpuacct path when mounted
28-
in a manner that is inconsistent with `/proc/self/cgroup`.
27+
| *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuAcctPath`
28+
and the old name will no longer be supported as of 8.0.
2929

3030
| `csp.rules:`
3131
| A https://w3c.github.io/webappsec-csp/[content-security-policy] template
@@ -438,6 +438,14 @@ not saved in {es}. *Default: `data`*
438438
| Set the interval in milliseconds to sample
439439
system and process performance metrics. The minimum value is 100. *Default: `5000`*
440440

441+
| `ops.cGroupOverrides.cpuPath:`
442+
| Override for cgroup cpu path when mounted in a
443+
manner that is inconsistent with `/proc/self/cgroup`.
444+
445+
| `ops.cGroupOverrides.cpuAcctPath:`
446+
| Override for cgroup cpuacct path when mounted
447+
in a manner that is inconsistent with `/proc/self/cgroup`.
448+
441449
| `server.basePath:`
442450
| Enables you to specify a path to mount {kib} at if you are
443451
running behind a proxy. Use the `server.rewriteBasePath` setting to tell {kib}

src/core/server/config/deprecation/core_deprecations.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ const mapManifestServiceUrlDeprecation: ConfigDeprecation = (settings, fromPath,
113113
return settings;
114114
};
115115

116-
export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromRoot }) => [
116+
export const coreDeprecationProvider: ConfigDeprecationProvider = ({ rename, unusedFromRoot }) => [
117117
unusedFromRoot('savedObjects.indexCheckTimeout'),
118118
unusedFromRoot('server.xsrf.token'),
119119
unusedFromRoot('maps.manifestServiceUrl'),
@@ -136,6 +136,8 @@ export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromR
136136
unusedFromRoot('optimize.workers'),
137137
unusedFromRoot('optimize.profile'),
138138
unusedFromRoot('optimize.validateSyntaxOfNodeModules'),
139+
rename('cpu.cgroup.path.override', 'ops.cGroupOverrides.cpuPath'),
140+
rename('cpuacct.cgroup.path.override', 'ops.cGroupOverrides.cpuAcctPath'),
139141
configPathDeprecation,
140142
dataPathDeprecation,
141143
rewriteBasePathDeprecation,
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import mockFs from 'mock-fs';
21+
import { OsCgroupMetricsCollector } from './cgroup';
22+
23+
describe('OsCgroupMetricsCollector', () => {
24+
afterEach(() => mockFs.restore());
25+
26+
it('returns empty object when no cgroup file present', async () => {
27+
mockFs({
28+
'/proc/self': {
29+
/** empty directory */
30+
},
31+
});
32+
33+
const collector = new OsCgroupMetricsCollector({});
34+
expect(await collector.collect()).toEqual({});
35+
});
36+
37+
it('collects default cgroup data', async () => {
38+
mockFs({
39+
'/proc/self/cgroup': `
40+
123:memory:/groupname
41+
123:cpu:/groupname
42+
123:cpuacct:/groupname
43+
`,
44+
'/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage': '111',
45+
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_period_us': '222',
46+
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_quota_us': '333',
47+
'/sys/fs/cgroup/cpu/groupname/cpu.stat': `
48+
nr_periods 444
49+
nr_throttled 555
50+
throttled_time 666
51+
`,
52+
});
53+
54+
const collector = new OsCgroupMetricsCollector({});
55+
expect(await collector.collect()).toMatchInlineSnapshot(`
56+
Object {
57+
"cpu": Object {
58+
"cfs_period_micros": 222,
59+
"cfs_quota_micros": 333,
60+
"control_group": "/groupname",
61+
"stat": Object {
62+
"number_of_elapsed_periods": 444,
63+
"number_of_times_throttled": 555,
64+
"time_throttled_nanos": 666,
65+
},
66+
},
67+
"cpuacct": Object {
68+
"control_group": "/groupname",
69+
"usage_nanos": 111,
70+
},
71+
}
72+
`);
73+
});
74+
75+
it('collects override cgroup data', async () => {
76+
mockFs({
77+
'/proc/self/cgroup': `
78+
123:memory:/groupname
79+
123:cpu:/groupname
80+
123:cpuacct:/groupname
81+
`,
82+
'/sys/fs/cgroup/cpuacct/xxcustomcpuacctxx/cpuacct.usage': '111',
83+
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_period_us': '222',
84+
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_quota_us': '333',
85+
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.stat': `
86+
nr_periods 444
87+
nr_throttled 555
88+
throttled_time 666
89+
`,
90+
});
91+
92+
const collector = new OsCgroupMetricsCollector({
93+
cpuAcctPath: 'xxcustomcpuacctxx',
94+
cpuPath: 'xxcustomcpuxx',
95+
});
96+
expect(await collector.collect()).toMatchInlineSnapshot(`
97+
Object {
98+
"cpu": Object {
99+
"cfs_period_micros": 222,
100+
"cfs_quota_micros": 333,
101+
"control_group": "xxcustomcpuxx",
102+
"stat": Object {
103+
"number_of_elapsed_periods": 444,
104+
"number_of_times_throttled": 555,
105+
"time_throttled_nanos": 666,
106+
},
107+
},
108+
"cpuacct": Object {
109+
"control_group": "xxcustomcpuacctxx",
110+
"usage_nanos": 111,
111+
},
112+
}
113+
`);
114+
});
115+
});
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import fs from 'fs';
21+
import { join as joinPath } from 'path';
22+
import { MetricsCollector, OpsOsMetrics } from './types';
23+
24+
type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
25+
26+
interface OsCgroupMetricsCollectorOptions {
27+
cpuPath?: string;
28+
cpuAcctPath?: string;
29+
}
30+
31+
export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetrics> {
32+
// Used to prevent unnecessary file reads on systems not using cgroups
33+
private noCgroupPresent = false;
34+
35+
constructor(private readonly options: OsCgroupMetricsCollectorOptions) {}
36+
37+
public async collect(): Promise<OsCgroupMetrics> {
38+
if (this.noCgroupPresent) {
39+
return {};
40+
}
41+
42+
try {
43+
const cgroups = await readControlGroups();
44+
const cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
45+
const cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
46+
47+
// prevents undefined cgroup paths
48+
if (!cpuPath || !cpuAcctPath) {
49+
this.noCgroupPresent = true;
50+
return {};
51+
}
52+
53+
const [cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat] = await Promise.all([
54+
readCPUAcctUsage(cpuAcctPath),
55+
readCPUFsPeriod(cpuPath),
56+
readCPUFsQuota(cpuPath),
57+
readCPUStat(cpuPath),
58+
]);
59+
60+
return {
61+
cpuacct: {
62+
control_group: cpuAcctPath,
63+
usage_nanos: cpuAcctUsage,
64+
},
65+
66+
cpu: {
67+
control_group: cpuPath,
68+
cfs_period_micros: cpuFsPeriod,
69+
cfs_quota_micros: cpuFsQuota,
70+
stat: cpuStat,
71+
},
72+
};
73+
} catch (err) {
74+
if (err.code === 'ENOENT') {
75+
return {};
76+
} else {
77+
throw err;
78+
}
79+
}
80+
}
81+
82+
public reset() {}
83+
}
84+
85+
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
86+
const CONTROLLER_SEPARATOR_RE = ',';
87+
88+
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
89+
const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu';
90+
const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct';
91+
92+
const GROUP_CPUACCT = 'cpuacct';
93+
const CPUACCT_USAGE_FILE = 'cpuacct.usage';
94+
95+
const GROUP_CPU = 'cpu';
96+
const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us';
97+
const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us';
98+
const CPU_STATS_FILE = 'cpu.stat';
99+
100+
async function readControlGroups() {
101+
const data = await fs.promises.readFile(PROC_SELF_CGROUP_FILE);
102+
103+
return data
104+
.toString()
105+
.split(/\n/)
106+
.reduce((acc, line) => {
107+
const matches = line.match(CONTROL_GROUP_RE);
108+
109+
if (matches !== null) {
110+
const controllers = matches[1].split(CONTROLLER_SEPARATOR_RE);
111+
controllers.forEach((controller) => {
112+
acc[controller] = matches[2];
113+
});
114+
}
115+
116+
return acc;
117+
}, {} as Record<string, string>);
118+
}
119+
120+
async function fileContentsToInteger(path: string) {
121+
const data = await fs.promises.readFile(path);
122+
return parseInt(data.toString(), 10);
123+
}
124+
125+
function readCPUAcctUsage(controlGroup: string) {
126+
return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE));
127+
}
128+
129+
function readCPUFsPeriod(controlGroup: string) {
130+
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE));
131+
}
132+
133+
function readCPUFsQuota(controlGroup: string) {
134+
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE));
135+
}
136+
137+
async function readCPUStat(controlGroup: string) {
138+
const stat = {
139+
number_of_elapsed_periods: -1,
140+
number_of_times_throttled: -1,
141+
time_throttled_nanos: -1,
142+
};
143+
144+
try {
145+
const data = await fs.promises.readFile(
146+
joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE)
147+
);
148+
return data
149+
.toString()
150+
.split(/\n/)
151+
.reduce((acc, line) => {
152+
const fields = line.split(/\s+/);
153+
154+
switch (fields[0]) {
155+
case 'nr_periods':
156+
acc.number_of_elapsed_periods = parseInt(fields[1], 10);
157+
break;
158+
159+
case 'nr_throttled':
160+
acc.number_of_times_throttled = parseInt(fields[1], 10);
161+
break;
162+
163+
case 'throttled_time':
164+
acc.time_throttled_nanos = parseInt(fields[1], 10);
165+
break;
166+
}
167+
168+
return acc;
169+
}, stat);
170+
} catch (err) {
171+
if (err.code === 'ENOENT') {
172+
return stat;
173+
}
174+
175+
throw err;
176+
}
177+
}

src/core/server/metrics/collectors/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818
*/
1919

2020
export { OpsProcessMetrics, OpsOsMetrics, OpsServerMetrics, MetricsCollector } from './types';
21-
export { OsMetricsCollector } from './os';
21+
export { OsMetricsCollector, OpsMetricsCollectorOptions } from './os';
2222
export { ProcessMetricsCollector } from './process';
2323
export { ServerMetricsCollector } from './server';

0 commit comments

Comments
 (0)