Skip to content

Commit c15c7aa

Browse files
authored
[ML] Add system call restrictions to the ML processes (#98)
Restrict the ability of the autodetect, autoconfig, categorize and normalize programs to make system calls. Implemented for Linux (Seccomp BPF), macOS (sandbox) and Windows (Job Groups)
1 parent 943e3cf commit c15c7aa

23 files changed

+706
-13
lines changed

bin/autoconfig/Main.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <config/CAutoconfigurerParams.h>
2929
#include <config/CReportWriter.h>
3030

31+
#include <seccomp/CSystemCallFilter.h>
32+
3133
#include "CCmdLineParser.h"
3234

3335
#include <boost/bind.hpp>
@@ -76,6 +78,8 @@ int main(int argc, char** argv) {
7678

7779
ml::core::CProcessPriority::reducePriority();
7880

81+
ml::seccomp::CSystemCallFilter::installSystemCallFilter();
82+
7983
if (ioMgr.initIo() == false) {
8084
LOG_FATAL(<< "Failed to initialise IO");
8185
return EXIT_FAILURE;

bin/autodetect/Main.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#include <api/CSingleStreamSearcher.h>
4343
#include <api/CStateRestoreStreamFilter.h>
4444

45+
#include <seccomp/CSystemCallFilter.h>
46+
4547
#include "CCmdLineParser.h"
4648

4749
#include <boost/bind.hpp>
@@ -120,6 +122,8 @@ int main(int argc, char** argv) {
120122

121123
ml::core::CProcessPriority::reducePriority();
122124

125+
ml::seccomp::CSystemCallFilter::installSystemCallFilter();
126+
123127
if (ioMgr.initIo() == false) {
124128
LOG_FATAL(<< "Failed to initialise IO");
125129
return EXIT_FAILURE;

bin/categorize/Main.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
#include <api/CSingleStreamSearcher.h>
3939
#include <api/CStateRestoreStreamFilter.h>
4040

41+
#include <seccomp/CSystemCallFilter.h>
42+
4143
#include "CCmdLineParser.h"
4244

4345
#include <boost/bind.hpp>
@@ -91,6 +93,8 @@ int main(int argc, char** argv) {
9193

9294
ml::core::CProcessPriority::reducePriority();
9395

96+
ml::seccomp::CSystemCallFilter::installSystemCallFilter();
97+
9498
if (ioMgr.initIo() == false) {
9599
LOG_FATAL(<< "Failed to initialise IO");
96100
return EXIT_FAILURE;

bin/normalize/Main.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <api/CLineifiedJsonOutputWriter.h>
2929
#include <api/CResultNormalizer.h>
3030

31+
#include <seccomp/CSystemCallFilter.h>
32+
3133
#include "CCmdLineParser.h"
3234

3335
#include <boost/bind.hpp>
@@ -78,6 +80,8 @@ int main(int argc, char** argv) {
7880

7981
ml::core::CProcessPriority::reducePriority();
8082

83+
ml::seccomp::CSystemCallFilter::installSystemCallFilter();
84+
8185
if (ioMgr.initIo() == false) {
8286
LOG_FATAL(<< "Failed to initialise IO");
8387
return EXIT_FAILURE;

docs/CHANGELOG.asciidoc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
=== Deprecations
1313

14-
=== New Features
14+
=== New Features
1515

1616
=== Enhancements
1717

@@ -37,6 +37,10 @@ Reduce model memory by storing state for periodicity testing in a compressed for
3737
Forecasting of Machine Learning job time series is now supported for large jobs by temporarily storing
3838
model state on disk ({pull}89[#89])
3939

40+
Secure the ML processes by preventing system calls such as fork and exec. The Linux implemenation uses
41+
Seccomp BPF to intercept system calls and is available in kernels since 3.5. On Windows Job Objects prevent
42+
new processes being created and macOS uses the sandbox functionality ({pull}98[#98])
43+
4044
=== Bug Fixes
4145

4246
Age seasonal components in proportion to the fraction of values with which they're updated ({pull}88[#88])

include/seccomp/CSystemCallFilter.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
#ifndef INCLUDED_ml_seccomp_CSystemCallFilter_h
7+
#define INCLUDED_ml_seccomp_CSystemCallFilter_h
8+
9+
#include <core/CNonInstantiatable.h>
10+
11+
namespace ml {
12+
namespace seccomp {
13+
14+
//! \brief
15+
//! Installs secure computing modes for Linux, macOs and Windows
16+
//!
17+
//! DESCRIPTION:\n
18+
//! ML processes require a subset of system calls to function correctly.
19+
//! These are create a named pipe, connect to a named pipe, read and write
20+
//! no other system calls are necessary and should be resticted to prevent
21+
//! malicious actions.
22+
//!
23+
//! IMPLEMENTATION DECISIONS:\n
24+
//! Implementations are platform specific more details can be found in the
25+
//! particular .cc files.
26+
//!
27+
//! Linux:
28+
//! Seccomp BPF is used to restrict system calls on kernels since 3.5.
29+
//!
30+
//! macOs:
31+
//! The sandbox facility is used to restict access to system resources.
32+
//!
33+
//! Windows:
34+
//! Job Objects prevent the process spawning another.
35+
//!
36+
class CSystemCallFilter : private core::CNonInstantiatable {
37+
public:
38+
static void installSystemCallFilter();
39+
};
40+
}
41+
}
42+
43+
#endif // INCLUDED_ml_seccomp_CSystemCallFilter_h

lib/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ COMPONENTS= \
1818
test \
1919
api \
2020
config \
21+
seccomp \
22+
2123

2224

2325
include $(CPP_SRC_HOME)/mk/toplevel.mk

lib/seccomp/.objs/.gitignore

Whitespace-only changes.
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
#include "seccomp/CSystemCallFilter.h"
7+
8+
#include <core/CLogger.h>
9+
10+
#include <linux/audit.h>
11+
#include <linux/filter.h>
12+
#include <sys/prctl.h>
13+
#include <sys/syscall.h>
14+
15+
#include <cerrno>
16+
#include <cstdint>
17+
#include <cstring>
18+
19+
namespace ml {
20+
namespace seccomp {
21+
22+
namespace {
23+
// The old x32 ABI always has bit 30 set in the sys call numbers.
24+
// The x64 architecture should fail these calls
25+
const std::uint32_t UPPER_NR_LIMIT = 0x3FFFFFFF;
26+
27+
// Offset to the nr field in struct seccomp_data
28+
const std::uint32_t SECCOMP_DATA_NR_OFFSET = 0x00;
29+
// Offset to the arch field in struct seccomp_data
30+
const std::uint32_t SECCOMP_DATA_ARCH_OFFSET = 0x04;
31+
32+
// Copied from seccomp.h
33+
// seccomp.h cannot be included as it was added in Linux kernel 3.17
34+
// and this must build on older versions.
35+
// TODO: remove on the minumum build kernel version supports seccomp
36+
#define SECCOMP_MODE_FILTER 2
37+
#define SECCOMP_RET_ERRNO 0x00050000U
38+
#define SECCOMP_RET_ALLOW 0x7fff0000U
39+
#define SECCOMP_RET_DATA 0x0000ffffU
40+
41+
// Added in Linux 3.5
42+
#ifndef PR_SET_NO_NEW_PRIVS
43+
#define PR_SET_NO_NEW_PRIVS 38
44+
#endif
45+
46+
const struct sock_filter FILTER[] = {
47+
// Load architecture from 'seccomp_data' buffer into accumulator
48+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_ARCH_OFFSET),
49+
// Jump to disallow if architecture is not X86_64
50+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 0, 5),
51+
// Load the system call number into accumulator
52+
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_NR_OFFSET),
53+
// Only applies to X86_64 arch. Jump to disallow for calls using the x32 ABI
54+
BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, UPPER_NR_LIMIT, 34, 0),
55+
// Allowed sys calls, jump to return allow on match
56+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 34, 0),
57+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 33, 0),
58+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_writev, 32, 0),
59+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lseek, 31, 0),
60+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_lstat, 30, 0),
61+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_readlink, 29, 0),
62+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_stat, 28, 0),
63+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat, 27, 0),
64+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 26, 0),
65+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_close, 25, 0),
66+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_connect, 24, 0),
67+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clone, 23, 0),
68+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_statfs, 22, 0),
69+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_dup2, 21, 0),
70+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rmdir, 20, 0), // for forecast temp storage
71+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getdents, 19, 0), // for forecast temp storage
72+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_openat, 18, 0), // for forecast temp storage
73+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_tgkill, 17, 0), // for the crash handler
74+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigaction, 16, 0), // for the crash handler
75+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn, 15, 0),
76+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 14, 0),
77+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_madvise, 13, 0),
78+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_unlink, 12, 0),
79+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mknod, 11, 0),
80+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_nanosleep, 10, 0),
81+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_robust_list, 9, 0),
82+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mprotect, 8, 0),
83+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 7, 0),
84+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 6, 0),
85+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_getuid, 5, 0),
86+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 4, 0),
87+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_access, 3, 0),
88+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_brk, 2, 0),
89+
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit, 1, 0),
90+
// Disallow call with error code EACCES
91+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EACCES & SECCOMP_RET_DATA)),
92+
// Allow call
93+
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW)};
94+
95+
bool canUseSeccompBpf() {
96+
// This call is expected to fail due to the nullptr argument
97+
// but the failure mode informs us if the kernel was configured
98+
// with CONFIG_SECCOMP_FILTER
99+
// http://man7.org/linux/man-pages/man2/prctl.2.html
100+
int result = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
101+
int configError = errno;
102+
if (result != -1) {
103+
LOG_ERROR(<< "prctl set seccomp with null argument should have failed");
104+
return false;
105+
}
106+
107+
// If the kernel is not configured with CONFIG_SECCOMP_FILTER
108+
// or CONFIG_SECCOMP the error is EINVAL. EFAULT indicates the
109+
// seccomp filters are enabled but the 3rd argument (nullptr)
110+
// was invalid.
111+
return configError == EFAULT;
112+
}
113+
}
114+
115+
void CSystemCallFilter::installSystemCallFilter() {
116+
if (canUseSeccompBpf()) {
117+
LOG_DEBUG(<< "Seccomp BPF filters available");
118+
119+
// Ensure more permissive privileges cannot be set in future.
120+
// This must be set before installing the filter.
121+
// PR_SET_NO_NEW_PRIVS was aded in kernel 3.5
122+
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
123+
LOG_ERROR(<< "prctl PR_SET_NO_NEW_PRIVS failed: " << std::strerror(errno));
124+
return;
125+
}
126+
127+
struct sock_fprog prog = {
128+
.len = static_cast<unsigned short>(sizeof(FILTER) / sizeof(FILTER[0])),
129+
.filter = const_cast<sock_filter*>(FILTER)};
130+
131+
// Install the filter.
132+
// prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, filter) was introduced
133+
// in kernel 3.5. This is functionally equivalent to
134+
// seccomp(SECCOMP_SET_MODE_FILTER, 0, filter) which was added in
135+
// kernel 3.17. We choose the older more compatible function.
136+
// Note this precludes the use of calling seccomp() with the
137+
// SECCOMP_FILTER_FLAG_TSYNC which is acceptable if the filter
138+
// is installed by the main thread before any other threads are
139+
// spawned.
140+
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
141+
LOG_ERROR(<< "Unable to install Seccomp BPF: " << std::strerror(errno));
142+
} else {
143+
LOG_DEBUG(<< "Seccomp BPF installed");
144+
}
145+
146+
} else {
147+
LOG_DEBUG(<< "Seccomp BPF not available");
148+
}
149+
}
150+
}
151+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
#include "seccomp/CSystemCallFilter.h"
7+
8+
#include <core/CLogger.h>
9+
10+
#include <paths.h>
11+
#include <sandbox.h>
12+
#include <unistd.h>
13+
14+
#include <cerrno>
15+
#include <cstring>
16+
17+
namespace ml {
18+
namespace seccomp {
19+
20+
namespace {
21+
// The Sandbox rules deny all actions apart from creating fifos,
22+
// opening files, reading and writing.
23+
// (allow file-write*) is required for mkfifo and that permission
24+
// can not be set using the more granular controls.
25+
const std::string SANDBOX_RULES("\
26+
(version 1) \
27+
(deny default) \
28+
(allow file-read*) \
29+
(allow file-read-data) \
30+
(allow file-write*) \
31+
(allow file-write-data)");
32+
33+
// mkstemps will replace the Xs with random characters
34+
const std::string FILE_NAME_TEMPLATE("ml.XXXXXX.sb");
35+
// The length of the suffix '.sb'
36+
const int FILE_NAME_TEMPLATE_SUFFIX_LEN = 3;
37+
38+
std::string getTempDir() {
39+
// Prefer to use the temporary directory set by the Elasticsearch JVM
40+
const char* tmpDir(::getenv("TMPDIR"));
41+
42+
// If TMPDIR is not set use _PATH_VARTMP
43+
std::string path((tmpDir == nullptr) ? _PATH_VARTMP : tmpDir);
44+
// Make sure path ends with a slash so it's ready to have a file name appended
45+
if (path[path.length() - 1] != '/') {
46+
path += '/';
47+
}
48+
return path;
49+
}
50+
51+
std::string writeTempRulesFile() {
52+
std::string profileFilename = getTempDir() + FILE_NAME_TEMPLATE;
53+
54+
// Create and open a temporary file with a random name
55+
// profileFilename is updated with the new filename.
56+
int fd = mkstemps(&profileFilename[0], FILE_NAME_TEMPLATE_SUFFIX_LEN);
57+
if (fd == -1) {
58+
LOG_ERROR(<< "Opening a temporary file with mkstemps failed: "
59+
<< std::strerror(errno));
60+
return std::string();
61+
}
62+
write(fd, SANDBOX_RULES.c_str(), SANDBOX_RULES.size());
63+
close(fd);
64+
65+
return profileFilename;
66+
}
67+
}
68+
69+
void CSystemCallFilter::installSystemCallFilter() {
70+
std::string profileFilename = writeTempRulesFile();
71+
if (profileFilename.empty()) {
72+
LOG_WARN(<< "Cannot write sandbox rules. macOS sandbox will not be initialized");
73+
return;
74+
}
75+
76+
char* errorbuf = nullptr;
77+
if (sandbox_init(profileFilename.c_str(), SANDBOX_NAMED, &errorbuf) != 0) {
78+
std::string msg("Error initializing macOS sandbox");
79+
if (errorbuf != nullptr) {
80+
msg += ": ";
81+
msg += errorbuf;
82+
sandbox_free_error(errorbuf);
83+
}
84+
LOG_ERROR(<< msg);
85+
} else {
86+
LOG_DEBUG(<< "macOS sandbox initialized");
87+
}
88+
89+
std::remove(profileFilename.c_str());
90+
}
91+
}
92+
}

0 commit comments

Comments
 (0)