From 063cbfb793a9bbf2d996c83a66d8713b0e0e3e1c Mon Sep 17 00:00:00 2001 From: Hilko Bengen Date: Tue, 24 Aug 2021 10:40:45 +0200 Subject: [PATCH] Reduce memory pressure caused by Linux process scanning (#1470) Reading memory pages from /proc/$PID/mem has the unpleasant side effect of forcing otherwise unused process memory pages into the target process VM, considerably increasing the target process' resident set size. As a typical example, many programs only make use of a subset of code from shared libraries that they are linked against. The solution consists of replicating memory mappings mappings from files if they are directly available through the filesystem. All information needed to find the file (and to determine if it is indeed identical to the file mapped by the target process) is available from /proc/$PID/maps. Only pages that may have been changed according to /proc/$PID/pagemap are then read via /proc/$PID/mem to overwrite the local mapping. Mappings whose underlying filesystem object has changed read from /proc/$PID/mem as before. Mappings that are not backed by a regular file (block devices, stack or heap sections, etc.) are assumed to behave like zeroed-out files and otherwise treated the same. --- Makefile.am | 11 +- libyara/proc/linux.c | 254 ++++++++++++++++++++++++++++++++++++------- tests/BUILD.bazel | 7 ++ tests/mapper.c | 61 +++++++++++ tests/test-rules.c | 163 +++++++++++++++------------ 5 files changed, 386 insertions(+), 110 deletions(-) create mode 100644 tests/mapper.c diff --git a/Makefile.am b/Makefile.am index ae8469bfc2..4ebf01d15e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -56,6 +56,9 @@ yarac_SOURCES = \ yarac_LDADD = -Llibyara/.libs -lyara yarac_DEPENDENCIES = libyara/.libs/libyara.la +tests_mapper_SOURCES = tests/mapper.c +tests_mapper_CFLAGS = -O0 + test_alignment_SOURCES = tests/test-alignment.c tests/util.c test_alignment_LDADD = libyara/.libs/libyara.a test_arena_SOURCES = tests/test-arena.c tests/util.c @@ -64,6 +67,9 @@ test_atoms_SOURCES = tests/test-atoms.c tests/util.c test_atoms_LDADD = libyara/.libs/libyara.a test_rules_SOURCES = tests/test-rules.c tests/util.c test_rules_LDADD = libyara/.libs/libyara.a +if POSIX +EXTRA_test_rules_DEPENDENCIES = tests/mapper$(EXEEXT) +endif test_pe_SOURCES = tests/test-pe.c tests/util.c test_pe_LDADD = libyara/.libs/libyara.a test_elf_SOURCES = tests/test-elf.c tests/util.c @@ -84,7 +90,7 @@ test_async_SOURCES = tests/test-async.c tests/util.c test_async_LDADD = libyara/.libs/libyara.a TESTS = $(check_PROGRAMS) -TESTS_ENVIRONMENT = TOP_SRCDIR=$(top_srcdir) +TESTS_ENVIRONMENT = TOP_SRCDIR=$(top_srcdir) TOP_BUILDDIR=$(top_builddir) check_PROGRAMS = \ test-arena \ @@ -101,6 +107,9 @@ check_PROGRAMS = \ test-re-split \ test-async +EXTRA_PROGRAMS = tests/mapper +CLEANFILES = tests/mapper$(EXEEXT) + if POSIX # The -fsanitize=address option makes test-exception fail. Include the test # only if the option is not enabled. diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 81871dfbad..dd82499541 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -32,6 +32,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include +#include +#include #include #include #include @@ -44,13 +47,23 @@ typedef struct _YR_PROC_INFO { int pid; int mem_fd; + int pagemap_fd; FILE* maps; + int page_size; + char map_path[PATH_MAX]; + uint64_t map_dmaj, map_dmin, map_ino, map_offset; } YR_PROC_INFO; +static int page_size = -1; + int _yr_process_attach(int pid, YR_PROC_ITERATOR_CTX* context) { char buffer[256]; + page_size = sysconf(_SC_PAGE_SIZE); + if (page_size < 0) + page_size = 4096; + YR_PROC_INFO* proc_info = (YR_PROC_INFO*) yr_malloc(sizeof(YR_PROC_INFO)); if (proc_info == NULL) @@ -61,38 +74,62 @@ int _yr_process_attach(int pid, YR_PROC_ITERATOR_CTX* context) proc_info->pid = pid; proc_info->maps = NULL; proc_info->mem_fd = -1; + proc_info->pagemap_fd = -1; snprintf(buffer, sizeof(buffer), "/proc/%u/maps", pid); proc_info->maps = fopen(buffer, "r"); if (proc_info->maps == NULL) - { - yr_free(proc_info); - return ERROR_COULD_NOT_ATTACH_TO_PROCESS; - } + goto err; snprintf(buffer, sizeof(buffer), "/proc/%u/mem", pid); proc_info->mem_fd = open(buffer, O_RDONLY); if (proc_info->mem_fd == -1) + goto err; + + snprintf(buffer, sizeof(buffer), "/proc/%u/pagemap", pid); + proc_info->pagemap_fd = open(buffer, O_RDONLY); + + if (proc_info->mem_fd == -1) + goto err; + + return ERROR_SUCCESS; + +err: + if (proc_info) { - fclose(proc_info->maps); - proc_info->maps = NULL; + if (proc_info->pagemap_fd != -1) + close(proc_info->pagemap_fd); - yr_free(proc_info); + if (proc_info->mem_fd != -1) + close(proc_info->mem_fd); - return ERROR_COULD_NOT_ATTACH_TO_PROCESS; + if (proc_info->maps != NULL) + fclose(proc_info->maps); + + yr_free(proc_info); } - return ERROR_SUCCESS; + return ERROR_COULD_NOT_ATTACH_TO_PROCESS; } int _yr_process_detach(YR_PROC_ITERATOR_CTX* context) { YR_PROC_INFO* proc_info = (YR_PROC_INFO*) context->proc_info; + if (proc_info) + { + fclose(proc_info->maps); + close(proc_info->mem_fd); + close(proc_info->pagemap_fd); + } - fclose(proc_info->maps); - close(proc_info->mem_fd); + if (context->buffer != NULL) + { + munmap((void*) context->buffer, context->buffer_size); + context->buffer = NULL; + context->buffer_size = 0; + } return ERROR_SUCCESS; } @@ -100,41 +137,153 @@ int _yr_process_detach(YR_PROC_ITERATOR_CTX* context) YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) { const uint8_t* result = NULL; + uint64_t *pagemap = NULL; YR_PROC_ITERATOR_CTX* context = (YR_PROC_ITERATOR_CTX*) block->context; YR_PROC_INFO* proc_info = (YR_PROC_INFO*) context->proc_info; - if (context->buffer_size < block->size) + if (context->buffer != NULL) { - if (context->buffer != NULL) - yr_free((void*) context->buffer); - - context->buffer = (const uint8_t*) yr_malloc(block->size); + munmap((void*) context->buffer, context->buffer_size); + context->buffer = NULL; + context->buffer_size = 0; + } - if (context->buffer != NULL) + int fd = -2; /* Assume mapping not connected with a file */ + if (strlen(proc_info->map_path) > 0 && proc_info->map_dmaj != 0 && + proc_info->map_ino != 0) + { + struct stat st; + fd = open(proc_info->map_path, O_RDONLY); + if (fd < 0) + { + /* file does not exist. */ + fd = -1; + } + else if (fstat(fd, &st) < 0) { - context->buffer_size = block->size; + /* Why should stat fail after file open? Treat like missing. */ + close(fd); + fd = -1; } - else + else if ( + (major(st.st_dev) != proc_info->map_dmaj) || + (minor(st.st_dev) != proc_info->map_dmin) || + (st.st_ino != proc_info->map_ino)) { - context->buffer_size = 0; - goto _exit; // return NULL; + /* Wrong file, may have been replaced. Treat like missing. */ + close(fd); + fd = -1; + } + else if (st.st_size < proc_info->map_offset + block->size) + { + /* Mapping extends past end of file. Treat like missing. */ + close(fd); + fd = -1; + } + else if ((st.st_mode & S_IFMT) != S_IFREG) + { + /* Correct filesystem object, but not a regular file. Treat like + * uninitialized mapping. */ + close(fd); + fd = -2; } } - if (pread( - proc_info->mem_fd, - (void*) context->buffer, - block->size, - block->base) == -1) + if (fd >= 0) { - goto _exit; // return NULL; + context->buffer = mmap( + NULL, + block->size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + fd, + proc_info->map_offset); + close(fd); + } + else + { + context->buffer = mmap( + NULL, + block->size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0); + } + + if (context->buffer != NULL) + { + context->buffer_size = block->size; + } + else + { + context->buffer_size = 0; + goto _exit; + } + + /* If mapping can't be accessed through the filesystem, read everything from + * target process VM. */ + if (fd == -1) + { + if (pread( + proc_info->mem_fd, + (void*) context->buffer, + block->size, + block->base) == -1) + { + goto _exit; + } + } + else + { + pagemap = calloc(block->size / page_size, sizeof(uint64_t)); + if (pagemap == NULL) + { + goto _exit; + } + if (pread( + proc_info->pagemap_fd, + pagemap, + sizeof(uint64_t) * block->size / page_size, + sizeof(uint64_t) * block->base / page_size) == -1) + { + goto _exit; + } + + for (uint64_t i = 0; i < block->size / page_size; i++) + { + if (pagemap[i] >> 61 == 0) { + continue; + } + /* Overwrite our mapping if the page is present, file-backed, or + * swap-backed and if it differs from our mapping. */ + uint8_t buffer[page_size]; + if (pread( + proc_info->mem_fd, + buffer, + page_size, + block->base + i * page_size) == -1) + { + goto _exit; + } + if (memcmp((void*) context->buffer + i * page_size, (void*) buffer, page_size) != 0) + { + memcpy((void*) context->buffer + i * page_size, (void*) buffer, page_size); + } + } } result = context->buffer; _exit:; + if (pagemap) + { + free(pagemap); + pagemap = NULL; + } + YR_DEBUG_FPRINTF(2, stderr, "- %s() {} = %p\n", __FUNCTION__, result); return result; @@ -147,28 +296,51 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( YR_PROC_ITERATOR_CTX* context = (YR_PROC_ITERATOR_CTX*) iterator->context; YR_PROC_INFO* proc_info = (YR_PROC_INFO*) context->proc_info; - char buffer[256]; + char buffer[PATH_MAX]; + char perm[5]; uint64_t begin, end; + int n, len; - if (fgets(buffer, sizeof(buffer), proc_info->maps) != NULL) + while (fgets(buffer, sizeof(buffer), proc_info->maps) != NULL) + { + // If we haven't read the whole line, skip over the rest. + if (strrchr(buffer, '\n') == NULL) + { + int c; + do + { + c = fgetc(proc_info->maps); + } while (c >= 0 && c != '\n'); + } + n = sscanf( + buffer, + "%" SCNx64 "-%" SCNx64 " %4s " + "%" SCNx64 " %" SCNx64 ":%" SCNx64 " %" SCNu64 " %n", + &begin, + &end, + perm, + &(proc_info->map_offset), + &(proc_info->map_dmaj), + &(proc_info->map_dmin), + &(proc_info->map_ino), + &len); + if (n == 7) + { + if (buffer[len] == '/') + strncpy( + proc_info->map_path, buffer + len, sizeof(proc_info->map_path) - 1); + else + proc_info->map_path[0] = '\0'; + break; + } + } + if (n == 7) { - sscanf(buffer, "%" SCNx64 "-%" SCNx64, &begin, &end); - context->current_block.base = begin; context->current_block.size = end - begin; result = &context->current_block; } - // If we haven't read the whole line, skip over the rest. - if (strrchr(buffer, '\n') == NULL) - { - int c; - do - { - c = fgetc(proc_info->maps); - } while (c >= 0 && c != '\n'); - } - iterator->last_error = ERROR_SUCCESS; YR_DEBUG_FPRINTF( diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 0b97e7713e..7f672a6310 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -78,6 +78,12 @@ cc_test( ], ) +cc_binary( + name = "mapper", + srcs = ["mapper.c"], + copts = ["-O0"], +) + cc_test( name = "test_rules", srcs = ["test-rules.c"], @@ -96,6 +102,7 @@ cc_test( ":blob", ":util", "@//:libyara", + ":mapper", ], ) diff --git a/tests/mapper.c b/tests/mapper.c new file mode 100644 index 0000000000..07883fcc85 --- /dev/null +++ b/tests/mapper.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char str[] = "!dlrow ,olleH"; +int fd; + +char* map_file(char* path) +{ + if ((fd = open(path, O_RDONLY)) < 0) + { + fprintf(stderr, "open: %s: %s\n", path, strerror(errno)); + exit(1); + } + char* rv = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (rv == NULL) + { + fprintf(stderr, "mmap: %s: failed: %s\n", path, strerror(errno)); + exit(1); + } + close(fd); + return rv; +} + +int main(int argc, char** argv) +{ + char* buf; + if (argc < 2) + { + fprintf(stderr, "no argument\n"); + exit(1); + } + else if (strcmp(argv[1], "open") == 0) + { + if (argc < 3) + exit(1); + printf("%s: %s %s\n", argv[0], argv[1], argv[2]); + buf = map_file(argv[2]); + } + else if (strcmp(argv[1], "patch") == 0) + { + if (argc < 3) + exit(1); + printf("%s: %s %s\n", argv[0], argv[1], argv[2]); + buf = map_file(argv[2]); + for (int i = 0; i < sizeof(str) - 1; i++) buf[i] = str[sizeof(str) - i - 2]; + } + else + { + fprintf(stderr, "unknown argument <%s>\n", argv[1]); + exit(1); + } + sleep(3600); +} diff --git a/tests/test-rules.c b/tests/test-rules.c index a085c91eae..857071adff 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -27,11 +27,15 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include +#include #include +#include #include #include +#include #if !defined(_WIN32) && !defined(__CYGWIN__) #include @@ -2820,39 +2824,46 @@ void test_tags() YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); } -#if !defined(_WIN32) && !defined(__CYGWIN__) +#if !defined(_WIN32) || defined(__CYGWIN__) + +#define spawn(cmd,rest...) \ + do \ + { \ + if ((pid = fork()) == 0) \ + { \ + execl(cmd, cmd, rest, NULL); \ + fprintf(stderr, "execl: %s: %s\n", cmd, strerror(errno)); \ + exit(1); \ + } \ + if (pid <= 0) \ + { \ + perror("fork"); \ + abort(); \ + } \ + sleep(1); \ + if (waitpid(pid, NULL, WNOHANG) != 0) \ + { \ + fprintf(stderr, "%s did not live long enough\n", cmd); \ + abort(); \ + } \ + } while (0) + void test_process_scan() { YR_DEBUG_FPRINTF(1, stderr, "+ %s() {\n", __FUNCTION__); - int pid = fork(); + int pid; int status = 0; YR_RULES* rules; - int rc1, rc2; + int rc; + int fd; + char* tf; + char buf[16384]; struct COUNTERS counters; - counters.rules_not_matching = 0; - counters.rules_matching = 0; - - if (pid == 0) - { - // The string should appear somewhere in the shell's process space. - if (execl( - "/bin/sh", - "/bin/sh", - "-c", - "VAR='Hello, world!'; sleep 600; true", - NULL) == -1) - exit(1); - } - assert(pid > 0); - - // Give child process time to initialize. - sleep(1); - - status = compile_rule( - "\ + if (compile_rule( + "\ rule should_match {\ strings:\ $a = { 48 65 6c 6c 6f 2c 20 77 6f 72 6c 64 21 }\ @@ -2863,58 +2874,74 @@ void test_process_scan() condition: \ filesize < 100000000 \ }", - &rules); - - if (status != ERROR_SUCCESS) + &rules) != ERROR_SUCCESS) { perror("compile_rule"); exit(EXIT_FAILURE); } - rc1 = yr_rules_scan_proc(rules, pid, 0, count, &counters, 0); - yr_rules_destroy(rules); + spawn("/bin/sh", "-c", "VAR='Hello, world!'; sleep 600; true"); + + counters.rules_matching = 0; + counters.rules_not_matching = 0; + rc = yr_rules_scan_proc(rules, pid, 0, count, &counters, 0); kill(pid, SIGALRM); - rc2 = waitpid(pid, &status, 0); - if (rc2 == -1) - { - perror("waitpid"); - exit(EXIT_FAILURE); - } - if (status != SIGALRM) - { - fprintf( - stderr, "Scanned process exited with unexpected status %d\n", status); - exit(EXIT_FAILURE); - } + assert(rc == ERROR_SUCCESS); - switch (rc1) - { - case ERROR_SUCCESS: - if (counters.rules_matching != 1) - { - fprintf( - stderr, - "Expecting one rule matching in test_process_scan, found %d\n", - counters.rules_matching); - exit(EXIT_FAILURE); - } - if (counters.rules_not_matching != 1) - { - fprintf( - stderr, - "Expecting one rule not matching in test_process_scan, found %d\n", - counters.rules_not_matching); - exit(EXIT_FAILURE); - } - break; - case ERROR_COULD_NOT_ATTACH_TO_PROCESS: - fprintf(stderr, "Could not attach to process, ignoring this error\n"); - break; - default: - fprintf(stderr, "yr_rules_scan_proc: Got unexpected error %d\n", rc1); - exit(EXIT_FAILURE); - } + assert(waitpid(pid, &status, 0) >= 0); + assert(status == SIGALRM); + + assert(counters.rules_matching == 1); + assert(counters.rules_not_matching == 1); + + tf = strdup("./map-XXXXXX"); + fd = mkstemp(tf); + assert(fd >= 0); + + // check for string in file that gets mapped by a process + bzero(buf, sizeof(buf)); + sprintf(buf, "Hello, world!"); + write(fd, buf, sizeof(buf)); + lseek(fd, 0, SEEK_SET); + + spawn("tests/mapper", "open", tf); + + counters.rules_matching = 0; + rc = yr_rules_scan_proc(rules, pid, 0, count, &counters, 0); + kill(pid, SIGALRM); + + fprintf(stderr, "scan: %d\n", rc); + assert(rc == ERROR_SUCCESS); + + assert(waitpid(pid, &status, 0) >= 0); + assert(status == SIGALRM); + + assert(counters.rules_matching == 1); + + // check for string in blank mapping after process has overwritten + // the mapping. + bzero(buf, sizeof(buf)); + write(fd, buf, sizeof(buf)); + + spawn("./tests/mapper", "patch", tf); + + counters.rules_matching = 0; + rc = yr_rules_scan_proc(rules, pid, 0, count, &counters, 0); + kill(pid, SIGALRM); + + fprintf(stderr, "scan: %d\n", rc); + assert(rc == ERROR_SUCCESS); + + assert(waitpid(pid, &status, 0) >= 0); + assert(status == SIGALRM); + + assert(counters.rules_matching == 1); + + close(fd); + unlink(tf); + free(tf); + yr_rules_destroy(rules); YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); }