-
Couldn't load subscription status.
- Fork 929
shmat/shmdt additions for patcher #6531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ | |
| * reserved. | ||
| * Copyright (c) 2016-2017 Research Organization for Information Science | ||
| * and Technology (RIST). All rights reserved. | ||
| * Copyright (c) 2016 IBM Corporation. All rights reserved. | ||
| * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. | ||
| * | ||
| * $COPYRIGHT$ | ||
| * | ||
|
|
@@ -48,6 +48,9 @@ | |
| #if defined(HAVE_LINUX_MMAN_H) | ||
| #include <linux/mman.h> | ||
| #endif | ||
| #if defined(HAVE_SYS_IPC_H) | ||
| #include <sys/ipc.h> | ||
| #endif | ||
|
|
||
| #include "memory_patcher.h" | ||
| #undef opal_memory_changed | ||
|
|
@@ -104,15 +107,7 @@ opal_memory_patcher_component_t mca_memory_patcher_component = { | |
| * data. If this can be resolved the two levels can be joined. | ||
| */ | ||
|
|
||
| /* | ||
| * The following block of code is #if 0'ed out because we do not need | ||
| * to intercept mmap() any more (mmap() only deals with memory | ||
| * protection; it does not invalidate any rcache entries for a given | ||
| * region). But if we do someday, this is the code that we'll need. | ||
| * It's a little non-trivial, so we might as well keep it (and #if 0 | ||
| * it out). | ||
| */ | ||
| #if 0 | ||
| #if defined (SYS_mmap) | ||
|
|
||
| #if defined(HAVE___MMAP) && !HAVE_DECL___MMAP | ||
| /* prototype for Apple's internal mmap function */ | ||
|
|
@@ -121,12 +116,11 @@ void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t off | |
|
|
||
| static void *(*original_mmap)(void *, size_t, int, int, int, off_t); | ||
|
|
||
| static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) | ||
| static void *_intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) | ||
| { | ||
| OPAL_PATCHER_BEGIN; | ||
| void *result = 0; | ||
|
|
||
| if (prot == PROT_NONE) { | ||
| if ((flags & MAP_FIXED) && (start != NULL)) { | ||
| opal_mem_hooks_release_hook (start, length, true); | ||
| } | ||
|
|
||
|
|
@@ -137,19 +131,20 @@ static void *intercept_mmap(void *start, size_t length, int prot, int flags, int | |
| #else | ||
| result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); | ||
| #endif | ||
|
|
||
| // I thought we had some issue in the past with the above line for IA32, | ||
| // like maybe syscall() wouldn't handle that many arguments. But just now | ||
| // I used gcc -m32 and it worked on a recent system. But there's a possibility | ||
| // that older ia32 systems may need some other code to make the above syscall. | ||
| } else { | ||
| result = original_mmap (start, length, prot, flags, fd, offset); | ||
| } | ||
|
|
||
| OPAL_PATCHER_END; | ||
| return result; | ||
| } | ||
|
|
||
| static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) | ||
| { | ||
| OPAL_PATCHER_BEGIN; | ||
| void *result = _intercept_mmap (start, length, prot, flags, fd, offset); | ||
| OPAL_PATCHER_END; | ||
| return result; | ||
| } | ||
| #endif | ||
|
|
||
| #if defined (SYS_munmap) | ||
|
|
@@ -256,6 +251,9 @@ static int _intercept_madvise (void *start, size_t length, int advice) | |
| int result = 0; | ||
|
|
||
| if (advice == MADV_DONTNEED || | ||
| #ifdef MADV_FREE | ||
| advice == MADV_FREE || | ||
| #endif | ||
| #ifdef MADV_REMOVE | ||
| advice == MADV_REMOVE || | ||
| #endif | ||
|
|
@@ -341,7 +339,12 @@ static int intercept_brk (void *addr) | |
|
|
||
| #endif | ||
|
|
||
| #if defined(SYS_shmdt) && defined(__linux__) | ||
| #define HAS_SHMDT (defined(SYS_shmdt) || \ | ||
| (defined(IPCOP_shmdt) && defined(SYS_ipc))) | ||
| #define HAS_SHMAT (defined(SYS_shmat) || \ | ||
| (defined(IPCOP_shmat) && defined(SYS_ipc))) | ||
|
|
||
| #if (HAS_SHMDT || HAS_SHMAT) && defined(__linux__) | ||
|
|
||
| #include <stdio.h> | ||
| #include <fcntl.h> | ||
|
|
@@ -404,6 +407,68 @@ static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) | |
| return seg_size; | ||
| } | ||
|
|
||
| static size_t get_shm_size(int shmid) | ||
| { | ||
| struct shmid_ds ds; | ||
| int ret; | ||
|
|
||
| ret = shmctl(shmid, IPC_STAT, &ds); | ||
| if (ret < 0) { | ||
| return 0; | ||
| } | ||
|
|
||
| return ds.shm_segsz; | ||
| } | ||
| #endif | ||
|
|
||
| #if HAS_SHMAT && defined(__linux__) | ||
| static void *(*original_shmat)(int shmid, const void *shmaddr, int shmflg); | ||
|
|
||
| static void *_intercept_shmat(int shmid, const void *shmaddr, int shmflg) | ||
| { | ||
| void *result = 0; | ||
|
|
||
| size_t size = get_shm_size(shmid); | ||
|
|
||
| if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) { | ||
| // I don't really know what REMAP combined with SHM_RND does, so I'll just | ||
| // guess it remaps all the way down to the lower attach_addr, and all the | ||
| // way up to the original shmaddr+size | ||
| uintptr_t attach_addr = (uintptr_t)shmaddr; | ||
|
|
||
| if (shmflg & SHM_RND) { | ||
| attach_addr -= ((uintptr_t)shmaddr) % SHMLBA; | ||
| size += ((uintptr_t)shmaddr) % SHMLBA; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO it should be something like this (i will probably have to fix UCX as well): There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, thinking again, we can't really change the size. we got the size from shmctl() so it's the actual segment size and should be aligned to SHMLBA So we probably should not change 'size' at all, just 'attach_addr' - since the segment attach point just moves by %SHMLBA, but the segment size cannot really change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm leaning toward agreeing. I agree the new mapping will have the original 'size' as its size, and it'll only occupy the range (attach_addr ..+original_size). And when I read the manpage for REMAP I'm sure they intended to write that it replaces any mapping that existed in the address range where the new mapping goes. But they wrote more explicitly that it replaces any mapping in (addr ..+size) eg where the new mapping would go w/o RND in play. So being logical I'd say with RND any mapping that's in (attach_addr ..+original_size) gets replaced. I'm still maybe 5% concerned that a pedantic reading of the manpage would say that they specified the exact range that gets replaced, and the above skips a little bit on the right hand side of that range. Is there any downside other than an unnecessary cache invalidation to my current code that uses the lower of the low ends and the higher of the high ends of the two interpretations of the manpage? My code would be telling you a certain virtual address had been invalidated when it really hadn't. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, let's be pedantic |
||
| } | ||
| opal_mem_hooks_release_hook ((void*)attach_addr, size, false); | ||
| } | ||
|
|
||
| if (!original_shmat) { | ||
| #if defined(SYS_shmat) | ||
| result = memory_patcher_syscall(SYS_shmat, shmid, shmaddr, shmflg); | ||
| #else // IPCOP_shmat | ||
| unsigned long ret; | ||
| ret = memory_patcher_syscall(SYS_ipc, IPCOP_shmat, | ||
| shmid, shmflg, &shmaddr, shmaddr); | ||
| result = (ret > -(unsigned long)SHMLBA) ? (void *)ret : (void *)shmaddr; | ||
| #endif | ||
| } else { | ||
| result = original_shmat (shmid, shmaddr, shmflg); | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| static void* intercept_shmat (int shmid, const void * shmaddr, int shmflg) | ||
| { | ||
| OPAL_PATCHER_BEGIN; | ||
| void *result = _intercept_shmat (shmid, shmaddr, shmflg); | ||
| OPAL_PATCHER_END; | ||
| return result; | ||
| } | ||
| #endif | ||
|
|
||
| #if HAS_SHMDT && defined(__linux__) | ||
| static int (*original_shmdt) (const void *); | ||
|
|
||
| static int _intercept_shmdt (const void *shmaddr) | ||
|
|
@@ -417,7 +482,11 @@ static int _intercept_shmdt (const void *shmaddr) | |
| if (original_shmdt) { | ||
| result = original_shmdt (shmaddr); | ||
| } else { | ||
| #if defined(SYS_shmdt) | ||
| result = memory_patcher_syscall (SYS_shmdt, shmaddr); | ||
| #else // IPCOP_shmdt | ||
| result = memory_patcher_syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, shmaddr); | ||
| #endif | ||
| } | ||
|
|
||
| return result; | ||
|
|
@@ -478,9 +547,7 @@ static int patcher_open (void) | |
| /* set memory hooks support level */ | ||
| opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); | ||
|
|
||
| #if 0 | ||
| /* See above block to see why mmap() functionality is #if 0'ed | ||
| out */ | ||
| #if defined (SYS_mmap) | ||
| rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); | ||
| if (OPAL_SUCCESS != rc) { | ||
| return rc; | ||
|
|
@@ -508,7 +575,14 @@ static int patcher_open (void) | |
| } | ||
| #endif | ||
|
|
||
| #if defined(SYS_shmdt) && defined(__linux__) | ||
| #if HAS_SHMAT && defined(__linux__) | ||
| rc = opal_patcher->patch_symbol ("shmat", (uintptr_t) intercept_shmat, (uintptr_t *) &original_shmat); | ||
| if (OPAL_SUCCESS != rc) { | ||
| return rc; | ||
| } | ||
| #endif | ||
|
|
||
| #if HAS_SHMDT && defined(__linux__) | ||
| rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); | ||
| if (OPAL_SUCCESS != rc) { | ||
| return rc; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW before this change, mmap(PROT_NONE) was calling release hook, and after this change - it doesn't. Is this the desired behavior?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be okay because previously that entire section was #if 0 so mmap() wasn't being intercepted at all. The PROT_NONE was old code and was #if 0 after a decision a long time ago that mmap PROT_NONE didn't need to be trigger a cache deregistration. So when I re-added mmap() for this commit I just used the UCX code to decide what should be intercepted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i guess if flags do not have MAP+FIXED, it will not overwrite an existing mapping anyway