Skip to content

Use an .init_array function to capture argc/argv on Linux. #78854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 24 additions & 185 deletions stdlib/public/CommandLineSupport/CommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,198 +169,37 @@ static char **swift::getUnsafeArgvArgc(int *outArgLen) {
template <typename F>
static void swift::enumerateUnsafeArgv(const F& body) { }
#elif defined(__linux__)
// On Linux, there is no easy way to get the argument vector pointer outside
// of the main() function. However, the ABI specifications dictate the layout
// of the process's initial stack, which looks something like:
//
// stack top ----> ┌────────────────────────┐
// │ Unspecified │
// ┊ ┊
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Information block │
// │ (argument strings, │
// │ environment strings, │
// │ auxiliary information) │
// ┊ ┊
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Unspecified │
// ┊ ┊
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ NULL │
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Auxiliary Vector │
// ┊ ┊
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ NULL │
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Environment Pointers │
// ┊ ┊
// environ ------> ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ NULL │
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Argument Pointers │
// ┊ ┊
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// │ Argument Count │
// ├┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┤
// ┊ ┊
//
// See https://gitlab.com/x86-psABIs/x86-64-ABI,
// https://gitlab.com/x86-psABIs/i386-ABI
//
// The upshot is that if we can get hold of `environ` before anything has
// had a chance to change it, we can find the `argv` array and also the
// argument count, `argc`, by walking back up the stack.
//
// (Note that Linux uses this same layout for all platforms, not just x86-based
// ones. It also has a fixed layout for the data at the top of the stack, but
// we don't need to take advantage of that here and can stick to things that
// are defined in the ABI specs.)

#include <unistd.h>

#define DEBUG_ARGVGRABBER 0
#if DEBUG_ARGVGRABBER
#define ARGVDEBUG(...) fprintf(stderr, __VA_ARGS__)
#else
#define ARGVDEBUG(...)
#endif

namespace {

struct ArgvGrabber {
char **argv;
int argc;

ArgvGrabber();

private:
struct stack {
void *base;
void *top;

stack() : base(nullptr), top(nullptr) {}
stack(void *b, void *t) : base(b), top(t) {}
};

stack findStack();
void findArgv(stack s);
};

// Find the stack by looking at /proc/self/maps
ArgvGrabber::stack ArgvGrabber::findStack(void) {
FILE *maps = fopen("/proc/self/maps", "r");
if (!maps) {
ARGVDEBUG("unable to open maps - %d\n", errno);
return stack();
}

char line[256];
void *base = NULL, *top = NULL;
bool found = false;
while (fgets(line, sizeof(line), maps)) {
// line is on the stack, so we know we're looking at the right
// region if line is between base and top.
//
// Note that we can't look for [stack], because Rosetta and qemu
// set up a separate stack for the emulated code.
//
// We also need to glom on extra VM ranges after the first one
// we find, because *sometimes* we end up with an extra range.
void *lo, *hi;
if (sscanf(line, "%p-%p", &lo, &hi) == 2) {
if ((void *)line >= lo && (void *)line < hi) {
base = lo;
top = hi;
found = true;
} else if (found && top == lo) {
top = hi;
}
}
}

fclose(maps);

if (!found) {
ARGVDEBUG("stack not found in maps\n");
return stack();
}

return stack(base, top);
}

#if DEBUG_ARGVGRABBER
void printMaps() {
FILE *maps = fopen("/proc/self/maps", "r");
if (!maps) {
fprintf(stderr, "unable to open maps - %d\n", errno);
return;
}

char line[256];
while (fgets(line, sizeof(line), maps)) {
fputs(line, stderr);
}

fclose(maps);
}
#endif

// Find argv by walking backwards from environ
void ArgvGrabber::findArgv(ArgvGrabber::stack stack) {
if (!stack.base) {
ARGVDEBUG("no stack\n");
return;
}

// Check that environ points to the stack
char **envp = environ;
if ((void *)envp < stack.base || (void *)envp >= stack.top) {
ARGVDEBUG("envp = %p, stack is from %p to %p\n",
envp, stack.base, stack.top);
#if DEBUG_ARGVGRABBER
printMaps();
#endif
return;
}

char **ptr = envp - 1;

// We're now pointing at the NULL that terminates argv. Keep going back
// while we're seeing pointers (values greater than envp).
while ((void *)(ptr - 1) > stack.base) {
--ptr;

// The first thing less than envp must be the argc value
if ((void *)*ptr < (void *)envp) {
argc = (int)(intptr_t)*ptr++;
argv = ptr;
return;
namespace swift {
static int capturedArgc = 0;
static char **capturedArgv = nullptr;

static void captureArgcArgv(int argc, char *argv[], char *envp[]) {
capturedArgc = argc;

// There is no guarantee that argv will remain allocated by the time we go to
// use capturedArgv. If this is a genuine concern, we can enable this code: it
// will make a defensive copy of the entire argv array.
//
// libmalloc (or whatever's in use here) should be fully initialized by the
// time the loader invokes the Swift runtime's initialization functions. See
// https://refspecs.linuxbase.org/elf/gabi4+/ch5.dynamic.html#init_fini
capturedArgv = reinterpret_cast<char **>(std::calloc(argc + 1, sizeof(char *)));
for (int i = 0; i < argc; i++) {
if (argv[i]) {
capturedArgv[i] = strdup(argv[i]);
}
}

ARGVDEBUG("didn't find argc\n");
}

ArgvGrabber::ArgvGrabber() : argv(nullptr), argc(0) {
ARGVDEBUG("***GRABBING ARGV for %d***\n", getpid());
findArgv(findStack());
#if DEBUG_ARGVGRABBER
fprintf(stderr, "ARGV is at %p with count %d\n", argv, argc);
for (int i = 0; i < argc; ++i) {
fprintf(stderr, " argv[%d] = \"%s\"\n", i, argv[i]);
}
fprintf(stderr, "***ARGV GRABBED***\n");
#endif
// Ensure we call captureArgcArgv() before main() runs by adding it to the ELF
// .init_array section.
__attribute__((__section__(".init_array.65534"), __used__))
static auto captureArgcArgvp = &captureArgcArgv;
}

ArgvGrabber argvGrabber;

} // namespace

static char **swift::getUnsafeArgvArgc(int *outArgLen) {
*outArgLen = argvGrabber.argc;
return argvGrabber.argv;
*outArgLen = swift::capturedArgc;
return swift::capturedArgv;
}

template <typename F>
Expand Down