Skip to content

Commit 12c2773

Browse files
committed
std.zig.system: use both PATH and hardcoded locations to find env
Should help systems that have main `env` binary in different location than hardcoded `/usr/bin/env` **during build** (not neccessarily always), like Nix/Guix, Termux, Gentoo Prefix etc. Related: https://www.github.com/ziglang/zig/issues/12156 https://www.github.com/ziglang/zig/issues/14146 https://www.github.com/ziglang/zig/issues/14577 https://www.github.com/ziglang/zig/issues/15898 Source for logic: https://www.github.com/ziglang/zig/issues/14146#issuecomment-2308984936 Signed-off-by: Eric Joldasov <bratishkaerik@landless-city.net>
1 parent f8942e8 commit 12c2773

File tree

1 file changed

+139
-108
lines changed

1 file changed

+139
-108
lines changed

lib/std/zig/system.zig

Lines changed: 139 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,106 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
949949
return max_ver;
950950
}
951951

952+
/// This functions tries to open file located at `start_path`, and then guesses
953+
/// whether it is a script or an ELF file.
954+
///
955+
/// If it finds "shebang line", file is considered a script, and logic is re-run
956+
/// using interpreter referenced after "#!" symbols. If interpreter is itself also a script,
957+
/// logic becomes recursive until non-script file is found.
958+
///
959+
/// If it finds ELF magic sequence, file is considered an ELF file and function returns.
960+
fn resolveElfFileRecursively(cwd: fs.Dir, start_path: []const u8) error{UnableToFindElfFile}!fs.File {
961+
var current_path = start_path;
962+
963+
// According to `man 2 execve`:
964+
//
965+
// The kernel imposes a maximum length on the text
966+
// that follows the "#!" characters at the start of a script;
967+
// characters beyond the limit are ignored.
968+
// Before Linux 5.1, the limit is 127 characters.
969+
// Since Linux 5.1, the limit is 255 characters.
970+
//
971+
// Tests show that bash and zsh consider 255 as total limit,
972+
// *including* "#!" characters and ignoring newline.
973+
// For safety, we set max length as 255 + \n (1).
974+
var buffer: [255 + 1]u8 = undefined;
975+
while (true) {
976+
// Interpreter path can be relative on Linux, but
977+
// for simplicity we are asserting it is an absolute path.
978+
assert(std.fs.path.isAbsolute(current_path));
979+
const file = cwd.openFile(current_path, .{}) catch |err| switch (err) {
980+
error.NoSpaceLeft => unreachable,
981+
error.NameTooLong => unreachable,
982+
error.PathAlreadyExists => unreachable,
983+
error.SharingViolation => unreachable,
984+
error.InvalidUtf8 => unreachable, // WASI only
985+
error.InvalidWtf8 => unreachable, // Windows only
986+
error.BadPathName => unreachable,
987+
error.PipeBusy => unreachable,
988+
error.FileLocksNotSupported => unreachable,
989+
error.WouldBlock => unreachable,
990+
error.FileBusy => unreachable, // opened without write permissions
991+
error.AntivirusInterference => unreachable, // Windows-only error
992+
993+
error.IsDir,
994+
error.NotDir,
995+
996+
error.AccessDenied,
997+
error.DeviceBusy,
998+
error.FileTooBig,
999+
error.SymLinkLoop,
1000+
error.ProcessFdQuotaExceeded,
1001+
error.SystemFdQuotaExceeded,
1002+
error.SystemResources,
1003+
1004+
error.FileNotFound,
1005+
error.NetworkNotFound,
1006+
error.NoDevice,
1007+
error.Unexpected,
1008+
=> return error.UnableToFindElfFile,
1009+
};
1010+
var is_elf_file = false;
1011+
defer if (is_elf_file == false) file.close();
1012+
1013+
// Shortest working interpreter path is "#!/i" (4)
1014+
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1015+
// ELF magic number length is also 4.
1016+
//
1017+
// If file is shorter than that, it is definitely not ELF file
1018+
// nor file with "shebang" line.
1019+
const min_len = 4;
1020+
1021+
const len = preadAtLeast(file, &buffer, 0, min_len) catch return error.UnableToFindElfFile;
1022+
const content = buffer[0..len];
1023+
1024+
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1025+
// It is very likely ELF file!
1026+
is_elf_file = true;
1027+
return file;
1028+
} else if (mem.eql(u8, content[0..2], "#!")) {
1029+
// We detected shebang, now parse entire line.
1030+
1031+
// Trim leading "#!", spaces and tabs.
1032+
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1033+
1034+
// This line can have:
1035+
// * Interpreter path only,
1036+
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1037+
// And optionally newline at the end.
1038+
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1039+
1040+
// Separate path and args.
1041+
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1042+
1043+
current_path = path_maybe_args[0..path_end];
1044+
continue;
1045+
} else {
1046+
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1047+
return error.UnableToFindElfFile;
1048+
}
1049+
}
1050+
}
1051+
9521052
/// In the past, this function attempted to use the executable's own binary if it was dynamically
9531053
/// linked to answer both the C ABI question and the dynamic linker question. However, this
9541054
/// could be problematic on a system that uses a RUNPATH for the compiler binary, locking
@@ -957,11 +1057,14 @@ fn glibcVerFromSoFile(file: fs.File) !std.SemanticVersion {
9571057
/// the dynamic linker will match that of the compiler binary. Executables with these versions
9581058
/// mismatching will fail to run.
9591059
///
960-
/// Therefore, this function works the same regardless of whether the compiler binary is
961-
/// dynamically or statically linked. It inspects `/usr/bin/env` as an ELF file to find the
962-
/// answer to these questions, or if there is a shebang line, then it chases the referenced
963-
/// file recursively. If that does not provide the answer, then the function falls back to
964-
/// defaults.
1060+
/// Therefore, this function now does not inspect the executable's own binary.
1061+
/// Instead, it tries to find `env` program in PATH or in hardcoded location, and uses it
1062+
/// to find suitable ELF file. If `env` program is an executable, work is done and function starts to
1063+
/// inspect inner structure of a file. But if `env` is a script or other non-ELF file, it uses
1064+
/// interpreter path instead and tries to search ELF file again, going recursively in case interpreter
1065+
/// is also a script/non-ELF file.
1066+
///
1067+
/// If nothing was found, then the function falls back to defaults.
9651068
fn detectAbiAndDynamicLinker(
9661069
cpu: Target.Cpu,
9671070
os: Target.Os,
@@ -1029,113 +1132,44 @@ fn detectAbiAndDynamicLinker(
10291132

10301133
const ld_info_list = ld_info_list_buffer[0..ld_info_list_len];
10311134

1032-
// Best case scenario: the executable is dynamically linked, and we can iterate
1033-
// over our own shared objects and find a dynamic linker.
1034-
const elf_file = elf_file: {
1035-
// This block looks for a shebang line in /usr/bin/env,
1036-
// if it finds one, then instead of using /usr/bin/env as the ELF file to examine, it uses the file it references instead,
1037-
// doing the same logic recursively in case it finds another shebang line.
1135+
const cwd = std.fs.cwd();
1136+
1137+
// Algorithm is:
1138+
// 1a) try_path: If PATH is non-empty and `env` file was found in one of the directories, use that.
1139+
// 1b) try_path: If `env` was not found or PATH is empty, try hardcoded path below.
1140+
// 2a) try_hardcoded: If `env` was found in hardcoded location, use that.
1141+
// 2b) try_hardcoded: If `env` was not found, fall back to default ABI and dynamic linker.
1142+
// Source: https://github.com/ziglang/zig/issues/14146#issuecomment-2308984936
1143+
const elf_file = (try_path: {
1144+
const PATH = std.posix.getenv("PATH") orelse break :try_path null;
1145+
var it = mem.tokenizeScalar(u8, PATH, fs.path.delimiter);
1146+
1147+
var buf: [std.fs.max_path_bytes + 1]u8 = undefined;
1148+
var fbs: std.heap.FixedBufferAllocator = .init(&buf);
1149+
const allocator = fbs.allocator();
1150+
1151+
while (it.next()) |path| : (fbs.reset()) {
1152+
const start_path = std.fs.path.joinZ(allocator, &.{ path, "env" }) catch |err| switch (err) {
1153+
error.OutOfMemory => continue,
1154+
};
10381155

1039-
var file_name: []const u8 = switch (os.tag) {
1156+
break :try_path resolveElfFileRecursively(cwd, start_path) catch |err| switch (err) {
1157+
error.UnableToFindElfFile => continue,
1158+
};
1159+
} else break :try_path null;
1160+
} orelse try_hardcoded: {
1161+
const hardcoded_file_name = switch (os.tag) {
10401162
// Since /usr/bin/env is hard-coded into the shebang line of many portable scripts, it's a
10411163
// reasonably reliable path to start with.
10421164
else => "/usr/bin/env",
10431165
// Haiku does not have a /usr root directory.
10441166
.haiku => "/bin/env",
10451167
};
10461168

1047-
// According to `man 2 execve`:
1048-
//
1049-
// The kernel imposes a maximum length on the text
1050-
// that follows the "#!" characters at the start of a script;
1051-
// characters beyond the limit are ignored.
1052-
// Before Linux 5.1, the limit is 127 characters.
1053-
// Since Linux 5.1, the limit is 255 characters.
1054-
//
1055-
// Tests show that bash and zsh consider 255 as total limit,
1056-
// *including* "#!" characters and ignoring newline.
1057-
// For safety, we set max length as 255 + \n (1).
1058-
var buffer: [255 + 1]u8 = undefined;
1059-
while (true) {
1060-
// Interpreter path can be relative on Linux, but
1061-
// for simplicity we are asserting it is an absolute path.
1062-
const file = fs.openFileAbsolute(file_name, .{}) catch |err| switch (err) {
1063-
error.NoSpaceLeft => unreachable,
1064-
error.NameTooLong => unreachable,
1065-
error.PathAlreadyExists => unreachable,
1066-
error.SharingViolation => unreachable,
1067-
error.InvalidUtf8 => unreachable, // WASI only
1068-
error.InvalidWtf8 => unreachable, // Windows only
1069-
error.BadPathName => unreachable,
1070-
error.PipeBusy => unreachable,
1071-
error.FileLocksNotSupported => unreachable,
1072-
error.WouldBlock => unreachable,
1073-
error.FileBusy => unreachable, // opened without write permissions
1074-
error.AntivirusInterference => unreachable, // Windows-only error
1075-
1076-
error.IsDir,
1077-
error.NotDir,
1078-
error.AccessDenied,
1079-
error.NoDevice,
1080-
error.FileNotFound,
1081-
error.NetworkNotFound,
1082-
error.FileTooBig,
1083-
error.Unexpected,
1084-
=> |e| {
1085-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1086-
return defaultAbiAndDynamicLinker(cpu, os, query);
1087-
},
1088-
1089-
else => |e| return e,
1090-
};
1091-
var is_elf_file = false;
1092-
defer if (is_elf_file == false) file.close();
1093-
1094-
// Shortest working interpreter path is "#!/i" (4)
1095-
// (interpreter is "/i", assuming all paths are absolute, like in above comment).
1096-
// ELF magic number length is also 4.
1097-
//
1098-
// If file is shorter than that, it is definitely not ELF file
1099-
// nor file with "shebang" line.
1100-
const min_len: usize = 4;
1101-
1102-
const len = preadAtLeast(file, &buffer, 0, min_len) catch |err| switch (err) {
1103-
error.UnexpectedEndOfFile,
1104-
error.UnableToReadElfFile,
1105-
error.ProcessNotFound,
1106-
=> return defaultAbiAndDynamicLinker(cpu, os, query),
1107-
1108-
else => |e| return e,
1109-
};
1110-
const content = buffer[0..len];
1111-
1112-
if (mem.eql(u8, content[0..4], std.elf.MAGIC)) {
1113-
// It is very likely ELF file!
1114-
is_elf_file = true;
1115-
break :elf_file file;
1116-
} else if (mem.eql(u8, content[0..2], "#!")) {
1117-
// We detected shebang, now parse entire line.
1118-
1119-
// Trim leading "#!", spaces and tabs.
1120-
const trimmed_line = mem.trimLeft(u8, content[2..], &.{ ' ', '\t' });
1121-
1122-
// This line can have:
1123-
// * Interpreter path only,
1124-
// * Interpreter path and arguments, all separated by space, tab or NUL character.
1125-
// And optionally newline at the end.
1126-
const path_maybe_args = mem.trimRight(u8, trimmed_line, "\n");
1127-
1128-
// Separate path and args.
1129-
const path_end = mem.indexOfAny(u8, path_maybe_args, &.{ ' ', '\t', 0 }) orelse path_maybe_args.len;
1130-
1131-
file_name = path_maybe_args[0..path_end];
1132-
continue;
1133-
} else {
1134-
// Not a ELF file, not a shell script with "shebang line", invalid duck.
1135-
return defaultAbiAndDynamicLinker(cpu, os, query);
1136-
}
1137-
}
1138-
};
1169+
break :try_hardcoded resolveElfFileRecursively(cwd, hardcoded_file_name) catch |err| switch (err) {
1170+
error.UnableToFindElfFile => null,
1171+
};
1172+
}) orelse return defaultAbiAndDynamicLinker(cpu, os, query);
11391173
defer elf_file.close();
11401174

11411175
// TODO: inline this function and combine the buffer we already read above to find
@@ -1159,10 +1193,7 @@ fn detectAbiAndDynamicLinker(
11591193
error.UnexpectedEndOfFile,
11601194
error.NameTooLong,
11611195
// Finally, we fall back on the standard path.
1162-
=> |e| {
1163-
std.log.warn("Encountered error: {s}, falling back to default ABI and dynamic linker.", .{@errorName(e)});
1164-
return defaultAbiAndDynamicLinker(cpu, os, query);
1165-
},
1196+
=> defaultAbiAndDynamicLinker(cpu, os, query),
11661197
};
11671198
}
11681199

0 commit comments

Comments
 (0)