Skip to content

use a declarative table for CLI linker argument parsing #14663

@andrewrk

Description

@andrewrk

In Zig's CLI, linker arguments are parsed here:

zig/src/main.zig

Lines 1788 to 2222 in 705e9cb

// Parse linker args.
var i: usize = 0;
while (i < linker_args.items.len) : (i += 1) {
const arg = linker_args.items[i];
if (mem.eql(u8, arg, "-soname") or
mem.eql(u8, arg, "--soname"))
{
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const name = linker_args.items[i];
soname = .{ .yes = name };
// Use it as --name.
// Example: libsoundio.so.2
var prefix: usize = 0;
if (mem.startsWith(u8, name, "lib")) {
prefix = 3;
}
var end: usize = name.len;
if (mem.endsWith(u8, name, ".so")) {
end -= 3;
} else {
var found_digit = false;
while (end > 0 and std.ascii.isDigit(name[end - 1])) {
found_digit = true;
end -= 1;
}
if (found_digit and end > 0 and name[end - 1] == '.') {
end -= 1;
} else {
end = name.len;
}
if (mem.endsWith(u8, name[prefix..end], ".so")) {
end -= 3;
}
}
provided_name = name[prefix..end];
} else if (mem.eql(u8, arg, "-rpath")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try rpath_list.append(linker_args.items[i]);
} else if (mem.eql(u8, arg, "--subsystem")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
subsystem = try parseSubSystem(linker_args.items[i]);
} else if (mem.eql(u8, arg, "-I") or
mem.eql(u8, arg, "--dynamic-linker") or
mem.eql(u8, arg, "-dynamic-linker"))
{
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
target_dynamic_linker = linker_args.items[i];
} else if (mem.eql(u8, arg, "-E") or
mem.eql(u8, arg, "--export-dynamic") or
mem.eql(u8, arg, "-export-dynamic"))
{
rdynamic = true;
} else if (mem.eql(u8, arg, "--version-script")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
version_script = linker_args.items[i];
} else if (mem.eql(u8, arg, "-O")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
linker_optimization = std.fmt.parseUnsigned(u8, linker_args.items[i], 10) catch |err| {
fatal("unable to parse optimization level '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.startsWith(u8, arg, "-O")) {
linker_optimization = std.fmt.parseUnsigned(u8, arg["-O".len..], 10) catch |err| {
fatal("unable to parse optimization level '{s}': {s}", .{ arg, @errorName(err) });
};
} else if (mem.eql(u8, arg, "-pagezero_size")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const next_arg = linker_args.items[i];
pagezero_size = std.fmt.parseUnsigned(u64, eatIntPrefix(next_arg, 16), 16) catch |err| {
fatal("unable to parse pagezero size '{s}': {s}", .{ next_arg, @errorName(err) });
};
} else if (mem.eql(u8, arg, "-headerpad")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const next_arg = linker_args.items[i];
headerpad_size = std.fmt.parseUnsigned(u32, eatIntPrefix(next_arg, 16), 16) catch |err| {
fatal("unable to parse headerpad size '{s}': {s}", .{ next_arg, @errorName(err) });
};
} else if (mem.eql(u8, arg, "-headerpad_max_install_names")) {
headerpad_max_install_names = true;
} else if (mem.eql(u8, arg, "-dead_strip")) {
linker_gc_sections = true;
} else if (mem.eql(u8, arg, "-dead_strip_dylibs")) {
dead_strip_dylibs = true;
} else if (mem.eql(u8, arg, "--no-undefined")) {
linker_z_defs = true;
} else if (mem.eql(u8, arg, "--gc-sections")) {
linker_gc_sections = true;
} else if (mem.eql(u8, arg, "--no-gc-sections")) {
linker_gc_sections = false;
} else if (mem.eql(u8, arg, "--print-gc-sections")) {
linker_print_gc_sections = true;
} else if (mem.eql(u8, arg, "--print-icf-sections")) {
linker_print_icf_sections = true;
} else if (mem.eql(u8, arg, "--print-map")) {
linker_print_map = true;
} else if (mem.eql(u8, arg, "--sort-section")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const arg1 = linker_args.items[i];
linker_sort_section = std.meta.stringToEnum(link.SortSection, arg1) orelse {
fatal("expected [name|alignment] after --sort-section, found '{s}'", .{arg1});
};
} else if (mem.eql(u8, arg, "--allow-shlib-undefined") or
mem.eql(u8, arg, "-allow-shlib-undefined"))
{
linker_allow_shlib_undefined = true;
} else if (mem.eql(u8, arg, "--no-allow-shlib-undefined") or
mem.eql(u8, arg, "-no-allow-shlib-undefined"))
{
linker_allow_shlib_undefined = false;
} else if (mem.eql(u8, arg, "-Bsymbolic")) {
linker_bind_global_refs_locally = true;
} else if (mem.eql(u8, arg, "--import-memory")) {
linker_import_memory = true;
} else if (mem.eql(u8, arg, "--import-symbols")) {
linker_import_symbols = true;
} else if (mem.eql(u8, arg, "--import-table")) {
linker_import_table = true;
} else if (mem.eql(u8, arg, "--export-table")) {
linker_export_table = true;
} else if (mem.startsWith(u8, arg, "--initial-memory=")) {
linker_initial_memory = parseIntSuffix(arg, "--initial-memory=".len);
} else if (mem.startsWith(u8, arg, "--max-memory=")) {
linker_max_memory = parseIntSuffix(arg, "--max-memory=".len);
} else if (mem.startsWith(u8, arg, "--shared-memory")) {
linker_shared_memory = true;
} else if (mem.startsWith(u8, arg, "--global-base=")) {
linker_global_base = parseIntSuffix(arg, "--global-base=".len);
} else if (mem.startsWith(u8, arg, "--export=")) {
try linker_export_symbol_names.append(arg["--export=".len..]);
} else if (mem.eql(u8, arg, "--export")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try linker_export_symbol_names.append(linker_args.items[i]);
} else if (mem.eql(u8, arg, "--compress-debug-sections")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const arg1 = linker_args.items[i];
linker_compress_debug_sections = std.meta.stringToEnum(link.CompressDebugSections, arg1) orelse {
fatal("expected [none|zlib] after --compress-debug-sections, found '{s}'", .{arg1});
};
} else if (mem.eql(u8, arg, "-z")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker extension flag after '{s}'", .{arg});
}
const z_arg = linker_args.items[i];
if (mem.eql(u8, z_arg, "nodelete")) {
linker_z_nodelete = true;
} else if (mem.eql(u8, z_arg, "notext")) {
linker_z_notext = true;
} else if (mem.eql(u8, z_arg, "defs")) {
linker_z_defs = true;
} else if (mem.eql(u8, z_arg, "undefs")) {
linker_z_defs = false;
} else if (mem.eql(u8, z_arg, "origin")) {
linker_z_origin = true;
} else if (mem.eql(u8, z_arg, "nocopyreloc")) {
linker_z_nocopyreloc = true;
} else if (mem.eql(u8, z_arg, "noexecstack")) {
// noexecstack is the default when linking with LLD
} else if (mem.eql(u8, z_arg, "now")) {
linker_z_now = true;
} else if (mem.eql(u8, z_arg, "lazy")) {
linker_z_now = false;
} else if (mem.eql(u8, z_arg, "relro")) {
linker_z_relro = true;
} else if (mem.eql(u8, z_arg, "norelro")) {
linker_z_relro = false;
} else if (mem.startsWith(u8, z_arg, "stack-size=")) {
const next_arg = z_arg["stack-size=".len..];
stack_size_override = std.fmt.parseUnsigned(u64, next_arg, 0) catch |err| {
fatal("unable to parse stack size '{s}': {s}", .{ next_arg, @errorName(err) });
};
} else if (mem.startsWith(u8, z_arg, "common-page-size=")) {
linker_z_common_page_size = parseIntSuffix(z_arg, "common-page-size=".len);
} else if (mem.startsWith(u8, z_arg, "max-page-size=")) {
linker_z_max_page_size = parseIntSuffix(z_arg, "max-page-size=".len);
} else {
warn("unsupported linker extension flag: -z {s}", .{z_arg});
}
} else if (mem.eql(u8, arg, "--major-image-version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
version.major = std.fmt.parseUnsigned(u32, linker_args.items[i], 10) catch |err| {
fatal("unable to parse major image version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
have_version = true;
} else if (mem.eql(u8, arg, "--minor-image-version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
version.minor = std.fmt.parseUnsigned(u32, linker_args.items[i], 10) catch |err| {
fatal("unable to parse minor image version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
have_version = true;
} else if (mem.eql(u8, arg, "-e") or mem.eql(u8, arg, "--entry")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
entry = linker_args.items[i];
} else if (mem.eql(u8, arg, "--stack") or mem.eql(u8, arg, "-stack_size")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
stack_size_override = std.fmt.parseUnsigned(u64, linker_args.items[i], 0) catch |err| {
fatal("unable to parse stack size override '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.eql(u8, arg, "--image-base")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
image_base_override = std.fmt.parseUnsigned(u64, linker_args.items[i], 0) catch |err| {
fatal("unable to parse image base override '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.eql(u8, arg, "-T") or mem.eql(u8, arg, "--script")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
linker_script = linker_args.items[i];
} else if (mem.eql(u8, arg, "--eh-frame-hdr")) {
link_eh_frame_hdr = true;
} else if (mem.eql(u8, arg, "--no-eh-frame-hdr")) {
link_eh_frame_hdr = false;
} else if (mem.eql(u8, arg, "--tsaware")) {
linker_tsaware = true;
} else if (mem.eql(u8, arg, "--nxcompat")) {
linker_nxcompat = true;
} else if (mem.eql(u8, arg, "--dynamicbase")) {
linker_dynamicbase = true;
} else if (mem.eql(u8, arg, "--high-entropy-va")) {
// This option does not do anything.
} else if (mem.eql(u8, arg, "--export-all-symbols")) {
rdynamic = true;
} else if (mem.eql(u8, arg, "-s") or mem.eql(u8, arg, "--strip-all") or
mem.eql(u8, arg, "-S") or mem.eql(u8, arg, "--strip-debug"))
{
// -s, --strip-all Strip all symbols
// -S, --strip-debug Strip debugging symbols
strip = true;
} else if (mem.eql(u8, arg, "--start-group") or
mem.eql(u8, arg, "--end-group"))
{
// We don't need to care about these because these args are
// for resolving circular dependencies but our linker takes
// care of this without explicit args.
} else if (mem.eql(u8, arg, "--major-os-version") or
mem.eql(u8, arg, "--minor-os-version"))
{
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
// This option does not do anything.
} else if (mem.eql(u8, arg, "--major-subsystem-version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
major_subsystem_version = std.fmt.parseUnsigned(
u32,
linker_args.items[i],
10,
) catch |err| {
fatal("unable to parse major subsystem version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.eql(u8, arg, "--minor-subsystem-version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
minor_subsystem_version = std.fmt.parseUnsigned(
u32,
linker_args.items[i],
10,
) catch |err| {
fatal("unable to parse minor subsystem version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.eql(u8, arg, "-framework")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try frameworks.put(gpa, linker_args.items[i], .{});
} else if (mem.eql(u8, arg, "-weak_framework")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try frameworks.put(gpa, linker_args.items[i], .{ .weak = true });
} else if (mem.eql(u8, arg, "-needed_framework")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try frameworks.put(gpa, linker_args.items[i], .{ .needed = true });
} else if (mem.eql(u8, arg, "-needed_library")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try system_libs.put(linker_args.items[i], .{ .needed = true });
} else if (mem.startsWith(u8, arg, "-weak-l")) {
try system_libs.put(arg["-weak-l".len..], .{ .weak = true });
} else if (mem.eql(u8, arg, "-weak_library")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try system_libs.put(linker_args.items[i], .{ .weak = true });
} else if (mem.eql(u8, arg, "-compatibility_version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
compatibility_version = std.builtin.Version.parse(linker_args.items[i]) catch |err| {
fatal("unable to parse -compatibility_version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
} else if (mem.eql(u8, arg, "-current_version")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
version = std.builtin.Version.parse(linker_args.items[i]) catch |err| {
fatal("unable to parse -current_version '{s}': {s}", .{ linker_args.items[i], @errorName(err) });
};
have_version = true;
} else if (mem.eql(u8, arg, "--out-implib") or
mem.eql(u8, arg, "-implib"))
{
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
emit_implib = .{ .yes = linker_args.items[i] };
emit_implib_arg_provided = true;
} else if (mem.eql(u8, arg, "-undefined")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
if (mem.eql(u8, "dynamic_lookup", linker_args.items[i])) {
linker_allow_shlib_undefined = true;
} else if (mem.eql(u8, "error", linker_args.items[i])) {
linker_allow_shlib_undefined = false;
} else {
fatal("unsupported -undefined option '{s}'", .{linker_args.items[i]});
}
} else if (mem.eql(u8, arg, "-install_name")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
install_name = linker_args.items[i];
} else if (mem.eql(u8, arg, "-force_load")) {
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
try link_objects.append(.{
.path = linker_args.items[i],
.must_link = true,
});
} else if (mem.eql(u8, arg, "-hash-style") or
mem.eql(u8, arg, "--hash-style"))
{
i += 1;
if (i >= linker_args.items.len) {
fatal("expected linker arg after '{s}'", .{arg});
}
const next_arg = linker_args.items[i];
hash_style = std.meta.stringToEnum(link.HashStyle, next_arg) orelse {
fatal("expected [sysv|gnu|both] after --hash-style, found '{s}'", .{
next_arg,
});
};
} else if (mem.startsWith(u8, arg, "/subsystem:")) {
var split_it = mem.splitBackwards(u8, arg, ":");
subsystem = try parseSubSystem(split_it.first());
} else if (mem.startsWith(u8, arg, "/implib:")) {
var split_it = mem.splitBackwards(u8, arg, ":");
emit_implib = .{ .yes = split_it.first() };
emit_implib_arg_provided = true;
} else if (mem.startsWith(u8, arg, "/pdb:")) {
var split_it = mem.splitBackwards(u8, arg, ":");
pdb_out_path = split_it.first();
} else if (mem.startsWith(u8, arg, "/version:")) {
var split_it = mem.splitBackwards(u8, arg, ":");
const version_arg = split_it.first();
version = std.builtin.Version.parse(version_arg) catch |err| {
fatal("unable to parse /version '{s}': {s}", .{ arg, @errorName(err) });
};
have_version = true;
} else {
warn("unsupported linker arg: {s}", .{arg});
}

The problem is that it's not a complete list, and some aliased forms are not supported. A declarative table of linker arguments would be easier to compare against other linker implementations. The table could also be initially seeded by examining the output of LLD's Options.td files, for example.

Note that some linker arguments are special-cased and handled inline with other arguments, like this:

zig/src/main.zig

Lines 1606 to 1668 in 705e9cb

.wl => {
var split_it = mem.split(u8, it.only_arg, ",");
while (split_it.next()) |linker_arg| {
// Handle nested-joined args like `-Wl,-rpath=foo`.
// Must be prefixed with 1 or 2 dashes.
if (linker_arg.len >= 3 and
linker_arg[0] == '-' and
linker_arg[2] != '-')
{
if (mem.indexOfScalar(u8, linker_arg, '=')) |equals_pos| {
const key = linker_arg[0..equals_pos];
const value = linker_arg[equals_pos + 1 ..];
if (mem.eql(u8, key, "build-id")) {
build_id = true;
warn("ignoring build-id style argument: '{s}'", .{value});
continue;
} else if (mem.eql(u8, key, "--sort-common")) {
// this ignores --sort=common=<anything>; ignoring plain --sort-common
// is done below.
continue;
}
try linker_args.append(key);
try linker_args.append(value);
continue;
}
}
if (mem.eql(u8, linker_arg, "--as-needed")) {
needed = false;
} else if (mem.eql(u8, linker_arg, "--no-as-needed")) {
needed = true;
} else if (mem.eql(u8, linker_arg, "-no-pie")) {
want_pie = false;
} else if (mem.eql(u8, linker_arg, "--sort-common")) {
// from ld.lld(1): --sort-common is ignored for GNU compatibility,
// this ignores plain --sort-common
} else if (mem.eql(u8, linker_arg, "--whole-archive") or
mem.eql(u8, linker_arg, "-whole-archive"))
{
must_link = true;
} else if (mem.eql(u8, linker_arg, "--no-whole-archive") or
mem.eql(u8, linker_arg, "-no-whole-archive"))
{
must_link = false;
} else if (mem.eql(u8, linker_arg, "-Bdynamic") or
mem.eql(u8, linker_arg, "-dy") or
mem.eql(u8, linker_arg, "-call_shared"))
{
force_static_libs = false;
} else if (mem.eql(u8, linker_arg, "-Bstatic") or
mem.eql(u8, linker_arg, "-dn") or
mem.eql(u8, linker_arg, "-non_shared") or
mem.eql(u8, linker_arg, "-static"))
{
force_static_libs = true;
} else if (mem.eql(u8, linker_arg, "-search_paths_first")) {
search_strategy = .paths_first;
} else if (mem.eql(u8, linker_arg, "-search_dylibs_first")) {
search_strategy = .dylibs_first;
} else {
try linker_args.append(linker_arg);
}
}
},

It is important to keep this logic how it is, but it could integrate with a declarative linker arg parsing system if it were flexible enough.

I think the key here is going to be not making the API too abstract. Something akin to a "linker argument iterator" might work nicely.

This will improve the robustness of zig cc as a drop-in linker.

Metadata

Metadata

Assignees

No one assigned

    Labels

    contributor friendlyThis issue is limited in scope and/or knowledge of Zig internals.enhancementSolving this issue will likely involve adding new logic or components to the codebase.zig ccZig as a drop-in C compiler feature

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions