Skip to content

Commit 0be8e16

Browse files
committed
Auto merge of #146700 - Zalathar:quoted-args, r=nikic
cg_llvm: Move target machine command-line quoting from C++ to Rust When this code was introduced in #130446 and #131805, it was complicated by the need to maintain compatibility with earlier versions of LLVM. Now that LLVM 20 is the baseline (#145071), we can do all of the quoting in pure Rust code, and pass two flat strings to LLVM to be used as-is. --- In this PR, my priority has been to preserve the existing behaviour as much as possible, without worrying too much about what the behaviour *should* be. (Though I did avoid a leading space before the first argument.)
2 parents 5904356 + 8b0a254 commit 0be8e16

File tree

7 files changed

+92
-49
lines changed

7 files changed

+92
-49
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#[cfg(test)]
2+
mod tests;
3+
4+
/// Joins command-line arguments into a single space-separated string, quoting
5+
/// and escaping individual arguments as necessary.
6+
///
7+
/// The result is intended to be informational, for embedding in debug metadata,
8+
/// and might not be properly quoted/escaped for actual command-line use.
9+
pub(crate) fn quote_command_line_args(args: &[String]) -> String {
10+
// Start with a decent-sized buffer, since rustc invocations tend to be long.
11+
let mut buf = String::with_capacity(128);
12+
13+
for arg in args {
14+
if !buf.is_empty() {
15+
buf.push(' ');
16+
}
17+
18+
print_arg_quoted(&mut buf, arg);
19+
}
20+
21+
buf
22+
}
23+
24+
/// Equivalent to LLVM's `sys::printArg` with quoting always enabled
25+
/// (see llvm/lib/Support/Program.cpp).
26+
fn print_arg_quoted(buf: &mut String, arg: &str) {
27+
buf.reserve(arg.len() + 2);
28+
29+
buf.push('"');
30+
for ch in arg.chars() {
31+
if matches!(ch, '"' | '\\' | '$') {
32+
buf.push('\\');
33+
}
34+
buf.push(ch);
35+
}
36+
buf.push('"');
37+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#[test]
2+
fn quote_command_line_args() {
3+
use super::quote_command_line_args;
4+
5+
struct Case<'a> {
6+
args: &'a [&'a str],
7+
expected: &'a str,
8+
}
9+
10+
let cases = &[
11+
Case { args: &[], expected: "" },
12+
Case { args: &["--hello", "world"], expected: r#""--hello" "world""# },
13+
Case { args: &["--hello world"], expected: r#""--hello world""# },
14+
Case {
15+
args: &["plain", "$dollar", "spa ce", r"back\slash", r#""quote""#, "plain"],
16+
expected: r#""plain" "\$dollar" "spa ce" "back\\slash" "\"quote\"" "plain""#,
17+
},
18+
];
19+
20+
for &Case { args, expected } in cases {
21+
let args = args.iter().copied().map(str::to_owned).collect::<Vec<_>>();
22+
let actual = quote_command_line_args(&args);
23+
assert_eq!(actual, expected, "args {args:?}");
24+
}
25+
}

compiler/rustc_codegen_llvm/src/back/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub(crate) mod archive;
2+
mod command_line_args;
23
pub(crate) mod lto;
34
pub(crate) mod owned_target_machine;
45
mod profiling;

compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::assert_matches::assert_matches;
21
use std::ffi::CStr;
32
use std::marker::PhantomData;
43
use std::ptr::NonNull;
@@ -39,13 +38,10 @@ impl OwnedTargetMachine {
3938
output_obj_file: &CStr,
4039
debug_info_compression: &CStr,
4140
use_emulated_tls: bool,
42-
args_cstr_buff: &[u8],
41+
argv0: &str,
42+
command_line_args: &str,
4343
use_wasm_eh: bool,
4444
) -> Result<Self, LlvmError<'static>> {
45-
// The argument list is passed as the concatenation of one or more C strings.
46-
// This implies that there must be a last byte, and it must be 0.
47-
assert_matches!(args_cstr_buff, [.., b'\0'], "the last byte must be a NUL terminator");
48-
4945
// SAFETY: llvm::LLVMRustCreateTargetMachine copies pointed to data
5046
let tm_ptr = unsafe {
5147
llvm::LLVMRustCreateTargetMachine(
@@ -70,8 +66,10 @@ impl OwnedTargetMachine {
7066
output_obj_file.as_ptr(),
7167
debug_info_compression.as_ptr(),
7268
use_emulated_tls,
73-
args_cstr_buff.as_ptr(),
74-
args_cstr_buff.len(),
69+
argv0.as_ptr(),
70+
argv0.len(),
71+
command_line_args.as_ptr(),
72+
command_line_args.len(),
7573
use_wasm_eh,
7674
)
7775
};

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use rustc_span::{BytePos, InnerSpan, Pos, SpanData, SyntaxContext, sym};
3131
use rustc_target::spec::{CodeModel, FloatAbi, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
3232
use tracing::{debug, trace};
3333

34+
use crate::back::command_line_args::quote_command_line_args;
3435
use crate::back::lto::ThinBuffer;
3536
use crate::back::owned_target_machine::OwnedTargetMachine;
3637
use crate::back::profiling::{
@@ -249,23 +250,15 @@ pub(crate) fn target_machine_factory(
249250

250251
let use_emulated_tls = matches!(sess.tls_model(), TlsModel::Emulated);
251252

252-
// copy the exe path, followed by path all into one buffer
253-
// null terminating them so we can use them as null terminated strings
254-
let args_cstr_buff = {
255-
let mut args_cstr_buff: Vec<u8> = Vec::new();
256-
let exe_path = std::env::current_exe().unwrap_or_default();
257-
let exe_path_str = exe_path.into_os_string().into_string().unwrap_or_default();
258-
259-
args_cstr_buff.extend_from_slice(exe_path_str.as_bytes());
260-
args_cstr_buff.push(0);
261-
262-
for arg in sess.expanded_args.iter() {
263-
args_cstr_buff.extend_from_slice(arg.as_bytes());
264-
args_cstr_buff.push(0);
265-
}
266-
267-
args_cstr_buff
268-
};
253+
// Command-line information to be included in the target machine.
254+
// This seems to only be used for embedding in PDB debuginfo files.
255+
// FIXME(Zalathar): Maybe skip this for non-PDB targets?
256+
let argv0 = std::env::current_exe()
257+
.unwrap_or_default()
258+
.into_os_string()
259+
.into_string()
260+
.unwrap_or_default();
261+
let command_line_args = quote_command_line_args(&sess.expanded_args);
269262

270263
let debuginfo_compression = sess.opts.debuginfo_compression.to_string();
271264
match sess.opts.debuginfo_compression {
@@ -323,7 +316,8 @@ pub(crate) fn target_machine_factory(
323316
&output_obj_file,
324317
&debuginfo_compression,
325318
use_emulated_tls,
326-
&args_cstr_buff,
319+
&argv0,
320+
&command_line_args,
327321
use_wasm_eh,
328322
)
329323
})

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,8 +2491,10 @@ unsafe extern "C" {
24912491
OutputObjFile: *const c_char,
24922492
DebugInfoCompression: *const c_char,
24932493
UseEmulatedTls: bool,
2494-
ArgsCstrBuff: *const c_uchar, // See "PTR_LEN_STR".
2495-
ArgsCstrBuffLen: usize,
2494+
Argv0: *const c_uchar, // See "PTR_LEN_STR".
2495+
Argv0Len: size_t,
2496+
CommandLineArgs: *const c_uchar, // See "PTR_LEN_STR".
2497+
CommandLineArgsLen: size_t,
24962498
UseWasmEH: bool,
24972499
) -> *mut TargetMachine;
24982500

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,9 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
271271
bool TrapUnreachable, bool Singlethread, bool VerboseAsm,
272272
bool EmitStackSizeSection, bool RelaxELFRelocations, bool UseInitArray,
273273
const char *SplitDwarfFile, const char *OutputObjFile,
274-
const char *DebugInfoCompression, bool UseEmulatedTls,
275-
const char *ArgsCstrBuff, size_t ArgsCstrBuffLen, bool UseWasmEH) {
274+
const char *DebugInfoCompression, bool UseEmulatedTls, const char *Argv0,
275+
size_t Argv0Len, const char *CommandLineArgs, size_t CommandLineArgsLen,
276+
bool UseWasmEH) {
276277

277278
auto OptLevel = fromRust(RustOptLevel);
278279
auto RM = fromRust(RustReloc);
@@ -343,25 +344,10 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
343344

344345
Options.EmitStackSizeSection = EmitStackSizeSection;
345346

346-
if (ArgsCstrBuff != nullptr) {
347-
size_t buffer_offset = 0;
348-
assert(ArgsCstrBuff[ArgsCstrBuffLen - 1] == '\0');
349-
auto Arg0 = std::string(ArgsCstrBuff);
350-
buffer_offset = Arg0.size() + 1;
351-
352-
std::string CommandlineArgs;
353-
raw_string_ostream OS(CommandlineArgs);
354-
ListSeparator LS(" ");
355-
for (StringRef Arg : split(StringRef(ArgsCstrBuff + buffer_offset,
356-
ArgsCstrBuffLen - buffer_offset),
357-
'\0')) {
358-
OS << LS;
359-
sys::printArg(OS, Arg, /*Quote=*/true);
360-
}
361-
OS.flush();
362-
Options.MCOptions.Argv0 = Arg0;
363-
Options.MCOptions.CommandlineArgs = CommandlineArgs;
364-
}
347+
if (Argv0 != nullptr)
348+
Options.MCOptions.Argv0 = {Argv0, Argv0Len};
349+
if (CommandLineArgs != nullptr)
350+
Options.MCOptions.CommandlineArgs = {CommandLineArgs, CommandLineArgsLen};
365351

366352
#if LLVM_VERSION_GE(21, 0)
367353
TargetMachine *TM = TheTarget->createTargetMachine(Trip, CPU, Feature,

0 commit comments

Comments
 (0)