@@ -9,29 +9,14 @@ mod tests;
9
9
use crate :: ffi:: OsString ;
10
10
use crate :: fmt;
11
11
use crate :: io;
12
- use crate :: marker:: PhantomData ;
13
- use crate :: num:: NonZeroU16 ;
14
12
use crate :: os:: windows:: prelude:: * ;
15
13
use crate :: path:: PathBuf ;
16
- use crate :: ptr:: NonNull ;
17
14
use crate :: sys:: c;
18
15
use crate :: sys:: process:: ensure_no_nuls;
19
16
use crate :: sys:: windows:: os:: current_exe;
17
+ use crate :: sys_common:: args:: { parse_lp_cmd_line, WStrUnits } ;
20
18
use crate :: vec;
21
19
22
- use core:: iter;
23
-
24
- /// This is the const equivalent to `NonZeroU16::new(n).unwrap()`
25
- ///
26
- /// FIXME: This can be removed once `Option::unwrap` is stably const.
27
- /// See the `const_option` feature (#67441).
28
- const fn non_zero_u16 ( n : u16 ) -> NonZeroU16 {
29
- match NonZeroU16 :: new ( n) {
30
- Some ( n) => n,
31
- None => panic ! ( "called `unwrap` on a `None` value" ) ,
32
- }
33
- }
34
-
35
20
pub fn args ( ) -> Args {
36
21
// SAFETY: `GetCommandLineW` returns a pointer to a null terminated UTF-16
37
22
// string so it's safe for `WStrUnits` to use.
@@ -45,128 +30,6 @@ pub fn args() -> Args {
45
30
}
46
31
}
47
32
48
- /// Implements the Windows command-line argument parsing algorithm.
49
- ///
50
- /// Microsoft's documentation for the Windows CLI argument format can be found at
51
- /// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
52
- ///
53
- /// A more in-depth explanation is here:
54
- /// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
55
- ///
56
- /// Windows includes a function to do command line parsing in shell32.dll.
57
- /// However, this is not used for two reasons:
58
- ///
59
- /// 1. Linking with that DLL causes the process to be registered as a GUI application.
60
- /// GUI applications add a bunch of overhead, even if no windows are drawn. See
61
- /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
62
- ///
63
- /// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
64
- ///
65
- /// This function was tested for equivalence to the C/C++ parsing rules using an
66
- /// extensive test suite available at
67
- /// <https://github.com/ChrisDenton/winarg/tree/std>.
68
- fn parse_lp_cmd_line < ' a , F : Fn ( ) -> OsString > (
69
- lp_cmd_line : Option < WStrUnits < ' a > > ,
70
- exe_name : F ,
71
- ) -> Vec < OsString > {
72
- const BACKSLASH : NonZeroU16 = non_zero_u16 ( b'\\' as u16 ) ;
73
- const QUOTE : NonZeroU16 = non_zero_u16 ( b'"' as u16 ) ;
74
- const TAB : NonZeroU16 = non_zero_u16 ( b'\t' as u16 ) ;
75
- const SPACE : NonZeroU16 = non_zero_u16 ( b' ' as u16 ) ;
76
-
77
- let mut ret_val = Vec :: new ( ) ;
78
- // If the cmd line pointer is null or it points to an empty string then
79
- // return the name of the executable as argv[0].
80
- if lp_cmd_line. as_ref ( ) . and_then ( |cmd| cmd. peek ( ) ) . is_none ( ) {
81
- ret_val. push ( exe_name ( ) ) ;
82
- return ret_val;
83
- }
84
- let mut code_units = lp_cmd_line. unwrap ( ) ;
85
-
86
- // The executable name at the beginning is special.
87
- let mut in_quotes = false ;
88
- let mut cur = Vec :: new ( ) ;
89
- for w in & mut code_units {
90
- match w {
91
- // A quote mark always toggles `in_quotes` no matter what because
92
- // there are no escape characters when parsing the executable name.
93
- QUOTE => in_quotes = !in_quotes,
94
- // If not `in_quotes` then whitespace ends argv[0].
95
- SPACE | TAB if !in_quotes => break ,
96
- // In all other cases the code unit is taken literally.
97
- _ => cur. push ( w. get ( ) ) ,
98
- }
99
- }
100
- // Skip whitespace.
101
- code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
102
- ret_val. push ( OsString :: from_wide ( & cur) ) ;
103
-
104
- // Parse the arguments according to these rules:
105
- // * All code units are taken literally except space, tab, quote and backslash.
106
- // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
107
- // treated as a single separator.
108
- // * A space or tab `in_quotes` is taken literally.
109
- // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
110
- // * A quote can be escaped if preceded by an odd number of backslashes.
111
- // * If any number of backslashes is immediately followed by a quote then the number of
112
- // backslashes is halved (rounding down).
113
- // * Backslashes not followed by a quote are all taken literally.
114
- // * If `in_quotes` then a quote can also be escaped using another quote
115
- // (i.e. two consecutive quotes become one literal quote).
116
- let mut cur = Vec :: new ( ) ;
117
- let mut in_quotes = false ;
118
- while let Some ( w) = code_units. next ( ) {
119
- match w {
120
- // If not `in_quotes`, a space or tab ends the argument.
121
- SPACE | TAB if !in_quotes => {
122
- ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
123
- cur. truncate ( 0 ) ;
124
-
125
- // Skip whitespace.
126
- code_units. advance_while ( |w| w == SPACE || w == TAB ) ;
127
- }
128
- // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
129
- BACKSLASH => {
130
- let backslash_count = code_units. advance_while ( |w| w == BACKSLASH ) + 1 ;
131
- if code_units. peek ( ) == Some ( QUOTE ) {
132
- cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count / 2 ) ) ;
133
- // The quote is escaped if there are an odd number of backslashes.
134
- if backslash_count % 2 == 1 {
135
- code_units. next ( ) ;
136
- cur. push ( QUOTE . get ( ) ) ;
137
- }
138
- } else {
139
- // If there is no quote on the end then there is no escaping.
140
- cur. extend ( iter:: repeat ( BACKSLASH . get ( ) ) . take ( backslash_count) ) ;
141
- }
142
- }
143
- // If `in_quotes` and not backslash escaped (see above) then a quote either
144
- // unsets `in_quote` or is escaped by another quote.
145
- QUOTE if in_quotes => match code_units. peek ( ) {
146
- // Two consecutive quotes when `in_quotes` produces one literal quote.
147
- Some ( QUOTE ) => {
148
- cur. push ( QUOTE . get ( ) ) ;
149
- code_units. next ( ) ;
150
- }
151
- // Otherwise set `in_quotes`.
152
- Some ( _) => in_quotes = false ,
153
- // The end of the command line.
154
- // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
155
- None => break ,
156
- } ,
157
- // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
158
- QUOTE => in_quotes = true ,
159
- // Everything else is always taken literally.
160
- _ => cur. push ( w. get ( ) ) ,
161
- }
162
- }
163
- // Push the final argument, if any.
164
- if !cur. is_empty ( ) || in_quotes {
165
- ret_val. push ( OsString :: from_wide ( & cur[ ..] ) ) ;
166
- }
167
- ret_val
168
- }
169
-
170
33
pub struct Args {
171
34
parsed_args_list : vec:: IntoIter < OsString > ,
172
35
}
@@ -199,55 +62,6 @@ impl ExactSizeIterator for Args {
199
62
}
200
63
}
201
64
202
- /// A safe iterator over a LPWSTR
203
- /// (aka a pointer to a series of UTF-16 code units terminated by a NULL).
204
- struct WStrUnits < ' a > {
205
- // The pointer must never be null...
206
- lpwstr : NonNull < u16 > ,
207
- // ...and the memory it points to must be valid for this lifetime.
208
- lifetime : PhantomData < & ' a [ u16 ] > ,
209
- }
210
- impl WStrUnits < ' _ > {
211
- /// Create the iterator. Returns `None` if `lpwstr` is null.
212
- ///
213
- /// SAFETY: `lpwstr` must point to a null-terminated wide string that lives
214
- /// at least as long as the lifetime of this struct.
215
- unsafe fn new ( lpwstr : * const u16 ) -> Option < Self > {
216
- Some ( Self { lpwstr : NonNull :: new ( lpwstr as _ ) ?, lifetime : PhantomData } )
217
- }
218
- fn peek ( & self ) -> Option < NonZeroU16 > {
219
- // SAFETY: It's always safe to read the current item because we don't
220
- // ever move out of the array's bounds.
221
- unsafe { NonZeroU16 :: new ( * self . lpwstr . as_ptr ( ) ) }
222
- }
223
- /// Advance the iterator while `predicate` returns true.
224
- /// Returns the number of items it advanced by.
225
- fn advance_while < P : FnMut ( NonZeroU16 ) -> bool > ( & mut self , mut predicate : P ) -> usize {
226
- let mut counter = 0 ;
227
- while let Some ( w) = self . peek ( ) {
228
- if !predicate ( w) {
229
- break ;
230
- }
231
- counter += 1 ;
232
- self . next ( ) ;
233
- }
234
- counter
235
- }
236
- }
237
- impl Iterator for WStrUnits < ' _ > {
238
- // This can never return zero as that marks the end of the string.
239
- type Item = NonZeroU16 ;
240
- fn next ( & mut self ) -> Option < NonZeroU16 > {
241
- // SAFETY: If NULL is reached we immediately return.
242
- // Therefore it's safe to advance the pointer after that.
243
- unsafe {
244
- let next = self . peek ( ) ?;
245
- self . lpwstr = NonNull :: new_unchecked ( self . lpwstr . as_ptr ( ) . add ( 1 ) ) ;
246
- Some ( next)
247
- }
248
- }
249
- }
250
-
251
65
#[ derive( Debug ) ]
252
66
pub ( crate ) enum Arg {
253
67
/// Add quotes (if needed)
0 commit comments