@@ -1862,72 +1862,117 @@ test "byteSwapAllFields" {
18621862 }, s );
18631863}
18641864
1865+ /// Deprecated: use `tokenizeAny`, `tokenizeFull`, or `tokenizeScalar`
1866+ pub const tokenize = tokenizeAny ;
1867+
18651868/// Returns an iterator that iterates over the slices of `buffer` that are not
1866- /// any of the bytes in `delimiter_bytes `.
1869+ /// any of the items in `delimiters `.
18671870///
1868- /// `tokenize (u8, " abc def ghi ", " ")` will return slices
1871+ /// `tokenizeAny (u8, " abc| def || ghi ", " | ")` will return slices
18691872/// for "abc", "def", "ghi", null, in that order.
18701873///
18711874/// If `buffer` is empty, the iterator will return null.
1872- /// If `delimiter_bytes` does not exist in buffer,
1875+ /// If none of `delimiters` exist in buffer,
1876+ /// the iterator will return `buffer`, null, in that order.
1877+ ///
1878+ /// See also: `tokenizeFull`, `tokenizeScalar`,
1879+ /// `splitFull`,`splitAny`, `splitScalar`,
1880+ /// `splitBackwardsFull`, `splitBackwardsAny`, and `splitBackwardsScalar`
1881+ pub fn tokenizeAny (comptime T : type , buffer : []const T , delimiters : []const T ) TokenIterator (T , .any ) {
1882+ return .{
1883+ .index = 0 ,
1884+ .buffer = buffer ,
1885+ .delimiter = delimiters ,
1886+ };
1887+ }
1888+
1889+ /// Returns an iterator that iterates over the slices of `buffer` that are not
1890+ /// the sequence in `delimiter`.
1891+ ///
1892+ /// `tokenizeFull(u8, "<>abc><def<><>ghi", "<>")` will return slices
1893+ /// for "abc><def", "ghi", null, in that order.
1894+ ///
1895+ /// If `buffer` is empty, the iterator will return null.
1896+ /// If `delimiter` does not exist in buffer,
18731897/// the iterator will return `buffer`, null, in that order.
1898+ /// The delimiter length must not be zero.
18741899///
1875- /// See also: `split` and `splitBackwards`.
1876- pub fn tokenize (comptime T : type , buffer : []const T , delimiter_bytes : []const T ) TokenIterator (T ) {
1900+ /// See also: `tokenizeAny`, `tokenizeScalar`,
1901+ /// `splitFull`,`splitAny`, and `splitScalar`
1902+ /// `splitBackwardsFull`, `splitBackwardsAny`, and `splitBackwardsScalar`
1903+ pub fn tokenizeFull (comptime T : type , buffer : []const T , delimiter : []const T ) TokenIterator (T , .full ) {
1904+ assert (delimiter .len != 0 );
18771905 return .{
18781906 .index = 0 ,
18791907 .buffer = buffer ,
1880- .delimiter_bytes = delimiter_bytes ,
1908+ .delimiter = delimiter ,
18811909 };
18821910}
18831911
1884- test "tokenize" {
1885- var it = tokenize (u8 , " abc def ghi " , " " );
1912+ /// Returns an iterator that iterates over the slices of `buffer` that are not
1913+ /// `delimiter`.
1914+ ///
1915+ /// `tokenizeScalar(u8, " abc def ghi ", ' ')` will return slices
1916+ /// for "abc", "def", "ghi", null, in that order.
1917+ ///
1918+ /// If `buffer` is empty, the iterator will return null.
1919+ /// If `delimiter` does not exist in buffer,
1920+ /// the iterator will return `buffer`, null, in that order.
1921+ ///
1922+ /// See also: `tokenizeAny`, `tokenizeFull`,
1923+ /// `splitFull`,`splitAny`, and `splitScalar`
1924+ /// `splitBackwardsFull`, `splitBackwardsAny`, and `splitBackwardsScalar`
1925+ pub fn tokenizeScalar (comptime T : type , buffer : []const T , delimiter : T ) TokenIterator (T , .scalar ) {
1926+ return .{
1927+ .index = 0 ,
1928+ .buffer = buffer ,
1929+ .delimiter = delimiter ,
1930+ };
1931+ }
1932+
1933+ test "tokenizeScalar" {
1934+ var it = tokenizeScalar (u8 , " abc def ghi " , ' ' );
18861935 try testing .expect (eql (u8 , it .next ().? , "abc" ));
18871936 try testing .expect (eql (u8 , it .peek ().? , "def" ));
18881937 try testing .expect (eql (u8 , it .next ().? , "def" ));
18891938 try testing .expect (eql (u8 , it .next ().? , "ghi" ));
18901939 try testing .expect (it .next () == null );
18911940
1892- it = tokenize (u8 , "..\\ bob" , " \\ " );
1941+ it = tokenizeScalar (u8 , "..\\ bob" , ' \\ ' );
18931942 try testing .expect (eql (u8 , it .next ().? , ".." ));
18941943 try testing .expect (eql (u8 , ".." , "..\\ bob" [0.. it .index ]));
18951944 try testing .expect (eql (u8 , it .next ().? , "bob" ));
18961945 try testing .expect (it .next () == null );
18971946
1898- it = tokenize (u8 , "//a/b" , "/" );
1947+ it = tokenizeScalar (u8 , "//a/b" , '/' );
18991948 try testing .expect (eql (u8 , it .next ().? , "a" ));
19001949 try testing .expect (eql (u8 , it .next ().? , "b" ));
19011950 try testing .expect (eql (u8 , "//a/b" , "//a/b" [0.. it .index ]));
19021951 try testing .expect (it .next () == null );
19031952
1904- it = tokenize (u8 , "|" , "|" );
1953+ it = tokenizeScalar (u8 , "|" , '|' );
19051954 try testing .expect (it .next () == null );
19061955 try testing .expect (it .peek () == null );
19071956
1908- it = tokenize (u8 , "" , "|" );
1957+ it = tokenizeScalar (u8 , "" , '|' );
19091958 try testing .expect (it .next () == null );
19101959 try testing .expect (it .peek () == null );
19111960
1912- it = tokenize (u8 , "hello" , "" );
1913- try testing .expect (eql (u8 , it .next ().? , "hello" ));
1914- try testing .expect (it .next () == null );
1915-
1916- it = tokenize (u8 , "hello" , " " );
1961+ it = tokenizeScalar (u8 , "hello" , ' ' );
19171962 try testing .expect (eql (u8 , it .next ().? , "hello" ));
19181963 try testing .expect (it .next () == null );
19191964
1920- var it16 = tokenize (
1965+ var it16 = tokenizeScalar (
19211966 u16 ,
19221967 std .unicode .utf8ToUtf16LeStringLiteral ("hello" ),
1923- std . unicode . utf8ToUtf16LeStringLiteral ( " " ) ,
1968+ ' ' ,
19241969 );
19251970 try testing .expect (eql (u16 , it16 .next ().? , std .unicode .utf8ToUtf16LeStringLiteral ("hello" )));
19261971 try testing .expect (it16 .next () == null );
19271972}
19281973
1929- test "tokenize (multibyte)" {
1930- var it = tokenize (u8 , "a|b,c/d e" , " /,|" );
1974+ test "tokenizeAny (multibyte)" {
1975+ var it = tokenizeAny (u8 , "a|b,c/d e" , " /,|" );
19311976 try testing .expect (eql (u8 , it .next ().? , "a" ));
19321977 try testing .expect (eql (u8 , it .peek ().? , "b" ));
19331978 try testing .expect (eql (u8 , it .next ().? , "b" ));
@@ -1937,7 +1982,11 @@ test "tokenize (multibyte)" {
19371982 try testing .expect (it .next () == null );
19381983 try testing .expect (it .peek () == null );
19391984
1940- var it16 = tokenize (
1985+ it = tokenizeAny (u8 , "hello" , "" );
1986+ try testing .expect (eql (u8 , it .next ().? , "hello" ));
1987+ try testing .expect (it .next () == null );
1988+
1989+ var it16 = tokenizeAny (
19411990 u16 ,
19421991 std .unicode .utf8ToUtf16LeStringLiteral ("a|b,c/d e" ),
19431992 std .unicode .utf8ToUtf16LeStringLiteral (" /,|" ),
@@ -1950,18 +1999,68 @@ test "tokenize (multibyte)" {
19501999 try testing .expect (it16 .next () == null );
19512000}
19522001
2002+ test "tokenizeFull" {
2003+ var it = tokenizeFull (u8 , "a<>b<><>c><>d><" , "<>" );
2004+ try testing .expectEqualStrings ("a" , it .next ().? );
2005+ try testing .expectEqualStrings ("b" , it .peek ().? );
2006+ try testing .expectEqualStrings ("b" , it .next ().? );
2007+ try testing .expectEqualStrings ("c>" , it .next ().? );
2008+ try testing .expectEqualStrings ("d><" , it .next ().? );
2009+ try testing .expect (it .next () == null );
2010+ try testing .expect (it .peek () == null );
2011+
2012+ var it16 = tokenizeFull (
2013+ u16 ,
2014+ std .unicode .utf8ToUtf16LeStringLiteral ("a<>b<><>c><>d><" ),
2015+ std .unicode .utf8ToUtf16LeStringLiteral ("<>" ),
2016+ );
2017+ try testing .expect (eql (u16 , it16 .next ().? , std .unicode .utf8ToUtf16LeStringLiteral ("a" )));
2018+ try testing .expect (eql (u16 , it16 .next ().? , std .unicode .utf8ToUtf16LeStringLiteral ("b" )));
2019+ try testing .expect (eql (u16 , it16 .next ().? , std .unicode .utf8ToUtf16LeStringLiteral ("c>" )));
2020+ try testing .expect (eql (u16 , it16 .next ().? , std .unicode .utf8ToUtf16LeStringLiteral ("d><" )));
2021+ try testing .expect (it16 .next () == null );
2022+ }
2023+
19532024test "tokenize (reset)" {
1954- var it = tokenize (u8 , " abc def ghi " , " " );
1955- try testing .expect (eql (u8 , it .next ().? , "abc" ));
1956- try testing .expect (eql (u8 , it .next ().? , "def" ));
1957- try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2025+ {
2026+ var it = tokenizeAny (u8 , " abc def ghi " , " " );
2027+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2028+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2029+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
19582030
1959- it .reset ();
2031+ it .reset ();
19602032
1961- try testing .expect (eql (u8 , it .next ().? , "abc" ));
1962- try testing .expect (eql (u8 , it .next ().? , "def" ));
1963- try testing .expect (eql (u8 , it .next ().? , "ghi" ));
1964- try testing .expect (it .next () == null );
2033+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2034+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2035+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2036+ try testing .expect (it .next () == null );
2037+ }
2038+ {
2039+ var it = tokenizeFull (u8 , "<><>abc<>def<><>ghi<>" , "<>" );
2040+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2041+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2042+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2043+
2044+ it .reset ();
2045+
2046+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2047+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2048+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2049+ try testing .expect (it .next () == null );
2050+ }
2051+ {
2052+ var it = tokenizeScalar (u8 , " abc def ghi " , ' ' );
2053+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2054+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2055+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2056+
2057+ it .reset ();
2058+
2059+ try testing .expect (eql (u8 , it .next ().? , "abc" ));
2060+ try testing .expect (eql (u8 , it .next ().? , "def" ));
2061+ try testing .expect (eql (u8 , it .next ().? , "ghi" ));
2062+ try testing .expect (it .next () == null );
2063+ }
19652064}
19662065
19672066/// Deprecated: use `splitFull`, `splitAny`, or `splitScalar`
@@ -1978,8 +2077,8 @@ pub const split = splitFull;
19782077/// The delimiter length must not be zero.
19792078///
19802079/// See also: `splitAny`, `splitScalar`, `splitBackwardsFull`,
1981- /// `splitBackwardsAny`,`splitBackwardsScalar`, and
1982- /// `tokenize `.
2080+ /// `splitBackwardsAny`,`splitBackwardsScalar`,
2081+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
19832082pub fn splitFull (comptime T : type , buffer : []const T , delimiter : []const T ) SplitIterator (T , .full ) {
19842083 assert (delimiter .len != 0 );
19852084 return .{
@@ -1999,8 +2098,8 @@ pub fn splitFull(comptime T: type, buffer: []const T, delimiter: []const T) Spli
19992098/// the iterator will return `buffer`, null, in that order.
20002099///
20012100/// See also: `splitFull`, `splitScalar`, `splitBackwardsFull`,
2002- /// `splitBackwardsAny`,`splitBackwardsScalar`, and
2003- /// `tokenize `.
2101+ /// `splitBackwardsAny`,`splitBackwardsScalar`,
2102+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
20042103pub fn splitAny (comptime T : type , buffer : []const T , delimiters : []const T ) SplitIterator (T , .any ) {
20052104 return .{
20062105 .index = 0 ,
@@ -2019,8 +2118,8 @@ pub fn splitAny(comptime T: type, buffer: []const T, delimiters: []const T) Spli
20192118/// the iterator will return `buffer`, null, in that order.
20202119///
20212120/// See also: `splitFull`, `splitAny`, `splitBackwardsFull`,
2022- /// `splitBackwardsAny`,`splitBackwardsScalar`, and
2023- /// `tokenize `.
2121+ /// `splitBackwardsAny`,`splitBackwardsScalar`,
2122+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
20242123pub fn splitScalar (comptime T : type , buffer : []const T , delimiter : T ) SplitIterator (T , .scalar ) {
20252124 return .{
20262125 .index = 0 ,
@@ -2176,8 +2275,8 @@ pub const splitBackwards = splitBackwardsFull;
21762275/// The delimiter length must not be zero.
21772276///
21782277/// See also: `splitBackwardsAny`, `splitBackwardsScalar`,
2179- /// `splitFull`, `splitAny`,`splitScalar`, and
2180- /// `tokenize `.
2278+ /// `splitFull`, `splitAny`,`splitScalar`,
2279+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
21812280pub fn splitBackwardsFull (comptime T : type , buffer : []const T , delimiter : []const T ) SplitBackwardsIterator (T , .full ) {
21822281 assert (delimiter .len != 0 );
21832282 return .{
@@ -2197,8 +2296,8 @@ pub fn splitBackwardsFull(comptime T: type, buffer: []const T, delimiter: []cons
21972296/// the iterator will return `buffer`, null, in that order.
21982297///
21992298/// See also: `splitBackwardsFull`, `splitBackwardsScalar`,
2200- /// `splitFull`, `splitAny`,`splitScalar`, and
2201- /// `tokenize `.
2299+ /// `splitFull`, `splitAny`,`splitScalar`,
2300+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
22022301pub fn splitBackwardsAny (comptime T : type , buffer : []const T , delimiters : []const T ) SplitBackwardsIterator (T , .any ) {
22032302 return .{
22042303 .index = buffer .len ,
@@ -2217,8 +2316,8 @@ pub fn splitBackwardsAny(comptime T: type, buffer: []const T, delimiters: []cons
22172316/// the iterator will return `buffer`, null, in that order.
22182317///
22192318/// See also: `splitBackwardsFull`, `splitBackwardsAny`,
2220- /// `splitFull`, `splitAny`,`splitScalar`, and
2221- /// `tokenize `.
2319+ /// `splitFull`, `splitAny`,`splitScalar`,
2320+ /// `tokenizeAny`, `tokenizeFull`, and `tokenizeScalar `.
22222321pub fn splitBackwardsScalar (comptime T : type , buffer : []const T , delimiter : T ) SplitBackwardsIterator (T , .scalar ) {
22232322 return .{
22242323 .index = buffer .len ,
@@ -2548,10 +2647,13 @@ test "endsWith" {
25482647
25492648pub const DelimiterType = enum { full , any , scalar };
25502649
2551- pub fn TokenIterator (comptime T : type ) type {
2650+ pub fn TokenIterator (comptime T : type , comptime delimiter_type : DelimiterType ) type {
25522651 return struct {
25532652 buffer : []const T ,
2554- delimiter_bytes : []const T ,
2653+ delimiter : switch (delimiter_type ) {
2654+ .full , .any = > []const T ,
2655+ .scalar = > T ,
2656+ },
25552657 index : usize ,
25562658
25572659 const Self = @This ();
@@ -2568,15 +2670,18 @@ pub fn TokenIterator(comptime T: type) type {
25682670 /// complete. Does not advance to the next token.
25692671 pub fn peek (self : * Self ) ? []const T {
25702672 // move to beginning of token
2571- while (self .index < self .buffer .len and self .isSplitByte (self .buffer [self .index ])) : (self .index += 1 ) {}
2673+ while (self .index < self .buffer .len and self .isDelimiter (self .index )) : (self .index += switch (delimiter_type ) {
2674+ .full = > self .delimiter .len ,
2675+ .any , .scalar = > 1 ,
2676+ }) {}
25722677 const start = self .index ;
25732678 if (start == self .buffer .len ) {
25742679 return null ;
25752680 }
25762681
25772682 // move to end of token
25782683 var end = start ;
2579- while (end < self .buffer .len and ! self .isSplitByte ( self . buffer [ end ] )) : (end += 1 ) {}
2684+ while (end < self .buffer .len and ! self .isDelimiter ( end )) : (end += 1 ) {}
25802685
25812686 return self .buffer [start .. end ];
25822687 }
@@ -2585,7 +2690,10 @@ pub fn TokenIterator(comptime T: type) type {
25852690 pub fn rest (self : Self ) []const T {
25862691 // move to beginning of token
25872692 var index : usize = self .index ;
2588- while (index < self .buffer .len and self .isSplitByte (self .buffer [index ])) : (index += 1 ) {}
2693+ while (index < self .buffer .len and self .isDelimiter (index )) : (index += switch (delimiter_type ) {
2694+ .full = > self .delimiter .len ,
2695+ .any , .scalar = > 1 ,
2696+ }) {}
25892697 return self .buffer [index .. ];
25902698 }
25912699
@@ -2594,13 +2702,20 @@ pub fn TokenIterator(comptime T: type) type {
25942702 self .index = 0 ;
25952703 }
25962704
2597- fn isSplitByte (self : Self , byte : T ) bool {
2598- for (self .delimiter_bytes ) | delimiter_byte | {
2599- if (byte == delimiter_byte ) {
2600- return true ;
2601- }
2705+ fn isDelimiter (self : Self , index : usize ) bool {
2706+ switch (delimiter_type ) {
2707+ .full = > return startsWith (T , self .buffer [index .. ], self .delimiter ),
2708+ .any = > {
2709+ const item = self .buffer [index ];
2710+ for (self .delimiter ) | delimiter_item | {
2711+ if (item == delimiter_item ) {
2712+ return true ;
2713+ }
2714+ }
2715+ return false ;
2716+ },
2717+ .scalar = > return self .buffer [index ] == self .delimiter ,
26022718 }
2603- return false ;
26042719 }
26052720 };
26062721}
0 commit comments