Skip to content

Commit

Permalink
Support parsing -00:00
Browse files Browse the repository at this point in the history
  • Loading branch information
pitdicker committed Jun 12, 2023
1 parent 5d07c43 commit 2c55b80
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/datetime/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ fn test_datetime_rfc2822_and_rfc3339() {
);
assert_eq!(
DateTime::parse_from_rfc2822("Wed, 18 Feb 2015 23:16:09 -0000"),
Ok(FixedOffset::east_opt(0).unwrap().with_ymd_and_hms(2015, 2, 18, 23, 16, 9).unwrap())
Ok(FixedOffset::OFFSET_UNKNOWN.with_ymd_and_hms(2015, 2, 18, 23, 16, 9).unwrap())
);
assert_eq!(
DateTime::parse_from_rfc3339("2015-02-18T23:16:09Z"),
Expand Down
48 changes: 28 additions & 20 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use super::scan;
use super::{Fixed, InternalFixed, InternalInternal, Item, Numeric, Pad, Parsed};
use super::{ParseError, ParseErrorKind, ParseResult};
use super::{BAD_FORMAT, INVALID, NOT_ENOUGH, OUT_OF_RANGE, TOO_LONG, TOO_SHORT};
use crate::format::parsed::NO_OFFSET_INFO;
use crate::{DateTime, FixedOffset, Weekday};

fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult<()> {
Expand Down Expand Up @@ -142,10 +143,8 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st
}

s = scan::space(s)?; // mandatory
if let Some(offset) = try_consume!(scan::timezone_offset_2822(s)) {
// only set the offset when it is definitely known (i.e. not `-0000`)
parsed.set_offset(i64::from(offset))?;
}
let offset = try_consume!(scan::timezone_offset_2822(s));
parsed.set_offset(i64::from(offset.unwrap_or(NO_OFFSET_INFO)))?;

// optional comments
while let Ok((s_out, ())) = scan::comment_2822(s) {
Expand Down Expand Up @@ -218,12 +217,12 @@ fn parse_rfc3339<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st
// But it is possible to read the offset directly from `Parsed`. We want to only successfully
// populate `Parsed` if the input is fully valid RFC 3339.
const MAX_OFFSET: i32 = 23 * 3600 + 59 * 60;
if offset < -MAX_OFFSET || offset > MAX_OFFSET {
return Err(OUT_OF_RANGE);
if offset >= Some(-MAX_OFFSET) || offset <= Some(MAX_OFFSET) {
parsed.set_offset(i64::from(offset.unwrap_or(NO_OFFSET_INFO)))?;
Ok((s, ()))
} else {
Err(OUT_OF_RANGE)
}
parsed.set_offset(i64::from(offset))?;

Ok((s, ()))
}

/// Tries to parse given string into `parsed` with given formatting items.
Expand Down Expand Up @@ -460,15 +459,19 @@ where
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
parsed
.set_offset(i64::from(offset.unwrap_or(NO_OFFSET_INFO)))
.map_err(|e| (s, e))?;
}

&TimezoneOffsetColonZ | &TimezoneOffsetZ => {
let offset = try_consume!(scan::timezone_offset_zulu(
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
parsed
.set_offset(i64::from(offset.unwrap_or(NO_OFFSET_INFO)))
.map_err(|e| (s, e))?;
}
&Internal(InternalFixed {
val: InternalInternal::TimezoneOffsetPermissive,
Expand All @@ -477,7 +480,9 @@ where
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
parsed
.set_offset(i64::from(offset.unwrap_or(NO_OFFSET_INFO)))
.map_err(|e| (s, e))?;
}

&RFC2822 => try_consume!(parse_rfc2822(parsed, s)),
Expand Down Expand Up @@ -851,8 +856,8 @@ mod tests {
check!("+1234:56", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+1234:567", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("−00:00", [fix!(TimezoneOffset)]; offset: 0); // MINUS SIGN (U+2212)
check!("-00:00", [fix!(TimezoneOffset)]; offset: NO_OFFSET_INFO);
check!("−00:00", [fix!(TimezoneOffset)]; offset: NO_OFFSET_INFO); // MINUS SIGN (U+2212)
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 1_800);
Expand Down Expand Up @@ -1261,6 +1266,7 @@ mod tests {
("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week
("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month
("Tue, 20 Jan 2015 17:35 -0800", Ok("Tue, 20 Jan 2015 17:35:00 -0800")), // no second
("20 Jan 2015 17:35:20 -0000", Ok("Tue, 20 Jan 2015 17:35:20 -0000")), // -0000 offset
("11 Sep 2001 09:45:00 +0000", Ok("Tue, 11 Sep 2001 09:45:00 +0000")),
("11 Sep 2001 09:45:00 EST", Ok("Tue, 11 Sep 2001 09:45:00 -0500")),
("11 Sep 2001 09:45:00 GMT", Ok("Tue, 11 Sep 2001 09:45:00 +0000")),
Expand All @@ -1273,8 +1279,10 @@ mod tests {
("Tue, 20 Jan 2015 17:35:90 -0800", Err(OUT_OF_RANGE)), // bad second
("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset
("6 Jun 1944 04:00:00Z", Err(INVALID)), // bad offset (zulu not allowed)
("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)), // bad named time zone
// named timezones that have specific timezone offsets
("Tue, 20 Jan 2015 17:35:20 HAS", Err(INVALID)), // bad named time zone
("20 Jan 2015 17:35:20 +0000", Ok("Tue, 20 Jan 2015 17:35:20 +0000")),
("20 Jan 2015 17:35:20 -0001", Ok("Tue, 20 Jan 2015 17:35:20 -0001")),
("Tue, 20 Jan 2015 17:35:20 -9900", Err(OUT_OF_RANGE)), // bad offset // named timezones that have specific timezone offsets
// see https://www.rfc-editor.org/rfc/rfc2822#section-4.3
("Tue, 20 Jan 2015 17:35:20 GMT", Ok("Tue, 20 Jan 2015 17:35:20 +0000")),
("Tue, 20 Jan 2015 17:35:20 UT", Ok("Tue, 20 Jan 2015 17:35:20 +0000")),
Expand All @@ -1295,14 +1303,13 @@ mod tests {
("Tue, 20 Jan 2015 17:35:20 K", Ok("Tue, 20 Jan 2015 17:35:20 +0000")),
("Tue, 20 Jan 2015 17:35:20 k", Ok("Tue, 20 Jan 2015 17:35:20 +0000")),
// named single-letter timezone "J" is specifically not valid
("Tue, 20 Jan 2015 17:35:20 J", Err(NOT_ENOUGH)),
("Tue, 20 Jan 2015 17:35:20 J", Err(INVALID)),
("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset minutes
("Tue, 20 Jan 2015 17:35:20Z", Err(INVALID)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 Zulu", Err(NOT_ENOUGH)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 ZULU", Err(NOT_ENOUGH)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 Zulu", Err(INVALID)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 ZULU", Err(INVALID)), // bad offset: zulu not allowed
("Tue, 20 Jan 2015 17:35:20 −0800", Err(INVALID)), // bad offset: timezone offset using MINUS SIGN (U+2212), not specified for RFC 2822
("Tue, 20 Jan 2015 17:35:20 0800", Err(INVALID)), // missing offset sign
("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)), // bad named timezone
("Tue, 20 Jan 2015😈17:35:20 -0800", Err(INVALID)), // bad character!
];

Expand Down Expand Up @@ -1414,6 +1421,7 @@ mod tests {
("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case
("2015-01-20T17:35:20−08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case with MINUS SIGN (U+2212)
("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day
("2015-01-20T17:35:20-00:00", Ok("2015-01-20T17:35:20-00:00")), // offset -00:00
("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")),
("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")),
("2015-01-20T17:35:20.001−08:00", Ok("2015-01-20T17:35:20.001-08:00")), // with MINUS SIGN (U+2212)
Expand Down
31 changes: 19 additions & 12 deletions src/format/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> {
///
/// The additional `colon` may be used to parse a mandatory or optional `:`
/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
///
/// May return `None` which indicates no offset data is available (i.e. `-0000`).
pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, Option<i32>)>
where
F: FnMut(&str) -> ParseResult<&str>,
{
Expand All @@ -231,7 +233,7 @@ fn timezone_offset_internal<F>(
mut consume_colon: F,
allow_missing_minutes: bool,
allow_tz_minus_sign: bool,
) -> ParseResult<(&str, i32)>
) -> ParseResult<(&str, Option<i32>)>
where
F: FnMut(&str) -> ParseResult<&str>,
{
Expand Down Expand Up @@ -299,22 +301,27 @@ where
};

let seconds = hours * 3600 + minutes * 60;
Ok((s, if negative { -seconds } else { seconds }))

if seconds == 0 && negative {
return Ok((s, None));
}
Ok((s, Some(if negative { -seconds } else { seconds })))
}

/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as `+00:00`.
pub(super) fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
/// May return `None` which indicates no offset data is available (i.e. `-0000`).
pub(super) fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, Option<i32>)>
where
F: FnMut(&str) -> ParseResult<&str>,
{
let bytes = s.as_bytes();
match bytes.first() {
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], Some(0))),
Some(&b'u') | Some(&b'U') => {
if bytes.len() >= 3 {
let (b, c) = (bytes[1], bytes[2]);
match (b | 32, c | 32) {
(b't', b'c') => Ok((&s[3..], 0)),
(b't', b'c') => Ok((&s[3..], Some(0))),
_ => Err(INVALID),
}
} else {
Expand All @@ -327,18 +334,18 @@ where

/// Same as `timezone_offset` but also allows for `z`/`Z` which is the same as
/// `+00:00`, and allows missing minutes entirely.
pub(super) fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
pub(super) fn timezone_offset_permissive<F>(s: &str, colon: F) -> ParseResult<(&str, Option<i32>)>
where
F: FnMut(&str) -> ParseResult<&str>,
{
match s.as_bytes().first() {
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], Some(0))),
_ => timezone_offset_internal(s, colon, true, true),
}
}

/// Same as `timezone_offset` but also allows for RFC 2822 legacy timezones.
/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
/// May return `None` which indicates no offset data is available (i.e. `-0000`).
/// See [RFC 2822 Section 4.3].
///
/// [RFC 2822 Section 4.3]: https://tools.ietf.org/html/rfc2822#section-4.3
Expand All @@ -365,14 +372,14 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>
match name[0] {
// recommended by RFC 2822: consume but treat it as -0000
b'a'..=b'i' | b'k'..=b'z' | b'A'..=b'I' | b'K'..=b'Z' => offset_hours(0),
_ => Ok((s, None)),
_ => Err(INVALID),
}
} else {
Ok((s, None))
Err(INVALID)
}
} else {
let (s_, offset) = timezone_offset_internal(s, |s| Ok(s), false, false)?;
Ok((s_, Some(offset)))
Ok((s_, offset))
}
}

Expand Down

0 comments on commit 2c55b80

Please sign in to comment.