Skip to content

Replace regex-based date/time parsing with manual parser #520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 210 additions & 73 deletions include/sqlpp11/detail/parse_date_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,100 +27,237 @@
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <regex>
#include <sqlpp11/chrono.h>

#include <cctype>

namespace sqlpp
{
namespace detail
{
// Parse a date string formatted as YYYY-MM-DD
inline bool parse_unsigned(int& value, const char*& input, int length)
{
value = 0;
auto new_input = input;
while (length--)
{
auto ch = *new_input++;
if (std::isdigit(ch) == false)
{
return false;
}
value = value * 10 + ch - '0';
}
input = new_input;
return true;
}

inline bool parse_character(const char*& input, char ch)
{
if (*input != ch)
{
return false;
}
++input;
return true;
}

inline bool parse_yyyy_mm_dd(sqlpp::chrono::day_point& dp, const char*& input)
{
auto new_input = input;
int year, month, day;
if ((parse_unsigned(year, new_input, 4) == false) || (parse_character(new_input, '-') == false) ||
(parse_unsigned(month, new_input, 2) == false) || (parse_character(new_input, '-') == false) ||
(parse_unsigned(day, new_input, 2) == false))
{
return false;
}
dp = ::date::year{year} / month / day;
input = new_input;
return true;
}

inline bool parse_hh_mm_ss(std::chrono::microseconds& us, const char*& input)
{
auto new_input = input;
int hour, minute, second;
if ((parse_unsigned(hour, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
(parse_unsigned(minute, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
(parse_unsigned(second, new_input, 2) == false))
{
return false;
}
// Strings that have valid format but year, month and/or day values that fall outside of the
// correct ranges are still mapped to day_point values. For the exact rules of the mapping see
// https://en.cppreference.com/w/cpp/chrono/year_month_day/operator_days
us = std::chrono::hours{hour} + std::chrono::minutes{minute} + std::chrono::seconds{second};
input = new_input;
return true;
}

inline bool parse_ss_fraction(std::chrono::microseconds& us, const char*& input)
{
auto new_input = input;
if (parse_character(new_input, '.') == false)
{
return false;
}
int value = 0;
int len_max = 6;
int len_actual;
for (len_actual = 0; (len_actual < len_max) && std::isdigit(*new_input); ++len_actual, ++new_input)
{
value = value * 10 + *new_input - '0';
}
if (len_actual == 0)
{
return false;
}
for (; len_actual < len_max; ++len_actual)
{
value *= 10;
}
us = std::chrono::microseconds{value};
input = new_input;
return true;
}

inline bool parse_tz(std::chrono::microseconds& offset, const char*& input)
{
auto new_input = input;
int tz_sign;
if (parse_character(new_input, '+'))
{
tz_sign = 1;
}
else if (parse_character(new_input, '-'))
{
tz_sign = -1;
}
else
{
return false;
}
int hour;
if (parse_unsigned(hour, new_input, 2) == false)
{
return false;
}
offset = tz_sign * std::chrono::hours{hour};
input = new_input;
int minute;
if ((parse_character(new_input, ':') == false) || (parse_unsigned(minute, new_input, 2) == false))
{
return true;
}
offset += tz_sign * std::chrono::minutes{minute};
input = new_input;
int second;
if ((parse_character(new_input, ':') == false) || (parse_unsigned(second, new_input, 2) == false))
{
return true;
}
offset += tz_sign * std::chrono::seconds{second};
input = new_input;
return true;
}

inline bool parse_hh_mm_ss_us_tz(std::chrono::microseconds& us, const char*& input)
{
if (parse_hh_mm_ss(us, input) == false)
{
return false;
}
std::chrono::microseconds fraction;
if (parse_ss_fraction(fraction, input))
{
us += fraction;
}
std::chrono::microseconds tz_offset;
if (parse_tz(tz_offset, input))
{
us -= tz_offset;
}
return true;
}

// Parse timestamp formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
// The microseconds and timezone offset are optional
//
inline bool parse_string_date(::sqlpp::chrono::day_point& value, const char* date_string)
inline bool parse_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
{
static const std::regex rx{"(\\d{4})-(\\d{2})-(\\d{2})"};
std::cmatch mr;
if (std::regex_match(date_string, mr, rx) == false)
sqlpp::chrono::day_point parsed_ymd;
std::chrono::microseconds parsed_tod;
if ((parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false) ||
(parse_character(date_time_string, ' ') == false) ||
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
{
return false;
}
if (*date_time_string)
{
return false;
}
value = ::sqlpp::chrono::day_point{
::date::year{std::atoi(date_string + mr.position(1))} / // Year
std::atoi(date_string + mr.position(2)) / // Month
std::atoi(date_string + mr.position(3)) // Day of month
};
tp = parsed_ymd + parsed_tod;
return true;
}

// Parse a date string formatted as YYYY-MM-DD HH:MM:SS.US TZ
// .US are optional fractional seconds, up to 6 digits in length
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
// Parse date string formatted as YYYY-MM-DD
//
inline bool parse_string_date_time(::sqlpp::chrono::microsecond_point& value, const char* date_time_string)
inline bool parse_date(sqlpp::chrono::day_point& dp, const char* date_string)
{
static const std::regex rx{
"(\\d{4})-(\\d{2})-(\\d{2}) "
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
};
std::cmatch mr;
if (std::regex_match(date_time_string, mr, rx) == false)
{
return false;
}
value =
::sqlpp::chrono::day_point{
::date::year{std::atoi(date_time_string + mr.position(1))} / // Year
std::atoi(date_time_string + mr.position(2)) / // Month
std::atoi(date_time_string + mr.position(3)) // Day of month
} +
std::chrono::hours{std::atoi(date_time_string + mr.position(4))} + // Hour
std::chrono::minutes{std::atoi(date_time_string + mr.position(5))} + // Minute
std::chrono::seconds{std::atoi(date_time_string + mr.position(6))} + // Second
::std::chrono::microseconds{ // Second fraction
mr[7].matched ? std::stoi((mr[7].str() + "000000").substr(0, 6)) : 0
};
if (mr[8].matched)
{
const auto tz_sign = (date_time_string[mr.position(8)] == '+') ? 1 : -1;
const auto tz_offset =
std::chrono::hours{std::atoi(date_time_string + mr.position(9))} +
std::chrono::minutes{mr[10].matched ? std::atoi(date_time_string + mr.position(10)) : 0} +
std::chrono::seconds{mr[11].matched ? std::atoi(date_time_string + mr.position(11)) : 0};
value -= tz_sign * tz_offset;
if (parse_yyyy_mm_dd(dp, date_string) == false)
{
return false;
}
if (*date_string)
{
return false;
}
return true;
}

// Parse a time string formatted as HH:MM:SS[.US][ TZ]
// .US is up to 6 digits in length
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
// Parse time string formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
// The time-of-day part is optional
//
inline bool parse_date_or_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
{
sqlpp::chrono::day_point parsed_ymd;
if (parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false)
{
return false;
}
if (*date_time_string == 0)
{
tp = parsed_ymd;
return true;
}
std::chrono::microseconds parsed_tod;
if ((parse_character(date_time_string, ' ') == false) ||
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
{
return false;
}
if (*date_time_string == 0)
{
tp = parsed_ymd + parsed_tod;
return true;
}
return false;
}

// Parse time of day string formatted as HH:MM:SS.U+HH:MM:SS
// The microseconds and timezone offset are optional
//
inline bool parse_string_time_of_day(::std::chrono::microseconds& value, const char* time_string)
inline bool parse_time_of_day(std::chrono::microseconds& us, const char* time_string)
{
static const std::regex rx{
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
};
std::cmatch mr;
if (std::regex_match (time_string, mr, rx) == false)
{
return false;
}
value =
std::chrono::hours{std::atoi(time_string + mr.position(1))} + // Hour
std::chrono::minutes{std::atoi(time_string + mr.position(2))} + // Minute
std::chrono::seconds{std::atoi(time_string + mr.position(3))} + // Second
::std::chrono::microseconds{ // Second fraction
mr[4].matched ? std::stoi((mr[4].str() + "000000").substr(0, 6)) : 0
};
if (mr[5].matched)
{
const auto tz_sign = (time_string[mr.position(5)] == '+') ? 1 : -1;
const auto tz_offset =
std::chrono::hours{std::atoi(time_string + mr.position(6))} +
std::chrono::minutes{mr[7].matched ? std::atoi(time_string + mr.position(7)) : 0} +
std::chrono::seconds{mr[8].matched ? std::atoi(time_string + mr.position(8)) : 0};
value -= tz_sign * tz_offset;
if (parse_hh_mm_ss_us_tz(us, time_string) == false)
{
return false;
}
if (*time_string)
{
return false;
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions include/sqlpp11/mysql/char_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ namespace sqlpp
if (_handle->debug)
std::cerr << "MySQL debug: date string: " << date_string << std::endl;

if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
if (::sqlpp::detail::parse_date(*value, date_string) == false)
{
if (_handle->debug)
std::cerr << "MySQL debug: invalid date result: " << date_string << std::endl;
Expand All @@ -175,7 +175,7 @@ namespace sqlpp
if (_handle->debug)
std::cerr << "MySQL debug: date_time string: " << date_time_string << std::endl;

if (::sqlpp::detail::parse_string_date_time(*value, date_time_string) == false)
if (::sqlpp::detail::parse_timestamp(*value, date_time_string) == false)
{
if (_handle->debug)
std::cerr << "MySQL debug: invalid date_time result: " << date_time_string << std::endl;
Expand Down
6 changes: 3 additions & 3 deletions include/sqlpp11/postgresql/bind_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ namespace sqlpp
{
std::cerr << "PostgreSQL debug: date string: " << date_string << std::endl;
}
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
if (::sqlpp::detail::parse_date(*value, date_string) == false)
{
if (_handle->debug())
{
Expand Down Expand Up @@ -269,7 +269,7 @@ namespace sqlpp
{
std::cerr << "PostgreSQL debug: got date_time string: " << date_string << std::endl;
}
if (::sqlpp::detail::parse_string_date_time(*value, date_string) == false)
if (::sqlpp::detail::parse_timestamp(*value, date_string) == false)
{
if (_handle->debug())
{
Expand Down Expand Up @@ -301,7 +301,7 @@ namespace sqlpp
std::cerr << "PostgreSQL debug: got time string: " << time_string << std::endl;
}

if (::sqlpp::detail::parse_string_time_of_day(*value, time_string) == false)
if (::sqlpp::detail::parse_time_of_day(*value, time_string) == false)
{
if (_handle->debug()) {
std::cerr << "PostgreSQL debug: got invalid time '" << time_string << "'" << std::endl;
Expand Down
Loading