Skip to content

Commit 25bca54

Browse files
Replace regex-based date/time parsing with manual parser (#520)
* Replace regex-based date/time string parsing with manually written parsing code. * Add date/time parser tests.
1 parent 1cd47c7 commit 25bca54

File tree

6 files changed

+549
-116
lines changed

6 files changed

+549
-116
lines changed

include/sqlpp11/detail/parse_date_time.h

+210-73
Original file line numberDiff line numberDiff line change
@@ -27,100 +27,237 @@
2727
* POSSIBILITY OF SUCH DAMAGE.
2828
*/
2929

30-
#include <regex>
30+
#include <sqlpp11/chrono.h>
31+
32+
#include <cctype>
3133

3234
namespace sqlpp
3335
{
3436
namespace detail
3537
{
36-
// Parse a date string formatted as YYYY-MM-DD
38+
inline bool parse_unsigned(int& value, const char*& input, int length)
39+
{
40+
value = 0;
41+
auto new_input = input;
42+
while (length--)
43+
{
44+
auto ch = *new_input++;
45+
if (std::isdigit(ch) == false)
46+
{
47+
return false;
48+
}
49+
value = value * 10 + ch - '0';
50+
}
51+
input = new_input;
52+
return true;
53+
}
54+
55+
inline bool parse_character(const char*& input, char ch)
56+
{
57+
if (*input != ch)
58+
{
59+
return false;
60+
}
61+
++input;
62+
return true;
63+
}
64+
65+
inline bool parse_yyyy_mm_dd(sqlpp::chrono::day_point& dp, const char*& input)
66+
{
67+
auto new_input = input;
68+
int year, month, day;
69+
if ((parse_unsigned(year, new_input, 4) == false) || (parse_character(new_input, '-') == false) ||
70+
(parse_unsigned(month, new_input, 2) == false) || (parse_character(new_input, '-') == false) ||
71+
(parse_unsigned(day, new_input, 2) == false))
72+
{
73+
return false;
74+
}
75+
dp = ::date::year{year} / month / day;
76+
input = new_input;
77+
return true;
78+
}
79+
80+
inline bool parse_hh_mm_ss(std::chrono::microseconds& us, const char*& input)
81+
{
82+
auto new_input = input;
83+
int hour, minute, second;
84+
if ((parse_unsigned(hour, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
85+
(parse_unsigned(minute, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
86+
(parse_unsigned(second, new_input, 2) == false))
87+
{
88+
return false;
89+
}
90+
// Strings that have valid format but year, month and/or day values that fall outside of the
91+
// correct ranges are still mapped to day_point values. For the exact rules of the mapping see
92+
// https://en.cppreference.com/w/cpp/chrono/year_month_day/operator_days
93+
us = std::chrono::hours{hour} + std::chrono::minutes{minute} + std::chrono::seconds{second};
94+
input = new_input;
95+
return true;
96+
}
97+
98+
inline bool parse_ss_fraction(std::chrono::microseconds& us, const char*& input)
99+
{
100+
auto new_input = input;
101+
if (parse_character(new_input, '.') == false)
102+
{
103+
return false;
104+
}
105+
int value = 0;
106+
int len_max = 6;
107+
int len_actual;
108+
for (len_actual = 0; (len_actual < len_max) && std::isdigit(*new_input); ++len_actual, ++new_input)
109+
{
110+
value = value * 10 + *new_input - '0';
111+
}
112+
if (len_actual == 0)
113+
{
114+
return false;
115+
}
116+
for (; len_actual < len_max; ++len_actual)
117+
{
118+
value *= 10;
119+
}
120+
us = std::chrono::microseconds{value};
121+
input = new_input;
122+
return true;
123+
}
124+
125+
inline bool parse_tz(std::chrono::microseconds& offset, const char*& input)
126+
{
127+
auto new_input = input;
128+
int tz_sign;
129+
if (parse_character(new_input, '+'))
130+
{
131+
tz_sign = 1;
132+
}
133+
else if (parse_character(new_input, '-'))
134+
{
135+
tz_sign = -1;
136+
}
137+
else
138+
{
139+
return false;
140+
}
141+
int hour;
142+
if (parse_unsigned(hour, new_input, 2) == false)
143+
{
144+
return false;
145+
}
146+
offset = tz_sign * std::chrono::hours{hour};
147+
input = new_input;
148+
int minute;
149+
if ((parse_character(new_input, ':') == false) || (parse_unsigned(minute, new_input, 2) == false))
150+
{
151+
return true;
152+
}
153+
offset += tz_sign * std::chrono::minutes{minute};
154+
input = new_input;
155+
int second;
156+
if ((parse_character(new_input, ':') == false) || (parse_unsigned(second, new_input, 2) == false))
157+
{
158+
return true;
159+
}
160+
offset += tz_sign * std::chrono::seconds{second};
161+
input = new_input;
162+
return true;
163+
}
164+
165+
inline bool parse_hh_mm_ss_us_tz(std::chrono::microseconds& us, const char*& input)
166+
{
167+
if (parse_hh_mm_ss(us, input) == false)
168+
{
169+
return false;
170+
}
171+
std::chrono::microseconds fraction;
172+
if (parse_ss_fraction(fraction, input))
173+
{
174+
us += fraction;
175+
}
176+
std::chrono::microseconds tz_offset;
177+
if (parse_tz(tz_offset, input))
178+
{
179+
us -= tz_offset;
180+
}
181+
return true;
182+
}
183+
184+
// Parse timestamp formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
185+
// The microseconds and timezone offset are optional
37186
//
38-
inline bool parse_string_date(::sqlpp::chrono::day_point& value, const char* date_string)
187+
inline bool parse_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
39188
{
40-
static const std::regex rx{"(\\d{4})-(\\d{2})-(\\d{2})"};
41-
std::cmatch mr;
42-
if (std::regex_match(date_string, mr, rx) == false)
189+
sqlpp::chrono::day_point parsed_ymd;
190+
std::chrono::microseconds parsed_tod;
191+
if ((parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false) ||
192+
(parse_character(date_time_string, ' ') == false) ||
193+
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
194+
{
195+
return false;
196+
}
197+
if (*date_time_string)
43198
{
44199
return false;
45200
}
46-
value = ::sqlpp::chrono::day_point{
47-
::date::year{std::atoi(date_string + mr.position(1))} / // Year
48-
std::atoi(date_string + mr.position(2)) / // Month
49-
std::atoi(date_string + mr.position(3)) // Day of month
50-
};
201+
tp = parsed_ymd + parsed_tod;
51202
return true;
52203
}
53204

54-
// Parse a date string formatted as YYYY-MM-DD HH:MM:SS.US TZ
55-
// .US are optional fractional seconds, up to 6 digits in length
56-
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
205+
// Parse date string formatted as YYYY-MM-DD
57206
//
58-
inline bool parse_string_date_time(::sqlpp::chrono::microsecond_point& value, const char* date_time_string)
207+
inline bool parse_date(sqlpp::chrono::day_point& dp, const char* date_string)
59208
{
60-
static const std::regex rx{
61-
"(\\d{4})-(\\d{2})-(\\d{2}) "
62-
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
63-
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
64-
};
65-
std::cmatch mr;
66-
if (std::regex_match(date_time_string, mr, rx) == false)
67-
{
68-
return false;
69-
}
70-
value =
71-
::sqlpp::chrono::day_point{
72-
::date::year{std::atoi(date_time_string + mr.position(1))} / // Year
73-
std::atoi(date_time_string + mr.position(2)) / // Month
74-
std::atoi(date_time_string + mr.position(3)) // Day of month
75-
} +
76-
std::chrono::hours{std::atoi(date_time_string + mr.position(4))} + // Hour
77-
std::chrono::minutes{std::atoi(date_time_string + mr.position(5))} + // Minute
78-
std::chrono::seconds{std::atoi(date_time_string + mr.position(6))} + // Second
79-
::std::chrono::microseconds{ // Second fraction
80-
mr[7].matched ? std::stoi((mr[7].str() + "000000").substr(0, 6)) : 0
81-
};
82-
if (mr[8].matched)
83-
{
84-
const auto tz_sign = (date_time_string[mr.position(8)] == '+') ? 1 : -1;
85-
const auto tz_offset =
86-
std::chrono::hours{std::atoi(date_time_string + mr.position(9))} +
87-
std::chrono::minutes{mr[10].matched ? std::atoi(date_time_string + mr.position(10)) : 0} +
88-
std::chrono::seconds{mr[11].matched ? std::atoi(date_time_string + mr.position(11)) : 0};
89-
value -= tz_sign * tz_offset;
209+
if (parse_yyyy_mm_dd(dp, date_string) == false)
210+
{
211+
return false;
212+
}
213+
if (*date_string)
214+
{
215+
return false;
90216
}
91217
return true;
92218
}
93219

94-
// Parse a time string formatted as HH:MM:SS[.US][ TZ]
95-
// .US is up to 6 digits in length
96-
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
220+
// Parse time string formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
221+
// The time-of-day part is optional
222+
//
223+
inline bool parse_date_or_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
224+
{
225+
sqlpp::chrono::day_point parsed_ymd;
226+
if (parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false)
227+
{
228+
return false;
229+
}
230+
if (*date_time_string == 0)
231+
{
232+
tp = parsed_ymd;
233+
return true;
234+
}
235+
std::chrono::microseconds parsed_tod;
236+
if ((parse_character(date_time_string, ' ') == false) ||
237+
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
238+
{
239+
return false;
240+
}
241+
if (*date_time_string == 0)
242+
{
243+
tp = parsed_ymd + parsed_tod;
244+
return true;
245+
}
246+
return false;
247+
}
248+
249+
// Parse time of day string formatted as HH:MM:SS.U+HH:MM:SS
250+
// The microseconds and timezone offset are optional
97251
//
98-
inline bool parse_string_time_of_day(::std::chrono::microseconds& value, const char* time_string)
252+
inline bool parse_time_of_day(std::chrono::microseconds& us, const char* time_string)
99253
{
100-
static const std::regex rx{
101-
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
102-
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
103-
};
104-
std::cmatch mr;
105-
if (std::regex_match (time_string, mr, rx) == false)
106-
{
107-
return false;
108-
}
109-
value =
110-
std::chrono::hours{std::atoi(time_string + mr.position(1))} + // Hour
111-
std::chrono::minutes{std::atoi(time_string + mr.position(2))} + // Minute
112-
std::chrono::seconds{std::atoi(time_string + mr.position(3))} + // Second
113-
::std::chrono::microseconds{ // Second fraction
114-
mr[4].matched ? std::stoi((mr[4].str() + "000000").substr(0, 6)) : 0
115-
};
116-
if (mr[5].matched)
117-
{
118-
const auto tz_sign = (time_string[mr.position(5)] == '+') ? 1 : -1;
119-
const auto tz_offset =
120-
std::chrono::hours{std::atoi(time_string + mr.position(6))} +
121-
std::chrono::minutes{mr[7].matched ? std::atoi(time_string + mr.position(7)) : 0} +
122-
std::chrono::seconds{mr[8].matched ? std::atoi(time_string + mr.position(8)) : 0};
123-
value -= tz_sign * tz_offset;
254+
if (parse_hh_mm_ss_us_tz(us, time_string) == false)
255+
{
256+
return false;
257+
}
258+
if (*time_string)
259+
{
260+
return false;
124261
}
125262
return true;
126263
}

include/sqlpp11/mysql/char_result.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ namespace sqlpp
152152
if (_handle->debug)
153153
std::cerr << "MySQL debug: date string: " << date_string << std::endl;
154154

155-
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
155+
if (::sqlpp::detail::parse_date(*value, date_string) == false)
156156
{
157157
if (_handle->debug)
158158
std::cerr << "MySQL debug: invalid date result: " << date_string << std::endl;
@@ -175,7 +175,7 @@ namespace sqlpp
175175
if (_handle->debug)
176176
std::cerr << "MySQL debug: date_time string: " << date_time_string << std::endl;
177177

178-
if (::sqlpp::detail::parse_string_date_time(*value, date_time_string) == false)
178+
if (::sqlpp::detail::parse_timestamp(*value, date_time_string) == false)
179179
{
180180
if (_handle->debug)
181181
std::cerr << "MySQL debug: invalid date_time result: " << date_time_string << std::endl;

include/sqlpp11/postgresql/bind_result.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ namespace sqlpp
239239
{
240240
std::cerr << "PostgreSQL debug: date string: " << date_string << std::endl;
241241
}
242-
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
242+
if (::sqlpp::detail::parse_date(*value, date_string) == false)
243243
{
244244
if (_handle->debug())
245245
{
@@ -269,7 +269,7 @@ namespace sqlpp
269269
{
270270
std::cerr << "PostgreSQL debug: got date_time string: " << date_string << std::endl;
271271
}
272-
if (::sqlpp::detail::parse_string_date_time(*value, date_string) == false)
272+
if (::sqlpp::detail::parse_timestamp(*value, date_string) == false)
273273
{
274274
if (_handle->debug())
275275
{
@@ -301,7 +301,7 @@ namespace sqlpp
301301
std::cerr << "PostgreSQL debug: got time string: " << time_string << std::endl;
302302
}
303303

304-
if (::sqlpp::detail::parse_string_time_of_day(*value, time_string) == false)
304+
if (::sqlpp::detail::parse_time_of_day(*value, time_string) == false)
305305
{
306306
if (_handle->debug()) {
307307
std::cerr << "PostgreSQL debug: got invalid time '" << time_string << "'" << std::endl;

0 commit comments

Comments
 (0)