Skip to content

gh-124531: Fix strftime() with embedded null characters #125003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -2955,6 +2955,16 @@ def test_more_strftime(self):
except UnicodeEncodeError:
pass

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
self.assertEqual(t.strftime('\0'), '\0')
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
s1 = t.strftime('%c')
s2 = t.strftime('%x')
self.assertEqual(t.strftime('\0%c\0%x'), f'\0{s1}\0{s2}')
self.assertEqual(t.strftime('\0%c\0%x\0'), f'\0{s1}\0{s2}\0')

def test_extract(self):
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
self.assertEqual(dt.date(), date(2002, 3, 4))
Expand Down Expand Up @@ -3736,6 +3746,16 @@ def test_strftime(self):
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
t.strftime(format="%f")

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
t = self.theclass(1, 2, 3, 4)
self.assertEqual(t.strftime('\0'), '\0')
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
s1 = t.strftime('%Z')
s2 = t.strftime('%X')
self.assertEqual(t.strftime('\0%Z\0%X'), f'\0{s1}\0{s2}')
self.assertEqual(t.strftime('\0%Z\0%X\0'), f'\0{s1}\0{s2}\0')

def test_format(self):
t = self.theclass(1, 2, 3, 4)
self.assertEqual(t.__format__(''), str(t))
Expand Down
12 changes: 10 additions & 2 deletions Lib/test/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,16 @@ def test_strftime(self):
self.fail('conversion specifier: %r failed.' % format)

self.assertRaises(TypeError, time.strftime, b'%S', tt)
# embedded null character
self.assertRaises(ValueError, time.strftime, '%S\0', tt)

def test_strftime_embedded_nul(self):
# gh-124531: The null character should not terminate the format string.
tt = time.gmtime(self.t)
self.assertEqual(time.strftime('\0', tt), '\0')
self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
s1 = time.strftime('%c', tt)
s2 = time.strftime('%x', tt)
self.assertEqual(time.strftime('\0%c\0%x', tt), f'\0{s1}\0{s2}')
self.assertEqual(time.strftime('\0%c\0%x\0', tt), f'\0{s1}\0{s2}\0')

def _bounds_checking(self, func):
# Make sure that strftime() checks the bounds of the various parts
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :func:`time.strftime`, the :meth:`~datetime.datetime.strftime` method of
the :mod:`datetime` classes :class:`~datetime.datetime`,
:class:`~datetime.date` and :class:`~datetime.time` and formatting of these
classes with format strings containing embedded null characters.
22 changes: 7 additions & 15 deletions Modules/_datetimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1837,6 +1837,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
PyObject *freplacement = NULL; /* py string, replacement for %f */

const char *pin; /* pointer to next char in input format */
const char *pend; /* pointer past the end of input format */
Py_ssize_t flen; /* length of input format */
char ch; /* next char in input format */

Expand Down Expand Up @@ -1886,22 +1887,15 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
pnew = PyBytes_AsString(newfmt);
usednew = 0;

while ((ch = *pin++) != '\0') {
if (ch != '%') {
ptoappend = pin - 1;
ntoappend = 1;
}
else if ((ch = *pin++) == '\0') {
/* Null byte follows %, copy only '%'.
*
* Back the pin up one char so that we catch the null check
* the next time through the loop.*/
pin--;
pend = pin + flen;
while (pin != pend) {
ch = *pin++;
if (ch != '%' || pin == pend) {
ptoappend = pin - 1;
ntoappend = 1;
}
/* A % has been seen and ch is the character after it. */
else if (ch == 'z') {
else if ((ch = *pin++) == 'z') {
/* %z -> +HHMM */
if (zreplacement == NULL) {
zreplacement = make_somezreplacement(object, "", tzinfoarg);
Expand Down Expand Up @@ -2035,12 +2029,10 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
assert(usednew <= totalnew);
} /* end while() */

if (_PyBytes_Resize(&newfmt, usednew) < 0)
goto Done;
{
PyObject *format;

format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt));
format = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(newfmt), usednew);
if (format != NULL) {
result = PyObject_CallFunctionObjArgs(strftime,
format, timetuple, NULL);
Expand Down
99 changes: 61 additions & 38 deletions Modules/timemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -787,16 +787,14 @@ time_strftime(PyObject *module, PyObject *args)
PyObject *format;
#endif
PyObject *format_arg;
Py_ssize_t fmtsize;
size_t fmtlen, buflen;
time_char *outbuf = NULL;
size_t i;
size_t outsize, outpos;
PyObject *ret = NULL;

memset((void *) &buf, '\0', sizeof(buf));

/* Will always expect a unicode string to be passed as format.
Given that there's no str type anymore in py3k this seems safe.
*/
if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup))
return NULL;

Expand Down Expand Up @@ -835,7 +833,7 @@ time_strftime(PyObject *module, PyObject *args)
buf.tm_isdst = 1;

#ifdef HAVE_WCSFTIME
format = PyUnicode_AsWideCharString(format_arg, NULL);
format = PyUnicode_AsWideCharString(format_arg, &fmtsize);
if (format == NULL)
return NULL;
fmt = format;
Expand All @@ -845,35 +843,36 @@ time_strftime(PyObject *module, PyObject *args)
if (format == NULL)
return NULL;
fmt = PyBytes_AS_STRING(format);
fmtsize = PyBytes_GET_SIZE(format);
#endif

#if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME)
/* check that the format string contains only valid directives */
for (outbuf = strchr(fmt, '%');
outbuf != NULL;
outbuf = strchr(outbuf+2, '%'))
for (const time_char *f = memchr(fmt, '%', fmtsize);
f != NULL;
f = memchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
{
if (outbuf[1] == '#')
++outbuf; /* not documented by python, */
if (outbuf[1] == '\0')
if (f[1] == '#')
++f; /* not documented by python, */
if (f + 1 >= fmt + fmtsize)
break;
if ((outbuf[1] == 'y') && buf.tm_year < 0) {
if ((f[1] == 'y') && buf.tm_year < 0) {
PyErr_SetString(PyExc_ValueError,
"format %y requires year >= 1900 on Windows");
Py_DECREF(format);
return NULL;
}
}
#elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME)
for (outbuf = wcschr(fmt, '%');
outbuf != NULL;
outbuf = wcschr(outbuf+2, '%'))
for (const time_char *f = wmemchr(fmt, '%', fmtsize);
f != NULL;
f = wmemchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
{
if (outbuf[1] == L'\0')
if (f + 1 >= fmt + fmtsize)
break;
/* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0))
returns "0/" instead of "99" */
if (outbuf[1] == L'y' && buf.tm_year < 0) {
if (f[1] == L'y' && buf.tm_year < 0) {
PyErr_SetString(PyExc_ValueError,
"format %y requires year >= 1900 on AIX");
PyMem_Free(format);
Expand All @@ -882,47 +881,71 @@ time_strftime(PyObject *module, PyObject *args)
}
#endif

fmtlen = time_strlen(fmt);

/* I hate these functions that presume you know how big the output
* will be ahead of time...
*/
for (i = 1024; ; i += i) {
outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char));
outsize = fmtsize + 128;
outpos = 0;
fmtlen = time_strlen(fmt);
while (1) {
outbuf = (time_char *)PyMem_Realloc(outbuf, outsize*sizeof(time_char));
if (outbuf == NULL) {
PyErr_NoMemory();
break;
}
if (fmtlen == 0) {
/* Empty format string or leading or trailing NUL,
or consequent NULs.
strftime() on macOS does not work well with empty format string.
*/
if (outpos == outsize) {
outsize += outsize;
continue;
}
outbuf[outpos] = 0;
}
else {
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
errno = 0;
errno = 0;
#endif
_Py_BEGIN_SUPPRESS_IPH
buflen = format_time(outbuf, i, fmt, &buf);
_Py_END_SUPPRESS_IPH
_Py_BEGIN_SUPPRESS_IPH
buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf);
_Py_END_SUPPRESS_IPH
#if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
/* VisualStudio .NET 2005 does this properly */
if (buflen == 0 && errno == EINVAL) {
PyErr_SetString(PyExc_ValueError, "Invalid format string");
PyMem_Free(outbuf);
break;
}
/* VisualStudio .NET 2005 does this properly */
if (buflen == 0 && errno == EINVAL) {
PyErr_SetString(PyExc_ValueError, "Invalid format string");
break;
}
#endif
if (buflen > 0 || i >= 256 * fmtlen) {
if (buflen == 0 && outsize - outpos < 256 * fmtlen) {
outsize += outsize;
continue;
}
/* If the buffer is 256 times as long as the format,
it's probably not failing for lack of room!
More likely, the format yields an empty result,
e.g. an empty format, or %Z when the timezone
is unknown. */
outpos += buflen;
}
if (fmtlen < (size_t)fmtsize) {
/* It was not terminating NUL, but an embedded NUL.
Skip the NUL and continue. */
outpos++;
fmt += fmtlen + 1;
fmtsize -= fmtlen + 1;
fmtlen = time_strlen(fmt);
continue;
}
#ifdef HAVE_WCSFTIME
ret = PyUnicode_FromWideChar(outbuf, buflen);
ret = PyUnicode_FromWideChar(outbuf, outpos);
#else
ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos, "surrogateescape");
#endif
PyMem_Free(outbuf);
break;
}
PyMem_Free(outbuf);
break;
}
PyMem_Free(outbuf);
#ifdef HAVE_WCSFTIME
PyMem_Free(format);
#else
Expand Down
Loading