Skip to content

Commit a4c704b

Browse files
committed
Issue #19424: Fix the warnings module to accept filename containing surrogate
characters.
1 parent 602f7cf commit a4c704b

3 files changed

Lines changed: 70 additions & 47 deletions

File tree

Lib/test/test_warnings.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,18 @@ def test_missing_filename_main_with_argv_empty_string(self):
331331
warning_tests.__name__ = module_name
332332
sys.argv = argv
333333

334+
def test_warn_explicit_non_ascii_filename(self):
335+
with original_warnings.catch_warnings(record=True,
336+
module=self.module) as w:
337+
self.module.resetwarnings()
338+
self.module.filterwarnings("always", category=UserWarning)
339+
340+
self.module.warn_explicit("text", UserWarning, "nonascii\xe9\u20ac", 1)
341+
self.assertEqual(w[-1].filename, "nonascii\xe9\u20ac")
342+
343+
self.module.warn_explicit("text", UserWarning, "surrogate\udc80", 1)
344+
self.assertEqual(w[-1].filename, "surrogate\udc80")
345+
334346
def test_warn_explicit_type_errors(self):
335347
# warn_explicit() should error out gracefully if it is given objects
336348
# of the wrong types.

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ Core and Builtins
3131
Library
3232
-------
3333

34+
- Issue #19424: Fix the warnings module to accept filename containing surrogate
35+
characters.
36+
3437
- Issue #19227: Remove pthread_atfork() handler. The handler was added to
3538
solve #18747 but has caused issues.
3639

Python/_warnings.c

Lines changed: 55 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ get_default_action(void)
9999

100100

101101
/* The item is a borrowed reference. */
102-
static const char *
102+
static PyObject*
103103
get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno,
104104
PyObject *module, PyObject **item)
105105
{
@@ -152,13 +152,12 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno,
152152
return NULL;
153153

154154
if (good_msg && is_subclass && good_mod && (ln == 0 || lineno == ln))
155-
return _PyUnicode_AsString(action);
155+
return action;
156156
}
157157

158158
action = get_default_action();
159-
if (action != NULL) {
160-
return _PyUnicode_AsString(action);
161-
}
159+
if (action != NULL)
160+
return action;
162161

163162
PyErr_SetString(PyExc_ValueError,
164163
MODULE_NAME ".defaultaction not found");
@@ -192,23 +191,26 @@ static PyObject *
192191
normalize_module(PyObject *filename)
193192
{
194193
PyObject *module;
195-
const char *mod_str;
194+
int kind;
195+
void *data;
196196
Py_ssize_t len;
197197

198-
int rc = PyObject_IsTrue(filename);
199-
if (rc == -1)
200-
return NULL;
201-
else if (rc == 0)
202-
return PyUnicode_FromString("<unknown>");
203-
204-
mod_str = _PyUnicode_AsString(filename);
205-
if (mod_str == NULL)
206-
return NULL;
207198
len = PyUnicode_GetLength(filename);
208199
if (len < 0)
209200
return NULL;
201+
202+
if (len == 0)
203+
return PyUnicode_FromString("<unknown>");
204+
205+
kind = PyUnicode_KIND(filename);
206+
data = PyUnicode_DATA(filename);
207+
208+
/* if filename.endswith(".py"): */
210209
if (len >= 3 &&
211-
strncmp(mod_str + (len - 3), ".py", 3) == 0) {
210+
PyUnicode_READ(kind, data, len-3) == '.' &&
211+
PyUnicode_READ(kind, data, len-2) == 'p' &&
212+
PyUnicode_READ(kind, data, len-1) == 'y')
213+
{
212214
module = PyUnicode_Substring(filename, 0, len-3);
213215
}
214216
else {
@@ -273,19 +275,37 @@ show_warning(PyObject *filename, int lineno, PyObject *text, PyObject
273275

274276
/* Print " source_line\n" */
275277
if (sourceline) {
276-
char *source_line_str = _PyUnicode_AsString(sourceline);
277-
if (source_line_str == NULL)
278-
return;
279-
while (*source_line_str == ' ' || *source_line_str == '\t' ||
280-
*source_line_str == '\014')
281-
source_line_str++;
282-
283-
PyFile_WriteString(source_line_str, f_stderr);
278+
int kind;
279+
void *data;
280+
Py_ssize_t i, len;
281+
Py_UCS4 ch;
282+
PyObject *truncated;
283+
284+
if (PyUnicode_READY(sourceline) < 1)
285+
goto error;
286+
287+
kind = PyUnicode_KIND(sourceline);
288+
data = PyUnicode_DATA(sourceline);
289+
len = PyUnicode_GET_LENGTH(sourceline);
290+
for (i=0; i<len; i++) {
291+
ch = PyUnicode_READ(kind, data, i);
292+
if (ch != ' ' && ch != '\t' && ch != '\014')
293+
break;
294+
}
295+
296+
truncated = PyUnicode_Substring(sourceline, i, len);
297+
if (truncated == NULL)
298+
goto error;
299+
300+
PyFile_WriteObject(sourceline, f_stderr, Py_PRINT_RAW);
301+
Py_DECREF(truncated);
284302
PyFile_WriteString("\n", f_stderr);
285303
}
286304
else {
287305
_Py_DisplaySourceLine(f_stderr, filename, lineno, 2);
288306
}
307+
308+
error:
289309
PyErr_Clear();
290310
}
291311

@@ -296,7 +316,7 @@ warn_explicit(PyObject *category, PyObject *message,
296316
{
297317
PyObject *key = NULL, *text = NULL, *result = NULL, *lineno_obj = NULL;
298318
PyObject *item = Py_None;
299-
const char *action;
319+
PyObject *action;
300320
int rc;
301321

302322
if (registry && !PyDict_Check(registry) && (registry != Py_None)) {
@@ -354,21 +374,21 @@ warn_explicit(PyObject *category, PyObject *message,
354374
if (action == NULL)
355375
goto cleanup;
356376

357-
if (strcmp(action, "error") == 0) {
377+
if (PyUnicode_CompareWithASCIIString(action, "error") == 0) {
358378
PyErr_SetObject(category, message);
359379
goto cleanup;
360380
}
361381

362382
/* Store in the registry that we've been here, *except* when the action
363383
is "always". */
364384
rc = 0;
365-
if (strcmp(action, "always") != 0) {
385+
if (PyUnicode_CompareWithASCIIString(action, "always") != 0) {
366386
if (registry != NULL && registry != Py_None &&
367387
PyDict_SetItem(registry, key, Py_True) < 0)
368388
goto cleanup;
369-
else if (strcmp(action, "ignore") == 0)
389+
else if (PyUnicode_CompareWithASCIIString(action, "ignore") == 0)
370390
goto return_none;
371-
else if (strcmp(action, "once") == 0) {
391+
else if (PyUnicode_CompareWithASCIIString(action, "once") == 0) {
372392
if (registry == NULL || registry == Py_None) {
373393
registry = get_once_registry();
374394
if (registry == NULL)
@@ -377,24 +397,15 @@ warn_explicit(PyObject *category, PyObject *message,
377397
/* _once_registry[(text, category)] = 1 */
378398
rc = update_registry(registry, text, category, 0);
379399
}
380-
else if (strcmp(action, "module") == 0) {
400+
else if (PyUnicode_CompareWithASCIIString(action, "module") == 0) {
381401
/* registry[(text, category, 0)] = 1 */
382402
if (registry != NULL && registry != Py_None)
383403
rc = update_registry(registry, text, category, 0);
384404
}
385-
else if (strcmp(action, "default") != 0) {
386-
PyObject *to_str = PyObject_Str(item);
387-
const char *err_str = "???";
388-
389-
if (to_str != NULL) {
390-
err_str = _PyUnicode_AsString(to_str);
391-
if (err_str == NULL)
392-
goto cleanup;
393-
}
405+
else if (PyUnicode_CompareWithASCIIString(action, "default") != 0) {
394406
PyErr_Format(PyExc_RuntimeError,
395-
"Unrecognized action (%s) in warnings.filters:\n %s",
396-
action, err_str);
397-
Py_XDECREF(to_str);
407+
"Unrecognized action (%R) in warnings.filters:\n %R",
408+
action, item);
398409
goto cleanup;
399410
}
400411
}
@@ -528,11 +539,8 @@ setup_context(Py_ssize_t stack_level, PyObject **filename, int *lineno,
528539
Py_INCREF(*filename);
529540
}
530541
else {
531-
const char *module_str = _PyUnicode_AsString(*module);
532542
*filename = NULL;
533-
if (module_str == NULL)
534-
goto handle_error;
535-
if (strcmp(module_str, "__main__") == 0) {
543+
if (PyUnicode_CompareWithASCIIString(*module, "__main__") == 0) {
536544
PyObject *argv = PySys_GetObject("argv");
537545
/* PyList_Check() is needed because sys.argv is set to None during
538546
Python finalization */
@@ -651,7 +659,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds)
651659
PyObject *registry = NULL;
652660
PyObject *module_globals = NULL;
653661

654-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOi|OOO:warn_explicit",
662+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOUi|OOO:warn_explicit",
655663
kwd_list, &message, &category, &filename, &lineno, &module,
656664
&registry, &module_globals))
657665
return NULL;

0 commit comments

Comments
 (0)