Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use PEP 393 new APIs #64

Merged
merged 4 commits into from
May 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions bench/runbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
Runs the benchmarks
"""
from __future__ import print_function
import sys
import os
import re
Expand Down Expand Up @@ -30,13 +31,13 @@ def run_bench(name):


def main():
print '=' * 80
print 'Running benchmark for MarkupSafe'
print '-' * 80
print('=' * 80)
print('Running benchmark for MarkupSafe')
print('-' * 80)
os.chdir(bench_directory)
for bench in list_benchmarks():
run_bench(bench)
print '-' * 80
print('-' * 80)


if __name__ == '__main__':
Expand Down
191 changes: 188 additions & 3 deletions markupsafe/_speedups.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,24 @@
* :copyright: © 2010 by the Pallets team.
* :license: BSD, see LICENSE for more details.
*/

#include <Python.h>

#if PY_MAJOR_VERSION < 3
#define ESCAPED_CHARS_TABLE_SIZE 63
#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));

static PyObject* markup;
static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
#endif

static PyObject* markup;

static int
init_constants(void)
{
PyObject *module;

#if PY_MAJOR_VERSION < 3
/* mapping of characters to replace */
escaped_chars_repl['"'] = UNICHR("&#34;");
escaped_chars_repl['\''] = UNICHR("&#39;");
Expand All @@ -34,6 +38,7 @@ init_constants(void)
escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
escaped_chars_delta_len['&'] = 4;
escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
#endif

/* import markup type so that we can mark the return value */
module = PyImport_ImportModule("markupsafe");
Expand All @@ -45,6 +50,7 @@ init_constants(void)
return 1;
}

#if PY_MAJOR_VERSION < 3
static PyObject*
escape_unicode(PyUnicodeObject *in)
{
Expand Down Expand Up @@ -105,13 +111,192 @@ escape_unicode(PyUnicodeObject *in)

return (PyObject*)out;
}
#else /* PY_MAJOR_VERSION < 3 */

#define GET_DELTA(inp, inp_end, delta) \
while (inp < inp_end) { \
switch (*inp++) { \
case '"': \
case '\'': \
case '&': \
delta += 4; \
break; \
case '<': \
case '>': \
delta += 3; \
break; \
} \
}

#define DO_ESCAPE(inp, inp_end, outp) \
{ \
Py_ssize_t ncopy = 0; \
while (inp < inp_end) { \
switch (*inp) { \
case '"': \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
outp += ncopy; ncopy = 0; \
*outp++ = '&'; \
*outp++ = '#'; \
*outp++ = '3'; \
*outp++ = '4'; \
*outp++ = ';'; \
break; \
case '\'': \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
outp += ncopy; ncopy = 0; \
*outp++ = '&'; \
*outp++ = '#'; \
*outp++ = '3'; \
*outp++ = '9'; \
*outp++ = ';'; \
break; \
case '&': \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
outp += ncopy; ncopy = 0; \
*outp++ = '&'; \
*outp++ = 'a'; \
*outp++ = 'm'; \
*outp++ = 'p'; \
*outp++ = ';'; \
break; \
case '<': \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
outp += ncopy; ncopy = 0; \
*outp++ = '&'; \
*outp++ = 'l'; \
*outp++ = 't'; \
*outp++ = ';'; \
break; \
case '>': \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
outp += ncopy; ncopy = 0; \
*outp++ = '&'; \
*outp++ = 'g'; \
*outp++ = 't'; \
*outp++ = ';'; \
break; \
default: \
ncopy++; \
} \
inp++; \
} \
memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
}

static PyObject*
escape_unicode_kind1(PyUnicodeObject *in)
{
Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
Py_UCS1 *outp;
PyObject *out;
Py_ssize_t delta = 0;

GET_DELTA(inp, inp_end, delta);
if (!delta) {
Py_INCREF(in);
return (PyObject*)in;
}

out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
PyUnicode_IS_ASCII(in) ? 127 : 255);
if (!out)
return NULL;

inp = PyUnicode_1BYTE_DATA(in);
outp = PyUnicode_1BYTE_DATA(out);
DO_ESCAPE(inp, inp_end, outp);
return out;
}

static PyObject*
escape_unicode_kind2(PyUnicodeObject *in)
{
Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
Py_UCS2 *outp;
PyObject *out;
Py_ssize_t delta = 0;

GET_DELTA(inp, inp_end, delta);
if (!delta) {
Py_INCREF(in);
return (PyObject*)in;
}

out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
if (!out)
return NULL;

inp = PyUnicode_2BYTE_DATA(in);
outp = PyUnicode_2BYTE_DATA(out);
DO_ESCAPE(inp, inp_end, outp);
return out;
}


static PyObject*
escape_unicode_kind4(PyUnicodeObject *in)
{
Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
Py_UCS4 *outp;
PyObject *out;
Py_ssize_t delta = 0;

GET_DELTA(inp, inp_end, delta);
if (!delta) {
Py_INCREF(in);
return (PyObject*)in;
}

out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
if (!out)
return NULL;

inp = PyUnicode_4BYTE_DATA(in);
outp = PyUnicode_4BYTE_DATA(out);
DO_ESCAPE(inp, inp_end, outp);
return out;
}

static PyObject*
escape_unicode(PyUnicodeObject *in)
{
if (PyUnicode_READY(in))
return NULL;

switch (PyUnicode_KIND(in)) {
case PyUnicode_1BYTE_KIND:
return escape_unicode_kind1(in);
case PyUnicode_2BYTE_KIND:
return escape_unicode_kind2(in);
case PyUnicode_4BYTE_KIND:
return escape_unicode_kind4(in);
}
assert(0); /* shouldn't happen */
return NULL;
}
#endif /* PY_MAJOR_VERSION < 3 */

static PyObject*
escape(PyObject *self, PyObject *text)
{
static PyObject *id_html;
PyObject *s = NULL, *rv = NULL, *html;

if (id_html == NULL) {
#if PY_MAJOR_VERSION < 3
id_html = PyString_InternFromString("__html__");
#else
id_html = PyUnicode_InternFromString("__html__");
#endif
if (id_html == NULL) {
return NULL;
}
}

/* we don't have to escape integers, bools or floats */
if (PyLong_CheckExact(text) ||
#if PY_MAJOR_VERSION < 3
Expand All @@ -122,7 +307,7 @@ escape(PyObject *self, PyObject *text)
return PyObject_CallFunctionObjArgs(markup, text, NULL);

/* if the object has an __html__ method that performs the escaping */
html = PyObject_GetAttrString(text, "__html__");
html = PyObject_GetAttr(text ,id_html);
if (html) {
s = PyObject_CallObject(html, NULL);
Py_DECREF(html);
Expand Down
35 changes: 35 additions & 0 deletions tests/test_escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
import pytest

from markupsafe import Markup, _native

try:
from markupsafe import _speedups
except ImportError:
_speedups = None


@pytest.mark.parametrize('mod', (
_native,
pytest.param(_speedups, marks=pytest.mark.skipif(
_speedups is None, reason='speedups unavailable')),
))
@pytest.mark.parametrize(('value', 'expect'), (
# empty
(u'', u''),
# ascii
(u'abcd&><\'"efgh', u'abcd&amp;&gt;&lt;&#39;&#34;efgh'),
(u'&><\'"efgh', u'&amp;&gt;&lt;&#39;&#34;efgh'),
(u'abcd&><\'"', u'abcd&amp;&gt;&lt;&#39;&#34;'),
# 2 byte
(u'こんにちは&><\'"こんばんは',
u'こんにちは&amp;&gt;&lt;&#39;&#34;こんばんは'),
(u'&><\'"こんばんは', u'&amp;&gt;&lt;&#39;&#34;こんばんは'),
(u'こんにちは&><\'"', u'こんにちは&amp;&gt;&lt;&#39;&#34;'),
# 4 byte
(u'\U0001F363\U0001F362&><\'"\U0001F37A xyz', u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
(u'&><\'"\U0001F37A xyz', u'&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
(u'\U0001F363\U0001F362&><\'"', u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;'),
))
def test_escape(mod, value, expect):
assert mod.escape(value) == Markup(expect)