|
| 1 | +#!/usr/bin/env pinpoint |
| 2 | +# Copyright (C) Sebastian Pipping <sebastian@pipping.org> |
| 3 | +# Licensed under CC-BY-SA 4.0 |
| 4 | + |
| 5 | +[rgb45347b.ppm] |
| 6 | +[fill] |
| 7 | +[text-color=white] |
| 8 | +[shading-opacity=0.0] |
| 9 | + |
| 10 | +-- |
| 11 | +* |
| 12 | + |
| 13 | + |
| 14 | + |
| 15 | +<b>Python JSON Emoji Crash Story</b> |
| 16 | + |
| 17 | +<i><span size='x-small'><tt>Sebastian Pipping <sebastian@pipping.org></tt></span></i> |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | + |
| 22 | +\-- |
| 23 | +<i><span size='x-small'>Berlin, 2020-02-18, v2 |
| 24 | +Licensed under CC-BY-SA 4.0</span></i> |
| 25 | + |
| 26 | +-- |
| 27 | +<b>DISCLAIMER</b> |
| 28 | + |
| 29 | +Slides were done with (GNOME pinpoint and) |
| 30 | +very tight time constraints. |
| 31 | + |
| 32 | +My apology, better slides next time! |
| 33 | + |
| 34 | +-- |
| 35 | +1. |
| 36 | + |
| 37 | +Django in Berlin at ~170 companies |
| 38 | + |
| 39 | +https://github.com/hartwork/django-berlin#companies |
| 40 | + |
| 41 | +-- |
| 42 | +2. |
| 43 | + |
| 44 | +Who has a friend running… |
| 45 | + - Django 3 <3.0.1 |
| 46 | + - Django 2 <2.2.9 |
| 47 | + - Django 1 <1.11.27 |
| 48 | +? |
| 49 | + |
| 50 | +-- |
| 51 | +Please consider upgrading! |
| 52 | + |
| 53 | +<tt>CVE-2019-19844</tt> |
| 54 | + |
| 55 | +Potential <b>account hijack</b> |
| 56 | +via password reset form |
| 57 | + |
| 58 | +-- |
| 59 | +https://www.djangoproject.com/weblog/2019/dec/18/security-releases/ |
| 60 | + |
| 61 | +-- |
| 62 | +3. |
| 63 | + |
| 64 | +Who has a friend running… |
| 65 | + |
| 66 | + settings.DEBUG == True |
| 67 | + |
| 68 | +accessible by public internet? |
| 69 | + |
| 70 | +-- |
| 71 | +4. |
| 72 | + |
| 73 | +Actual talk |
| 74 | + |
| 75 | +-- |
| 76 | +<b>Python JSON Emoji Crash Story</b> |
| 77 | + |
| 78 | +-- |
| 79 | +Tell a story |
| 80 | + |
| 81 | +Point out a problem |
| 82 | + |
| 83 | +Questions + Discussion |
| 84 | + |
| 85 | +-- |
| 86 | +Environment: |
| 87 | + |
| 88 | + - Django backend… |
| 89 | + with Django REST Framework |
| 90 | + |
| 91 | + - A JavaScript frontend |
| 92 | + POST'ing JSON |
| 93 | + |
| 94 | +-- |
| 95 | +<b>Flow of data</b> |
| 96 | + |
| 97 | +1. User input |
| 98 | +2. Form / HTML DOM |
| 99 | +3. JavaScript |
| 100 | +4. JSON (= ECMA-404) |
| 101 | +5. HTTP request with body |
| 102 | +6. Django REST Framework |
| 103 | +7. <tt>rest_framework.parsers.JSONParser</tt> |
| 104 | +8. De-serialization |
| 105 | +9. <i>Some action</i> (e.g. store into database) |
| 106 | + |
| 107 | +-- |
| 108 | +<b>Unicode</b> |
| 109 | + |
| 110 | +<tt>U+0000</tt> — <tt>U+ffff</tt> |
| 111 | +<b>B</b>asic <b>M</b>ultilingual <b>P</b>lane |
| 112 | + |
| 113 | +<tt>U+10000</tt> — <tt>U+10ffff</tt> |
| 114 | +16 "astral" planes |
| 115 | + |
| 116 | +-- |
| 117 | +<b>Emoji</b> |
| 118 | + |
| 119 | +beyond <tt>U+ffff</tt> |
| 120 | + |
| 121 | +i.e. need more than 4 hex digits |
| 122 | + |
| 123 | +-- |
| 124 | +Example: |
| 125 | + |
| 126 | +Character 'GRINNING FACE' |
| 127 | + |
| 128 | +Code point: <tt>U+1F600</tt> |
| 129 | +Example glyph: 😀 |
| 130 | + |
| 131 | +-- |
| 132 | +Unicode characters in JSON |
| 133 | + |
| 134 | +a) character itself as UTF-8 |
| 135 | + (except <tt>U+0</tt> to <tt>U+1f</tt>, <tt>U+22</tt>, <tt>U+5C</tt>) |
| 136 | + |
| 137 | +b) escaped a la (regex:) \\\\u[0-9a-fA-F]{4} |
| 138 | + |
| 139 | +-- |
| 140 | +Works, Python: |
| 141 | +<tt> |
| 142 | +In [1]: import json |
| 143 | + |
| 144 | +In [2]: json.loads('"😀"') # plain UTF-8 |
| 145 | +Out[2]: '😀' |
| 146 | + |
| 147 | +In [3]: json.loads('"\\ud83d\\ude00"') |
| 148 | +Out[3]: '😀' |
| 149 | +</tt> |
| 150 | + |
| 151 | +-- |
| 152 | +1024 "high" surrogates (<tt>U+D800</tt>–<tt>U+DBFF</tt>) |
| 153 | +1024 "low" surrogates (<tt>U+DC00</tt>–<tt>U+DFFF</tt>) |
| 154 | + |
| 155 | +Pair of surrogates allows "addressing" |
| 156 | +any of the astral characters. |
| 157 | + |
| 158 | +This is the very idea behind UTF-16. |
| 159 | + |
| 160 | +(<tt>2**20 + 2**16 == 2**16 * 17</tt>) |
| 161 | + |
| 162 | +-- |
| 163 | +Length of a string |
| 164 | + |
| 165 | +-- |
| 166 | +Python:<tt> |
| 167 | +In : len('😀') |
| 168 | +Out: 1 |
| 169 | +</tt> |
| 170 | +JavaScript:<tt> |
| 171 | +>> '😀'.length |
| 172 | +<- 2 |
| 173 | +</tt> |
| 174 | + |
| 175 | +-- |
| 176 | +JavaScript:<tt> |
| 177 | +>> '😀'.split('') |
| 178 | +<- Array [ "\\ud83d", "\\ude00" ] |
| 179 | +</tt> |
| 180 | + |
| 181 | +-- |
| 182 | +What if buggy code italified like this?: |
| 183 | + |
| 184 | +JavaScript:<tt> |
| 185 | +>> input_text.replace(/./g, '<em>$&</em>') |
| 186 | +</tt> |
| 187 | + |
| 188 | +-- |
| 189 | +We send <i>single surrogates</i> |
| 190 | + |
| 191 | +to the backend |
| 192 | + |
| 193 | +-- |
| 194 | +JavaScript:<tt> |
| 195 | +>> '😀'.replace(/./g, '[$&]').split('') |
| 196 | +<- Array(6) [ "[", "\\ud83d", "]", "[", "\\ude00", "]" ] |
| 197 | +</tt> |
| 198 | + |
| 199 | +-- |
| 200 | +How does <i>Python</i> deal with this? |
| 201 | + |
| 202 | +-- |
| 203 | +Python:<tt> |
| 204 | +In : json.loads('"[\\\\ud83d][\\\\ude00]"') |
| 205 | +Out: '[\\ud83d][\\ude00]' |
| 206 | +</tt> |
| 207 | + |
| 208 | +-- |
| 209 | +Surrogates in isolation |
| 210 | + == |
| 211 | +invalid characters |
| 212 | + |
| 213 | +-- |
| 214 | +Python:<tt> |
| 215 | +In : json.loads('"[\\\\ud83d][\\\\ude00]"').encode('utf-8') |
| 216 | +[..] |
| 217 | +UnicodeEncodeError: 'utf-8' codec can't encode character |
| 218 | + '\ud83d' in position 1: surrogates not allowed |
| 219 | +</tt> |
| 220 | + |
| 221 | +-- |
| 222 | +Fixed for <tt>CharField</tt> |
| 223 | + |
| 224 | +in next release (3.9.4?) of |
| 225 | + |
| 226 | +Django REST Framework |
| 227 | + |
| 228 | +-- |
| 229 | +https://github.com/encode/django-rest-framework/pull/7067 |
| 230 | + |
| 231 | +https://github.com/encode/django-rest-framework/issues/7026 |
| 232 | + |
| 233 | +-- |
| 234 | +"Unfixed" in CPython's JSON decoder |
| 235 | + |
| 236 | +Considered a feature upstream |
| 237 | + |
| 238 | +Potentially for good reasons |
| 239 | + |
| 240 | + |
| 241 | +https://docs.python.org/3/library/json.html#character-encodings |
| 242 | + |
| 243 | +-- |
| 244 | +<b>Playing with surrogate characters</b> |
| 245 | + |
| 246 | +<tt># pip3 install surrogates</tt> |
| 247 | + |
| 248 | +https://github.com/hartwork/surrogates#usage |
| 249 | + |
| 250 | +-- |
| 251 | +<b>Consequences?</b> |
| 252 | + - Produce error 500 on any(?) DRF deployed today |
| 253 | + - (Read secrets if <tt>DEBUG=True</tt>) |
| 254 | + - Catch early once 👍 or late everywhere 👎 |
| 255 | + - ? |
| 256 | + |
| 257 | +<b>Coping strategies?</b> |
| 258 | + |
| 259 | + |
| 260 | +<b>Thank you!</b> |
| 261 | + |
| 262 | +<i><span size='x-small'><tt>Sebastian Pipping <sebastian@pipping.org></tt></span></i> |
0 commit comments