@@ -139,3 +139,125 @@ bool aws_strutil_is_http_token(struct aws_byte_cursor token) {
139139bool aws_strutil_is_lowercase_http_token (struct aws_byte_cursor token ) {
140140 return s_is_token (token , s_http_lowercase_token_table );
141141}
142+
143+ /* clang-format off */
144+ /**
145+ * Table with true for all octets allowed in field-content,
146+ * as defined in RFC7230 section 3.2 and 3.2.6 and RFC5234 appendix-B.1:
147+ *
148+ * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
149+ * field-vchar = VCHAR / obs-text
150+ * VCHAR = %x21-7E ; visible (printing) characters
151+ * obs-text = %x80-FF
152+ */
153+ static const bool s_http_field_content_table [256 ] = {
154+ /* clang-format off */
155+
156+ /* whitespace */
157+ ['\t' ] = true, [' ' ] = true,
158+
159+ /* VCHAR = 0x21-7E */
160+ [0x21 ] = true, [0x22 ] = true, [0x23 ] = true, [0x24 ] = true, [0x25 ] = true, [0x26 ] = true, [0x27 ] = true,
161+ [0x28 ] = true, [0x29 ] = true, [0x2A ] = true, [0x2B ] = true, [0x2C ] = true, [0x2D ] = true, [0x2E ] = true,
162+ [0x2F ] = true, [0x30 ] = true, [0x31 ] = true, [0x32 ] = true, [0x33 ] = true, [0x34 ] = true, [0x35 ] = true,
163+ [0x36 ] = true, [0x37 ] = true, [0x38 ] = true, [0x39 ] = true, [0x3A ] = true, [0x3B ] = true, [0x3C ] = true,
164+ [0x3D ] = true, [0x3E ] = true, [0x3F ] = true, [0x40 ] = true, [0x41 ] = true, [0x42 ] = true, [0x43 ] = true,
165+ [0x44 ] = true, [0x45 ] = true, [0x46 ] = true, [0x47 ] = true, [0x48 ] = true, [0x49 ] = true, [0x4A ] = true,
166+ [0x4B ] = true, [0x4C ] = true, [0x4D ] = true, [0x4E ] = true, [0x4F ] = true, [0x50 ] = true, [0x51 ] = true,
167+ [0x52 ] = true, [0x53 ] = true, [0x54 ] = true, [0x55 ] = true, [0x56 ] = true, [0x57 ] = true, [0x58 ] = true,
168+ [0x59 ] = true, [0x5A ] = true, [0x5B ] = true, [0x5C ] = true, [0x5D ] = true, [0x5E ] = true, [0x5F ] = true,
169+ [0x60 ] = true, [0x61 ] = true, [0x62 ] = true, [0x63 ] = true, [0x64 ] = true, [0x65 ] = true, [0x66 ] = true,
170+ [0x67 ] = true, [0x68 ] = true, [0x69 ] = true, [0x6A ] = true, [0x6B ] = true, [0x6C ] = true, [0x6D ] = true,
171+ [0x6E ] = true, [0x6F ] = true, [0x70 ] = true, [0x71 ] = true, [0x72 ] = true, [0x73 ] = true, [0x74 ] = true,
172+ [0x75 ] = true, [0x76 ] = true, [0x77 ] = true, [0x78 ] = true, [0x79 ] = true, [0x7A ] = true, [0x7B ] = true,
173+ [0x7C ] = true, [0x7D ] = true, [0x7E ] = true,
174+
175+ /* obs-text = %x80-FF */
176+ [0x80 ] = true, [0x81 ] = true, [0x82 ] = true, [0x83 ] = true, [0x84 ] = true, [0x85 ] = true, [0x86 ] = true,
177+ [0x87 ] = true, [0x88 ] = true, [0x89 ] = true, [0x8A ] = true, [0x8B ] = true, [0x8C ] = true, [0x8D ] = true,
178+ [0x8E ] = true, [0x8F ] = true, [0x90 ] = true, [0x91 ] = true, [0x92 ] = true, [0x93 ] = true, [0x94 ] = true,
179+ [0x95 ] = true, [0x96 ] = true, [0x97 ] = true, [0x98 ] = true, [0x99 ] = true, [0x9A ] = true, [0x9B ] = true,
180+ [0x9C ] = true, [0x9D ] = true, [0x9E ] = true, [0x9F ] = true, [0xA0 ] = true, [0xA1 ] = true, [0xA2 ] = true,
181+ [0xA3 ] = true, [0xA4 ] = true, [0xA5 ] = true, [0xA6 ] = true, [0xA7 ] = true, [0xA8 ] = true, [0xA9 ] = true,
182+ [0xAA ] = true, [0xAB ] = true, [0xAC ] = true, [0xAD ] = true, [0xAE ] = true, [0xAF ] = true, [0xB0 ] = true,
183+ [0xB1 ] = true, [0xB2 ] = true, [0xB3 ] = true, [0xB4 ] = true, [0xB5 ] = true, [0xB6 ] = true, [0xB7 ] = true,
184+ [0xB8 ] = true, [0xB9 ] = true, [0xBA ] = true, [0xBB ] = true, [0xBC ] = true, [0xBD ] = true, [0xBE ] = true,
185+ [0xBF ] = true, [0xC0 ] = true, [0xC1 ] = true, [0xC2 ] = true, [0xC3 ] = true, [0xC4 ] = true, [0xC5 ] = true,
186+ [0xC6 ] = true, [0xC7 ] = true, [0xC8 ] = true, [0xC9 ] = true, [0xCA ] = true, [0xCB ] = true, [0xCC ] = true,
187+ [0xCD ] = true, [0xCE ] = true, [0xCF ] = true, [0xD0 ] = true, [0xD1 ] = true, [0xD2 ] = true, [0xD3 ] = true,
188+ [0xD4 ] = true, [0xD5 ] = true, [0xD6 ] = true, [0xD7 ] = true, [0xD8 ] = true, [0xD9 ] = true, [0xDA ] = true,
189+ [0xDB ] = true, [0xDC ] = true, [0xDD ] = true, [0xDE ] = true, [0xDF ] = true, [0xE0 ] = true, [0xE1 ] = true,
190+ [0xE2 ] = true, [0xE3 ] = true, [0xE4 ] = true, [0xE5 ] = true, [0xE6 ] = true, [0xE7 ] = true, [0xE8 ] = true,
191+ [0xE9 ] = true, [0xEA ] = true, [0xEB ] = true, [0xEC ] = true, [0xED ] = true, [0xEE ] = true, [0xEF ] = true,
192+ [0xF0 ] = true, [0xF1 ] = true, [0xF2 ] = true, [0xF3 ] = true, [0xF4 ] = true, [0xF5 ] = true, [0xF6 ] = true,
193+ [0xF7 ] = true, [0xF8 ] = true, [0xF9 ] = true, [0xFA ] = true, [0xFB ] = true, [0xFC ] = true, [0xFD ] = true,
194+ [0xFE ] = true, [0xFF ] = true,
195+ /* clang-format on */
196+ };
197+
198+ /**
199+ * From RFC7230 section 3.2:
200+ * field-value = *( field-content / obs-fold )
201+ * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
202+ *
203+ * But we're forbidding obs-fold
204+ */
205+ bool aws_strutil_is_http_field_value (struct aws_byte_cursor cursor ) {
206+ if (cursor .len == 0 ) {
207+ return true;
208+ }
209+
210+ /* first and last char cannot be whitespace */
211+ const uint8_t first_c = cursor .ptr [0 ];
212+ const uint8_t last_c = cursor .ptr [cursor .len - 1 ];
213+ if (s_http_whitespace_table [first_c ] || s_http_whitespace_table [last_c ]) {
214+ return false;
215+ }
216+
217+ /* ensure every char is legal field-content */
218+ size_t i = 0 ;
219+ do {
220+ const uint8_t c = cursor .ptr [i ++ ];
221+ if (s_http_field_content_table [c ] == false) {
222+ return false;
223+ }
224+ } while (i < cursor .len );
225+
226+ return true;
227+ }
228+
229+ /**
230+ * From RFC7230 section 3.1.2:
231+ * reason-phrase = *( HTAB / SP / VCHAR / obs-text )
232+ * VCHAR = %x21-7E ; visible (printing) characters
233+ * obs-text = %x80-FF
234+ */
235+ bool aws_strutil_is_http_reason_phrase (struct aws_byte_cursor cursor ) {
236+ for (size_t i = 0 ; i < cursor .len ; ++ i ) {
237+ const uint8_t c = cursor .ptr [i ];
238+ /* the field-content table happens to allow the exact same characters as reason-phrase */
239+ if (s_http_field_content_table [c ] == false) {
240+ return false;
241+ }
242+ }
243+ return true;
244+ }
245+
246+ bool aws_strutil_is_http_request_target (struct aws_byte_cursor cursor ) {
247+ if (cursor .len == 0 ) {
248+ return false;
249+ }
250+
251+ /* TODO: Actually check the complete grammar as defined in RFC7230 5.3 and
252+ * RFC3986. Currently this just checks whether the sequence is blatantly illegal */
253+ size_t i = 0 ;
254+ do {
255+ const uint8_t c = cursor .ptr [i ++ ];
256+ /* everything <= ' ' is non-visible ascii*/
257+ if (c <= ' ' ) {
258+ return false;
259+ }
260+ } while (i < cursor .len );
261+
262+ return true;
263+ }
0 commit comments