Skip to content

Commit fa1692a

Browse files
authored
More validation of HTTP/1.1 messages. (#321)
Add validation for the following, according to the ABNF defined in RFC-7230. - header field-names - header field-values - request method - request-path aka URI - response reason-phrase Previously, we did an OK job validating incoming messages, but did no validation whatsoever of outgoing messages. Caveats: - URI validation is extremely basic for now. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent ec42882 commit fa1692a

File tree

8 files changed

+563
-11
lines changed

8 files changed

+563
-11
lines changed

include/aws/http/private/strutil.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,49 @@ AWS_HTTP_API
7070
bool aws_strutil_is_http_token(struct aws_byte_cursor token);
7171

7272
/**
73-
* Same as aws_strutil_is_http_token_valid(), but uppercase letters are forbidden.
73+
* Same as aws_strutil_is_http_token(), but uppercase letters are forbidden.
7474
*/
7575
AWS_HTTP_API
7676
bool aws_strutil_is_lowercase_http_token(struct aws_byte_cursor token);
7777

78+
/**
79+
* Return whether this ASCII/UTF-8 sequence is a valid HTTP header field-value.
80+
*
81+
* As defined in RFC7230 section 3.2 (except we are ALWAYS forbidding obs-fold):
82+
*
83+
* field-value = *( field-content / obs-fold )
84+
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
85+
* field-vchar = VCHAR / obs-text
86+
* VCHAR = %x21-7E ; visible (printing) characters
87+
* obs-text = %x80-FF
88+
*
89+
* Note that we ALWAYS forbid obs-fold. Section 3.2.4 explains how
90+
* obs-fold is deprecated "except within the message/http media type".
91+
*/
92+
AWS_HTTP_API
93+
bool aws_strutil_is_http_field_value(struct aws_byte_cursor cursor);
94+
95+
/**
96+
* Return whether this ASCII/UTF-8 sequence is a valid HTTP response status reason-phrase.
97+
*
98+
* As defined in RFC7230 section 3.1.2:
99+
*
100+
* reason-phrase = *( HTAB / SP / VCHAR / obs-text )
101+
* VCHAR = %x21-7E ; visible (printing) characters
102+
* obs-text = %x80-FF
103+
*/
104+
AWS_HTTP_API
105+
bool aws_strutil_is_http_reason_phrase(struct aws_byte_cursor cursor);
106+
107+
/**
108+
* Return whether this ASCII/UTF-8 sequence is a valid HTTP request-target.
109+
*
110+
* TODO: Actually check the complete grammar as defined in RFC7230 5.3 and
111+
* RFC3986. Currently this just checks whether the sequence is blatantly illegal
112+
* (ex: contains CR or LF)
113+
*/
114+
AWS_HTTP_API
115+
bool aws_strutil_is_http_request_target(struct aws_byte_cursor cursor);
116+
78117
AWS_EXTERN_C_END
79118
#endif /* AWS_HTTP_STRUTIL_H */

source/h1_decoder.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,14 +426,20 @@ static int s_linestate_header(struct aws_h1_decoder *decoder, struct aws_byte_cu
426426
}
427427

428428
struct aws_byte_cursor name = splits[0];
429-
if (name.len == 0 || !aws_strutil_is_http_token(name)) {
429+
if (!aws_strutil_is_http_token(name)) {
430430
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Invalid incoming header, bad name.", decoder->logging_id);
431431
AWS_LOGF_DEBUG(
432432
AWS_LS_HTTP_STREAM, "id=%p: Bad header is: '" PRInSTR "'", decoder->logging_id, AWS_BYTE_CURSOR_PRI(input));
433433
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
434434
}
435435

436436
struct aws_byte_cursor value = aws_strutil_trim_http_whitespace(splits[1]);
437+
if (!aws_strutil_is_http_field_value(value)) {
438+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Invalid incoming header, bad value.", decoder->logging_id);
439+
AWS_LOGF_DEBUG(
440+
AWS_LS_HTTP_STREAM, "id=%p: Bad header is: '" PRInSTR "'", decoder->logging_id, AWS_BYTE_CURSOR_PRI(input));
441+
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
442+
}
437443

438444
struct aws_h1_decoded_header header;
439445
header.name = aws_http_str_to_header_name(name);
@@ -603,6 +609,26 @@ static int s_linestate_request(struct aws_h1_decoder *decoder, struct aws_byte_c
603609
struct aws_byte_cursor uri = cursors[1];
604610
struct aws_byte_cursor version = cursors[2];
605611

612+
if (!aws_strutil_is_http_token(method)) {
613+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming request has invalid method.", decoder->logging_id);
614+
AWS_LOGF_DEBUG(
615+
AWS_LS_HTTP_STREAM,
616+
"id=%p: Bad request line is: '" PRInSTR "'",
617+
decoder->logging_id,
618+
AWS_BYTE_CURSOR_PRI(input));
619+
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
620+
}
621+
622+
if (!aws_strutil_is_http_request_target(uri)) {
623+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming request has invalid path.", decoder->logging_id);
624+
AWS_LOGF_DEBUG(
625+
AWS_LS_HTTP_STREAM,
626+
"id=%p: Bad request line is: '" PRInSTR "'",
627+
decoder->logging_id,
628+
AWS_BYTE_CURSOR_PRI(input));
629+
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
630+
}
631+
606632
struct aws_byte_cursor version_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_1);
607633
if (!aws_byte_cursor_eq(&version, &version_expected)) {
608634
AWS_LOGF_ERROR(
@@ -645,7 +671,6 @@ static int s_linestate_response(struct aws_h1_decoder *decoder, struct aws_byte_
645671
struct aws_byte_cursor version = cursors[0];
646672
struct aws_byte_cursor code = cursors[1];
647673
struct aws_byte_cursor phrase = cursors[2];
648-
(void)phrase; /* Unused for now. */
649674

650675
struct aws_byte_cursor version_1_1_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_1);
651676
struct aws_byte_cursor version_1_0_expected = aws_http_version_to_str(AWS_HTTP_VERSION_1_0);
@@ -660,6 +685,12 @@ static int s_linestate_response(struct aws_h1_decoder *decoder, struct aws_byte_
660685
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
661686
}
662687

688+
/* Validate phrase */
689+
if (!aws_strutil_is_http_reason_phrase(phrase)) {
690+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=%p: Incoming response has invalid reason phrase.", decoder->logging_id);
691+
return aws_raise_error(AWS_ERROR_HTTP_PROTOCOL_ERROR);
692+
}
693+
663694
/* Status-code is a 3-digit integer. RFC7230 section 3.1.2 */
664695
uint64_t code_val_u64;
665696
err = aws_strutil_read_unsigned_num(code, &code_val_u64);

source/h1_encoder.c

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,31 +37,51 @@ static int s_scan_outgoing_headers(
3737
struct aws_http_header header;
3838
aws_http_message_get_header(message, &header, i);
3939

40+
/* Validate header field-name (RFC-7230 3.2): field-name = token */
41+
if (!aws_strutil_is_http_token(header.name)) {
42+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Header name is invalid");
43+
return aws_raise_error(AWS_ERROR_HTTP_INVALID_HEADER_NAME);
44+
}
45+
46+
/* Validate header field-value.
47+
* The value itself isn't supposed to have whitespace on either side,
48+
* but we'll trim it off before validation so we don't start needlessly
49+
* failing requests that used to work before we added validation.
50+
* This should be OK because field-value can be sent with any amount
51+
* of whitespace around it, which the other side will just ignore (RFC-7230 3.2):
52+
* header-field = field-name ":" OWS field-value OWS */
53+
struct aws_byte_cursor field_value = aws_strutil_trim_http_whitespace(header.value);
54+
if (!aws_strutil_is_http_field_value(field_value)) {
55+
AWS_LOGF_ERROR(
56+
AWS_LS_HTTP_STREAM,
57+
"id=static: Header '" PRInSTR "' has invalid value",
58+
AWS_BYTE_CURSOR_PRI(header.name));
59+
return aws_raise_error(AWS_ERROR_HTTP_INVALID_HEADER_VALUE);
60+
}
61+
4062
enum aws_http_header_name name_enum = aws_http_str_to_header_name(header.name);
4163
switch (name_enum) {
4264
case AWS_HTTP_HEADER_CONNECTION: {
43-
struct aws_byte_cursor trimmed_value = aws_strutil_trim_http_whitespace(header.value);
44-
if (aws_byte_cursor_eq_c_str(&trimmed_value, "close")) {
65+
if (aws_byte_cursor_eq_c_str(&field_value, "close")) {
4566
encoder_message->has_connection_close_header = true;
4667
}
4768
} break;
4869
case AWS_HTTP_HEADER_CONTENT_LENGTH: {
4970
has_content_length_header = true;
50-
struct aws_byte_cursor trimmed_value = aws_strutil_trim_http_whitespace(header.value);
51-
if (aws_strutil_read_unsigned_num(trimmed_value, &encoder_message->content_length)) {
71+
if (aws_strutil_read_unsigned_num(field_value, &encoder_message->content_length)) {
5272
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Invalid Content-Length");
5373
return aws_raise_error(AWS_ERROR_HTTP_INVALID_HEADER_VALUE);
5474
}
5575
} break;
5676
case AWS_HTTP_HEADER_TRANSFER_ENCODING: {
5777
has_transfer_encoding_header = true;
58-
if (0 == header.value.len) {
78+
if (0 == field_value.len) {
5979
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Transfer-Encoding must include a valid value");
6080
return aws_raise_error(AWS_ERROR_HTTP_INVALID_HEADER_VALUE);
6181
}
6282
struct aws_byte_cursor substr;
6383
AWS_ZERO_STRUCT(substr);
64-
while (aws_byte_cursor_next_split(&header.value, ',', &substr)) {
84+
while (aws_byte_cursor_next_split(&field_value, ',', &substr)) {
6585
struct aws_byte_cursor trimmed = aws_strutil_trim_http_whitespace(substr);
6686
if (0 == trimmed.len) {
6787
AWS_LOGF_ERROR(
@@ -177,13 +197,26 @@ int aws_h1_encoder_message_init_from_request(
177197
struct aws_byte_cursor method;
178198
int err = aws_http_message_get_request_method(request, &method);
179199
if (err) {
200+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Request method not set");
201+
aws_raise_error(AWS_ERROR_HTTP_INVALID_METHOD);
202+
goto error;
203+
}
204+
/* RFC-7230 3.1.1: method = token */
205+
if (!aws_strutil_is_http_token(method)) {
206+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Request method is invalid");
180207
aws_raise_error(AWS_ERROR_HTTP_INVALID_METHOD);
181208
goto error;
182209
}
183210

184211
struct aws_byte_cursor uri;
185212
err = aws_http_message_get_request_path(request, &uri);
186213
if (err) {
214+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Request path not set");
215+
aws_raise_error(AWS_ERROR_HTTP_INVALID_PATH);
216+
goto error;
217+
}
218+
if (!aws_strutil_is_http_request_target(uri)) {
219+
AWS_LOGF_ERROR(AWS_LS_HTTP_STREAM, "id=static: Request path is invalid");
187220
aws_raise_error(AWS_ERROR_HTTP_INVALID_PATH);
188221
goto error;
189222
}

source/strutil.c

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,125 @@ bool aws_strutil_is_http_token(struct aws_byte_cursor token) {
139139
bool aws_strutil_is_lowercase_http_token(struct aws_byte_cursor token) {
140140
return s_is_token(token, s_http_lowercase_token_table);
141141
}
142+
143+
/* clang-format off */
144+
/**
145+
* Table with true for all octets allowed in field-content,
146+
* as defined in RFC7230 section 3.2 and 3.2.6 and RFC5234 appendix-B.1:
147+
*
148+
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
149+
* field-vchar = VCHAR / obs-text
150+
* VCHAR = %x21-7E ; visible (printing) characters
151+
* obs-text = %x80-FF
152+
*/
153+
static const bool s_http_field_content_table[256] = {
154+
/* clang-format off */
155+
156+
/* whitespace */
157+
['\t'] = true, [' '] = true,
158+
159+
/* VCHAR = 0x21-7E */
160+
[0x21] = true, [0x22] = true, [0x23] = true, [0x24] = true, [0x25] = true, [0x26] = true, [0x27] = true,
161+
[0x28] = true, [0x29] = true, [0x2A] = true, [0x2B] = true, [0x2C] = true, [0x2D] = true, [0x2E] = true,
162+
[0x2F] = true, [0x30] = true, [0x31] = true, [0x32] = true, [0x33] = true, [0x34] = true, [0x35] = true,
163+
[0x36] = true, [0x37] = true, [0x38] = true, [0x39] = true, [0x3A] = true, [0x3B] = true, [0x3C] = true,
164+
[0x3D] = true, [0x3E] = true, [0x3F] = true, [0x40] = true, [0x41] = true, [0x42] = true, [0x43] = true,
165+
[0x44] = true, [0x45] = true, [0x46] = true, [0x47] = true, [0x48] = true, [0x49] = true, [0x4A] = true,
166+
[0x4B] = true, [0x4C] = true, [0x4D] = true, [0x4E] = true, [0x4F] = true, [0x50] = true, [0x51] = true,
167+
[0x52] = true, [0x53] = true, [0x54] = true, [0x55] = true, [0x56] = true, [0x57] = true, [0x58] = true,
168+
[0x59] = true, [0x5A] = true, [0x5B] = true, [0x5C] = true, [0x5D] = true, [0x5E] = true, [0x5F] = true,
169+
[0x60] = true, [0x61] = true, [0x62] = true, [0x63] = true, [0x64] = true, [0x65] = true, [0x66] = true,
170+
[0x67] = true, [0x68] = true, [0x69] = true, [0x6A] = true, [0x6B] = true, [0x6C] = true, [0x6D] = true,
171+
[0x6E] = true, [0x6F] = true, [0x70] = true, [0x71] = true, [0x72] = true, [0x73] = true, [0x74] = true,
172+
[0x75] = true, [0x76] = true, [0x77] = true, [0x78] = true, [0x79] = true, [0x7A] = true, [0x7B] = true,
173+
[0x7C] = true, [0x7D] = true, [0x7E] = true,
174+
175+
/* obs-text = %x80-FF */
176+
[0x80] = true, [0x81] = true, [0x82] = true, [0x83] = true, [0x84] = true, [0x85] = true, [0x86] = true,
177+
[0x87] = true, [0x88] = true, [0x89] = true, [0x8A] = true, [0x8B] = true, [0x8C] = true, [0x8D] = true,
178+
[0x8E] = true, [0x8F] = true, [0x90] = true, [0x91] = true, [0x92] = true, [0x93] = true, [0x94] = true,
179+
[0x95] = true, [0x96] = true, [0x97] = true, [0x98] = true, [0x99] = true, [0x9A] = true, [0x9B] = true,
180+
[0x9C] = true, [0x9D] = true, [0x9E] = true, [0x9F] = true, [0xA0] = true, [0xA1] = true, [0xA2] = true,
181+
[0xA3] = true, [0xA4] = true, [0xA5] = true, [0xA6] = true, [0xA7] = true, [0xA8] = true, [0xA9] = true,
182+
[0xAA] = true, [0xAB] = true, [0xAC] = true, [0xAD] = true, [0xAE] = true, [0xAF] = true, [0xB0] = true,
183+
[0xB1] = true, [0xB2] = true, [0xB3] = true, [0xB4] = true, [0xB5] = true, [0xB6] = true, [0xB7] = true,
184+
[0xB8] = true, [0xB9] = true, [0xBA] = true, [0xBB] = true, [0xBC] = true, [0xBD] = true, [0xBE] = true,
185+
[0xBF] = true, [0xC0] = true, [0xC1] = true, [0xC2] = true, [0xC3] = true, [0xC4] = true, [0xC5] = true,
186+
[0xC6] = true, [0xC7] = true, [0xC8] = true, [0xC9] = true, [0xCA] = true, [0xCB] = true, [0xCC] = true,
187+
[0xCD] = true, [0xCE] = true, [0xCF] = true, [0xD0] = true, [0xD1] = true, [0xD2] = true, [0xD3] = true,
188+
[0xD4] = true, [0xD5] = true, [0xD6] = true, [0xD7] = true, [0xD8] = true, [0xD9] = true, [0xDA] = true,
189+
[0xDB] = true, [0xDC] = true, [0xDD] = true, [0xDE] = true, [0xDF] = true, [0xE0] = true, [0xE1] = true,
190+
[0xE2] = true, [0xE3] = true, [0xE4] = true, [0xE5] = true, [0xE6] = true, [0xE7] = true, [0xE8] = true,
191+
[0xE9] = true, [0xEA] = true, [0xEB] = true, [0xEC] = true, [0xED] = true, [0xEE] = true, [0xEF] = true,
192+
[0xF0] = true, [0xF1] = true, [0xF2] = true, [0xF3] = true, [0xF4] = true, [0xF5] = true, [0xF6] = true,
193+
[0xF7] = true, [0xF8] = true, [0xF9] = true, [0xFA] = true, [0xFB] = true, [0xFC] = true, [0xFD] = true,
194+
[0xFE] = true, [0xFF] = true,
195+
/* clang-format on */
196+
};
197+
198+
/**
199+
* From RFC7230 section 3.2:
200+
* field-value = *( field-content / obs-fold )
201+
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
202+
*
203+
* But we're forbidding obs-fold
204+
*/
205+
bool aws_strutil_is_http_field_value(struct aws_byte_cursor cursor) {
206+
if (cursor.len == 0) {
207+
return true;
208+
}
209+
210+
/* first and last char cannot be whitespace */
211+
const uint8_t first_c = cursor.ptr[0];
212+
const uint8_t last_c = cursor.ptr[cursor.len - 1];
213+
if (s_http_whitespace_table[first_c] || s_http_whitespace_table[last_c]) {
214+
return false;
215+
}
216+
217+
/* ensure every char is legal field-content */
218+
size_t i = 0;
219+
do {
220+
const uint8_t c = cursor.ptr[i++];
221+
if (s_http_field_content_table[c] == false) {
222+
return false;
223+
}
224+
} while (i < cursor.len);
225+
226+
return true;
227+
}
228+
229+
/**
230+
* From RFC7230 section 3.1.2:
231+
* reason-phrase = *( HTAB / SP / VCHAR / obs-text )
232+
* VCHAR = %x21-7E ; visible (printing) characters
233+
* obs-text = %x80-FF
234+
*/
235+
bool aws_strutil_is_http_reason_phrase(struct aws_byte_cursor cursor) {
236+
for (size_t i = 0; i < cursor.len; ++i) {
237+
const uint8_t c = cursor.ptr[i];
238+
/* the field-content table happens to allow the exact same characters as reason-phrase */
239+
if (s_http_field_content_table[c] == false) {
240+
return false;
241+
}
242+
}
243+
return true;
244+
}
245+
246+
bool aws_strutil_is_http_request_target(struct aws_byte_cursor cursor) {
247+
if (cursor.len == 0) {
248+
return false;
249+
}
250+
251+
/* TODO: Actually check the complete grammar as defined in RFC7230 5.3 and
252+
* RFC3986. Currently this just checks whether the sequence is blatantly illegal */
253+
size_t i = 0;
254+
do {
255+
const uint8_t c = cursor.ptr[i++];
256+
/* everything <= ' ' is non-visible ascii*/
257+
if (c <= ' ') {
258+
return false;
259+
}
260+
} while (i < cursor.len);
261+
262+
return true;
263+
}

tests/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ add_test_case(h1_encoder_transfer_encoding_set_body_stream_errors)
6363
add_test_case(h1_encoder_transfer_encoding_chunked_multiple_put_request_headers)
6464
add_test_case(h1_encoder_transfer_encoding_chunked_not_final_encoding_put_request_headers)
6565
add_test_case(h1_encoder_transfer_encoding_not_ending_in_chunked_put_request_headers)
66+
add_test_case(h1_encoder_rejects_bad_method)
67+
add_test_case(h1_encoder_rejects_missing_method)
68+
add_test_case(h1_encoder_rejects_bad_path)
69+
add_test_case(h1_encoder_rejects_missing_path)
70+
add_test_case(h1_encoder_rejects_bad_header_name)
71+
add_test_case(h1_encoder_rejects_bad_header_value)
6672

6773
add_test_case(h1_client_sanity_check)
6874
add_test_case(h1_client_request_send_1liner)
@@ -135,6 +141,9 @@ add_test_case(strutil_read_unsigned_hex)
135141
add_test_case(strutil_trim_http_whitespace)
136142
add_test_case(strutil_is_http_token)
137143
add_test_case(strutil_is_lowercase_http_token)
144+
add_test_case(strutil_is_http_field_value)
145+
add_test_case(strutil_is_http_reason_phrase)
146+
add_test_case(strutil_is_http_request_target)
138147

139148
add_net_test_case(tls_download_medium_file_h1)
140149
add_net_test_case(tls_download_medium_file_h2)

0 commit comments

Comments
 (0)