Skip to content

Commit 62a03c5

Browse files
authored
Websocket: Validate UTF-8 in text payloads (#418)
Validate text payloads, as required by [RFC-6455 Section 5.6](https://www.rfc-editor.org/rfc/rfc6455#section-5.6)
1 parent 9bd58ca commit 62a03c5

File tree

6 files changed

+285
-8
lines changed

6 files changed

+285
-8
lines changed

include/aws/http/private/websocket_decoder.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum aws_websocket_decoder_state {
2626
AWS_WEBSOCKET_DECODER_STATE_MASKING_KEY,
2727
AWS_WEBSOCKET_DECODER_STATE_PAYLOAD_CHECK,
2828
AWS_WEBSOCKET_DECODER_STATE_PAYLOAD,
29+
AWS_WEBSOCKET_DECODER_STATE_FRAME_END,
2930
AWS_WEBSOCKET_DECODER_STATE_DONE,
3031
};
3132

@@ -38,6 +39,11 @@ struct aws_websocket_decoder {
3839

3940
bool expecting_continuation_data_frame; /* True when the next data frame must be CONTINUATION frame */
4041

42+
/* True while processing a TEXT "message" (from the start of a TEXT frame,
43+
* until the end of the TEXT or CONTINUATION frame with the FIN bit set). */
44+
bool processing_text_message;
45+
struct aws_utf8_validator *text_message_validator;
46+
4147
void *user_data;
4248
aws_websocket_decoder_frame_fn *on_frame;
4349
aws_websocket_decoder_payload_fn *on_payload;
@@ -48,19 +54,22 @@ AWS_EXTERN_C_BEGIN
4854
AWS_HTTP_API
4955
void aws_websocket_decoder_init(
5056
struct aws_websocket_decoder *decoder,
57+
struct aws_allocator *alloc,
5158
aws_websocket_decoder_frame_fn *on_frame,
5259
aws_websocket_decoder_payload_fn *on_payload,
5360
void *user_data);
5461

62+
AWS_HTTP_API
63+
void aws_websocket_decoder_clean_up(struct aws_websocket_decoder *decoder);
64+
5565
/**
5666
* Returns when all data is processed, or a frame and its payload have completed.
5767
* `data` will be advanced to reflect the amount of data processed by this call.
5868
* `frame_complete` will be set true if this call returned due to completion of a frame.
5969
* The `on_frame` and `on_payload` callbacks may each be invoked once as a result of this call.
6070
* If an error occurs, the decoder is invalid forevermore.
6171
*/
62-
AWS_HTTP_API
63-
int aws_websocket_decoder_process(
72+
AWS_HTTP_API int aws_websocket_decoder_process(
6473
struct aws_websocket_decoder *decoder,
6574
struct aws_byte_cursor *data,
6675
bool *frame_complete);

source/websocket.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ struct aws_websocket *aws_websocket_handler_new(const struct aws_websocket_handl
304304

305305
aws_websocket_encoder_init(&websocket->thread_data.encoder, s_encoder_stream_outgoing_payload, websocket);
306306

307-
aws_websocket_decoder_init(&websocket->thread_data.decoder, s_decoder_on_frame, s_decoder_on_payload, websocket);
307+
aws_websocket_decoder_init(
308+
&websocket->thread_data.decoder, options->allocator, s_decoder_on_frame, s_decoder_on_payload, websocket);
308309

309310
aws_linked_list_init(&websocket->synced_data.outgoing_frame_list);
310311

@@ -346,6 +347,7 @@ static void s_handler_destroy(struct aws_channel_handler *handler) {
346347

347348
AWS_LOGF_TRACE(AWS_LS_HTTP_WEBSOCKET, "id=%p: Destroying websocket.", (void *)websocket);
348349

350+
aws_websocket_decoder_clean_up(&websocket->thread_data.decoder);
349351
aws_byte_buf_clean_up(&websocket->thread_data.incoming_ping_payload);
350352
aws_mutex_clean_up(&websocket->synced_data.lock);
351353
aws_mem_release(websocket->alloc, websocket);

source/websocket_decoder.c

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
#include <aws/http/private/websocket_decoder.h>
77

8+
#include <aws/common/encoding.h>
9+
810
#include <inttypes.h>
911

1012
typedef int(state_fn)(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data);
@@ -86,6 +88,10 @@ static int s_state_opcode_byte(struct aws_websocket_decoder *decoder, struct aws
8688
}
8789
}
8890

91+
if (decoder->current_frame.opcode == AWS_WEBSOCKET_OPCODE_TEXT) {
92+
decoder->processing_text_message = true;
93+
}
94+
8995
decoder->state = AWS_WEBSOCKET_DECODER_STATE_LENGTH_BYTE;
9096
return AWS_OP_SUCCESS;
9197
}
@@ -234,7 +240,7 @@ static int s_state_payload_check(struct aws_websocket_decoder *decoder, struct a
234240
decoder->state_bytes_processed = 0;
235241
decoder->state = AWS_WEBSOCKET_DECODER_STATE_PAYLOAD;
236242
} else {
237-
decoder->state = AWS_WEBSOCKET_DECODER_STATE_DONE;
243+
decoder->state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END;
238244
}
239245

240246
return AWS_OP_SUCCESS;
@@ -266,9 +272,16 @@ static int s_state_payload(struct aws_websocket_decoder *decoder, struct aws_byt
266272
}
267273
}
268274

269-
/* TODO: validate utf-8 */
270275
/* TODO: validate payload of CLOSE frame */
271276

277+
/* Validate the UTF-8 for TEXT messages (a TEXT frame and any subsequent CONTINUATION frames) */
278+
if (decoder->processing_text_message && aws_websocket_is_data_frame(decoder->current_frame.opcode)) {
279+
if (aws_utf8_validator_update(decoder->text_message_validator, payload)) {
280+
AWS_LOGF_ERROR(AWS_LS_HTTP_WEBSOCKET, "id=%p: Received invalid UTF-8", (void *)decoder->user_data);
281+
return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
282+
}
283+
}
284+
272285
/* Invoke on_payload() callback to inform user of payload data */
273286
int err = decoder->on_payload(payload, decoder->user_data);
274287
if (err) {
@@ -280,9 +293,34 @@ static int s_state_payload(struct aws_websocket_decoder *decoder, struct aws_byt
280293

281294
/* If all data consumed, proceed to next state. */
282295
if (decoder->state_bytes_processed == decoder->current_frame.payload_length) {
283-
decoder->state++;
296+
decoder->state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END;
297+
}
298+
299+
return AWS_OP_SUCCESS;
300+
}
301+
302+
/* FRAME_END: Perform checks once we reach the end of the frame. */
303+
static int s_state_frame_end(struct aws_websocket_decoder *decoder, struct aws_byte_cursor *data) {
304+
(void)data;
305+
306+
/* If we're done processing a text message (a TEXT frame and any subsequent CONTINUATION frames),
307+
* complete the UTF-8 validation */
308+
if (decoder->processing_text_message && aws_websocket_is_data_frame(decoder->current_frame.opcode) &&
309+
decoder->current_frame.fin) {
310+
311+
if (aws_utf8_validator_finalize(decoder->text_message_validator)) {
312+
AWS_LOGF_ERROR(
313+
AWS_LS_HTTP_WEBSOCKET,
314+
"id=%p: Received invalid UTF-8 (incomplete encoding)",
315+
(void *)decoder->user_data);
316+
return aws_raise_error(AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR);
317+
}
318+
319+
decoder->processing_text_message = false;
284320
}
285321

322+
/* Done! */
323+
decoder->state = AWS_WEBSOCKET_DECODER_STATE_DONE;
286324
return AWS_OP_SUCCESS;
287325
}
288326

@@ -295,6 +333,7 @@ static state_fn *s_state_functions[AWS_WEBSOCKET_DECODER_STATE_DONE] = {
295333
s_state_masking_key,
296334
s_state_payload_check,
297335
s_state_payload,
336+
s_state_frame_end,
298337
};
299338

300339
int aws_websocket_decoder_process(
@@ -330,6 +369,7 @@ int aws_websocket_decoder_process(
330369

331370
void aws_websocket_decoder_init(
332371
struct aws_websocket_decoder *decoder,
372+
struct aws_allocator *alloc,
333373
aws_websocket_decoder_frame_fn *on_frame,
334374
aws_websocket_decoder_payload_fn *on_payload,
335375
void *user_data) {
@@ -338,4 +378,10 @@ void aws_websocket_decoder_init(
338378
decoder->user_data = user_data;
339379
decoder->on_frame = on_frame;
340380
decoder->on_payload = on_payload;
381+
decoder->text_message_validator = aws_utf8_validator_new(alloc);
382+
}
383+
384+
void aws_websocket_decoder_clean_up(struct aws_websocket_decoder *decoder) {
385+
aws_utf8_validator_destroy(decoder->text_message_validator);
386+
AWS_ZERO_STRUCT(*decoder);
341387
}

tests/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ add_test_case(websocket_decoder_fail_on_unknown_opcode)
167167
add_test_case(websocket_decoder_fragmented_message)
168168
add_test_case(websocket_decoder_fail_on_bad_fragmentation)
169169
add_test_case(websocket_decoder_control_frame_cannot_be_fragmented)
170+
add_test_case(websocket_decoder_utf8_text)
171+
add_test_case(websocket_decoder_fail_on_bad_utf8_text)
172+
add_test_case(websocket_decoder_fragmented_utf8_text)
173+
add_test_case(websocket_decoder_fail_on_fragmented_bad_utf8_text)
170174
add_test_case(websocket_decoder_on_frame_callback_can_fail_decoder)
171175
add_test_case(websocket_decoder_on_payload_callback_can_fail_decoder)
172176
add_test_case(websocket_encoder_sanity_check)

0 commit comments

Comments
 (0)