55
66#include <aws/http/private/websocket_decoder.h>
77
8+ #include <aws/common/encoding.h>
9+
810#include <inttypes.h>
911
1012typedef int (state_fn )(struct aws_websocket_decoder * decoder , struct aws_byte_cursor * data );
@@ -86,6 +88,10 @@ static int s_state_opcode_byte(struct aws_websocket_decoder *decoder, struct aws
8688 }
8789 }
8890
91+ if (decoder -> current_frame .opcode == AWS_WEBSOCKET_OPCODE_TEXT ) {
92+ decoder -> processing_text_message = true;
93+ }
94+
8995 decoder -> state = AWS_WEBSOCKET_DECODER_STATE_LENGTH_BYTE ;
9096 return AWS_OP_SUCCESS ;
9197}
@@ -234,7 +240,7 @@ static int s_state_payload_check(struct aws_websocket_decoder *decoder, struct a
234240 decoder -> state_bytes_processed = 0 ;
235241 decoder -> state = AWS_WEBSOCKET_DECODER_STATE_PAYLOAD ;
236242 } else {
237- decoder -> state = AWS_WEBSOCKET_DECODER_STATE_DONE ;
243+ decoder -> state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END ;
238244 }
239245
240246 return AWS_OP_SUCCESS ;
@@ -266,9 +272,16 @@ static int s_state_payload(struct aws_websocket_decoder *decoder, struct aws_byt
266272 }
267273 }
268274
269- /* TODO: validate utf-8 */
270275 /* TODO: validate payload of CLOSE frame */
271276
277+ /* Validate the UTF-8 for TEXT messages (a TEXT frame and any subsequent CONTINUATION frames) */
278+ if (decoder -> processing_text_message && aws_websocket_is_data_frame (decoder -> current_frame .opcode )) {
279+ if (aws_utf8_validator_update (decoder -> text_message_validator , payload )) {
280+ AWS_LOGF_ERROR (AWS_LS_HTTP_WEBSOCKET , "id=%p: Received invalid UTF-8" , (void * )decoder -> user_data );
281+ return aws_raise_error (AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR );
282+ }
283+ }
284+
272285 /* Invoke on_payload() callback to inform user of payload data */
273286 int err = decoder -> on_payload (payload , decoder -> user_data );
274287 if (err ) {
@@ -280,9 +293,34 @@ static int s_state_payload(struct aws_websocket_decoder *decoder, struct aws_byt
280293
281294 /* If all data consumed, proceed to next state. */
282295 if (decoder -> state_bytes_processed == decoder -> current_frame .payload_length ) {
283- decoder -> state ++ ;
296+ decoder -> state = AWS_WEBSOCKET_DECODER_STATE_FRAME_END ;
297+ }
298+
299+ return AWS_OP_SUCCESS ;
300+ }
301+
302+ /* FRAME_END: Perform checks once we reach the end of the frame. */
303+ static int s_state_frame_end (struct aws_websocket_decoder * decoder , struct aws_byte_cursor * data ) {
304+ (void )data ;
305+
306+ /* If we're done processing a text message (a TEXT frame and any subsequent CONTINUATION frames),
307+ * complete the UTF-8 validation */
308+ if (decoder -> processing_text_message && aws_websocket_is_data_frame (decoder -> current_frame .opcode ) &&
309+ decoder -> current_frame .fin ) {
310+
311+ if (aws_utf8_validator_finalize (decoder -> text_message_validator )) {
312+ AWS_LOGF_ERROR (
313+ AWS_LS_HTTP_WEBSOCKET ,
314+ "id=%p: Received invalid UTF-8 (incomplete encoding)" ,
315+ (void * )decoder -> user_data );
316+ return aws_raise_error (AWS_ERROR_HTTP_WEBSOCKET_PROTOCOL_ERROR );
317+ }
318+
319+ decoder -> processing_text_message = false;
284320 }
285321
322+ /* Done! */
323+ decoder -> state = AWS_WEBSOCKET_DECODER_STATE_DONE ;
286324 return AWS_OP_SUCCESS ;
287325}
288326
@@ -295,6 +333,7 @@ static state_fn *s_state_functions[AWS_WEBSOCKET_DECODER_STATE_DONE] = {
295333 s_state_masking_key ,
296334 s_state_payload_check ,
297335 s_state_payload ,
336+ s_state_frame_end ,
298337};
299338
300339int aws_websocket_decoder_process (
@@ -330,6 +369,7 @@ int aws_websocket_decoder_process(
330369
331370void aws_websocket_decoder_init (
332371 struct aws_websocket_decoder * decoder ,
372+ struct aws_allocator * alloc ,
333373 aws_websocket_decoder_frame_fn * on_frame ,
334374 aws_websocket_decoder_payload_fn * on_payload ,
335375 void * user_data ) {
@@ -338,4 +378,10 @@ void aws_websocket_decoder_init(
338378 decoder -> user_data = user_data ;
339379 decoder -> on_frame = on_frame ;
340380 decoder -> on_payload = on_payload ;
381+ decoder -> text_message_validator = aws_utf8_validator_new (alloc );
382+ }
383+
384+ void aws_websocket_decoder_clean_up (struct aws_websocket_decoder * decoder ) {
385+ aws_utf8_validator_destroy (decoder -> text_message_validator );
386+ AWS_ZERO_STRUCT (* decoder );
341387}
0 commit comments