12
12
13
13
14
14
#define CHAR_EOF -1
15
- #define CHAR_NULL 0
16
15
#define CHAR_BANG 33
17
16
#define CHAR_DOUBLE 34
18
17
#define CHAR_PERCENT 37
24
23
#define CHAR_GT 62
25
24
#define CHAR_QUESTION 63
26
25
#define CHAR_RIGHTB 93
27
- #define CHAR_TICK 96
28
26
29
27
/* prototypes */
30
28
@@ -43,7 +41,6 @@ static int h5_state_before_attribute_name(h5_state_t* hs);
43
41
static int h5_state_before_attribute_value (h5_state_t * hs );
44
42
static int h5_state_attribute_value_double_quote (h5_state_t * hs );
45
43
static int h5_state_attribute_value_single_quote (h5_state_t * hs );
46
- static int h5_state_attribute_value_back_quote (h5_state_t * hs );
47
44
static int h5_state_attribute_value_no_quote (h5_state_t * hs );
48
45
static int h5_state_after_attribute_value_quoted_state (h5_state_t * hs );
49
46
static int h5_state_comment (h5_state_t * hs );
@@ -63,28 +60,16 @@ static int h5_state_doctype(h5_state_t* hs);
63
60
/**
64
61
* public function
65
62
*/
66
- void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , enum html5_flags flags )
63
+ void libinjection_h5_init (h5_state_t * hs , const char * s , size_t len , int flags )
67
64
{
68
65
memset (hs , 0 , sizeof (h5_state_t ));
69
66
hs -> s = s ;
70
67
hs -> len = len ;
71
-
72
- switch (flags ) {
73
- case DATA_STATE :
68
+ hs -> state = h5_state_data ;
69
+ if (flags == 0 ) {
74
70
hs -> state = h5_state_data ;
75
- break ;
76
- case VALUE_NO_QUOTE :
77
- hs -> state = h5_state_before_attribute_name ;
78
- break ;
79
- case VALUE_SINGLE_QUOTE :
80
- hs -> state = h5_state_attribute_value_single_quote ;
81
- break ;
82
- case VALUE_DOUBLE_QUOTE :
83
- hs -> state = h5_state_attribute_value_double_quote ;
84
- break ;
85
- case VALUE_BACK_QUOTE :
86
- hs -> state = h5_state_attribute_value_back_quote ;
87
- break ;
71
+ } else {
72
+ assert (0 );
88
73
}
89
74
}
90
75
@@ -100,18 +85,10 @@ int libinjection_h5_next(h5_state_t* hs)
100
85
/**
101
86
* Everything below here is private
102
87
*
103
- */
104
-
88
+ */
105
89
106
90
static int h5_is_white (char ch )
107
91
{
108
- /*
109
- * \t = horizontal tab = 0x09
110
- * \n = newline = 0x0A
111
- * \v = vertical tab = 0x0B
112
- * \f = form feed = 0x0C
113
- * \r = cr = 0x0D
114
- */
115
92
return strchr (" \t\n\v\f\r" , ch ) != NULL ;
116
93
}
117
94
@@ -120,17 +97,9 @@ static int h5_skip_white(h5_state_t* hs)
120
97
char ch ;
121
98
while (hs -> pos < hs -> len ) {
122
99
ch = hs -> s [hs -> pos ];
123
- switch (ch ) {
124
- case 0x00 : /* IE only */
125
- case 0x20 :
126
- case 0x09 :
127
- case 0x0A :
128
- case 0x0B : /* IE only */
129
- case 0x0C :
130
- case 0x0D : /* IE only */
100
+ if (ch == ' ' ) {
131
101
hs -> pos += 1 ;
132
- break ;
133
- default :
102
+ } else {
134
103
return ch ;
135
104
}
136
105
}
@@ -198,9 +167,6 @@ static int h5_state_tag_open(h5_state_t* hs)
198
167
return h5_state_bogus_comment2 (hs );
199
168
} else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
200
169
return h5_state_tag_name (hs );
201
- } else if (ch == CHAR_NULL ) {
202
- /* IE-ism NULL characters are ignored */
203
- return h5_state_tag_name (hs );
204
170
} else {
205
171
/* user input mistake in configuring state */
206
172
if (hs -> pos == 0 ) {
@@ -231,9 +197,7 @@ static int h5_state_end_tag_open(h5_state_t* hs)
231
197
} else if ((ch >= 'a' && ch <= 'z' ) || (ch >= 'A' && ch <= 'Z' )) {
232
198
return h5_state_tag_name (hs );
233
199
}
234
-
235
- hs -> is_close = 0 ;
236
- return h5_state_bogus_comment (hs );
200
+ return h5_state_data (hs );
237
201
}
238
202
/*
239
203
*
@@ -267,12 +231,7 @@ static int h5_state_tag_name(h5_state_t* hs)
267
231
pos = hs -> pos ;
268
232
while (pos < hs -> len ) {
269
233
ch = hs -> s [pos ];
270
- if (ch == 0 ) {
271
- /* special non-standard case */
272
- /* allow nulls in tag name */
273
- /* some old browsers apparently allow and ignore them */
274
- pos += 1 ;
275
- } else if (h5_is_white (ch )) {
234
+ if (h5_is_white (ch )) {
276
235
hs -> token_start = hs -> s + hs -> pos ;
277
236
hs -> token_len = pos - hs -> pos ;
278
237
hs -> token_type = TAG_NAME_OPEN ;
@@ -340,7 +299,7 @@ static int h5_state_before_attribute_name(h5_state_t* hs)
340
299
default : {
341
300
return h5_state_attribute_name (hs );
342
301
}
343
- }
302
+ }
344
303
}
345
304
346
305
static int h5_state_attribute_name (h5_state_t * hs )
@@ -349,7 +308,7 @@ static int h5_state_attribute_name(h5_state_t* hs)
349
308
size_t pos ;
350
309
351
310
TRACE ();
352
- pos = hs -> pos + 1 ;
311
+ pos = hs -> pos ;
353
312
while (pos < hs -> len ) {
354
313
ch = hs -> s [pos ];
355
314
if (h5_is_white (ch )) {
@@ -399,19 +358,21 @@ static int h5_state_attribute_name(h5_state_t* hs)
399
358
static int h5_state_after_attribute_name (h5_state_t * hs )
400
359
{
401
360
int c ;
361
+ size_t pos ;
402
362
403
363
TRACE ();
364
+ pos = hs -> pos ;
404
365
c = h5_skip_white (hs );
405
366
switch (c ) {
406
367
case CHAR_EOF : {
407
368
return 0 ;
408
369
}
409
370
case CHAR_SLASH : {
410
- hs -> pos += 1 ;
371
+ hs -> pos = pos + 1 ;
411
372
return h5_state_self_closing_start_tag (hs );
412
373
}
413
374
case CHAR_EQUALS : {
414
- hs -> pos += 1 ;
375
+ hs -> pos = pos + 1 ;
415
376
return h5_state_before_attribute_value (hs );
416
377
}
417
378
case CHAR_GT : {
@@ -442,9 +403,6 @@ static int h5_state_before_attribute_value(h5_state_t* hs)
442
403
return h5_state_attribute_value_double_quote (hs );
443
404
} else if (c == CHAR_SINGLE ) {
444
405
return h5_state_attribute_value_single_quote (hs );
445
- } else if (c == CHAR_TICK ) {
446
- /* NON STANDARD IE */
447
- return h5_state_attribute_value_back_quote (hs );
448
406
} else {
449
407
return h5_state_attribute_value_no_quote (hs );
450
408
}
@@ -457,16 +415,8 @@ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
457
415
458
416
TRACE ();
459
417
460
- /* skip initial quote in normal case.
461
- * don't do this "if (pos == 0)" since it means we have started
462
- * in a non-data state. given an input of '><foo
463
- * we want to make 0-length attribute name
464
- */
465
- if (hs -> pos > 0 ) {
466
- hs -> pos += 1 ;
467
- }
468
-
469
-
418
+ /* skip quote */
419
+ hs -> pos += 1 ;
470
420
idx = (const char * ) memchr (hs -> s + hs -> pos , qchar , hs -> len - hs -> pos );
471
421
if (idx == NULL ) {
472
422
hs -> token_start = hs -> s + hs -> pos ;
@@ -497,13 +447,6 @@ int h5_state_attribute_value_single_quote(h5_state_t* hs)
497
447
return h5_state_attribute_value_quote (hs , CHAR_SINGLE );
498
448
}
499
449
500
- static
501
- int h5_state_attribute_value_back_quote (h5_state_t * hs )
502
- {
503
- TRACE ();
504
- return h5_state_attribute_value_quote (hs , CHAR_TICK );
505
- }
506
-
507
450
static int h5_state_attribute_value_no_quote (h5_state_t * hs )
508
451
{
509
452
char ch ;
@@ -713,13 +656,10 @@ static int h5_state_comment(h5_state_t* hs)
713
656
char ch ;
714
657
const char * idx ;
715
658
size_t pos ;
716
- size_t offset ;
717
- const char * end = hs -> s + hs -> len ;
718
659
719
660
TRACE ();
720
661
pos = hs -> pos ;
721
662
while (1 ) {
722
-
723
663
idx = (const char * ) memchr (hs -> s + pos , CHAR_DASH , hs -> len - pos );
724
664
725
665
/* did not find anything or has less than 3 chars left */
@@ -730,62 +670,21 @@ static int h5_state_comment(h5_state_t* hs)
730
670
hs -> token_type = TAG_COMMENT ;
731
671
return 1 ;
732
672
}
733
- offset = 1 ;
734
-
735
- /* skip all nulls */
736
- while (idx + offset < end && * (idx + offset ) == 0 ) {
737
- offset += 1 ;
738
- }
739
- if (idx + offset == end ) {
740
- hs -> state = h5_state_eof ;
741
- hs -> token_start = hs -> s + hs -> pos ;
742
- hs -> token_len = hs -> len - hs -> pos ;
743
- hs -> token_type = TAG_COMMENT ;
744
- return 1 ;
745
- }
746
-
747
- ch = * (idx + offset );
673
+ ch = * (idx + 1 );
748
674
if (ch != CHAR_DASH && ch != CHAR_BANG ) {
749
675
pos = (size_t )(idx - hs -> s ) + 1 ;
750
676
continue ;
751
677
}
752
-
753
- /* need to test */
754
- #if 0
755
- /* skip all nulls */
756
- while (idx + offset < end && * (idx + offset ) == 0 ) {
757
- offset += 1 ;
758
- }
759
- if (idx + offset == end ) {
760
- hs -> state = h5_state_eof ;
761
- hs -> token_start = hs -> s + hs -> pos ;
762
- hs -> token_len = hs -> len - hs -> pos ;
763
- hs -> token_type = TAG_COMMENT ;
764
- return 1 ;
765
- }
766
- #endif
767
-
768
- offset += 1 ;
769
- if (idx + offset == end ) {
770
- hs -> state = h5_state_eof ;
771
- hs -> token_start = hs -> s + hs -> pos ;
772
- hs -> token_len = hs -> len - hs -> pos ;
773
- hs -> token_type = TAG_COMMENT ;
774
- return 1 ;
775
- }
776
-
777
-
778
- ch = * (idx + offset );
678
+ ch = * (idx + 2 );
779
679
if (ch != CHAR_GT ) {
780
680
pos = (size_t )(idx - hs -> s ) + 1 ;
781
681
continue ;
782
682
}
783
- offset += 1 ;
784
683
785
684
/* ends in --> or -!> */
786
685
hs -> token_start = hs -> s + hs -> pos ;
787
686
hs -> token_len = (size_t )(idx - hs -> s ) - hs -> pos ;
788
- hs -> pos = (size_t )(idx + offset - hs -> s );
687
+ hs -> pos = (size_t )(idx - hs -> s ) + 3 ;
789
688
hs -> state = h5_state_data ;
790
689
hs -> token_type = TAG_COMMENT ;
791
690
return 1 ;
0 commit comments