Skip to content

Commit 319f5a3

Browse files
Branislav Zahradníkkhwilliamson
Branislav Zahradník
authored andcommitted
malformed utf8 message: use symbol instead of magic number with comment
symbols expressing meaning are easier to understand, easier to grep, as well as resistant to change value without changing comment
1 parent 6c2ae79 commit 319f5a3

File tree

7 files changed

+24
-21
lines changed

7 files changed

+24
-21
lines changed

doop.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
373373
else {
374374
from = utf8_to_uvchr_buf(s, send, &s_len);
375375
if (from == 0 && *s != '\0') {
376-
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
376+
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
377377
}
378378
}
379379

@@ -492,7 +492,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
492492
else {
493493
from = utf8_to_uvchr_buf(s, send, &s_len);
494494
if (from == 0 && *s != '\0') {
495-
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
495+
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
496496
}
497497
}
498498

handy.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,7 +1436,7 @@ or casts
14361436
/* Likewise, this is effectively a static assert to be used to guarantee the
14371437
* parameter is a pointer
14381438
*
1439-
* NOT suitable for void*
1439+
* NOT suitable for void*
14401440
*/
14411441
#define ASSERT_IS_PTR(x) (__ASSERT_(sizeof(*(x))) (x))
14421442

@@ -2276,15 +2276,15 @@ END_EXTERN_C
22762276

22772277
#define generic_utf8_safe_(classnum, p, e, above_latin1) \
22782278
((! _utf8_safe_assert(p, e)) \
2279-
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, 1), 0)\
2279+
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)\
22802280
: (UTF8_IS_INVARIANT(*(p))) \
22812281
? generic_isCC_(*(p), classnum) \
22822282
: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \
22832283
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
22842284
? generic_isCC_(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1 )), \
22852285
classnum) \
22862286
: (_force_out_malformed_utf8_message( \
2287-
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
2287+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
22882288
: above_latin1))
22892289
/* Like the above, but calls 'above_latin1(p)' to get the utf8 value.
22902290
* 'above_latin1' can be a macro */
@@ -2294,7 +2294,7 @@ END_EXTERN_C
22942294
generic_utf8_safe_(classnum, p, e, \
22952295
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
22962296
? (_force_out_malformed_utf8_message( \
2297-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2297+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
22982298
: above_latin1(p)))
22992299
/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
23002300
* 'above_latin1' parameter */
@@ -2384,7 +2384,7 @@ END_EXTERN_C
23842384
generic_utf8_safe_no_upper_latin1_(CC_XDIGIT_, p, e, \
23852385
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
23862386
? (_force_out_malformed_utf8_message( \
2387-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2387+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
23882388
: is_XDIGIT_high(p)))
23892389

23902390
#define toFOLD_utf8(p,e,s,l) toFOLD_utf8_safe(p,e,s,l)
@@ -2433,7 +2433,7 @@ END_EXTERN_C
24332433
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
24342434
? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1))) \
24352435
: (_force_out_malformed_utf8_message( \
2436-
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
2436+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
24372437
: above_latin1))
24382438

24392439
#define generic_LC_invlist_utf8_safe_(macro, classnum, p, e) \
@@ -2447,7 +2447,7 @@ END_EXTERN_C
24472447
generic_LC_utf8_safe_(classnum, p, e, \
24482448
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
24492449
? (_force_out_malformed_utf8_message( \
2450-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2450+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
24512451
: above_latin1(p)))
24522452

24532453
#define isALPHANUMERIC_LC_utf8_safe(p, e) \

pp_pack.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#define PERL_IN_PP_PACK_C
3333
#include "perl.h"
3434

35-
/* Types used by pack/unpack */
35+
/* Types used by pack/unpack */
3636
typedef enum {
3737
e_no_len, /* no length */
3838
e_number, /* number, [] */
@@ -48,7 +48,7 @@ typedef struct tempsym {
4848
U32 flags; /* /=4, comma=2, pack=1 */
4949
/* and group modifiers */
5050
SSize_t length; /* length/repeat count */
51-
howlen_t howlen; /* how length is given */
51+
howlen_t howlen; /* how length is given */
5252
int level; /* () nesting level */
5353
STRLEN strbeg; /* offset of group start */
5454
struct tempsym *previous; /* previous group */
@@ -3167,7 +3167,7 @@ PP_wrapped(pp_pack, 0, 1)
31673167
_force_out_malformed_utf8_message(error_pos,
31683168
(U8 *) result + result_len,
31693169
0, /* no flags */
3170-
1 /* Die */
3170+
MALFORMED_UTF8_DIE
31713171
);
31723172
NOT_REACHED; /* NOTREACHED */
31733173
}

regexec.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8041,7 +8041,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
80418041
break;
80428042
}
80438043
} while(n);
8044-
8044+
80458045
if (!n) /* this means there is nothing that matched */
80468046
sayNO;
80478047
}
@@ -10921,7 +10921,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
1092110921
if (c_len == (STRLEN)-1) {
1092210922
_force_out_malformed_utf8_message(p, p_end,
1092310923
utf8n_flags,
10924-
1 /* 1 means die */ );
10924+
MALFORMED_UTF8_DIE);
1092510925
NOT_REACHED; /* NOTREACHED */
1092610926
}
1092710927
if ( c > 255

toke.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -972,7 +972,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
972972
_force_out_malformed_utf8_message(first_bad_char_loc,
973973
(U8 *) s + SvCUR(line),
974974
0,
975-
1 /* 1 means die */ );
975+
MALFORMED_UTF8_DIE);
976976
NOT_REACHED; /* NOTREACHED */
977977
}
978978
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(
@@ -1589,7 +1589,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
15891589
_force_out_malformed_utf8_message(first_bad_char_loc,
15901590
(U8 *) PL_parser->bufend,
15911591
0,
1592-
1 /* 1 means die */ );
1592+
MALFORMED_UTF8_DIE);
15931593
NOT_REACHED; /* NOTREACHED */
15941594
}
15951595
}
@@ -1679,7 +1679,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
16791679
_force_out_malformed_utf8_message((U8 *) s,
16801680
(U8 *) bufend,
16811681
0,
1682-
1 /* 1 means die */ );
1682+
MALFORMED_UTF8_DIE);
16831683
NOT_REACHED; /* NOTREACHED */
16841684
}
16851685
return unichar;
@@ -3058,7 +3058,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
30583058
_force_out_malformed_utf8_message(first_bad_char_loc,
30593059
(U8 *) PL_parser->bufend,
30603060
0,
3061-
0 /* 0 means don't die */ );
3061+
MALFORMED_UTF8_WARN);
30623062
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
30633063
immediately after '%s' */
30643064
*error_msg = Perl_form(aTHX_
@@ -9656,7 +9656,7 @@ Perl_yylex(pTHX)
96569656
_force_out_malformed_utf8_message(first_bad_char_loc,
96579657
(U8 *) PL_bufend,
96589658
0,
9659-
1 /* 1 means die */ );
9659+
MALFORMED_UTF8_DIE);
96609660
NOT_REACHED; /* NOTREACHED */
96619661
}
96629662
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(

utf8.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3289,7 +3289,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, const U8 * const e,
32893289
PERL_ARGS_ASSERT_IS_UTF8_COMMON;
32903290

32913291
if (cp == 0 && (p >= e || *p != '\0')) {
3292-
_force_out_malformed_utf8_message(p, e, 0, 1);
3292+
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE);
32933293
NOT_REACHED; /* NOTREACHED */
32943294
}
32953295

@@ -3834,7 +3834,7 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
38343834
STRLEN len_result; \
38353835
result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
38363836
if (len_result == (STRLEN) -1) { \
3837-
_force_out_malformed_utf8_message(p, e, 0, 1 /* Die */ ); \
3837+
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE ); \
38383838
}
38393839

38403840
#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \

utf8.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,9 @@ point's representation.
13041304
* retained solely for backwards compatibility */
13051305
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)
13061306

1307+
#define MALFORMED_UTF8_DIE TRUE
1308+
#define MALFORMED_UTF8_WARN FALSE
1309+
13071310
#endif /* PERL_UTF8_H_ */
13081311

13091312
/*

0 commit comments

Comments
 (0)