Skip to content

Commit d7ec8f5

Browse files
author
Branislav Zahradník
committed
malformed utf8 message: use symbol instead of magic number with comment
symbols expressing meaning are easier to understand, easier to grep, as well as resistant to change value without changing comment
1 parent fc734f5 commit d7ec8f5

File tree

7 files changed

+24
-21
lines changed

7 files changed

+24
-21
lines changed

doop.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
369369
else {
370370
from = utf8_to_uvchr_buf(s, send, &s_len);
371371
if (from == 0 && *s != '\0') {
372-
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
372+
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
373373
}
374374
}
375375

@@ -486,7 +486,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
486486
else {
487487
from = utf8_to_uvchr_buf(s, send, &s_len);
488488
if (from == 0 && *s != '\0') {
489-
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
489+
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
490490
}
491491
}
492492

handy.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,7 +1435,7 @@ or casts
14351435
/* Likewise, this is effectively a static assert to be used to guarantee the
14361436
* parameter is a pointer
14371437
*
1438-
* NOT suitable for void*
1438+
* NOT suitable for void*
14391439
*/
14401440
#define ASSERT_IS_PTR(x) (__ASSERT_(sizeof(*(x))) (x))
14411441

@@ -2277,15 +2277,15 @@ END_EXTERN_C
22772277

22782278
#define generic_utf8_safe_(classnum, p, e, above_latin1) \
22792279
((! _utf8_safe_assert(p, e)) \
2280-
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, 1), 0)\
2280+
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)\
22812281
: (UTF8_IS_INVARIANT(*(p))) \
22822282
? generic_isCC_(*(p), classnum) \
22832283
: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \
22842284
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
22852285
? generic_isCC_(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1 )), \
22862286
classnum) \
22872287
: (_force_out_malformed_utf8_message( \
2288-
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
2288+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
22892289
: above_latin1))
22902290
/* Like the above, but calls 'above_latin1(p)' to get the utf8 value.
22912291
* 'above_latin1' can be a macro */
@@ -2295,7 +2295,7 @@ END_EXTERN_C
22952295
generic_utf8_safe_(classnum, p, e, \
22962296
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
22972297
? (_force_out_malformed_utf8_message( \
2298-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2298+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
22992299
: above_latin1(p)))
23002300
/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
23012301
* 'above_latin1' parameter */
@@ -2385,7 +2385,7 @@ END_EXTERN_C
23852385
generic_utf8_safe_no_upper_latin1_(CC_XDIGIT_, p, e, \
23862386
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
23872387
? (_force_out_malformed_utf8_message( \
2388-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2388+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
23892389
: is_XDIGIT_high(p)))
23902390

23912391
#define toFOLD_utf8(p,e,s,l) toFOLD_utf8_safe(p,e,s,l)
@@ -2434,7 +2434,7 @@ END_EXTERN_C
24342434
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
24352435
? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1))) \
24362436
: (_force_out_malformed_utf8_message( \
2437-
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
2437+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
24382438
: above_latin1))
24392439

24402440
#define generic_LC_invlist_utf8_safe_(macro, classnum, p, e) \
@@ -2448,7 +2448,7 @@ END_EXTERN_C
24482448
generic_LC_utf8_safe_(classnum, p, e, \
24492449
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
24502450
? (_force_out_malformed_utf8_message( \
2451-
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
2451+
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
24522452
: above_latin1(p)))
24532453

24542454
#define isALPHANUMERIC_LC_utf8_safe(p, e) \

pp_pack.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#define PERL_IN_PP_PACK_C
3333
#include "perl.h"
3434

35-
/* Types used by pack/unpack */
35+
/* Types used by pack/unpack */
3636
typedef enum {
3737
e_no_len, /* no length */
3838
e_number, /* number, [] */
@@ -48,7 +48,7 @@ typedef struct tempsym {
4848
U32 flags; /* /=4, comma=2, pack=1 */
4949
/* and group modifiers */
5050
SSize_t length; /* length/repeat count */
51-
howlen_t howlen; /* how length is given */
51+
howlen_t howlen; /* how length is given */
5252
int level; /* () nesting level */
5353
STRLEN strbeg; /* offset of group start */
5454
struct tempsym *previous; /* previous group */
@@ -3167,7 +3167,7 @@ PP_wrapped(pp_pack, 0, 1)
31673167
_force_out_malformed_utf8_message(error_pos,
31683168
(U8 *) result + result_len,
31693169
0, /* no flags */
3170-
1 /* Die */
3170+
MALFORMED_UTF8_DIE
31713171
);
31723172
NOT_REACHED; /* NOTREACHED */
31733173
}

regexec.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8041,7 +8041,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
80418041
break;
80428042
}
80438043
} while(n);
8044-
8044+
80458045
if (!n) /* this means there is nothing that matched */
80468046
sayNO;
80478047
}
@@ -10921,7 +10921,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
1092110921
if (c_len == (STRLEN)-1) {
1092210922
_force_out_malformed_utf8_message(p, p_end,
1092310923
utf8n_flags,
10924-
1 /* 1 means die */ );
10924+
MALFORMED_UTF8_DIE);
1092510925
NOT_REACHED; /* NOTREACHED */
1092610926
}
1092710927
if ( c > 255

toke.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
914914
_force_out_malformed_utf8_message(first_bad_char_loc,
915915
(U8 *) s + SvCUR(line),
916916
0,
917-
1 /* 1 means die */ );
917+
MALFORMED_UTF8_DIE);
918918
NOT_REACHED; /* NOTREACHED */
919919
}
920920

@@ -1551,7 +1551,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
15511551
_force_out_malformed_utf8_message(first_bad_char_loc,
15521552
(U8 *) PL_parser->bufend,
15531553
0,
1554-
1 /* 1 means die */ );
1554+
MALFORMED_UTF8_DIE);
15551555
NOT_REACHED; /* NOTREACHED */
15561556
}
15571557
}
@@ -1634,7 +1634,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
16341634
_force_out_malformed_utf8_message((U8 *) s,
16351635
(U8 *) bufend,
16361636
0,
1637-
1 /* 1 means die */ );
1637+
MALFORMED_UTF8_DIE);
16381638
NOT_REACHED; /* NOTREACHED */
16391639
}
16401640
return unichar;
@@ -3013,7 +3013,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
30133013
_force_out_malformed_utf8_message(first_bad_char_loc,
30143014
(U8 *) PL_parser->bufend,
30153015
0,
3016-
0 /* 0 means don't die */ );
3016+
MALFORMED_UTF8_WARN);
30173017
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
30183018
immediately after '%s' */
30193019
*error_msg = Perl_form(aTHX_
@@ -9703,7 +9703,7 @@ Perl_yylex(pTHX)
97039703
_force_out_malformed_utf8_message(first_bad_char_loc,
97049704
(U8 *) PL_bufend,
97059705
0,
9706-
1 /* 1 means die */ );
9706+
MALFORMED_UTF8_DIE);
97079707
NOT_REACHED; /* NOTREACHED */
97089708
}
97099709
PL_parser->recheck_utf8_validity = FALSE;

utf8.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3308,7 +3308,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, const U8 * const e,
33083308
PERL_ARGS_ASSERT_IS_UTF8_COMMON;
33093309

33103310
if (cp == 0 && (p >= e || *p != '\0')) {
3311-
_force_out_malformed_utf8_message(p, e, 0, 1);
3311+
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE);
33123312
NOT_REACHED; /* NOTREACHED */
33133313
}
33143314

@@ -3853,7 +3853,7 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
38533853
STRLEN len_result; \
38543854
result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
38553855
if (len_result == (STRLEN) -1) { \
3856-
_force_out_malformed_utf8_message(p, e, 0, 1 /* Die */ ); \
3856+
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE ); \
38573857
}
38583858

38593859
#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \

utf8.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,9 @@ point's representation.
13121312
* retained solely for backwards compatibility */
13131313
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)
13141314

1315+
#define MALFORMED_UTF8_DIE TRUE
1316+
#define MALFORMED_UTF8_WARN FALSE
1317+
13151318
#endif /* PERL_UTF8_H_ */
13161319

13171320
/*

0 commit comments

Comments
 (0)