Skip to content

Commit 3046486

Browse files
authored
Add (?aT) for updating PCRE2_EXTRA_ASCII_DIGIT in pattern (PCRE2Project#307)
Affecting the definition of [:digit:] and [:xdigit:]. For consistency, also allow (?aP) to have a similar effect.
1 parent 0a55280 commit 3046486

File tree

8 files changed

+262
-16
lines changed

8 files changed

+262
-16
lines changed

doc/pcre2api.3

+3-2
Original file line numberDiff line numberDiff line change
@@ -1972,11 +1972,12 @@ setting.
19721972
PCRE2_EXTRA_ASCII_DIGIT
19731973
.sp
19741974
This option forces the POSIX character classes [:digit:] and [:xdigit:] to
1975-
match only ASCII digits, even when PCRE2_UCP is set.
1975+
match only ASCII digits, even when PCRE2_UCP is set. It can be changed within
1976+
a pattern by means of the (?aT) option setting.
19761977
.sp
19771978
PCRE2_EXTRA_ASCII_POSIX
19781979
.sp
1979-
This option forces the POSIX character classes to match only ASCII characters,
1980+
This option forces all the POSIX character classes to match only ASCII characters,
19801981
even when PCRE2_UCP is set. It can be changed within a pattern by means of the
19811982
(?aP) option setting.
19821983
.sp

doc/pcre2pattern.3

+1
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,7 @@ pairs or individual letters:
16661666
aS for PCRE2_EXTRA_ASCII_BSS
16671667
aW for PCRE2_EXTRA_ASCII_BSW
16681668
aP for PCRE2_EXTRA_ASCII_POSIX
1669+
aT for PCRE2_EXTRA_ASCII_DIGIT
16691670
r for PCRE2_EXTRA_CASELESS_RESTRICT
16701671
J for PCRE2_DUPNAMES
16711672
U for PCRE2_UNGREEDY

doc/pcre2syntax.3

+2-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,8 @@ of the group.
369369
(?aD) restrict \ed to ASCII, even in UCP mode
370370
(?aS) restrict \es to ASCII, even in UCP mode
371371
(?aW) restrict \ew to ASCII, even in UCP mode
372-
(?aP) restrict POSIX classes to ASCII even in UCP mode
372+
(?aP) restrict all POSIX classes to ASCII even in UCP mode
373+
(?aT) restrict [:digit:] and [:xdigit:] to ASCII even in UCP mode
373374
(?i) caseless
374375
(?J) allow duplicate named groups
375376
(?m) multiline

src/pcre2_compile.c

+10-3
Original file line numberDiff line numberDiff line change
@@ -2741,7 +2741,7 @@ the main compiling phase. */
27412741

27422742
#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \
27432743
PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW| \
2744-
PCRE2_EXTRA_ASCII_POSIX)
2744+
PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_ASCII_POSIX)
27452745

27462746
/* States used for analyzing ranges in character classes. The two OK values
27472747
must be last. */
@@ -4233,7 +4233,7 @@ while (ptr < ptrend)
42334233
}
42344234
if (*ptr == CHAR_P)
42354235
{
4236-
*xoptset |= PCRE2_EXTRA_ASCII_POSIX;
4236+
*xoptset |= (PCRE2_EXTRA_ASCII_POSIX|PCRE2_EXTRA_ASCII_DIGIT);
42374237
ptr++;
42384238
break;
42394239
}
@@ -4243,6 +4243,12 @@ while (ptr < ptrend)
42434243
ptr++;
42444244
break;
42454245
}
4246+
if (*ptr == CHAR_T)
4247+
{
4248+
*xoptset |= PCRE2_EXTRA_ASCII_DIGIT;
4249+
ptr++;
4250+
break;
4251+
}
42464252
if (*ptr == CHAR_W)
42474253
{
42484254
*xoptset |= PCRE2_EXTRA_ASCII_BSW;
@@ -4251,7 +4257,8 @@ while (ptr < ptrend)
42514257
}
42524258
}
42534259
*xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|
4254-
PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX;
4260+
PCRE2_EXTRA_ASCII_BSW|
4261+
PCRE2_EXTRA_ASCII_DIGIT|PCRE2_EXTRA_ASCII_POSIX;
42554262
break;
42564263

42574264
case CHAR_J: /* Record that it changed in the external options */

testdata/testinput5

+49-2
Original file line numberDiff line numberDiff line change
@@ -2343,11 +2343,19 @@
23432343
\= Expect no match
23442344
\x{660}\x{660}\x{660}
23452345

2346+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2347+
999
2348+
9\x{660}9
2349+
23462350
/\d(?a)\d(?-a)\d/utf,ucp
23472351
\x{660}9\x{660}
23482352
\= Expect no match
23492353
\x{660}\x{660}\x{660}
23502354

2355+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2356+
999
2357+
9\x{660}9
2358+
23512359
# SPACES
23522360

23532361
/>\s+</i,utf
@@ -2435,18 +2443,57 @@
24352443

24362444
# POSIX
24372445

2438-
/[[:digit:]]+/utf,ucp
2446+
/^[[:digit:]]+$/utf,ucp
2447+
123456
24392448
123\x{660}456
24402449

2441-
/[[:digit:]]+/utf,ucp,ascii_digit
2450+
/^[[:digit:]]+$/utf,ucp,ascii_digit
2451+
123456
2452+
\= Expect no match
24422453
123\x{660}456
24432454

24442455
/[[:digit:]]+/g,utf,ucp,ascii_digit
24452456
123\x{660}456
24462457

2458+
/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
2459+
11
2460+
\x{ff11}1
2461+
\= Expect no match
2462+
1\x{ff11}
2463+
2464+
/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
2465+
11
2466+
\x{ff11}1
2467+
\= Expect no match
2468+
1\x{ff11}
2469+
2470+
/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
2471+
11
2472+
\= Expect no match
2473+
\x{ff11}1
2474+
1\x{ff11}
2475+
24472476
/[[:digit:]]+/utf,ucp,ascii_posix
24482477
123\x{660}456
24492478

2479+
/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
2480+
11
2481+
\x{ff11}1
2482+
\= Expect no match
2483+
1\x{ff11}
2484+
2485+
/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
2486+
11
2487+
\x{ff11}1
2488+
\= Expect no match
2489+
1\x{ff11}
2490+
2491+
/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
2492+
11
2493+
\x{ff11}1
2494+
\= Expect no match
2495+
1\x{ff11}
2496+
24502497
/^[[:xdigit:]]+$/utf,ucp
24512498
f0
24522499
1A

testdata/testinput7

+49-2
Original file line numberDiff line numberDiff line change
@@ -2355,11 +2355,19 @@
23552355
\= Expect no match
23562356
\x{660}\x{660}\x{660}
23572357

2358+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2359+
999
2360+
9\x{660}9
2361+
23582362
/\d(?a)\d(?-a)\d/utf,ucp
23592363
\x{660}9\x{660}
23602364
\= Expect no match
23612365
\x{660}\x{660}\x{660}
23622366

2367+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
2368+
999
2369+
9\x{660}9
2370+
23632371
# SPACES
23642372

23652373
/>\s+</i,utf
@@ -2432,18 +2440,57 @@
24322440

24332441
# POSIX
24342442

2435-
/[[:digit:]]+/utf,ucp
2443+
/^[[:digit:]]+$/utf,ucp
2444+
123456
24362445
123\x{660}456
24372446

2438-
/[[:digit:]]+/utf,ucp,ascii_digit
2447+
/^[[:digit:]]+$/utf,ucp,ascii_digit
2448+
123456
2449+
\= Expect no match
24392450
123\x{660}456
24402451

24412452
/[[:digit:]]+/g,utf,ucp,ascii_digit
24422453
123\x{660}456
24432454

2455+
/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
2456+
11
2457+
\x{ff11}1
2458+
\= Expect no match
2459+
1\x{ff11}
2460+
2461+
/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
2462+
11
2463+
\x{ff11}1
2464+
\= Expect no match
2465+
1\x{ff11}
2466+
2467+
/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
2468+
11
2469+
\= Expect no match
2470+
\x{ff11}1
2471+
1\x{ff11}
2472+
24442473
/[[:digit:]]+/utf,ucp,ascii_posix
24452474
123\x{660}456
24462475

2476+
/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
2477+
11
2478+
\x{ff11}1
2479+
\= Expect no match
2480+
1\x{ff11}
2481+
2482+
/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
2483+
11
2484+
\x{ff11}1
2485+
\= Expect no match
2486+
1\x{ff11}
2487+
2488+
/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
2489+
11
2490+
\x{ff11}1
2491+
\= Expect no match
2492+
1\x{ff11}
2493+
24472494
/>[[:space:]]+</utf,ucp
24482495
>\x{a0} \x{a0}<
24492496
>\x{a0}\x{a0}\x{a0}<

testdata/testoutput5

+74-3
Original file line numberDiff line numberDiff line change
@@ -5251,13 +5251,25 @@ No match
52515251
\x{660}\x{660}\x{660}
52525252
No match
52535253

5254+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
5255+
999
5256+
0: 999
5257+
9\x{660}9
5258+
0: 9\x{660}9
5259+
52545260
/\d(?a)\d(?-a)\d/utf,ucp
52555261
\x{660}9\x{660}
52565262
0: \x{660}9\x{660}
52575263
\= Expect no match
52585264
\x{660}\x{660}\x{660}
52595265
No match
52605266

5267+
/\d(?-aD)\d(?aD)\d/utf,ucp,ascii_bsd
5268+
999
5269+
0: 999
5270+
9\x{660}9
5271+
0: 9\x{660}9
5272+
52615273
# SPACES
52625274

52635275
/>\s+</i,utf
@@ -5375,23 +5387,82 @@ No match
53755387

53765388
# POSIX
53775389

5378-
/[[:digit:]]+/utf,ucp
5390+
/^[[:digit:]]+$/utf,ucp
5391+
123456
5392+
0: 123456
53795393
123\x{660}456
53805394
0: 123\x{660}456
53815395

5382-
/[[:digit:]]+/utf,ucp,ascii_digit
5396+
/^[[:digit:]]+$/utf,ucp,ascii_digit
5397+
123456
5398+
0: 123456
5399+
\= Expect no match
53835400
123\x{660}456
5384-
0: 123
5401+
No match
53855402

53865403
/[[:digit:]]+/g,utf,ucp,ascii_digit
53875404
123\x{660}456
53885405
0: 123
53895406
0: 456
53905407

5408+
/(?-aT)[[:digit:]](?aT)[[:digit:]]/utf,ucp,ascii_digit
5409+
11
5410+
0: 11
5411+
\x{ff11}1
5412+
0: \x{ff11}1
5413+
\= Expect no match
5414+
1\x{ff11}
5415+
No match
5416+
5417+
/(?-aT:[[:digit:]])[[:digit:]]/utf,ucp,ascii_digit
5418+
11
5419+
0: 11
5420+
\x{ff11}1
5421+
0: \x{ff11}1
5422+
\= Expect no match
5423+
1\x{ff11}
5424+
No match
5425+
5426+
/(?-aT:[[:digit:]])[[:digit:]]/utf,never_ucp,ascii_digit
5427+
11
5428+
0: 11
5429+
\= Expect no match
5430+
\x{ff11}1
5431+
No match
5432+
1\x{ff11}
5433+
No match
5434+
53915435
/[[:digit:]]+/utf,ucp,ascii_posix
53925436
123\x{660}456
53935437
0: 123
53945438

5439+
/(?-aP)[[:digit:]](?aP)[[:digit:]]/utf,ucp,ascii_posix
5440+
11
5441+
0: 11
5442+
\x{ff11}1
5443+
0: \x{ff11}1
5444+
\= Expect no match
5445+
1\x{ff11}
5446+
No match
5447+
5448+
/(?-aP:[[:digit:]])[[:digit:]]/utf,ucp,ascii_posix
5449+
11
5450+
0: 11
5451+
\x{ff11}1
5452+
0: \x{ff11}1
5453+
\= Expect no match
5454+
1\x{ff11}
5455+
No match
5456+
5457+
/(?-a:[[:digit:]])[[:digit:]]/a,utf,ucp
5458+
11
5459+
0: 11
5460+
\x{ff11}1
5461+
0: \x{ff11}1
5462+
\= Expect no match
5463+
1\x{ff11}
5464+
No match
5465+
53955466
/^[[:xdigit:]]+$/utf,ucp
53965467
f0
53975468
0: f0

0 commit comments

Comments
 (0)