Skip to content

Commit 9baac1a

Browse files
srezictonycoz
authored andcommitted
Data::Dumper: useqq implementation for xs
Tests are mainly unchanged, just a "cheat" and a couple of TODOs were removed.
1 parent 49fb45d commit 9baac1a

File tree

3 files changed

+63
-40
lines changed

3 files changed

+63
-40
lines changed

dist/Data-Dumper/Dumper.pm

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ sub DESTROY {}
221221
sub Dump {
222222
return &Dumpxs
223223
unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
224-
$Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) ||
225224
$Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
226225
return &Dumpperl;
227226
}

dist/Data-Dumper/Dumper.xs

Lines changed: 60 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818

1919
static I32 num_q (const char *s, STRLEN slen);
2020
static I32 esc_q (char *dest, const char *src, STRLEN slen);
21-
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
21+
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
2222
static I32 needs_quote(const char *s, STRLEN len);
2323
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
2424
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
2525
HV *seenhv, AV *postav, I32 *levelp, I32 indent,
2626
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
2727
SV *freezer, SV *toaster,
2828
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
29-
I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
29+
I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);
3030

3131
#ifndef HvNAME_get
3232
#define HvNAME_get HvNAME
@@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
158158
return ret;
159159
}
160160

161+
/* this function is also misused for implementing $Useqq */
161162
static I32
162-
esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
163+
esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
163164
{
164165
char *r, *rstart;
165166
const char *s = src;
@@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
176177
int increment;
177178

178179
/* this will need EBCDICification */
179-
for (s = src; s < send; s += increment) {
180-
const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
180+
for (s = src; s < send; do_utf8 ? s += increment : s++) {
181+
const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
181182

182183
/* check for invalid utf8 */
183184
increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
@@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
195196
k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
196197
#endif
197198
);
199+
#ifndef EBCDIC
200+
} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
201+
grow += 2;
202+
} else if (useqq && k <= 31) {
203+
grow += 3;
204+
} else if (useqq && k >= 127) {
205+
grow += 4;
206+
#endif
198207
} else if (k == '\\') {
199208
backslashes++;
200209
} else if (k == '\'') {
@@ -205,16 +214,17 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
205214
normal++;
206215
}
207216
}
208-
if (grow) {
217+
if (grow || useqq) {
209218
/* We have something needing hex. 3 is ""\0 */
210219
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
211220
+ 2*qq_escapables + normal);
212221
rstart = r = SvPVX(sv) + cur;
213222

214223
*r++ = '"';
215224

216-
for (s = src; s < send; s += UTF8SKIP(s)) {
217-
const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
225+
for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
226+
const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
227+
218228

219229
if (k == '"' || k == '\\' || k == '$' || k == '@') {
220230
*r++ = '\\';
@@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
224234
#ifdef EBCDIC
225235
if (isprint(k) && k < 256)
226236
#else
237+
if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
238+
*r++ = '\\';
239+
switch (k) {
240+
case 7: *r++ = 'a'; break;
241+
case 8: *r++ = 'b'; break;
242+
case 9: *r++ = 't'; break;
243+
case 10: *r++ = 'n'; break;
244+
case 12: *r++ = 'f'; break;
245+
case 13: *r++ = 'r'; break;
246+
case 27: *r++ = 'e'; break;
247+
default:
248+
/* faster than
249+
* r = r + my_sprintf(r, "%o", k);
250+
*/
251+
if (k <= 7) {
252+
*r++ = (char)k + '0';
253+
} else if (k <= 63) {
254+
*r++ = (char)(k>>3) + '0';
255+
*r++ = (char)(k&7) + '0';
256+
} else {
257+
*r++ = (char)(k>>6) + '0';
258+
*r++ = (char)((k&63)>>3) + '0';
259+
*r++ = (char)(k&7) + '0';
260+
}
261+
}
262+
}
263+
else
227264
if (k < 0x80)
228265
#endif
229266
*r++ = (char)k;
@@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
298335
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
299336
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
300337
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
301-
int use_sparse_seen_hash)
338+
int use_sparse_seen_hash, I32 useqq)
302339
{
303340
char tmpbuf[128];
304341
U32 i;
@@ -524,15 +561,15 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
524561
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
525562
postav, levelp, indent, pad, xpad, apad, sep, pair,
526563
freezer, toaster, purity, deepcopy, quotekeys, bless,
527-
maxdepth, sortkeys, use_sparse_seen_hash);
564+
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
528565
sv_catpvn(retval, ")}", 2);
529566
} /* plain */
530567
else {
531568
sv_catpvn(retval, "\\", 1);
532569
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
533570
postav, levelp, indent, pad, xpad, apad, sep, pair,
534571
freezer, toaster, purity, deepcopy, quotekeys, bless,
535-
maxdepth, sortkeys, use_sparse_seen_hash);
572+
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
536573
}
537574
SvREFCNT_dec(namesv);
538575
}
@@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
544581
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
545582
postav, levelp, indent, pad, xpad, apad, sep, pair,
546583
freezer, toaster, purity, deepcopy, quotekeys, bless,
547-
maxdepth, sortkeys, use_sparse_seen_hash);
584+
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
548585
SvREFCNT_dec(namesv);
549586
}
550587
else if (realtype == SVt_PVAV) {
@@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
617654
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
618655
levelp, indent, pad, xpad, apad, sep, pair,
619656
freezer, toaster, purity, deepcopy, quotekeys, bless,
620-
maxdepth, sortkeys, use_sparse_seen_hash);
657+
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
621658
if (ix < ixmax)
622659
sv_catpvn(retval, ",", 1);
623660
}
@@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
777814
The code is also smaller (22044 vs 22260) because I've been
778815
able to pull the common logic out to both sides. */
779816
if (quotekeys || needs_quote(key,keylen)) {
780-
if (do_utf8) {
817+
if (do_utf8 || useqq) {
781818
STRLEN ocur = SvCUR(retval);
782-
nlen = esc_q_utf8(aTHX_ retval, key, klen);
819+
nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
783820
nkey = SvPVX(retval) + ocur;
784821
}
785822
else {
@@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
824861
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
825862
postav, levelp, indent, pad, xpad, newapad, sep, pair,
826863
freezer, toaster, purity, deepcopy, quotekeys, bless,
827-
maxdepth, sortkeys, use_sparse_seen_hash);
864+
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
828865
SvREFCNT_dec(sname);
829866
Safefree(nkey_buffer);
830867
if (indent >= 2)
@@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
9731010
r = SvPVX(retval)+SvCUR(retval);
9741011
r[0] = '*'; r[1] = '{';
9751012
SvCUR_set(retval, SvCUR(retval)+2);
976-
esc_q_utf8(aTHX_ retval, c, i);
1013+
esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
9771014
sv_grow(retval, SvCUR(retval)+2);
9781015
r = SvPVX(retval)+SvCUR(retval);
9791016
r[0] = '}'; r[1] = '\0';
@@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
10331070
seenhv, postav, &nlevel, indent, pad, xpad,
10341071
newapad, sep, pair, freezer, toaster, purity,
10351072
deepcopy, quotekeys, bless, maxdepth,
1036-
sortkeys, use_sparse_seen_hash);
1073+
sortkeys, use_sparse_seen_hash, useqq);
10371074
SvREFCNT_dec(e);
10381075
}
10391076
}
@@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
10621099
else {
10631100
integer_came_from_string:
10641101
c = SvPV(val, i);
1065-
if (DO_UTF8(val))
1066-
i += esc_q_utf8(aTHX_ retval, c, i);
1102+
if (DO_UTF8(val) || useqq)
1103+
i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
10671104
else {
10681105
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
10691106
r = SvPVX(retval) + SvCUR(retval);
@@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
11081145
HV *seenhv = NULL;
11091146
AV *postav, *todumpav, *namesav;
11101147
I32 level = 0;
1111-
I32 indent, terse, i, imax, postlen;
1148+
I32 indent, terse, useqq, i, imax, postlen;
11121149
SV **svp;
11131150
SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
11141151
SV *freezer, *toaster, *bless, *sortkeys;
@@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
11491186
= freezer = toaster = bless = sortkeys = &PL_sv_undef;
11501187
name = sv_newmortal();
11511188
indent = 2;
1152-
terse = purity = deepcopy = 0;
1189+
terse = purity = deepcopy = useqq = 0;
11531190
quotekeys = 1;
11541191

11551192
retval = newSVpvn("", 0);
@@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
11731210
purity = SvIV(*svp);
11741211
if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
11751212
terse = SvTRUE(*svp);
1176-
#if 0 /* useqq currently unused */
11771213
if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
11781214
useqq = SvTRUE(*svp);
1179-
#endif
11801215
if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
11811216
pad = *svp;
11821217
if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
12801315
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
12811316
postav, &level, indent, pad, xpad, newapad, sep, pair,
12821317
freezer, toaster, purity, deepcopy, quotekeys,
1283-
bless, maxdepth, sortkeys, use_sparse_seen_hash);
1318+
bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
12841319
SPAGAIN;
12851320

12861321
if (indent >= 2 && !terse)

dist/Data-Dumper/t/dumper.t

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
307307
{
308308
local $Data::Dumper::Useqq = 1;
309309
TEST q(Dumper($foo));
310+
TEST q(Data::Dumper::DumperX($foo)) if $XS;
310311
}
311312

312-
$WANT = <<"EOT";
313-
#\$VAR1 = {
314-
# 'abc\0\\'\efg' => 'mno\0',
315-
# 'reftest' => \\\\1
316-
#};
317-
EOT
318-
319-
{
320-
local $Data::Dumper::Useqq = 1;
321-
TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat
322-
}
323-
324313

325314

326315
#############
@@ -1461,7 +1450,7 @@ EOT
14611450
$foo = [ join "", map chr, 0..255 ];
14621451
local $Data::Dumper::Useqq = 1;
14631452
TEST q(Dumper($foo)), 'All latin1 characters';
1464-
for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
1453+
TEST q(Data::Dumper::DumperX($foo)) if $XS;
14651454
}
14661455

14671456
############# 372
@@ -1481,7 +1470,7 @@ EOT
14811470
TEST q(Dumper($foo)),
14821471
'All latin1 characters with utf8 flag including a wide character';
14831472
}
1484-
for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
1473+
TEST q(Data::Dumper::DumperX($foo)) if $XS;
14851474
}
14861475

14871476
############# 378

0 commit comments

Comments
 (0)