From ebdad9b4ff4f93f57aca6e2d260ff74cbc2b93e9 Mon Sep 17 00:00:00 2001 From: Pali Date: Thu, 27 Oct 2016 00:39:49 +0200 Subject: [PATCH] Fix handling of undef, ref, typeglob, UTF8, COW and magic scalar argument in all XS functions Before this patch every function XS function did it differently and not every one correctly. Now SvPV_force_nomg() is used when source argument is going to be modified. SvGETMAGIC() is called when entering into functions and then only "nomg" variants of perl functions are used to prevent processing get magic more times. SvSETMAGIC() is called after modification of source argument. This fixes bugs: https://rt.cpan.org/Public/Bug/Display.html?id=117158 https://rt.cpan.org/Public/Bug/Display.html?id=85489 https://github.com/dankogai/p5-encode/pull/66 --- Encode.xs | 256 +++++++++++++++++++++++++++------------------------- MANIFEST | 2 + t/decode.t | 54 ++++++++++- t/magic.t | 141 +++++++++++++++++++++++++++++ t/rt85489.t | 48 ++++++++++ t/utf8ref.t | 4 +- 6 files changed, 380 insertions(+), 125 deletions(-) create mode 100644 t/magic.t create mode 100644 t/rt85489.t diff --git a/Encode.xs b/Encode.xs index 8c990ea..888e785 100644 --- a/Encode.xs +++ b/Encode.xs @@ -31,6 +31,10 @@ UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) +#ifndef SvIV_nomg +#define SvIV_nomg SvIV +#endif + #ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE # define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE #else @@ -76,6 +80,37 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) PERL_UNUSED_VAR(orig); } +static void +utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn(*s, *slen)); + SvUTF8_on(tmp); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + *s = SvPVX(*src); + } + if (*slen) { + if (!utf8_to_bytes(*s, slen)) + croak("Wide character"); + SvCUR_set(*src, *slen); + } + SvUTF8_off(*src); +} + +static void +utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn(*s, *slen)); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + } + sv_utf8_upgrade_nomg(*src); + *s = SvPV_nomg(*src, *slen); +} #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" @@ -104,12 +139,10 @@ do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) } static SV * -encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, +encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen, int check, STRLEN * offset, SV * term, int * retcode, SV *fallback_cb) { - STRLEN slen; - U8 *s = (U8 *) SvPV(src, slen); STRLEN tlen = slen; STRLEN ddone = 0; STRLEN sdone = 0; @@ -279,6 +312,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, sv_setpvn(src, (char*)s+slen, sdone); } SvCUR_set(src, sdone); + SvSETMAGIC(src); } /* warn("check = 0x%X, code = 0x%d\n", check, code); */ @@ -392,7 +426,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, } else { fallback_cb = &PL_sv_undef; - check = SvIV(check_sv); + check = SvIV_nomg(check_sv); } SvPOK_only(dst); @@ -504,10 +538,6 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ PROTOTYPES: DISABLE -#ifndef SvIsCOW -# define SvIsCOW(sv) (SvREADONLY(sv) && SvFAKE(sv)) -#endif - void Method_decode_xs(obj,src,check_sv = &PL_sv_no) SV * obj @@ -520,23 +550,26 @@ PREINIT: SV *dst; bool renewed = 0; int check; + bool modify; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + modify = (check && !(check & ENCODE_LEAVE_SRC)); CODE: { - dSP; ENTER; SAVETMPS; - if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); - check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); - if (!(check & ENCODE_LEAVE_SRC) && SvIsCOW(src)) { - /* - * disassociate from any other scalars before doing - * in-place modifications - */ - sv_force_normal(src); - } - s = (U8 *) SvPV(src, slen); - e = (U8 *) SvEND(src); + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + e = s+slen; + /* * PerlIO check -- we assume the object is of PerlIO if renewed */ + dSP; + ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(obj); PUTBACK; @@ -551,28 +584,17 @@ CODE: FREETMPS; LEAVE; /* end PerlIO check */ - if (SvUTF8(src)) { - s = utf8_to_bytes(s,&slen); - if (s) { - SvCUR_set(src,slen); - SvUTF8_off(src); - e = s+slen; - } - else { - croak("Cannot decode string with wide characters"); - } - } - dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); /* Clear out translated part of source unless asked not to */ - if (check && !(check & ENCODE_LEAVE_SRC)){ + if (modify) { slen = e-s; if (slen) { sv_setpvn(src, (char*)s, slen); } SvCUR_set(src, slen); + SvSETMAGIC(src); } SvUTF8_on(dst); if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ @@ -591,12 +613,18 @@ PREINIT: U8 *e; SV *dst; int check; + bool modify; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + modify = (check && !(check & ENCODE_LEAVE_SRC)); CODE: { - check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); - if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); - s = (U8 *) SvPV(src, slen); - e = (U8 *) SvEND(src); + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + e = s+slen; dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ if (SvUTF8(src)) { /* Already encoded */ @@ -632,12 +660,13 @@ CODE: } /* Clear out translated part of source unless asked not to */ - if (check && !(check & ENCODE_LEAVE_SRC)){ + if (modify) { slen = e-s; if (slen) { sv_setpvn(src, (char*)s, slen); } SvCUR_set(src, slen); + SvSETMAGIC(src); } SvPOK_only(dst); SvUTF8_off(dst); @@ -686,23 +715,26 @@ SV * src SV * off SV * term SV * check_sv -CODE: -{ - int check; - SV *fallback_cb = &PL_sv_undef; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); STRLEN offset = (STRLEN)SvIV(off); int code = 0; - if (SvUTF8(src)) { - sv_utf8_downgrade(src, FALSE); - } - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, + U8 *s; + STRLEN slen; + SV *tmp; +CODE: +{ + if (!SvOK(src)) + XSRETURN_NO; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, &offset, term, &code, fallback_cb)); SvIV_set(off, (IV)offset); if (code == ENCODE_FOUND_TERM) { @@ -718,71 +750,50 @@ Method_decode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + U8 *s; + STRLEN slen; CODE: { - int check; - SV *fallback_cb = &PL_sv_undef; - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) { - SV *tmp; - tmp = sv_newmortal(); - sv_copypv(tmp, src); - src = tmp; - } - if (SvUTF8(src)) { - sv_utf8_downgrade(src, FALSE); - } - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); SvUTF8_on(ST(0)); XSRETURN(1); } - -#ifndef SvPV_force_nolen -# define SvPV_force_nolen(sv) SvPV_force_flags_nolen(sv, SV_GMAGIC) -#endif - -#ifndef SvPV_force_flags_nolen -# define SvPV_force_flags_nolen(sv, flags) \ - ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ - ? SvPVX(sv) : sv_pvn_force_flags(sv, &PL_na, flags)) -#endif - void Method_encode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + U8 *s; + STRLEN slen; CODE: { - int check; - SV *fallback_cb = &PL_sv_undef; - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) { - /* - SV *tmp; - tmp = sv_newmortal(); - sv_copypv(tmp, src); - src = tmp; - */ - src = sv_mortalcopy(src); - SvPV_force_nolen(src); - } - sv_utf8_upgrade(src); - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (!SvUTF8(src)) + utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); XSRETURN(1); } @@ -951,17 +962,16 @@ bool is_utf8(sv, check = 0) SV * sv int check +PREINIT: + char *str; + STRLEN len; CODE: { - if (SvGMAGICAL(sv)) /* it could be $1, for example */ - sv = newSVsv(sv); /* GMAGIG will be done */ + SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ + str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ RETVAL = SvUTF8(sv) ? TRUE : FALSE; - if (RETVAL && - check && - !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) + if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) RETVAL = FALSE; - if (sv != ST(0)) - SvREFCNT_dec(sv); /* it was a temp copy */ } OUTPUT: RETVAL @@ -971,13 +981,14 @@ _utf8_on(sv) SV * sv CODE: { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - if (SvIsCOW(sv)) sv_force_normal(sv); - SvUTF8_on(sv); + SvGETMAGIC(sv); + if (SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = newSViv(SvUTF8(sv)); + SvUTF8_on(sv); + SvSETMAGIC(sv); } else { - RETVAL = &PL_sv_undef; + RETVAL = &PL_sv_undef; } } OUTPUT: @@ -988,13 +999,14 @@ _utf8_off(sv) SV * sv CODE: { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - if (SvIsCOW(sv)) sv_force_normal(sv); - SvUTF8_off(sv); + SvGETMAGIC(sv); + if (SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = newSViv(SvUTF8(sv)); + SvUTF8_off(sv); + SvSETMAGIC(sv); } else { - RETVAL = &PL_sv_undef; + RETVAL = &PL_sv_undef; } } OUTPUT: diff --git a/MANIFEST b/MANIFEST index a74c0bf..14d8222 100644 --- a/MANIFEST +++ b/MANIFEST @@ -94,12 +94,14 @@ t/jisx0212.utf test data t/jperl.t test script t/ksc5601.enc test data t/ksc5601.utf test data +t/magic.t test script t/mime-header.t test script t/mime-name.t test script t/mime_header_iso2022jp.t test script t/perlio.t test script t/piconv.t test script t/rt.pl even more test script +t/rt85489.t test script t/taint.t test script t/unibench.pl benchmark script t/utf8ref.t test script diff --git a/t/decode.t b/t/decode.t index 6b24a8f..8aefb15 100644 --- a/t/decode.t +++ b/t/decode.t @@ -3,7 +3,7 @@ # use strict; use Encode qw(decode_utf8 FB_CROAK find_encoding decode); -use Test::More tests => 5; +use Test::More tests => 17; sub croak_ok(&) { my $code = shift; @@ -32,3 +32,55 @@ SKIP: { *a = $orig; is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode'); } + +$orig = "\x80"; +$orig =~ /(.)/; +is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode'); + +$orig = "\x80"; +*a = $orig; +is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode'); + +$orig = "\N{U+0080}"; +$orig =~ /(.)/; +is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode'); + +SKIP: { + skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016; + + $orig = "\N{U+0080}"; + *a = $orig; + is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode'); +} diff --git a/t/magic.t b/t/magic.t new file mode 100644 index 0000000..447e963 --- /dev/null +++ b/t/magic.t @@ -0,0 +1,141 @@ +BEGIN { + if ($ENV{'PERL_CORE'}) { + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; + +use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK); + +use Test::More tests => 3*(2*(3*(4*4)+4)+4+3*3); + +my $ascii = find_encoding('ASCII'); +my $latin1 = find_encoding('Latin1'); +my $utf8 = find_encoding('UTF-8'); + +my $undef = undef; +my $ascii_str = 'ascii_str'; +my $utf8_str = 'utf8_str'; +_utf8_on($utf8_str); + +{ + foreach my $str ($undef, $ascii_str, $utf8_str) { + foreach my $croak (0, 1) { + foreach my $enc ('ASCII', 'Latin1', 'UTF-8') { + my $mod = defined $str && $croak; + my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = encode($enc, $input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $enc ('ASCII', 'Latin1', 'UTF-8') { + my $mod = defined $str && $croak; + my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = decode($enc, $input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $obj ($ascii, $latin1, $utf8) { + my $mod = defined $str && $croak; + my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = $obj->encode($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $obj ($ascii, $latin1, $utf8) { + my $mod = defined $str && $croak; + my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = $obj->decode($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + { + my $mod = defined $str && $croak; + my $func = 'decode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = decode_utf8($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + } + { + my $func = 'encode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = encode_utf8($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, 0, "$func does not process set magic"); + is($input, $str, "$func does not modify \$input string"); + is($output, $str, "$func returns correct \$output string"); + } + { + my $func = '_utf8_on(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + _utf8_on($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic')); + defined $str ? ok(is_utf8($input), "$func sets UTF8 status flag") : ok(!is_utf8($input), "$func does not set UTF8 status flag"); + } + { + my $func = '_utf8_off(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + _utf8_off($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic')); + ok(!is_utf8($input), "$func unsets UTF8 status flag"); + } + { + my $func = 'is_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $utf8 = is_utf8($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, 0, "$func does not process set magic"); + is($utf8, is_utf8($str), "$func returned correct state"); + } + } +} + +package TieScalarCounter; + +sub TIESCALAR { + my ($class, $value) = @_; + return bless { fetch => 0, store => 0, value => $value }, $class; +} + +sub FETCH { + my ($self) = @_; + $self->{fetch}++; + return $self->{value}; +} + +sub STORE { + my ($self, $value) = @_; + $self->{store}++; + $self->{value} = $value; +} diff --git a/t/rt85489.t b/t/rt85489.t new file mode 100644 index 0000000..3b28e35 --- /dev/null +++ b/t/rt85489.t @@ -0,0 +1,48 @@ +BEGIN { + if ($ENV{'PERL_CORE'}) { + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; + +use Test::More tests => 8; + +use Encode; + +my $ascii = Encode::find_encoding("ascii"); +my $orig = "str"; + +my $str = $orig; +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode"; +$ascii->encode($str); +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode"; + +$str = $orig; +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii"; +Encode::encode("ascii", $str); +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii"; + +$str = $orig; +Encode::_utf8_on($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode"; +$ascii->decode($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode"; + +$str = $orig; +Encode::_utf8_on($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii"; +Encode::decode("ascii", $str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii"; diff --git a/t/utf8ref.t b/t/utf8ref.t index aff098f..465fb6a 100644 --- a/t/utf8ref.t +++ b/t/utf8ref.t @@ -14,10 +14,10 @@ my $u = find_encoding('UTF-8'); my $r = []; no warnings 'uninitialized'; is encode_utf8($r), ''.$r; -is $u->encode($r), ''; +is $u->encode($r), ''.$r; $r = {}; is decode_utf8($r), ''.$r; -is $u->decode($r), ''; +is $u->decode($r), ''.$r; use warnings 'uninitialized'; is encode_utf8(undef), undef;