Skip to content

Commit e8c6a47

Browse files
committed
Implement "max waste" thresholds to avoid problems with COW and deliberately overallocated pvs
COW does not play nicely with "preallocate" algorithms. More specifically code like sv_gets() wants to preallocate a large buffer into $_ for performance reasons. Prior to COW this was just fine. When someone assigned $_ to a less volatile variable only the used portion of the buffer was copied, and the extended buffer would be reused by sv_gets() and all was well. With COW however this process is foiled. The preallocated buffer get shared, and then when $_ is updated the buffer is dropped from $_, leaving the other SV holding ownership of the overallocated buffer, and causing sv_gets() to allocate a new buffer entirely. This process would then repeat consuming time and lots of memory. This patch introduces a "wastage" check to COW. When decided if we should COW a string we look at the ratio and difference of SvCUR(sv) and SvLEN(sv), which represent the "actual string length" and the "allocated string length". When the difference exceeds a hard threshold, or when the ration exceeds a designated factor then we do not COW. This means that strings with large overallocations are not COWed. Exactly how this works out in practice, where SvGROW() *always* overallocates, is an open question. See: https://rt.perl.org/Ticket/Display.html?id=121796 This patch also slightly tweaks SvGROW() not to do roundup on the first allocation of the pv. Odds are good that the initial request realy does want exactly what they expected. (SvGROW contrary to what the name suggests is used for bother *extended* the size of a pv, and initializing it the first time.)
1 parent c8180b0 commit e8c6a47

File tree

2 files changed

+71
-23
lines changed

2 files changed

+71
-23
lines changed

sv.c

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,69 @@
4646
char *gconvert(double, int, int, char *);
4747
#endif
4848

49+
#ifdef PERL_NEW_COPY_ON_WRITE
50+
# ifndef SV_COW_THRESHOLD
51+
# define SV_COW_THRESHOLD 0 /* COW iff len > K */
52+
# endif
53+
# ifndef SV_COWBUF_THRESHOLD
54+
# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
55+
# endif
56+
# ifndef SV_COW_MAX_WASTE_THRESHOLD
57+
# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
58+
# endif
59+
# ifndef SV_COWBUF_WASTE_THRESHOLD
60+
# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
61+
# endif
62+
# ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
63+
# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
64+
# endif
65+
# ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
66+
# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
67+
# endif
68+
#endif
69+
/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70+
hold is 0. */
71+
#if SV_COW_THRESHOLD
72+
# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73+
#else
74+
# define GE_COW_THRESHOLD(cur) 1
75+
#endif
76+
#if SV_COWBUF_THRESHOLD
77+
# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78+
#else
79+
# define GE_COWBUF_THRESHOLD(cur) 1
80+
#endif
81+
#if SV_COW_MAX_WASTE_THRESHOLD
82+
# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83+
#else
84+
# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85+
#endif
86+
#if SV_COWBUF_WASTE_THRESHOLD
87+
# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88+
#else
89+
# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90+
#endif
91+
#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92+
# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93+
#else
94+
# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95+
#endif
96+
#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97+
# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98+
#else
99+
# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100+
#endif
101+
102+
#define CHECK_COW_THRESHOLD(cur,len) (\
103+
GE_COW_THRESHOLD((cur)) && \
104+
GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105+
GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106+
)
107+
#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108+
GE_COWBUF_THRESHOLD((cur)) && \
109+
GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110+
GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111+
)
49112
/* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to),
50113
* has a mandatory return value, even though that value is just the same
51114
* as the buf arg */
@@ -1524,7 +1587,8 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
15241587
if (newlen < minlen)
15251588
newlen = minlen;
15261589
#ifndef Perl_safesysmalloc_size
1527-
newlen = PERL_STRLEN_ROUNDUP(newlen);
1590+
if (SvLEN(sv))
1591+
newlen = PERL_STRLEN_ROUNDUP(newlen);
15281592
#endif
15291593
if (SvLEN(sv) && s) {
15301594
s = (char*)saferealloc(s, newlen);
@@ -3987,18 +4051,8 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr)
39874051
return;
39884052
}
39894053

3990-
/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
3991-
hold is 0. */
3992-
#if SV_COW_THRESHOLD
3993-
# define GE_COW_THRESHOLD(len) ((len) >= SV_COW_THRESHOLD)
3994-
#else
3995-
# define GE_COW_THRESHOLD(len) 1
3996-
#endif
3997-
#if SV_COWBUF_THRESHOLD
3998-
# define GE_COWBUF_THRESHOLD(len) ((len) >= SV_COWBUF_THRESHOLD)
3999-
#else
4000-
# define GE_COWBUF_THRESHOLD(len) 1
4001-
#endif
4054+
4055+
40024056

40034057
#ifdef PERL_DEBUG_READONLY_COW
40044058
# include <sys/mman.h>
@@ -4366,7 +4420,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
43664420
|| ((sflags & (SVs_PADTMP|SVf_READONLY|SVf_IsCOW))
43674421
== SVs_PADTMP
43684422
/* whose buffer is worth stealing */
4369-
&& GE_COWBUF_THRESHOLD(cur)
4423+
&& CHECK_COWBUF_THRESHOLD(cur,len)
43704424
)
43714425
) &&
43724426
!(sflags & SVf_OOK) && /* and not involved in OOK hack? */
@@ -4400,14 +4454,14 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
44004454
#elif defined(PERL_NEW_COPY_ON_WRITE)
44014455
(sflags & SVf_IsCOW
44024456
? (!len ||
4403-
( (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1)
4457+
( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1)
44044458
/* If this is a regular (non-hek) COW, only so
44054459
many COW "copies" are possible. */
44064460
&& CowREFCNT(sstr) != SV_COW_REFCNT_MAX ))
44074461
: ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS
44084462
&& !(SvFLAGS(dstr) & SVf_BREAK)
4409-
&& GE_COW_THRESHOLD(cur) && cur+1 < len
4410-
&& (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1)
4463+
&& CHECK_COW_THRESHOLD(cur,len) && cur+1 < len
4464+
&& (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1)
44114465
))
44124466
#else
44134467
sflags & SVf_IsCOW

sv.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,12 +1909,6 @@ mg.c:1024: warning: left-hand operand of comma expression has no effect
19091909
/* Note: To allow 256 COW "copies", a refcnt of 0 means 1. */
19101910
# define CowREFCNT(sv) (*(U8 *)(SvPVX(sv)+SvLEN(sv)-1))
19111911
# define SV_COW_REFCNT_MAX ((1 << sizeof(U8)*8) - 1)
1912-
# ifndef SV_COW_THRESHOLD
1913-
# define SV_COW_THRESHOLD 0 /* min string length for cow */
1914-
# endif
1915-
# ifndef SV_COWBUF_THRESHOLD
1916-
# define SV_COWBUF_THRESHOLD 1250 /* min string length for cow */
1917-
# endif /* over existing buffer */
19181912
# define CAN_COW_MASK (SVf_POK|SVf_ROK|SVp_POK|SVf_FAKE| \
19191913
SVf_OOK|SVf_BREAK|SVf_READONLY)
19201914
# endif

0 commit comments

Comments
 (0)