@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
145145 BUG ();
146146}
147147
148+
149+ /*
150+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
151+ * the caller if emergency pfmemalloc reserves are being used. If it is and
152+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
153+ * may be used. Otherwise, the packet data may be discarded until enough
154+ * memory is free
155+ */
156+ #define kmalloc_reserve (size , gfp , node , pfmemalloc ) \
157+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158+ void * __kmalloc_reserve (size_t size , gfp_t flags , int node , unsigned long ip ,
159+ bool * pfmemalloc )
160+ {
161+ void * obj ;
162+ bool ret_pfmemalloc = false;
163+
164+ /*
165+ * Try a regular allocation, when that fails and we're not entitled
166+ * to the reserves, fail.
167+ */
168+ obj = kmalloc_node_track_caller (size ,
169+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN ,
170+ node );
171+ if (obj || !(gfp_pfmemalloc_allowed (flags )))
172+ goto out ;
173+
174+ /* Try again but now we are using pfmemalloc reserves */
175+ ret_pfmemalloc = true;
176+ obj = kmalloc_node_track_caller (size , flags , node );
177+
178+ out :
179+ if (pfmemalloc )
180+ * pfmemalloc = ret_pfmemalloc ;
181+
182+ return obj ;
183+ }
184+
148185/* Allocate a new skbuff. We do this ourselves so we can fill in a few
149186 * 'private' fields and also do memory statistics to find all the
150187 * [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155192 * __alloc_skb - allocate a network buffer
156193 * @size: size to allocate
157194 * @gfp_mask: allocation mask
158- * @fclone: allocate from fclone cache instead of head cache
159- * and allocate a cloned (child) skb
195+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
196+ * instead of head cache and allocate a cloned (child) skb.
197+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
198+ * allocations in case the data is required for writeback
160199 * @node: numa node to allocate memory on
161200 *
162201 * Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
167206 * %GFP_ATOMIC.
168207 */
169208struct sk_buff * __alloc_skb (unsigned int size , gfp_t gfp_mask ,
170- int fclone , int node )
209+ int flags , int node )
171210{
172211 struct kmem_cache * cache ;
173212 struct skb_shared_info * shinfo ;
174213 struct sk_buff * skb ;
175214 u8 * data ;
215+ bool pfmemalloc ;
176216
177- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache ;
217+ cache = (flags & SKB_ALLOC_FCLONE )
218+ ? skbuff_fclone_cache : skbuff_head_cache ;
219+
220+ if (sk_memalloc_socks () && (flags & SKB_ALLOC_RX ))
221+ gfp_mask |= __GFP_MEMALLOC ;
178222
179223 /* Get the HEAD */
180224 skb = kmem_cache_alloc_node (cache , gfp_mask & ~__GFP_DMA , node );
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
189233 */
190234 size = SKB_DATA_ALIGN (size );
191235 size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
192- data = kmalloc_node_track_caller (size , gfp_mask , node );
236+ data = kmalloc_reserve (size , gfp_mask , node , & pfmemalloc );
193237 if (!data )
194238 goto nodata ;
195239 /* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
207251 memset (skb , 0 , offsetof(struct sk_buff , tail ));
208252 /* Account for allocated memory : skb + skb->head */
209253 skb -> truesize = SKB_TRUESIZE (size );
254+ skb -> pfmemalloc = pfmemalloc ;
210255 atomic_set (& skb -> users , 1 );
211256 skb -> head = data ;
212257 skb -> data = data ;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
222267 atomic_set (& shinfo -> dataref , 1 );
223268 kmemcheck_annotate_variable (shinfo -> destructor_arg );
224269
225- if (fclone ) {
270+ if (flags & SKB_ALLOC_FCLONE ) {
226271 struct sk_buff * child = skb + 1 ;
227272 atomic_t * fclone_ref = (atomic_t * ) (child + 1 );
228273
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
232277 atomic_set (fclone_ref , 1 );
233278
234279 child -> fclone = SKB_FCLONE_UNAVAILABLE ;
280+ child -> pfmemalloc = pfmemalloc ;
235281 }
236282out :
237283 return skb ;
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
302348
303349#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304350
305- /**
306- * netdev_alloc_frag - allocate a page fragment
307- * @fragsz: fragment size
308- *
309- * Allocates a frag from a page for receive buffer.
310- * Uses GFP_ATOMIC allocations.
311- */
312- void * netdev_alloc_frag (unsigned int fragsz )
351+ static void * __netdev_alloc_frag (unsigned int fragsz , gfp_t gfp_mask )
313352{
314353 struct netdev_alloc_cache * nc ;
315354 void * data = NULL ;
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
319358 nc = & __get_cpu_var (netdev_alloc_cache );
320359 if (unlikely (!nc -> page )) {
321360refill :
322- nc -> page = alloc_page (GFP_ATOMIC | __GFP_COLD );
361+ nc -> page = alloc_page (gfp_mask );
323362 if (unlikely (!nc -> page ))
324363 goto end ;
325364recycle :
@@ -343,6 +382,18 @@ void *netdev_alloc_frag(unsigned int fragsz)
343382 local_irq_restore (flags );
344383 return data ;
345384}
385+
386+ /**
387+ * netdev_alloc_frag - allocate a page fragment
388+ * @fragsz: fragment size
389+ *
390+ * Allocates a frag from a page for receive buffer.
391+ * Uses GFP_ATOMIC allocations.
392+ */
393+ void * netdev_alloc_frag (unsigned int fragsz )
394+ {
395+ return __netdev_alloc_frag (fragsz , GFP_ATOMIC | __GFP_COLD );
396+ }
346397EXPORT_SYMBOL (netdev_alloc_frag );
347398
348399/**
@@ -366,15 +417,21 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
366417 SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
367418
368419 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA ))) {
369- void * data = netdev_alloc_frag (fragsz );
420+ void * data ;
421+
422+ if (sk_memalloc_socks ())
423+ gfp_mask |= __GFP_MEMALLOC ;
424+
425+ data = __netdev_alloc_frag (fragsz , gfp_mask );
370426
371427 if (likely (data )) {
372428 skb = build_skb (data , fragsz );
373429 if (unlikely (!skb ))
374430 put_page (virt_to_head_page (data ));
375431 }
376432 } else {
377- skb = __alloc_skb (length + NET_SKB_PAD , gfp_mask , 0 , NUMA_NO_NODE );
433+ skb = __alloc_skb (length + NET_SKB_PAD , gfp_mask ,
434+ SKB_ALLOC_RX , NUMA_NO_NODE );
378435 }
379436 if (likely (skb )) {
380437 skb_reserve (skb , NET_SKB_PAD );
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
656713#if IS_ENABLED (CONFIG_IP_VS )
657714 new -> ipvs_property = old -> ipvs_property ;
658715#endif
716+ new -> pfmemalloc = old -> pfmemalloc ;
659717 new -> protocol = old -> protocol ;
660718 new -> mark = old -> mark ;
661719 new -> skb_iif = old -> skb_iif ;
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
814872 n -> fclone = SKB_FCLONE_CLONE ;
815873 atomic_inc (fclone_ref );
816874 } else {
875+ if (skb_pfmemalloc (skb ))
876+ gfp_mask |= __GFP_MEMALLOC ;
877+
817878 n = kmem_cache_alloc (skbuff_head_cache , gfp_mask );
818879 if (!n )
819880 return NULL ;
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
850911 skb_shinfo (new )-> gso_type = skb_shinfo (old )-> gso_type ;
851912}
852913
914+ static inline int skb_alloc_rx_flag (const struct sk_buff * skb )
915+ {
916+ if (skb_pfmemalloc (skb ))
917+ return SKB_ALLOC_RX ;
918+ return 0 ;
919+ }
920+
853921/**
854922 * skb_copy - create private copy of an sk_buff
855923 * @skb: buffer to copy
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
871939{
872940 int headerlen = skb_headroom (skb );
873941 unsigned int size = skb_end_offset (skb ) + skb -> data_len ;
874- struct sk_buff * n = alloc_skb (size , gfp_mask );
942+ struct sk_buff * n = __alloc_skb (size , gfp_mask ,
943+ skb_alloc_rx_flag (skb ), NUMA_NO_NODE );
875944
876945 if (!n )
877946 return NULL ;
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
906975struct sk_buff * __pskb_copy (struct sk_buff * skb , int headroom , gfp_t gfp_mask )
907976{
908977 unsigned int size = skb_headlen (skb ) + headroom ;
909- struct sk_buff * n = alloc_skb (size , gfp_mask );
978+ struct sk_buff * n = __alloc_skb (size , gfp_mask ,
979+ skb_alloc_rx_flag (skb ), NUMA_NO_NODE );
910980
911981 if (!n )
912982 goto out ;
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
9791049
9801050 size = SKB_DATA_ALIGN (size );
9811051
982- data = kmalloc (size + SKB_DATA_ALIGN (sizeof (struct skb_shared_info )),
983- gfp_mask );
1052+ if (skb_pfmemalloc (skb ))
1053+ gfp_mask |= __GFP_MEMALLOC ;
1054+ data = kmalloc_reserve (size + SKB_DATA_ALIGN (sizeof (struct skb_shared_info )),
1055+ gfp_mask , NUMA_NO_NODE , NULL );
9841056 if (!data )
9851057 goto nodata ;
9861058 size = SKB_WITH_OVERHEAD (ksize (data ));
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
10921164 /*
10931165 * Allocate the copy buffer
10941166 */
1095- struct sk_buff * n = alloc_skb (newheadroom + skb -> len + newtailroom ,
1096- gfp_mask );
1167+ struct sk_buff * n = __alloc_skb (newheadroom + skb -> len + newtailroom ,
1168+ gfp_mask , skb_alloc_rx_flag (skb ),
1169+ NUMA_NO_NODE );
10971170 int oldheadroom = skb_headroom (skb );
10981171 int head_copy_len , head_copy_off ;
10991172 int off ;
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
27752848 skb_release_head_state (nskb );
27762849 __skb_push (nskb , doffset );
27772850 } else {
2778- nskb = alloc_skb (hsize + doffset + headroom ,
2779- GFP_ATOMIC );
2851+ nskb = __alloc_skb (hsize + doffset + headroom ,
2852+ GFP_ATOMIC , skb_alloc_rx_flag (skb ),
2853+ NUMA_NO_NODE );
27802854
27812855 if (unlikely (!nskb ))
27822856 goto err ;
0 commit comments