@@ -89,6 +89,34 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
89
89
#ifdef CONFIG_SKB_EXTENSIONS
90
90
static struct kmem_cache * skbuff_ext_cache __ro_after_init ;
91
91
#endif
92
+
93
+ /* skb_small_head_cache and related code is only supported
94
+ * for CONFIG_SLAB and CONFIG_SLUB.
95
+ * As soon as SLOB is removed from the kernel, we can clean up this.
96
+ */
97
+ #if !defined(CONFIG_SLOB )
98
+ # define HAVE_SKB_SMALL_HEAD_CACHE 1
99
+ #endif
100
+
101
+ #ifdef HAVE_SKB_SMALL_HEAD_CACHE
102
+ static struct kmem_cache * skb_small_head_cache __ro_after_init ;
103
+
104
+ #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
105
+
106
+ /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
107
+ * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
108
+ * size, and we can differentiate heads from skb_small_head_cache
109
+ * vs system slabs by looking at their size (skb_end_offset()).
110
+ */
111
+ #define SKB_SMALL_HEAD_CACHE_SIZE \
112
+ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
113
+ (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
114
+ SKB_SMALL_HEAD_SIZE)
115
+
116
+ #define SKB_SMALL_HEAD_HEADROOM \
117
+ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
118
+ #endif /* HAVE_SKB_SMALL_HEAD_CACHE */
119
+
92
120
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS ;
93
121
EXPORT_SYMBOL (sysctl_max_skb_frags );
94
122
@@ -478,25 +506,45 @@ EXPORT_SYMBOL(napi_build_skb);
478
506
* may be used. Otherwise, the packet data may be discarded until enough
479
507
* memory is free
480
508
*/
481
- static void * kmalloc_reserve (size_t size , gfp_t flags , int node ,
509
+ static void * kmalloc_reserve (unsigned int * size , gfp_t flags , int node ,
482
510
bool * pfmemalloc )
483
511
{
484
- void * obj ;
485
512
bool ret_pfmemalloc = false;
513
+ unsigned int obj_size ;
514
+ void * obj ;
486
515
516
+ obj_size = SKB_HEAD_ALIGN (* size );
517
+ #ifdef HAVE_SKB_SMALL_HEAD_CACHE
518
+ if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
519
+ !(flags & KMALLOC_NOT_NORMAL_BITS )) {
520
+
521
+ /* skb_small_head_cache has non power of two size,
522
+ * likely forcing SLUB to use order-3 pages.
523
+ * We deliberately attempt a NOMEMALLOC allocation only.
524
+ */
525
+ obj = kmem_cache_alloc_node (skb_small_head_cache ,
526
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN ,
527
+ node );
528
+ if (obj ) {
529
+ * size = SKB_SMALL_HEAD_CACHE_SIZE ;
530
+ goto out ;
531
+ }
532
+ }
533
+ #endif
534
+ * size = obj_size = kmalloc_size_roundup (obj_size );
487
535
/*
488
536
* Try a regular allocation, when that fails and we're not entitled
489
537
* to the reserves, fail.
490
538
*/
491
- obj = kmalloc_node_track_caller (size ,
539
+ obj = kmalloc_node_track_caller (obj_size ,
492
540
flags | __GFP_NOMEMALLOC | __GFP_NOWARN ,
493
541
node );
494
542
if (obj || !(gfp_pfmemalloc_allowed (flags )))
495
543
goto out ;
496
544
497
545
/* Try again but now we are using pfmemalloc reserves */
498
546
ret_pfmemalloc = true;
499
- obj = kmalloc_node_track_caller (size , flags , node );
547
+ obj = kmalloc_node_track_caller (obj_size , flags , node );
500
548
501
549
out :
502
550
if (pfmemalloc )
@@ -533,7 +581,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
533
581
{
534
582
struct kmem_cache * cache ;
535
583
struct sk_buff * skb ;
536
- unsigned int osize ;
537
584
bool pfmemalloc ;
538
585
u8 * data ;
539
586
@@ -558,26 +605,22 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
558
605
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
559
606
* Both skb->head and skb_shared_info are cache line aligned.
560
607
*/
561
- size = SKB_DATA_ALIGN (size );
562
- size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
563
- osize = kmalloc_size_roundup (size );
564
- data = kmalloc_reserve (osize , gfp_mask , node , & pfmemalloc );
608
+ data = kmalloc_reserve (& size , gfp_mask , node , & pfmemalloc );
565
609
if (unlikely (!data ))
566
610
goto nodata ;
567
611
/* kmalloc_size_roundup() might give us more room than requested.
568
612
* Put skb_shared_info exactly at the end of allocated zone,
569
613
* to allow max possible filling before reallocation.
570
614
*/
571
- size = SKB_WITH_OVERHEAD (osize );
572
- prefetchw (data + size );
615
+ prefetchw (data + SKB_WITH_OVERHEAD (size ));
573
616
574
617
/*
575
618
* Only clear those fields we need to clear, not those that we will
576
619
* actually initialise below. Hence, don't put any more fields after
577
620
* the tail pointer in struct sk_buff!
578
621
*/
579
622
memset (skb , 0 , offsetof(struct sk_buff , tail ));
580
- __build_skb_around (skb , data , osize );
623
+ __build_skb_around (skb , data , size );
581
624
skb -> pfmemalloc = pfmemalloc ;
582
625
583
626
if (flags & SKB_ALLOC_FCLONE ) {
@@ -632,8 +675,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
632
675
goto skb_success ;
633
676
}
634
677
635
- len += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
636
- len = SKB_DATA_ALIGN (len );
678
+ len = SKB_HEAD_ALIGN (len );
637
679
638
680
if (sk_memalloc_socks ())
639
681
gfp_mask |= __GFP_MEMALLOC ;
@@ -732,8 +774,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
732
774
data = page_frag_alloc_1k (& nc -> page_small , gfp_mask );
733
775
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC (nc -> page_small );
734
776
} else {
735
- len += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
736
- len = SKB_DATA_ALIGN (len );
777
+ len = SKB_HEAD_ALIGN (len );
737
778
738
779
data = page_frag_alloc (& nc -> page , len , gfp_mask );
739
780
pfmemalloc = nc -> page .pfmemalloc ;
@@ -809,6 +850,16 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
809
850
return page_pool_return_skb_page (virt_to_page (data ));
810
851
}
811
852
853
+ static void skb_kfree_head (void * head , unsigned int end_offset )
854
+ {
855
+ #ifdef HAVE_SKB_SMALL_HEAD_CACHE
856
+ if (end_offset == SKB_SMALL_HEAD_HEADROOM )
857
+ kmem_cache_free (skb_small_head_cache , head );
858
+ else
859
+ #endif
860
+ kfree (head );
861
+ }
862
+
812
863
static void skb_free_head (struct sk_buff * skb )
813
864
{
814
865
unsigned char * head = skb -> head ;
@@ -818,7 +869,7 @@ static void skb_free_head(struct sk_buff *skb)
818
869
return ;
819
870
skb_free_frag (head );
820
871
} else {
821
- kfree (head );
872
+ skb_kfree_head (head , skb_end_offset ( skb ) );
822
873
}
823
874
}
824
875
@@ -1938,10 +1989,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1938
1989
if (skb_pfmemalloc (skb ))
1939
1990
gfp_mask |= __GFP_MEMALLOC ;
1940
1991
1941
- size = SKB_DATA_ALIGN (size );
1942
- size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
1943
- size = kmalloc_size_roundup (size );
1944
- data = kmalloc_reserve (size , gfp_mask , NUMA_NO_NODE , NULL );
1992
+ data = kmalloc_reserve (& size , gfp_mask , NUMA_NO_NODE , NULL );
1945
1993
if (!data )
1946
1994
goto nodata ;
1947
1995
size = SKB_WITH_OVERHEAD (size );
@@ -2004,7 +2052,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
2004
2052
return 0 ;
2005
2053
2006
2054
nofrags :
2007
- kfree (data );
2055
+ skb_kfree_head (data , size );
2008
2056
nodata :
2009
2057
return - ENOMEM ;
2010
2058
}
@@ -4641,6 +4689,13 @@ void __init skb_init(void)
4641
4689
0 ,
4642
4690
SLAB_HWCACHE_ALIGN |SLAB_PANIC ,
4643
4691
NULL );
4692
+ #ifdef HAVE_SKB_SMALL_HEAD_CACHE
4693
+ skb_small_head_cache = kmem_cache_create ("skbuff_small_head" ,
4694
+ SKB_SMALL_HEAD_CACHE_SIZE ,
4695
+ 0 ,
4696
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC ,
4697
+ NULL );
4698
+ #endif
4644
4699
skb_extensions_init ();
4645
4700
}
4646
4701
@@ -6289,10 +6344,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
6289
6344
if (skb_pfmemalloc (skb ))
6290
6345
gfp_mask |= __GFP_MEMALLOC ;
6291
6346
6292
- size = SKB_DATA_ALIGN (size );
6293
- size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
6294
- size = kmalloc_size_roundup (size );
6295
- data = kmalloc_reserve (size , gfp_mask , NUMA_NO_NODE , NULL );
6347
+ data = kmalloc_reserve (& size , gfp_mask , NUMA_NO_NODE , NULL );
6296
6348
if (!data )
6297
6349
return - ENOMEM ;
6298
6350
size = SKB_WITH_OVERHEAD (size );
@@ -6308,7 +6360,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
6308
6360
if (skb_cloned (skb )) {
6309
6361
/* drop the old head gracefully */
6310
6362
if (skb_orphan_frags (skb , gfp_mask )) {
6311
- kfree (data );
6363
+ skb_kfree_head (data , size );
6312
6364
return - ENOMEM ;
6313
6365
}
6314
6366
for (i = 0 ; i < skb_shinfo (skb )-> nr_frags ; i ++ )
@@ -6408,18 +6460,15 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
6408
6460
if (skb_pfmemalloc (skb ))
6409
6461
gfp_mask |= __GFP_MEMALLOC ;
6410
6462
6411
- size = SKB_DATA_ALIGN (size );
6412
- size += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
6413
- size = kmalloc_size_roundup (size );
6414
- data = kmalloc_reserve (size , gfp_mask , NUMA_NO_NODE , NULL );
6463
+ data = kmalloc_reserve (& size , gfp_mask , NUMA_NO_NODE , NULL );
6415
6464
if (!data )
6416
6465
return - ENOMEM ;
6417
6466
size = SKB_WITH_OVERHEAD (size );
6418
6467
6419
6468
memcpy ((struct skb_shared_info * )(data + size ),
6420
6469
skb_shinfo (skb ), offsetof(struct skb_shared_info , frags [0 ]));
6421
6470
if (skb_orphan_frags (skb , gfp_mask )) {
6422
- kfree (data );
6471
+ skb_kfree_head (data , size );
6423
6472
return - ENOMEM ;
6424
6473
}
6425
6474
shinfo = (struct skb_shared_info * )(data + size );
@@ -6455,7 +6504,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
6455
6504
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
6456
6505
if (skb_has_frag_list (skb ))
6457
6506
kfree_skb_list (skb_shinfo (skb )-> frag_list );
6458
- kfree (data );
6507
+ skb_kfree_head (data , size );
6459
6508
return - ENOMEM ;
6460
6509
}
6461
6510
skb_release_data (skb , SKB_CONSUMED );
0 commit comments