@@ -433,21 +433,6 @@ static void veth_set_multicast_list(struct net_device *dev)
433433{
434434}
435435
436- static struct sk_buff * veth_build_skb (void * head , int headroom , int len ,
437- int buflen )
438- {
439- struct sk_buff * skb ;
440-
441- skb = build_skb (head , buflen );
442- if (!skb )
443- return NULL ;
444-
445- skb_reserve (skb , headroom );
446- skb_put (skb , len );
447-
448- return skb ;
449- }
450-
451436static int veth_select_rxq (struct net_device * dev )
452437{
453438 return smp_processor_id () % dev -> real_num_rx_queues ;
@@ -695,72 +680,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
695680 }
696681}
697682
698- static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
699- struct sk_buff * skb ,
700- struct veth_xdp_tx_bq * bq ,
701- struct veth_stats * stats )
683+ static void veth_xdp_get (struct xdp_buff * xdp )
702684{
703- u32 pktlen , headroom , act , metalen , frame_sz ;
704- void * orig_data , * orig_data_end ;
705- struct bpf_prog * xdp_prog ;
706- int mac_len , delta , off ;
707- struct xdp_buff xdp ;
685+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
686+ int i ;
708687
709- skb_prepare_for_gro (skb );
688+ get_page (virt_to_page (xdp -> data ));
689+ if (likely (!xdp_buff_has_frags (xdp )))
690+ return ;
710691
711- rcu_read_lock ();
712- xdp_prog = rcu_dereference (rq -> xdp_prog );
713- if (unlikely (!xdp_prog )) {
714- rcu_read_unlock ();
715- goto out ;
716- }
692+ for (i = 0 ; i < sinfo -> nr_frags ; i ++ )
693+ __skb_frag_ref (& sinfo -> frags [i ]);
694+ }
717695
718- mac_len = skb -> data - skb_mac_header (skb );
719- pktlen = skb -> len + mac_len ;
720- headroom = skb_headroom (skb ) - mac_len ;
696+ static int veth_convert_skb_to_xdp_buff (struct veth_rq * rq ,
697+ struct xdp_buff * xdp ,
698+ struct sk_buff * * pskb )
699+ {
700+ struct sk_buff * skb = * pskb ;
701+ u32 frame_sz ;
721702
722703 if (skb_shared (skb ) || skb_head_is_locked (skb ) ||
723- skb_is_nonlinear (skb ) || headroom < XDP_PACKET_HEADROOM ) {
704+ skb_shinfo (skb )-> nr_frags ) {
705+ u32 size , len , max_head_size , off ;
724706 struct sk_buff * nskb ;
725- int size , head_off ;
726- void * head , * start ;
727707 struct page * page ;
708+ int i , head_off ;
728709
729- size = SKB_DATA_ALIGN (VETH_XDP_HEADROOM + pktlen ) +
730- SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
731- if (size > PAGE_SIZE )
710+ /* We need a private copy of the skb and data buffers since
711+ * the ebpf program can modify it. We segment the original skb
712+ * into order-0 pages without linearize it.
713+ *
714+ * Make sure we have enough space for linear and paged area
715+ */
716+ max_head_size = SKB_WITH_OVERHEAD (PAGE_SIZE -
717+ VETH_XDP_HEADROOM );
718+ if (skb -> len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size )
732719 goto drop ;
733720
721+ /* Allocate skb head */
734722 page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
735723 if (!page )
736724 goto drop ;
737725
738- head = page_address (page );
739- start = head + VETH_XDP_HEADROOM ;
740- if (skb_copy_bits (skb , - mac_len , start , pktlen )) {
741- page_frag_free (head );
726+ nskb = build_skb (page_address (page ), PAGE_SIZE );
727+ if (!nskb ) {
728+ put_page (page );
742729 goto drop ;
743730 }
744731
745- nskb = veth_build_skb ( head , VETH_XDP_HEADROOM + mac_len ,
746- skb -> len , PAGE_SIZE );
747- if (! nskb ) {
748- page_frag_free ( head );
732+ skb_reserve ( nskb , VETH_XDP_HEADROOM );
733+ size = min_t ( u32 , skb -> len , max_head_size );
734+ if (skb_copy_bits ( skb , 0 , nskb -> data , size ) ) {
735+ consume_skb ( nskb );
749736 goto drop ;
750737 }
738+ skb_put (nskb , size );
751739
752740 skb_copy_header (nskb , skb );
753741 head_off = skb_headroom (nskb ) - skb_headroom (skb );
754742 skb_headers_offset_update (nskb , head_off );
743+
744+ /* Allocate paged area of new skb */
745+ off = size ;
746+ len = skb -> len - off ;
747+
748+ for (i = 0 ; i < MAX_SKB_FRAGS && off < skb -> len ; i ++ ) {
749+ page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
750+ if (!page ) {
751+ consume_skb (nskb );
752+ goto drop ;
753+ }
754+
755+ size = min_t (u32 , len , PAGE_SIZE );
756+ skb_add_rx_frag (nskb , i , page , 0 , size , PAGE_SIZE );
757+ if (skb_copy_bits (skb , off , page_address (page ),
758+ size )) {
759+ consume_skb (nskb );
760+ goto drop ;
761+ }
762+
763+ len -= size ;
764+ off += size ;
765+ }
766+
755767 consume_skb (skb );
756768 skb = nskb ;
769+ } else if (skb_headroom (skb ) < XDP_PACKET_HEADROOM &&
770+ pskb_expand_head (skb , VETH_XDP_HEADROOM , 0 , GFP_ATOMIC )) {
771+ goto drop ;
757772 }
758773
759774 /* SKB "head" area always have tailroom for skb_shared_info */
760775 frame_sz = skb_end_pointer (skb ) - skb -> head ;
761776 frame_sz += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
762- xdp_init_buff (& xdp , frame_sz , & rq -> xdp_rxq );
763- xdp_prepare_buff (& xdp , skb -> head , skb -> mac_header , pktlen , true);
777+ xdp_init_buff (xdp , frame_sz , & rq -> xdp_rxq );
778+ xdp_prepare_buff (xdp , skb -> head , skb_headroom (skb ),
779+ skb_headlen (skb ), true);
780+
781+ if (skb_is_nonlinear (skb )) {
782+ skb_shinfo (skb )-> xdp_frags_size = skb -> data_len ;
783+ xdp_buff_set_frags_flag (xdp );
784+ } else {
785+ xdp_buff_clear_frags_flag (xdp );
786+ }
787+ * pskb = skb ;
788+
789+ return 0 ;
790+ drop :
791+ consume_skb (skb );
792+ * pskb = NULL ;
793+
794+ return - ENOMEM ;
795+ }
796+
797+ static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
798+ struct sk_buff * skb ,
799+ struct veth_xdp_tx_bq * bq ,
800+ struct veth_stats * stats )
801+ {
802+ void * orig_data , * orig_data_end ;
803+ struct bpf_prog * xdp_prog ;
804+ struct xdp_buff xdp ;
805+ u32 act , metalen ;
806+ int off ;
807+
808+ skb_prepare_for_gro (skb );
809+
810+ rcu_read_lock ();
811+ xdp_prog = rcu_dereference (rq -> xdp_prog );
812+ if (unlikely (!xdp_prog )) {
813+ rcu_read_unlock ();
814+ goto out ;
815+ }
816+
817+ __skb_push (skb , skb -> data - skb_mac_header (skb ));
818+ if (veth_convert_skb_to_xdp_buff (rq , & xdp , & skb ))
819+ goto drop ;
764820
765821 orig_data = xdp .data ;
766822 orig_data_end = xdp .data_end ;
@@ -771,7 +827,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
771827 case XDP_PASS :
772828 break ;
773829 case XDP_TX :
774- get_page ( virt_to_page ( xdp . data ) );
830+ veth_xdp_get ( & xdp );
775831 consume_skb (skb );
776832 xdp .rxq -> mem = rq -> xdp_mem ;
777833 if (unlikely (veth_xdp_tx (rq , & xdp , bq ) < 0 )) {
@@ -783,7 +839,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
783839 rcu_read_unlock ();
784840 goto xdp_xmit ;
785841 case XDP_REDIRECT :
786- get_page ( virt_to_page ( xdp . data ) );
842+ veth_xdp_get ( & xdp );
787843 consume_skb (skb );
788844 xdp .rxq -> mem = rq -> xdp_mem ;
789845 if (xdp_do_redirect (rq -> dev , & xdp , xdp_prog )) {
@@ -806,18 +862,27 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
806862 rcu_read_unlock ();
807863
808864 /* check if bpf_xdp_adjust_head was used */
809- delta = orig_data - xdp .data ;
810- off = mac_len + delta ;
865+ off = orig_data - xdp .data ;
811866 if (off > 0 )
812867 __skb_push (skb , off );
813868 else if (off < 0 )
814869 __skb_pull (skb , - off );
815- skb -> mac_header -= delta ;
870+
871+ skb_reset_mac_header (skb );
816872
817873 /* check if bpf_xdp_adjust_tail was used */
818874 off = xdp .data_end - orig_data_end ;
819875 if (off != 0 )
820876 __skb_put (skb , off ); /* positive on grow, negative on shrink */
877+
878+ /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
879+ * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
880+ */
881+ if (xdp_buff_has_frags (& xdp ))
882+ skb -> data_len = skb_shinfo (skb )-> xdp_frags_size ;
883+ else
884+ skb -> data_len = 0 ;
885+
821886 skb -> protocol = eth_type_trans (skb , rq -> dev );
822887
823888 metalen = xdp .data - xdp .data_meta ;
@@ -833,7 +898,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
833898 return NULL ;
834899err_xdp :
835900 rcu_read_unlock ();
836- page_frag_free ( xdp . data );
901+ xdp_return_buff ( & xdp );
837902xdp_xmit :
838903 return NULL ;
839904}
0 commit comments