@@ -2090,18 +2090,18 @@ static unsigned int run_filter(struct sk_buff *skb,
20902090}
20912091
20922092static int packet_rcv_vnet (struct msghdr * msg , const struct sk_buff * skb ,
2093- size_t * len )
2093+ size_t * len , int vnet_hdr_sz )
20942094{
2095- struct virtio_net_hdr vnet_hdr ;
2095+ struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { . num_buffers = 0 } ;
20962096
2097- if (* len < sizeof ( vnet_hdr ) )
2097+ if (* len < vnet_hdr_sz )
20982098 return - EINVAL ;
2099- * len -= sizeof ( vnet_hdr ) ;
2099+ * len -= vnet_hdr_sz ;
21002100
2101- if (virtio_net_hdr_from_skb (skb , & vnet_hdr , vio_le (), true, 0 ))
2101+ if (virtio_net_hdr_from_skb (skb , ( struct virtio_net_hdr * ) & vnet_hdr , vio_le (), true, 0 ))
21022102 return - EINVAL ;
21032103
2104- return memcpy_to_msg (msg , (void * )& vnet_hdr , sizeof ( vnet_hdr ) );
2104+ return memcpy_to_msg (msg , (void * )& vnet_hdr , vnet_hdr_sz );
21052105}
21062106
21072107/*
@@ -2250,7 +2250,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
22502250 __u32 ts_status ;
22512251 bool is_drop_n_account = false;
22522252 unsigned int slot_id = 0 ;
2253- bool do_vnet = false ;
2253+ int vnet_hdr_sz = 0 ;
22542254
22552255 /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
22562256 * We may add members to them until current aligned size without forcing
@@ -2308,10 +2308,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
23082308 netoff = TPACKET_ALIGN (po -> tp_hdrlen +
23092309 (maclen < 16 ? 16 : maclen )) +
23102310 po -> tp_reserve ;
2311- if (packet_sock_flag (po , PACKET_SOCK_HAS_VNET_HDR )) {
2312- netoff += sizeof (struct virtio_net_hdr );
2313- do_vnet = true;
2314- }
2311+ vnet_hdr_sz = READ_ONCE (po -> vnet_hdr_sz );
2312+ if (vnet_hdr_sz )
2313+ netoff += vnet_hdr_sz ;
23152314 macoff = netoff - maclen ;
23162315 }
23172316 if (netoff > USHRT_MAX ) {
@@ -2337,7 +2336,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
23372336 snaplen = po -> rx_ring .frame_size - macoff ;
23382337 if ((int )snaplen < 0 ) {
23392338 snaplen = 0 ;
2340- do_vnet = false ;
2339+ vnet_hdr_sz = 0 ;
23412340 }
23422341 }
23432342 } else if (unlikely (macoff + snaplen >
@@ -2351,7 +2350,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
23512350 if (unlikely ((int )snaplen < 0 )) {
23522351 snaplen = 0 ;
23532352 macoff = GET_PBDQC_FROM_RB (& po -> rx_ring )-> max_frame_len ;
2354- do_vnet = false ;
2353+ vnet_hdr_sz = 0 ;
23552354 }
23562355 }
23572356 spin_lock (& sk -> sk_receive_queue .lock );
@@ -2367,7 +2366,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
23672366 __set_bit (slot_id , po -> rx_ring .rx_owner_map );
23682367 }
23692368
2370- if (do_vnet &&
2369+ if (vnet_hdr_sz &&
23712370 virtio_net_hdr_from_skb (skb , h .raw + macoff -
23722371 sizeof (struct virtio_net_hdr ),
23732372 vio_le (), true, 0 )) {
@@ -2551,16 +2550,26 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
25512550}
25522551
25532552static int packet_snd_vnet_parse (struct msghdr * msg , size_t * len ,
2554- struct virtio_net_hdr * vnet_hdr )
2553+ struct virtio_net_hdr * vnet_hdr , int vnet_hdr_sz )
25552554{
2556- if (* len < sizeof (* vnet_hdr ))
2555+ int ret ;
2556+
2557+ if (* len < vnet_hdr_sz )
25572558 return - EINVAL ;
2558- * len -= sizeof ( * vnet_hdr ) ;
2559+ * len -= vnet_hdr_sz ;
25592560
25602561 if (!copy_from_iter_full (vnet_hdr , sizeof (* vnet_hdr ), & msg -> msg_iter ))
25612562 return - EFAULT ;
25622563
2563- return __packet_snd_vnet_parse (vnet_hdr , * len );
2564+ ret = __packet_snd_vnet_parse (vnet_hdr , * len );
2565+ if (ret )
2566+ return ret ;
2567+
2568+ /* move iter to point to the start of mac header */
2569+ if (vnet_hdr_sz != sizeof (struct virtio_net_hdr ))
2570+ iov_iter_advance (& msg -> msg_iter , vnet_hdr_sz - sizeof (struct virtio_net_hdr ));
2571+
2572+ return 0 ;
25642573}
25652574
25662575static int tpacket_fill_skb (struct packet_sock * po , struct sk_buff * skb ,
@@ -2722,6 +2731,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
27222731 void * ph ;
27232732 DECLARE_SOCKADDR (struct sockaddr_ll * , saddr , msg -> msg_name );
27242733 bool need_wait = !(msg -> msg_flags & MSG_DONTWAIT );
2734+ int vnet_hdr_sz = READ_ONCE (po -> vnet_hdr_sz );
27252735 unsigned char * addr = NULL ;
27262736 int tp_len , size_max ;
27272737 void * data ;
@@ -2779,8 +2789,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
27792789 size_max = po -> tx_ring .frame_size
27802790 - (po -> tp_hdrlen - sizeof (struct sockaddr_ll ));
27812791
2782- if ((size_max > dev -> mtu + reserve + VLAN_HLEN ) &&
2783- !packet_sock_flag (po , PACKET_SOCK_HAS_VNET_HDR ))
2792+ if ((size_max > dev -> mtu + reserve + VLAN_HLEN ) && !vnet_hdr_sz )
27842793 size_max = dev -> mtu + reserve + VLAN_HLEN ;
27852794
27862795 reinit_completion (& po -> skb_completion );
@@ -2809,10 +2818,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
28092818 status = TP_STATUS_SEND_REQUEST ;
28102819 hlen = LL_RESERVED_SPACE (dev );
28112820 tlen = dev -> needed_tailroom ;
2812- if (packet_sock_flag ( po , PACKET_SOCK_HAS_VNET_HDR ) ) {
2821+ if (vnet_hdr_sz ) {
28132822 vnet_hdr = data ;
2814- data += sizeof ( * vnet_hdr ) ;
2815- tp_len -= sizeof ( * vnet_hdr ) ;
2823+ data += vnet_hdr_sz ;
2824+ tp_len -= vnet_hdr_sz ;
28162825 if (tp_len < 0 ||
28172826 __packet_snd_vnet_parse (vnet_hdr , tp_len )) {
28182827 tp_len = - EINVAL ;
@@ -2837,7 +2846,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
28372846 addr , hlen , copylen , & sockc );
28382847 if (likely (tp_len >= 0 ) &&
28392848 tp_len > dev -> mtu + reserve &&
2840- !packet_sock_flag ( po , PACKET_SOCK_HAS_VNET_HDR ) &&
2849+ !vnet_hdr_sz &&
28412850 !packet_extra_vlan_len_allowed (dev , skb ))
28422851 tp_len = - EMSGSIZE ;
28432852
@@ -2856,7 +2865,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
28562865 }
28572866 }
28582867
2859- if (packet_sock_flag ( po , PACKET_SOCK_HAS_VNET_HDR ) ) {
2868+ if (vnet_hdr_sz ) {
28602869 if (virtio_net_hdr_to_skb (skb , vnet_hdr , vio_le ())) {
28612870 tp_len = - EINVAL ;
28622871 goto tpacket_error ;
@@ -2946,7 +2955,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
29462955 struct virtio_net_hdr vnet_hdr = { 0 };
29472956 int offset = 0 ;
29482957 struct packet_sock * po = pkt_sk (sk );
2949- bool has_vnet_hdr = false ;
2958+ int vnet_hdr_sz = READ_ONCE ( po -> vnet_hdr_sz ) ;
29502959 int hlen , tlen , linear ;
29512960 int extra_len = 0 ;
29522961
@@ -2990,11 +2999,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
29902999
29913000 if (sock -> type == SOCK_RAW )
29923001 reserve = dev -> hard_header_len ;
2993- if (packet_sock_flag ( po , PACKET_SOCK_HAS_VNET_HDR ) ) {
2994- err = packet_snd_vnet_parse (msg , & len , & vnet_hdr );
3002+ if (vnet_hdr_sz ) {
3003+ err = packet_snd_vnet_parse (msg , & len , & vnet_hdr , vnet_hdr_sz );
29953004 if (err )
29963005 goto out_unlock ;
2997- has_vnet_hdr = true;
29983006 }
29993007
30003008 if (unlikely (sock_flag (sk , SOCK_NOFCS ))) {
@@ -3064,11 +3072,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
30643072
30653073 packet_parse_headers (skb , sock );
30663074
3067- if (has_vnet_hdr ) {
3075+ if (vnet_hdr_sz ) {
30683076 err = virtio_net_hdr_to_skb (skb , & vnet_hdr , vio_le ());
30693077 if (err )
30703078 goto out_free ;
3071- len += sizeof ( vnet_hdr ) ;
3079+ len += vnet_hdr_sz ;
30723080 virtio_net_hdr_set_proto (skb , & vnet_hdr );
30733081 }
30743082
@@ -3408,7 +3416,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
34083416 struct sock * sk = sock -> sk ;
34093417 struct sk_buff * skb ;
34103418 int copied , err ;
3411- int vnet_hdr_len = 0 ;
3419+ int vnet_hdr_len = READ_ONCE ( pkt_sk ( sk ) -> vnet_hdr_sz ) ;
34123420 unsigned int origlen = 0 ;
34133421
34143422 err = - EINVAL ;
@@ -3449,11 +3457,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
34493457
34503458 packet_rcv_try_clear_pressure (pkt_sk (sk ));
34513459
3452- if (packet_sock_flag ( pkt_sk ( sk ), PACKET_SOCK_HAS_VNET_HDR ) ) {
3453- err = packet_rcv_vnet (msg , skb , & len );
3460+ if (vnet_hdr_len ) {
3461+ err = packet_rcv_vnet (msg , skb , & len , vnet_hdr_len );
34543462 if (err )
34553463 goto out_free ;
3456- vnet_hdr_len = sizeof (struct virtio_net_hdr );
34573464 }
34583465
34593466 /* You lose any data beyond the buffer you gave. If it worries
@@ -3915,8 +3922,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
39153922 return 0 ;
39163923 }
39173924 case PACKET_VNET_HDR :
3925+ case PACKET_VNET_HDR_SZ :
39183926 {
3919- int val ;
3927+ int val , hdr_len ;
39203928
39213929 if (sock -> type != SOCK_RAW )
39223930 return - EINVAL ;
@@ -3925,11 +3933,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
39253933 if (copy_from_sockptr (& val , optval , sizeof (val )))
39263934 return - EFAULT ;
39273935
3936+ if (optname == PACKET_VNET_HDR_SZ ) {
3937+ if (val && val != sizeof (struct virtio_net_hdr ) &&
3938+ val != sizeof (struct virtio_net_hdr_mrg_rxbuf ))
3939+ return - EINVAL ;
3940+ hdr_len = val ;
3941+ } else {
3942+ hdr_len = val ? sizeof (struct virtio_net_hdr ) : 0 ;
3943+ }
39283944 lock_sock (sk );
39293945 if (po -> rx_ring .pg_vec || po -> tx_ring .pg_vec ) {
39303946 ret = - EBUSY ;
39313947 } else {
3932- packet_sock_flag_set (po , PACKET_SOCK_HAS_VNET_HDR , val );
3948+ WRITE_ONCE (po -> vnet_hdr_sz , hdr_len );
39333949 ret = 0 ;
39343950 }
39353951 release_sock (sk );
@@ -4062,7 +4078,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
40624078 val = packet_sock_flag (po , PACKET_SOCK_ORIGDEV );
40634079 break ;
40644080 case PACKET_VNET_HDR :
4065- val = packet_sock_flag (po , PACKET_SOCK_HAS_VNET_HDR );
4081+ val = !!READ_ONCE (po -> vnet_hdr_sz );
4082+ break ;
4083+ case PACKET_VNET_HDR_SZ :
4084+ val = READ_ONCE (po -> vnet_hdr_sz );
40664085 break ;
40674086 case PACKET_VERSION :
40684087 val = po -> tp_version ;
0 commit comments