Skip to content

Commit a28b355

Browse files
committed
lkl: introduce raw socket based netdev backend
This patch introduces new backend for virtio net, which uses AF_PACKET socket (a.k.a. raw socket) to bypass host kernel and uses LKL network stack instead. it is convinient since we don't have to add additional net_device (e.g., tap) for LKL, and possibly faster than tuntap with PACKET_QDISC_BYPASS socket option (available after Linux 3.14). One drawback is it requires root privilege (sudo or suid bit on) to use this. example usage is like this: sudo LKL_HIJACK_NET_IFTYPE=raw LKL_HIJACK_NET_IFPARAMS=docker0 \ LKL_HIJACK_NET_IP=172.17.0.39 LKL_HIJACK_NET_NETMASK_LEN=24 \ ./bin/lkl-hijack.sh ping 172.17.0.2 some benchmarks with netperf: - TCP_RR raw(QDISC_BYPASS): 9519.31 Trans/sec tap: 9486.03 Trans/sec - TCP_STREAM raw(QDISC_BYPASS): 2184.79 Mbps tap: 2130.39 Mbps - UDP_STREAM raw(QDISC_BYPASS): 3654.32 Mbps tap: 3108.10 Mbps Signed-off-by: Hajime Tazaki <thehajime@gmail.com>
1 parent f12c823 commit a28b355

File tree

7 files changed

+100
-4
lines changed

7 files changed

+100
-4
lines changed

tools/lkl/include/lkl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,14 @@ struct lkl_netdev *lkl_netdev_dpdk_create(const char *ifname);
301301
*/
302302
struct lkl_netdev *lkl_netdev_vde_create(const char *switch_path);
303303

304+
/**
305+
* lkl_netdev_raw_create - create raw socket net_device for the virtio net
306+
* backend
307+
*
308+
* @ifname - interface name for the snoop device.
309+
*/
310+
struct lkl_netdev *lkl_netdev_raw_create(const char *ifname);
311+
304312
/*
305313
* lkl_register_dbg_handler- register a signal handler that loads a debug lib.
306314
*

tools/lkl/lib/Build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ lkl-y += dbg_handler.o
1414
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net.o
1515
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_linux_fdnet.o
1616
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_tap.o
17+
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_raw.o
1718
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_DPDK) += virtio_net_dpdk.o
1819
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_VDE) += virtio_net_vde.o

tools/lkl/lib/hijack/hijack.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ HOST_CALL(socket);
158158
int socket(int domain, int type, int protocol)
159159
{
160160
CHECK_HOST_CALL(socket);
161-
if (domain == AF_UNIX)
161+
if (domain == AF_UNIX || domain == PF_PACKET)
162162
return host_socket(domain, type, protocol);
163163

164164
return lkl_call(__lkl__NR_socket, 3, domain, type, protocol);

tools/lkl/lib/hijack/init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ hijack_init(void)
214214
nd = lkl_netdev_dpdk_create(ifparams);
215215
else if (strcmp(iftype, "vde") == 0)
216216
nd = lkl_netdev_vde_create(ifparams);
217+
else if (strcmp(iftype, "raw") == 0)
218+
nd = lkl_netdev_raw_create(ifparams);
217219
}
218220

219221
if (nd) {

tools/lkl/lib/virtio_net_raw.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* raw socket based virtual network interface feature for LKL
3+
* Copyright (c) 2015,2016 Ryo Nakamura, Hajime Tazaki
4+
*
5+
* Author: Ryo Nakamura <upa@wide.ad.jp>
6+
* Hajime Tazaki <thehajime@gmail.com>
7+
*
8+
* Current implementation is linux-specific.
9+
*/
10+
11+
#include <stdio.h>
12+
#include <errno.h>
13+
#include <string.h>
14+
#include <unistd.h>
15+
#include <net/if.h>
16+
#include <linux/if_ether.h>
17+
#include <linux/if_packet.h>
18+
#include <arpa/inet.h>
19+
#include <fcntl.h>
20+
21+
#include "virtio.h"
22+
#include "virtio_net_linux_fdnet.h"
23+
24+
/* since Linux 3.14 (man 7 packet) */
25+
#ifndef PACKET_QDISC_BYPASS
26+
#define PACKET_QDISC_BYPASS 20
27+
#endif
28+
29+
struct lkl_netdev *lkl_netdev_raw_create(const char *ifname)
30+
{
31+
struct lkl_netdev_linux_fdnet *nd;
32+
int ret;
33+
struct sockaddr_ll ll;
34+
int fd, fd_flags, val;
35+
36+
fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
37+
if (fd < 0) {
38+
perror("socket");
39+
return NULL;
40+
}
41+
42+
memset(&ll, 0, sizeof(ll));
43+
ll.sll_family = PF_PACKET;
44+
ll.sll_ifindex = if_nametoindex(ifname);
45+
ll.sll_protocol = htons(ETH_P_ALL);
46+
ret = bind(fd, (struct sockaddr *)&ll, sizeof(ll));
47+
if (ret) {
48+
perror("bind");
49+
close(fd);
50+
return NULL;
51+
}
52+
53+
val = 1;
54+
ret = setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &val,
55+
sizeof(val));
56+
if (ret)
57+
perror("PACKET_QDISC_BYPASS, ignoring");
58+
59+
fd_flags = fcntl(fd, F_GETFD, NULL);
60+
fcntl(fd, F_SETFL, fd_flags | O_NONBLOCK);
61+
62+
nd = lkl_register_netdev_linux_fdnet(fd);
63+
if (!nd) {
64+
perror("failed to register to.");
65+
return NULL;
66+
}
67+
68+
return (struct lkl_netdev *)nd;
69+
}

tools/lkl/tests/net-test.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ static int test_icmp(char *str, int len)
8989
return TEST_FAILURE;
9090
}
9191

92-
ret = lkl_sys_recv(sock, buf, sizeof(buf), 0);
92+
ret = lkl_sys_recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
9393
if (ret < 0) {
9494
snprintf(str, len, "recv error (%s)", strerror(errno));
9595
return TEST_FAILURE;
@@ -116,7 +116,7 @@ static int test_net_init(int argc, char **argv)
116116
struct lkl_netdev *nd = NULL;
117117

118118
if (argc < 6) {
119-
printf("usage %s <iftype: tap|dpdk> <ifname> <v4addr> <v4mask> <dstaddr> [gateway]\n", argv[0]);
119+
printf("usage %s <iftype: tap|dpdk|raw> <ifname> <v4addr> <v4mask> <dstaddr> [gateway]\n", argv[0]);
120120
exit(0);
121121
}
122122

@@ -127,14 +127,16 @@ static int test_net_init(int argc, char **argv)
127127
dst = argv[5];
128128

129129
if (argc == 7)
130-
gateway = argv[5];
130+
gateway = argv[6];
131131

132132
if (iftype && ifname && (strncmp(iftype, "tap", 3) == 0))
133133
nd = lkl_netdev_tap_create(ifname);
134134
#ifdef CONFIG_AUTO_LKL_VIRTIO_NET_DPDK
135135
else if (iftype && ifname && (strncmp(iftype, "dpdk", 4) == 0))
136136
nd = lkl_netdev_dpdk_create(ifname);
137137
#endif /* CONFIG_AUTO_LKL_VIRTIO_NET_DPDK */
138+
else if (iftype && ifname && (strncmp(iftype, "raw", 3) == 0))
139+
nd = lkl_netdev_raw_create(ifname);
138140

139141
if (!nd) {
140142
fprintf(stderr, "init netdev failed\n");

tools/lkl/tests/net.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
#!/bin/bash -e
22

3+
IFNAME=`ip route |grep default | awk '{print $5}'`
4+
GW=`ip route |grep default | awk '{print $3}'`
5+
IPADDR=`echo $GW | sed -r "s/([0-9]+\.[0-9]+\.[0-9]+\.)([0-9]+)$/\1\`expr \2 + 10\`/"`
6+
PLEN=`ip rou |grep ${IFNAME} | grep "scope link" | awk '{print $1}' | sed "s/.*\/\(.*\)/\1/"`
7+
38
echo "== TAP (LKL net) tests =="
49
if [ -c /dev/net/tun ]; then
510
sudo ip link set dev lkl_ptt1 down || true
@@ -14,6 +19,15 @@ if [ -c /dev/net/tun ]; then
1419
sudo ip tuntap del dev lkl_ptt1 mode tap
1520
fi
1621

22+
echo "== RAW socket (LKL net) tests =="
23+
# currently not supported mingw
24+
if [ -n "`printenv CONFIG_AUTO_LKL_POSIX_HOST`" ] ; then
25+
sudo ip link set dev ${IFNAME} promisc on
26+
# this won't work if IFNAME is wifi since it rewrites the src macaddr
27+
sudo ./net-test raw ${IFNAME} ${IPADDR} ${PLEN} 8.8.8.8 ${GW}
28+
sudo ip link set dev ${IFNAME} promisc off
29+
fi
30+
1731
# we disabled this DPDK test because it's unlikely possible to describe
1832
# a generic set of commands for all environments to test with DPDK. users
1933
# may customize those test commands for your host.

0 commit comments

Comments
 (0)