Skip to content

Commit 7881bd3

Browse files
author
Octavian Purdila
authored
Merge pull request torvalds#171 from libos-nuse/feature-virtio-rawsock
add raw socket (AF_PACKET) backend for virtio-net host
2 parents 6dacb1e + a28b355 commit 7881bd3

File tree

11 files changed

+408
-253
lines changed

11 files changed

+408
-253
lines changed

tools/lkl/include/lkl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,14 @@ struct lkl_netdev *lkl_netdev_dpdk_create(const char *ifname);
301301
*/
302302
struct lkl_netdev *lkl_netdev_vde_create(const char *switch_path);
303303

304+
/**
305+
* lkl_netdev_raw_create - create raw socket net_device for the virtio net
306+
* backend
307+
*
308+
* @ifname - interface name for the snoop device.
309+
*/
310+
struct lkl_netdev *lkl_netdev_raw_create(const char *ifname);
311+
304312
/*
305313
* lkl_register_dbg_handler- register a signal handler that loads a debug lib.
306314
*

tools/lkl/lib/Build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ lkl-y += virtio.o
1212
lkl-y += dbg.o
1313
lkl-y += dbg_handler.o
1414
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net.o
15+
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_linux_fdnet.o
1516
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_tap.o
17+
lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_raw.o
1618
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_DPDK) += virtio_net_dpdk.o
1719
lkl-$(CONFIG_AUTO_LKL_VIRTIO_NET_VDE) += virtio_net_vde.o

tools/lkl/lib/hijack/hijack.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ HOST_CALL(socket);
158158
int socket(int domain, int type, int protocol)
159159
{
160160
CHECK_HOST_CALL(socket);
161-
if (domain == AF_UNIX)
161+
if (domain == AF_UNIX || domain == PF_PACKET)
162162
return host_socket(domain, type, protocol);
163163

164164
return lkl_call(__lkl__NR_socket, 3, domain, type, protocol);

tools/lkl/lib/hijack/init.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include <lkl_host.h>
2626

2727
#include "xlate.h"
28-
#include "../virtio_net_tap.h"
28+
#include "../virtio_net_linux_fdnet.h"
2929

3030
#define __USE_GNU
3131
#include <dlfcn.h>
@@ -170,11 +170,11 @@ static void mount_cmds_exec(char *_cmds, int (*callback)(char*))
170170
free(cmds);
171171
}
172172

173-
void fixup_netdev_tap_ops(void)
173+
void fixup_netdev_linux_fdnet_ops(void)
174174
{
175175
/* It's okay if this is NULL, because then netdev close will
176176
* fall back onto an uncloseable implementation. */
177-
lkl_netdev_tap_ops.eventfd = dlsym(RTLD_NEXT, "eventfd");
177+
lkl_netdev_linux_fdnet_ops.eventfd = dlsym(RTLD_NEXT, "eventfd");
178178
}
179179

180180
void __attribute__((constructor(102)))
@@ -197,7 +197,7 @@ hijack_init(void)
197197
char *arp_entries = getenv("LKL_HIJACK_NET_ARP");
198198

199199
/* Must be run before lkl_netdev_tap_create */
200-
fixup_netdev_tap_ops();
200+
fixup_netdev_linux_fdnet_ops();
201201

202202
if (tap) {
203203
fprintf(stderr,
@@ -214,6 +214,8 @@ hijack_init(void)
214214
nd = lkl_netdev_dpdk_create(ifparams);
215215
else if (strcmp(iftype, "vde") == 0)
216216
nd = lkl_netdev_vde_create(ifparams);
217+
else if (strcmp(iftype, "raw") == 0)
218+
nd = lkl_netdev_raw_create(ifparams);
217219
}
218220

219221
if (nd) {
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
/*
2+
* Linux File descripter based virtual network interface feature for LKL
3+
* Copyright (c) 2015,2016 Ryo Nakamura, Hajime Tazaki
4+
*
5+
* Author: Ryo Nakamura <upa@wide.ad.jp>
6+
* Hajime Tazaki <thehajime@gmail.com>
7+
* Octavian Purdila <octavian.purdila@intel.com>
8+
*
9+
* Current implementation is linux-specific.
10+
*/
11+
12+
#include <stdio.h>
13+
#include <stdlib.h>
14+
#include <errno.h>
15+
#include <string.h>
16+
#include <sys/epoll.h>
17+
#include <sys/eventfd.h>
18+
19+
#include "virtio.h"
20+
#include "virtio_net_linux_fdnet.h"
21+
22+
struct lkl_netdev_linux_fdnet_ops lkl_netdev_linux_fdnet_ops = {
23+
/*
24+
* /dev/net/tun is Linux specific so we know our host is some
25+
* flavor of Linux, but this allows graceful support if we're
26+
* on a kernel that's < 2.6.22.
27+
*/
28+
#ifdef __NR_eventfd
29+
/* This sigature was recently (9/2014) changed in glibc. */
30+
.eventfd = (int (*)(unsigned int, int))eventfd,
31+
#endif /* __NR_eventfd */
32+
};
33+
34+
static int linux_fdnet_net_tx(struct lkl_netdev *nd, void *data, int len)
35+
{
36+
int ret;
37+
struct lkl_netdev_linux_fdnet *nd_fdnet =
38+
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
39+
40+
do {
41+
ret = write(nd_fdnet->fd, data, len);
42+
} while (ret == -1 && errno == EINVAL);
43+
if (ret > 0)
44+
return 0;
45+
if (ret < 0 && errno != EAGAIN)
46+
perror("write to Linux fd netdev fails");
47+
48+
return -1;
49+
}
50+
51+
static int linux_fdnet_net_rx(struct lkl_netdev *nd, void *data, int *len)
52+
{
53+
int ret;
54+
struct lkl_netdev_linux_fdnet *nd_fdnet =
55+
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
56+
57+
do {
58+
ret = read(nd_fdnet->fd, data, *len);
59+
} while (ret == -1 && errno == EINVAL);
60+
if (ret > 0) {
61+
*len = ret;
62+
return 0;
63+
}
64+
if (ret < 0 && errno != EAGAIN)
65+
perror("read from fdnet device fails");
66+
67+
return -1;
68+
}
69+
70+
static int linux_fdnet_net_poll(struct lkl_netdev *nd, int events)
71+
{
72+
struct lkl_netdev_linux_fdnet *nd_fdnet =
73+
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
74+
int epoll_fd = -1;
75+
struct epoll_event ev[2];
76+
int ret;
77+
const int is_rx = events & LKL_DEV_NET_POLL_RX;
78+
const int is_tx = events & LKL_DEV_NET_POLL_TX;
79+
int i;
80+
int ret_ev = 0;
81+
unsigned int event;
82+
83+
if (is_rx && is_tx) {
84+
fprintf(stderr, "both LKL_DEV_NET_POLL_RX and "
85+
"LKL_DEV_NET_POLL_TX are set\n");
86+
lkl_host_ops.panic();
87+
return -1;
88+
}
89+
if (!is_rx && !is_tx) {
90+
fprintf(stderr, "Neither LKL_DEV_NET_POLL_RX nor"
91+
" LKL_DEV_NET_POLL_TX are set.\n");
92+
lkl_host_ops.panic();
93+
return -1;
94+
}
95+
96+
if (is_rx)
97+
epoll_fd = nd_fdnet->epoll_rx_fd;
98+
else if (is_tx)
99+
epoll_fd = nd_fdnet->epoll_tx_fd;
100+
101+
do {
102+
ret = epoll_wait(epoll_fd, ev, 2, -1);
103+
} while (ret == -1 && errno == EINTR);
104+
if (ret < 0) {
105+
perror("epoll_wait");
106+
return -1;
107+
}
108+
109+
for (i = 0; i < ret; ++i) {
110+
if (ev[i].data.fd == nd_fdnet->eventfd)
111+
return -1;
112+
if (ev[i].data.fd == nd_fdnet->fd) {
113+
event = ev[i].events;
114+
if (event & (EPOLLIN | EPOLLPRI))
115+
ret_ev = LKL_DEV_NET_POLL_RX;
116+
else if (event & EPOLLOUT)
117+
ret_ev = LKL_DEV_NET_POLL_TX;
118+
else
119+
return -1;
120+
}
121+
}
122+
return ret_ev;
123+
}
124+
125+
static int linux_fdnet_net_close(struct lkl_netdev *nd)
126+
{
127+
long buf = 1;
128+
struct lkl_netdev_linux_fdnet *nd_fdnet =
129+
container_of(nd, struct lkl_netdev_linux_fdnet, dev);
130+
131+
if (nd_fdnet->eventfd == -1) {
132+
/* No eventfd support. */
133+
return 0;
134+
}
135+
136+
if (write(nd_fdnet->eventfd, &buf, sizeof(buf)) < 0) {
137+
perror("linux-fdnet: failed to close fd");
138+
/* This should never happen. */
139+
return -1;
140+
}
141+
142+
/* The order that we join in doesn't matter. */
143+
if (lkl_host_ops.thread_join(nd->rx_tid) ||
144+
lkl_host_ops.thread_join(nd->tx_tid))
145+
return -1;
146+
147+
/* nor does the order that we close */
148+
if (close(nd_fdnet->fd) || close(nd_fdnet->eventfd) ||
149+
close(nd_fdnet->epoll_rx_fd) || close(nd_fdnet->epoll_tx_fd)) {
150+
perror("linux-fdnet net_close fd");
151+
return -1;
152+
}
153+
154+
return 0;
155+
}
156+
157+
struct lkl_dev_net_ops linux_fdnet_net_ops = {
158+
.tx = linux_fdnet_net_tx,
159+
.rx = linux_fdnet_net_rx,
160+
.poll = linux_fdnet_net_poll,
161+
.close = linux_fdnet_net_close,
162+
};
163+
164+
static int add_to_epoll(int epoll_fd, int fd, unsigned int events)
165+
{
166+
struct epoll_event ev;
167+
int ret;
168+
169+
memset(&ev, 0, sizeof(ev));
170+
ev.events = events;
171+
ev.data.fd = fd;
172+
ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev);
173+
if (ret) {
174+
perror("EPOLL_CTL_ADD fails");
175+
return -1;
176+
}
177+
return 0;
178+
}
179+
180+
static int create_epoll_fd(int fd, unsigned int events)
181+
{
182+
int ret = epoll_create1(0);
183+
184+
if (ret < 0) {
185+
perror("epoll_create1");
186+
return -1;
187+
}
188+
if (add_to_epoll(ret, fd, events)) {
189+
close(ret);
190+
return -1;
191+
}
192+
return ret;
193+
}
194+
195+
196+
struct lkl_netdev_linux_fdnet *lkl_register_netdev_linux_fdnet(int fd)
197+
{
198+
struct lkl_netdev_linux_fdnet *nd;
199+
200+
nd = (struct lkl_netdev_linux_fdnet *)
201+
malloc(sizeof(struct lkl_netdev_linux_fdnet));
202+
if (!nd) {
203+
fprintf(stderr, "fdnet: failed to allocate memory\n");
204+
/* TODO: propagate the error state, maybe use errno for that? */
205+
return NULL;
206+
}
207+
208+
nd->fd = fd;
209+
/* Making them edge-triggered to save CPU. */
210+
nd->epoll_rx_fd = create_epoll_fd(nd->fd, EPOLLIN | EPOLLPRI | EPOLLET);
211+
nd->epoll_tx_fd = create_epoll_fd(nd->fd, EPOLLOUT | EPOLLET);
212+
if (nd->epoll_rx_fd < 0 || nd->epoll_tx_fd < 0) {
213+
if (nd->epoll_rx_fd >= 0)
214+
close(nd->epoll_rx_fd);
215+
if (nd->epoll_tx_fd >= 0)
216+
close(nd->epoll_tx_fd);
217+
lkl_unregister_netdev_linux_fdnet(nd);
218+
return NULL;
219+
}
220+
221+
if (lkl_netdev_linux_fdnet_ops.eventfd) {
222+
/* eventfd is supported by the host, all is well */
223+
nd->eventfd = lkl_netdev_linux_fdnet_ops.eventfd(
224+
0, EFD_NONBLOCK | EFD_SEMAPHORE);
225+
226+
if (nd->eventfd < 0) {
227+
perror("fdnet: create eventfd");
228+
lkl_unregister_netdev_linux_fdnet(nd);
229+
return NULL;
230+
}
231+
if (add_to_epoll(nd->epoll_rx_fd, nd->eventfd, EPOLLIN) ||
232+
add_to_epoll(nd->epoll_tx_fd, nd->eventfd, EPOLLIN)) {
233+
lkl_unregister_netdev_linux_fdnet(nd);
234+
return NULL;
235+
}
236+
} else {
237+
/* no host eventfd support */
238+
nd->eventfd = -1;
239+
}
240+
241+
nd->dev.ops = &linux_fdnet_net_ops;
242+
return nd;
243+
}
244+
245+
void lkl_unregister_netdev_linux_fdnet(struct lkl_netdev_linux_fdnet *nd)
246+
{
247+
close(nd->eventfd);
248+
close(nd->epoll_rx_fd);
249+
close(nd->epoll_tx_fd);
250+
free(nd);
251+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#ifndef _VIRTIO_NET_LINUX_FDNET_H
2+
#define _VIRTIO_NET_LINUX_FDNET_H
3+
4+
struct lkl_netdev_linux_fdnet {
5+
struct lkl_netdev dev;
6+
/* file-descriptor based device */
7+
int fd;
8+
/* Needed to initiate shutdown */
9+
int eventfd;
10+
/* epoll fds for rx and tx */
11+
int epoll_rx_fd;
12+
int epoll_tx_fd;
13+
};
14+
15+
extern struct lkl_netdev_linux_fdnet_ops {
16+
/*
17+
* We need this so that we can "unhijack" this function in
18+
* case we decided to hijack it.
19+
*/
20+
int (*eventfd)(unsigned int initval, int flags);
21+
} lkl_netdev_linux_fdnet_ops;
22+
23+
/**
24+
* lkl_register_netdev_linux_fdnet - register a file descriptor-based network
25+
* device as a NIC
26+
*
27+
* @fd - a POSIX file descriptor number for input/output
28+
* @returns a struct lkl_netdev_linux_fdnet entry for virtio-net
29+
*/
30+
struct lkl_netdev_linux_fdnet *lkl_register_netdev_linux_fdnet(int fd);
31+
32+
33+
/**
34+
* lkl_unregister_netdev_linux_fdnet - unregister a file descriptor-based
35+
* network device as a NIC
36+
*
37+
* @nd - a struct lkl_netdev_linux_fdnet entry to be unregistered
38+
*/
39+
void lkl_unregister_netdev_linux_fdnet(struct lkl_netdev_linux_fdnet *nd);
40+
41+
#endif /* _VIRTIO_NET_LINUX_FDNET_H*/

0 commit comments

Comments
 (0)