Skip to content

Commit

Permalink
virtio-net: use large buffers when VIRTIO_NET_F_MRG_RXBUF is not nego…
Browse files Browse the repository at this point in the history
…tiated

Some hypervisors like firecracker do not support VIRTIO_NET_F_MRG_RXBUF feature
which allows for more efficient and more fine grain utilization of memory when receiving
incoming packets from host. When VIRTIO_NET_F_MRG_RXBUF is not negotiated
and VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_UFO are, the VirtIO spec
mandates the guest to use large receive buffers of size of 65562 bytes.

This patch changes OSv implementation of vitio-net to detect
if large receive buffers need to be used and accordingly allocate
and free those as necessary in relevant places.

Prior to this patch attempts to fetch large enough data (for example 1MB file over http)
would fail on firecracker with message "Receiving buffer is too small to hold frame of current size".

Please note that using large buffers requires 17 times more memory (around 17MB) than
when VIRTIO_NET_F_MRG_RXBUF is ON and buffers are merged from single pages.
This obviously is not ideal and in future we might lower the number of SGs (currently 256)
in this case to lower memory used.

Signed-off-by: Waldemar Kozaczuk <jwkozaczuk@gmail.com>
  • Loading branch information
wkozaczuk committed Nov 2, 2019
1 parent 65e14b6 commit 3fc7638
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 16 deletions.
60 changes: 44 additions & 16 deletions drivers/virtio-net.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,16 @@ void net::read_config()
net_i("Features: %s=%d,%s=%d", "Status", _status, "TSO_ECN", _tso_ecn);
net_i("Features: %s=%d,%s=%d", "Host TSO ECN", _host_tso_ecn, "CSUM", _csum);
net_i("Features: %s=%d,%s=%d", "Guest_csum", _guest_csum, "guest tso4", _guest_tso4);
net_i("Features: %s=%d", "host tso4", _host_tso4);
net_i("Features: %s=%d,%s=%d", "host tso4", _host_tso4, "MRG_RX_BUF", _mergeable_bufs);

// If VIRTIO_NET_F_MRG_RXBUF is not negotiated and VIRTIO_NET_F_GUEST_TSO4
// or VIRTIO_NET_F_GUEST_UFO are, the VirtIO spec mandates the guest to use
// large receive buffers
// For details please see "5.1.6.3.1 Driver Requirements: Setting Up Receive Buffers" in VirtIO spec
if (!_mergeable_bufs && (_guest_tso4 || _guest_ufo)) {
net_i("Set up to use large receive buffers");
_use_large_buffers = true;
}
}

/**
Expand Down Expand Up @@ -463,7 +472,7 @@ void net::receiver()
// truncating it.
net_hdr_mrg_rxbuf* mhdr;

while (void* page = vq->get_buf_elem(&len)) {
while (void* buffer = vq->get_buf_elem(&len)) {

vq->get_buf_finalize();

Expand All @@ -473,32 +482,31 @@ void net::receiver()
// Bad packet/buffer - discard and continue to the next one
if (len < _hdr_size + ETHER_HDR_LEN) {
rx_drops++;
memory::free_page(page);

free_buffer(buffer);
continue;
}

mhdr = static_cast<net_hdr_mrg_rxbuf*>(page);
mhdr = static_cast<net_hdr_mrg_rxbuf*>(buffer);

if (!_mergeable_bufs) {
nbufs = 1;
} else {
nbufs = mhdr->num_buffers;
}

packet.push_back({page + _hdr_size, len - _hdr_size});
packet.push_back({buffer + _hdr_size, len - _hdr_size});

// Read the fragments
// Read the fragments - only applies if _mergeable_bufs is ON
while (--nbufs > 0) {
page = vq->get_buf_elem(&len);
if (!page) {
buffer = vq->get_buf_elem(&len);
if (!buffer) {
rx_drops++;
for (auto&& v : packet) {
free_buffer(v);
}
break;
}
packet.push_back({page, len});
packet.push_back({buffer, len});
vq->get_buf_finalize();
}

Expand Down Expand Up @@ -546,7 +554,8 @@ mbuf* net::packet_to_mbuf(const std::vector<iovec>& packet)
auto refcnt = new unsigned;
m->M_dat.MH.MH_dat.MH_ext.ref_cnt = refcnt;
m_extadd(m, static_cast<char*>(packet[0].iov_base), packet[0].iov_len,
&net::free_buffer_and_refcnt, packet[0].iov_base, refcnt, M_PKTHDR, EXT_EXTREF);
_use_large_buffers ? &net::free_large_buffer_and_refcnt : &net::free_buffer_and_refcnt,
packet[0].iov_base, refcnt, M_PKTHDR, EXT_EXTREF);
m->M_dat.MH.MH_pkthdr.len = packet[0].iov_len;
m->M_dat.MH.MH_pkthdr.rcvif = _ifn;
m->M_dat.MH.MH_pkthdr.csum_flags = 0;
Expand All @@ -561,7 +570,8 @@ mbuf* net::packet_to_mbuf(const std::vector<iovec>& packet)
refcnt = new unsigned;
m->M_dat.MH.MH_dat.MH_ext.ref_cnt = refcnt;
m_extadd(m, static_cast<char*>(iov.iov_base), iov.iov_len,
&net::free_buffer_and_refcnt, iov.iov_base, refcnt, 0, EXT_EXTREF);
_use_large_buffers ? &net::free_large_buffer_and_refcnt : &net::free_buffer_and_refcnt,
iov.iov_base, refcnt, 0, EXT_EXTREF);
m->m_hdr.mh_len = iov.iov_len;
m->m_hdr.mh_next = nullptr;
m_tail->m_hdr.mh_next = m;
Expand All @@ -578,25 +588,43 @@ void net::free_buffer_and_refcnt(void* buffer, void* refcnt)
delete static_cast<unsigned*>(refcnt);
}

void net::free_large_buffer_and_refcnt(void* buffer, void* refcnt)
{
do_free_large_buffer(buffer);
delete static_cast<unsigned*>(refcnt);
}

void net::do_free_buffer(void* buffer)
{
buffer = align_down(buffer, page_size);
memory::free_page(buffer);
}

void net::do_free_large_buffer(void* buffer)
{
buffer = align_down(buffer, page_size);
memory::free_phys_contiguous_aligned(buffer);
}

void net::fill_rx_ring()
{
trace_virtio_net_fill_rx_ring(_ifn->if_index);
int added = 0;
vring* vq = _rxq.vqueue;

int size_in_pages = _use_large_buffers ? LARGE_BUFFER_SIZE_IN_PAGES : 1;
while (vq->avail_ring_not_empty()) {
auto page = memory::alloc_page();
void *buffer;
if (_use_large_buffers) {
buffer = memory::alloc_phys_contiguous_aligned(size_in_pages * memory::page_size, memory::page_size);
} else {
buffer = memory::alloc_page();
}

vq->init_sg();
vq->add_in_sg(page, memory::page_size);
if (!vq->add_buf(page)) {
memory::free_page(page);
vq->add_in_sg(buffer, size_in_pages * memory::page_size);
if (!vq->add_buf(buffer)) {
free_buffer(buffer);
break;
}
added++;
Expand Down
18 changes: 18 additions & 0 deletions drivers/virtio-net.hh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <bsd/sys/sys/mbuf.h>

#include <osv/percpu_xmit.hh>
#include <osv/contiguous_alloc.hh>

#include "drivers/virtio.hh"
#include "drivers/pci-device.hh"
Expand Down Expand Up @@ -108,6 +109,11 @@ public:
ETH_ALEN = 14,
VIRTIO_NET_CSUM_OFFLOAD = CSUM_TCP | CSUM_UDP,
};
// If VIRTIO_NET_F_MRG_RXBUF is not negotiated, the VirtIO spec
// mandates the guest to use large receive buffers of at least 65562 bytes size
// For simplicity we are rounding this up to the number of full pages which is 17
// 65562 = 16 * 4096 + 26 = 65536 + 26 <= 17 * 4096
const int LARGE_BUFFER_SIZE_IN_PAGES = 17;

struct net_config {
/* The config defining mac address (if VIRTIO_NET_F_MAC) */
Expand Down Expand Up @@ -218,8 +224,10 @@ public:
void fill_rx_ring();
mbuf* packet_to_mbuf(const std::vector<iovec>& iovec);
static void free_buffer_and_refcnt(void* buffer, void* refcnt);
static void free_large_buffer_and_refcnt(void* buffer, void* refcnt);
static void free_buffer(iovec iov) { do_free_buffer(iov.iov_base); }
static void do_free_buffer(void* buffer);
static void do_free_large_buffer(void* buffer);

bool ack_irq();

Expand Down Expand Up @@ -266,6 +274,7 @@ private:
bool _guest_tso4 = false;
bool _host_tso4 = false;
bool _guest_ufo = false;
bool _use_large_buffers = false;

u32 _hdr_size;

Expand Down Expand Up @@ -478,6 +487,15 @@ private:
*/
void fill_qstats(const struct txq& txq, struct if_data* out_data) const;

void free_buffer(void *buffer)
{
if (_use_large_buffers) {
memory::free_phys_contiguous_aligned(buffer);
} else {
memory::free_page(buffer);
}
}

/* We currently support only a single Rx+Tx queue */
struct rxq _rxq;
struct txq _txq;
Expand Down

0 comments on commit 3fc7638

Please sign in to comment.