Skip to content

Commit

Permalink
net: Add TCP option support
Browse files Browse the repository at this point in the history
Maximum segment size and Window scale option are supported currently.
  • Loading branch information
asias authored and avikivity committed Oct 22, 2014
1 parent 5d19306 commit 6561bde
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 15 deletions.
1 change: 1 addition & 0 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
'net/stack.cc',
'net/ip_checksum.cc',
'net/udp.cc',
'net/tcp.cc',
]

core = [
Expand Down
2 changes: 2 additions & 0 deletions net/const.hh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@ enum class eth_protocol_num : uint16_t {
ipv4 = 0x0800, arp = 0x0806, ipv6 = 0x86dd
};

const uint8_t ip_hdr_len_min = 20;

}
#endif
6 changes: 3 additions & 3 deletions net/packet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ public:

// prepend a header (default-initializing it)
template <typename Header>
Header* prepend_header(size_t size = sizeof(Header));
Header* prepend_header(size_t extra_size = 0);

// prepend a header (uninitialized!)
char* prepend_uninitialized_header(size_t size);
Expand Down Expand Up @@ -484,8 +484,8 @@ void packet::trim_front(size_t how_much) {

template <typename Header>
Header*
packet::prepend_header(size_t size) {
auto h = prepend_uninitialized_header(size);
packet::prepend_header(size_t extra_size) {
auto h = prepend_uninitialized_header(sizeof(Header) + extra_size);
return new (h) Header{};
}

Expand Down
108 changes: 108 additions & 0 deletions net/tcp.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright (C) 2014 Cloudius Systems, Ltd.
*/

#include "tcp.hh"
#include "core/align.hh"

namespace net {

void tcp_option::parse(tcp_hdr* th) {
auto hdr = reinterpret_cast<uint8_t*>(th);
auto beg = hdr + sizeof(tcp_hdr);
auto end = hdr + th->data_offset * 4;
while (beg < end) {
auto kind = option_kind(*beg);
if (kind != option_kind::nop && kind != option_kind::eol) {
// Make sure there is enough room for this option
auto len = *(beg + 1);
if (beg + len > end) {
return;
}
}
switch (kind) {
case option_kind::mss:
_mss_received = true;
_remote_mss = reinterpret_cast<mss*>(beg)->mss;
ntoh(_remote_mss);
beg += option_len::mss;
break;
case option_kind::win_scale:
_win_scale_received = true;
_remote_win_scale = reinterpret_cast<win_scale*>(beg)->shift;
// We can turn on win_scale option, 7 is Linux's default win scale size
_local_win_scale = 7;
beg += option_len::win_scale;
break;
case option_kind::sack:
_sack_received = true;
beg += option_len::sack;
break;
case option_kind::nop:
beg += option_len::nop;
break;
case option_kind::eol:
return;
default:
// Ignore options we do not understand
auto len = *(beg + 1);
beg += len;
// Prevent infinite loop
if (len == 0) {
return;
}
break;
}
}
}

uint8_t tcp_option::fill(tcp_hdr* th, uint8_t options_size) {
auto hdr = reinterpret_cast<uint8_t*>(th);
auto off = hdr + sizeof(tcp_hdr);
uint8_t size = 0;

if (th->f_syn) {
if (_mss_received || !th->f_ack) {
auto mss = new (off) tcp_option::mss;
mss->mss = _local_mss;
off += mss->len;
size += mss->len;
hton(*mss);
}
if (_win_scale_received || !th->f_ack) {
auto win_scale = new (off) tcp_option::win_scale;
win_scale->shift = _local_win_scale;
off += win_scale->len;
size += win_scale->len;
}
}
if (size > 0) {
// Insert NOP option
auto size_max = align_up(uint8_t(size + 1), tcp_option::align);
while (size < size_max - uint8_t(option_len::eol)) {
new (off) tcp_option::nop;
off += option_len::nop;
size += option_len::nop;
}
new (off) tcp_option::eol;
size += option_len::eol;
}
assert(size == options_size);

return size;
}

uint8_t tcp_option::get_size() {
uint8_t size = 0;
if (_mss_received)
size += option_len::mss;
if (_win_scale_received)
size += option_len::win_scale;
size += option_len::eol;
// Insert NOP option to align on 32-bit
size = align_up(size, tcp_option::align);
return size;
}

}

101 changes: 89 additions & 12 deletions net/tcp.hh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,63 @@ using namespace std::chrono_literals;

namespace net {

class tcp_hdr;

struct tcp_option {
// The kind and len field are fixed and defined in TCP protocol
enum class option_kind: uint8_t { mss = 2, win_scale = 3, sack = 4, timestamps = 8, nop = 1, eol = 0 };
enum class option_len: uint8_t { mss = 4, win_scale = 3, sack = 2, timestamps = 10, nop = 1, eol = 1 };
struct mss {
option_kind kind = option_kind::mss;
option_len len = option_len::mss;
packed<uint16_t> mss;
template <typename Adjuster>
void adjust_endianness(Adjuster a) { a(mss); }
} __attribute__((packed));
struct win_scale {
option_kind kind = option_kind::win_scale;
option_len len = option_len::win_scale;
uint8_t shift;
} __attribute__((packed));
struct sack {
option_kind kind = option_kind::sack;
option_len len = option_len::sack;
} __attribute__((packed));
struct timestamps {
option_kind kind = option_kind::timestamps;
option_len len = option_len::timestamps;
packed<uint32_t> t1;
packed<uint32_t> t2;
template <typename Adjuster>
void adjust_endianness(Adjuster a) { a(t1, t2); }
} __attribute__((packed));
struct nop {
option_kind kind = option_kind::nop;
} __attribute__((packed));
struct eol {
option_kind kind = option_kind::eol;
} __attribute__((packed));
static const uint8_t align = 4;

void parse(tcp_hdr* th);
uint8_t fill(tcp_hdr* th, uint8_t option_size);
uint8_t get_size();

// For option negotiattion
bool _mss_received = false;
bool _win_scale_received = false;
bool _timestamps_received = false;
bool _sack_received = false;

// Option data
uint16_t _remote_mss = 536;
uint16_t _local_mss;
uint8_t _remote_win_scale = 0;
uint8_t _local_win_scale = 0;
};
inline uint8_t*& operator+=(uint8_t*& x, tcp_option::option_len len) { x += uint8_t(len); return x; }
inline uint8_t& operator+=(uint8_t& x, tcp_option::option_len len) { x += uint8_t(len); return x; }

struct tcp_seq {
uint32_t raw;
};
Expand Down Expand Up @@ -121,6 +178,8 @@ private:
tcp_seq unacknowledged;
tcp_seq next;
uint32_t window;
uint8_t window_scale;
uint16_t mss;
tcp_seq urgent;
tcp_seq wl1;
tcp_seq wl2;
Expand All @@ -133,14 +192,17 @@ private:
} _snd;
struct receive {
tcp_seq next;
uint32_t window = 20000;
uint32_t window;
uint8_t window_scale;
uint16_t mss;
tcp_seq urgent;
tcp_seq initial;
std::deque<packet> data;
std::map<tcp_seq, packet> out_of_order;
bool _user_waiting = false;
promise<> _data_received;
} _rcv;
tcp_option _option;
timer _delayed_ack;
public:
tcb(tcp& t, connid id);
Expand Down Expand Up @@ -279,7 +341,6 @@ void tcp<InetTraits>::received(packet p, ipaddr from, ipaddr to) {
return;
}
}
// FIXME: process options
p.trim_front(th->data_offset * 4);
ntoh(*th);
auto id = connid{to, from, th->dst_port, th->src_port};
Expand Down Expand Up @@ -385,10 +446,21 @@ void tcp<InetTraits>::tcb::input(tcp_hdr* th, packet p) {
_foreign_syn_received = true;
_rcv.initial = seg_seq;
_rcv.next = _rcv.initial + 1;
_rcv.window = 4500; // FIXME: what?
_rcv.urgent = _rcv.next;
_snd.wl1 = th->seq;
_snd.next = _snd.initial = get_tcp_isn();
_option.parse(th);
// Remote receive window scale factor
_snd.window_scale = _option._remote_win_scale;
// Local receive window scale factor
_rcv.window_scale = _option._local_win_scale;
// Maximum segment size remote can receive
_snd.mss = _option._remote_mss;
// Maximum segment size local can receive
_rcv.mss = _option._local_mss =
_tcp.hw_features().mtu - sizeof(tcp_hdr) - net::ip_hdr_len_min;
// Linux's default window size
_rcv.window = 29200 << _rcv.window_scale;
} else {
if (seg_seq != _rcv.initial) {
return respond_with_reset(th);
Expand Down Expand Up @@ -482,7 +554,7 @@ void tcp<InetTraits>::tcb::input(tcp_hdr* th, packet p) {
if (!_snd.window && th->window && _snd.unsent_len) {
do_output = true;
}
_snd.window = th->window;
_snd.window = th->window << _snd.window_scale;
_snd.wl1 = th->seq;
_snd.wl2 = th->ack;
}
Expand All @@ -505,9 +577,10 @@ packet tcp<InetTraits>::tcb::get_transmit_packet() {
uint32_t len;
if (_tcp.hw_features().tx_tso) {
// FIXME: No magic numbers when adding IP and TCP option support
// FIXME: Info tap device the size of the splitted packet
len = _tcp.hw_features().max_packet_len - 20 - 20;
} else {
len = _tcp.hw_features().mtu - 20 - 20;
len = std::min(uint16_t(_tcp.hw_features().mtu - 20 - 20), _snd.mss);
}
can_send = std::min(can_send, len);
// easy case: one small packet
Expand Down Expand Up @@ -545,13 +618,17 @@ packet tcp<InetTraits>::tcb::get_transmit_packet() {

template <typename InetTraits>
void tcp<InetTraits>::tcb::output() {
uint8_t options_size = 0;
packet p = get_transmit_packet();
auto len = p.len();
if (len) {
_snd.data.push_back(p.share());
}

auto th = p.prepend_header<tcp_hdr>();
if (!_local_syn_acked) {
options_size = _option.get_size();
}
auto th = p.prepend_header<tcp_hdr>(options_size);
th->src_port = _local_port;
th->dst_port = _foreign_port;

Expand All @@ -566,8 +643,8 @@ void tcp<InetTraits>::tcb::output() {

th->seq = _snd.next;
th->ack = _rcv.next;
th->data_offset = sizeof(*th) / 4; // FIXME: options
th->window = _rcv.window;
th->data_offset = (sizeof(*th) + options_size) / 4;
th->window = _rcv.window >> _rcv.window_scale;
th->checksum = 0;

_snd.next += len;
Expand All @@ -576,10 +653,12 @@ void tcp<InetTraits>::tcb::output() {
th->f_fin = _snd.closed && _snd.unsent_len == 0 && !_local_fin_acked;
_local_fin_sent |= th->f_fin;

// Add tcp options
_option.fill(th, options_size);
hton(*th);

checksummer csum;
InetTraits::tcp_pseudo_header_checksum(csum, _local_ip, _foreign_ip, sizeof(*th) + len);
InetTraits::tcp_pseudo_header_checksum(csum, _local_ip, _foreign_ip, sizeof(*th) + options_size + len);
if (_tcp.hw_features().tx_csum_offload) {
// virtio-net's VIRTIO_NET_F_CSUM feature requires th->checksum to be
// initialized to ones' complement sum of the pseudo header.
Expand All @@ -590,10 +669,8 @@ void tcp<InetTraits>::tcb::output() {
}

offload_info oi;
// TCP protocol
oi.protocol = ip_protocol_num::tcp;
// TCP hdr len
oi.tcp_hdr_len = 20;
oi.tcp_hdr_len = sizeof(tcp_hdr) + options_size;
p.set_offload_info(oi);

_tcp.send(_local_ip, _foreign_ip, std::move(p));
Expand Down

0 comments on commit 6561bde

Please sign in to comment.