Skip to content

tcp: increase flexibility of EBPF congestion control initialization #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
sudo: required
language: bash
dist: bionic
services:
- docker

env:
global:
- PROJECT_NAME='libbpf'
- AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")"
- REPO_ROOT="$TRAVIS_BUILD_DIR"
- CI_ROOT="$REPO_ROOT/travis-ci"
- VMTEST_ROOT="$CI_ROOT/vmtest"

addons:
apt:
packages:
- qemu-kvm
- zstd
- binutils-dev
- elfutils
- libcap-dev
- libelf-dev
- libdw-dev

jobs:
include:
- stage: Builds & Tests
name: Kernel LATEST + selftests
language: bash
env: KERNEL=LATEST
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
3 changes: 2 additions & 1 deletion include/net/inet_connection_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ struct inet_connection_sock {
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8 icsk_ca_state:6,
__u8 icsk_ca_state:5,
icsk_ca_initialized:1,
icsk_ca_setsockopt:1,
icsk_ca_dst_locked:1;
__u8 icsk_retransmits;
Expand Down
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
bool reinit, bool cap_net_admin);
bool cap_net_admin);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);

Expand Down
18 changes: 4 additions & 14 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -4313,10 +4313,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};

#define SOCKOPT_CC_REINIT (1 << 0)

static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
char *optval, int optlen)
{
char devname[IFNAMSIZ];
int val, valbool;
Expand Down Expand Up @@ -4449,13 +4447,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
bool reinit = flags & SOCKOPT_CC_REINIT;

strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
ret = tcp_set_congestion_control(sk, name, false,
reinit, true);
ret = tcp_set_congestion_control(sk, name, false, true);
} else {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
Expand Down Expand Up @@ -4615,9 +4611,7 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
u32 flags = 0;
return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
flags);
return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
Expand Down Expand Up @@ -4651,11 +4645,7 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
u32 flags = 0;
if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
flags |= SOCKOPT_CC_REINIT;
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
flags);
return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}

static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2698,6 +2698,7 @@ int tcp_disconnect(struct sock *sk, int flags)
if (icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
icsk->icsk_ca_initialized = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
Expand Down Expand Up @@ -3049,7 +3050,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
name[val] = 0;

lock_sock(sk);
err = tcp_set_congestion_control(sk, name, true, true,
err = tcp_set_congestion_control(sk, name, true,
ns_capable(sock_net(sk)->user_ns,
CAP_NET_ADMIN));
release_sock(sk);
Expand Down
27 changes: 7 additions & 20 deletions net/ipv4/tcp_cong.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk)

void tcp_init_congestion_control(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);

tcp_sk(sk)->prior_ssthresh = 0;
if (icsk->icsk_ca_ops->init)
Expand All @@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_xmit(sk);
else
INET_ECN_dontxmit(sk);
icsk->icsk_ca_initialized = 1;
}

static void tcp_reinit_congestion_control(struct sock *sk,
Expand Down Expand Up @@ -340,7 +341,7 @@ int tcp_set_allowed_congestion_control(char *val)
* already initialized.
*/
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
bool reinit, bool cap_net_admin)
bool cap_net_admin)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
Expand All @@ -361,28 +362,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
goto out;
}

if (!ca) {
if (!ca)
err = -ENOENT;
} else if (!load) {
const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;

if (bpf_try_module_get(ca, ca->owner)) {
if (reinit) {
tcp_reinit_congestion_control(sk, ca);
} else {
icsk->icsk_ca_ops = ca;
bpf_module_put(old_ca, old_ca->owner);
}
} else {
err = -EBUSY;
}
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin))
err = -EPERM;
} else if (!bpf_try_module_get(ca, ca->owner)) {
else if (!bpf_try_module_get(ca, ca->owner))
err = -EBUSY;
} else {
else
tcp_reinit_congestion_control(sk, ca);
}
out:
rcu_read_unlock();
return err;
Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -5894,8 +5894,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;

icsk->icsk_ca_initialized = 0;
bpf_skops_established(sk, bpf_op, skb);
tcp_init_congestion_control(sk);
if (!icsk->icsk_ca_initialized)
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}

Expand Down
84 changes: 84 additions & 0 deletions travis-ci/managers/debian.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

PHASES=(${@:-SETUP RUN RUN_ASAN CLEANUP})
DEBIAN_RELEASE="${DEBIAN_RELEASE:-testing}"
CONT_NAME="${CONT_NAME:-libbpf-debian-$DEBIAN_RELEASE}"
ENV_VARS="${ENV_VARS:-}"
DOCKER_RUN="${DOCKER_RUN:-docker run}"
REPO_ROOT="${REPO_ROOT:-$PWD}"
ADDITIONAL_DEPS=(clang pkg-config gcc-8)
CFLAGS="-g -O2 -Werror -Wall"

function info() {
echo -e "\033[33;1m$1\033[0m"
}

function error() {
echo -e "\033[31;1m$1\033[0m"
}

function docker_exec() {
docker exec $ENV_VARS -it $CONT_NAME "$@"
}

set -e

source "$(dirname $0)/travis_wait.bash"

for phase in "${PHASES[@]}"; do
case $phase in
SETUP)
info "Setup phase"
info "Using Debian $DEBIAN_RELEASE"

sudo apt-get -y -o Dpkg::Options::="--force-confnew" install docker-ce
docker --version

docker pull debian:$DEBIAN_RELEASE
info "Starting container $CONT_NAME"
$DOCKER_RUN -v $REPO_ROOT:/build:rw \
-w /build --privileged=true --name $CONT_NAME \
-dit --net=host debian:$DEBIAN_RELEASE /bin/bash
docker_exec bash -c "echo deb-src http://deb.debian.org/debian $DEBIAN_RELEASE main >>/etc/apt/sources.list"
docker_exec apt-get -y update
docker_exec apt-get -y build-dep libelf-dev
docker_exec apt-get -y install libelf-dev
docker_exec apt-get -y install "${ADDITIONAL_DEPS[@]}"
;;
RUN|RUN_CLANG|RUN_GCC8|RUN_ASAN|RUN_CLANG_ASAN|RUN_GCC8_ASAN)
if [[ "$phase" = *"CLANG"* ]]; then
ENV_VARS="-e CC=clang -e CXX=clang++"
CC="clang"
elif [[ "$phase" = *"GCC8"* ]]; then
ENV_VARS="-e CC=gcc-8 -e CXX=g++-8"
CC="gcc-8"
else
CFLAGS="${CFLAGS} -Wno-stringop-truncation"
fi
if [[ "$phase" = *"ASAN"* ]]; then
CFLAGS="${CFLAGS} -fsanitize=address,undefined"
fi
docker_exec mkdir build install
docker_exec ${CC:-cc} --version
info "build"
docker_exec make -j$((4*$(nproc))) CFLAGS="${CFLAGS}" -C ./src -B OBJDIR=../build
info "ldd build/libbpf.so:"
docker_exec ldd build/libbpf.so
if ! docker_exec ldd build/libbpf.so | grep -q libelf; then
error "No reference to libelf.so in libbpf.so!"
exit 1
fi
info "install"
docker_exec make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install
docker_exec rm -rf build install
;;
CLEANUP)
info "Cleanup phase"
docker stop $CONT_NAME
docker rm -f $CONT_NAME
;;
*)
echo >&2 "Unknown phase '$phase'"
exit 1
esac
done
61 changes: 61 additions & 0 deletions travis-ci/managers/travis_wait.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This was borrowed from https://github.com/travis-ci/travis-build/tree/master/lib/travis/build/bash
# to get around https://github.com/travis-ci/travis-ci/issues/9979. It should probably be removed
# as soon as Travis CI has started to provide an easy way to export the functions to bash scripts.

travis_jigger() {
local cmd_pid="${1}"
shift
local timeout="${1}"
shift
local count=0

echo -e "\\n"

while [[ "${count}" -lt "${timeout}" ]]; do
count="$((count + 1))"
echo -ne "Still running (${count} of ${timeout}): ${*}\\r"
sleep 60
done

echo -e "\\n${ANSI_RED}Timeout (${timeout} minutes) reached. Terminating \"${*}\"${ANSI_RESET}\\n"
kill -9 "${cmd_pid}"
}

travis_wait() {
local timeout="${1}"

if [[ "${timeout}" =~ ^[0-9]+$ ]]; then
shift
else
timeout=20
fi

local cmd=("${@}")
local log_file="travis_wait_${$}.log"

"${cmd[@]}" &>"${log_file}" &
local cmd_pid="${!}"

travis_jigger "${!}" "${timeout}" "${cmd[@]}" &
local jigger_pid="${!}"
local result

{
set +e
wait "${cmd_pid}" 2>/dev/null
result="${?}"
ps -p"${jigger_pid}" &>/dev/null && kill "${jigger_pid}"
set -e
}

if [[ "${result}" -eq 0 ]]; then
echo -e "\\n${ANSI_GREEN}The command ${cmd[*]} exited with ${result}.${ANSI_RESET}"
else
echo -e "\\n${ANSI_RED}The command ${cmd[*]} exited with ${result}.${ANSI_RESET}"
fi

echo -e "\\n${ANSI_GREEN}Log:${ANSI_RESET}\\n"
cat "${log_file}"

return "${result}"
}
27 changes: 27 additions & 0 deletions travis-ci/managers/ubuntu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
set -e
set -x

RELEASE="bionic"

echo "deb-src http://archive.ubuntu.com/ubuntu/ $RELEASE main restricted universe multiverse" >>/etc/apt/sources.list

apt-get update
apt-get -y build-dep libelf-dev
apt-get install -y libelf-dev pkg-config

source "$(dirname $0)/travis_wait.bash"

cd $REPO_ROOT

CFLAGS="-g -O2 -Werror -Wall -fsanitize=address,undefined"
mkdir build install
cc --version
make -j$((4*$(nproc))) CFLAGS="${CFLAGS}" -C ./src -B OBJDIR=../build
ldd build/libbpf.so
if ! ldd build/libbpf.so | grep -q libelf; then
echo "FAIL: No reference to libelf.so in libbpf.so!"
exit 1
fi
make -j$((4*$(nproc))) -C src OBJDIR=../build DESTDIR=../install install
rm -rf build install
30 changes: 30 additions & 0 deletions travis-ci/vmtest/build_pahole.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -eu

source $(cd $(dirname $0) && pwd)/helpers.sh

travis_fold start build_pahole "Building pahole"

CWD=$(pwd)
REPO_PATH=$1
PAHOLE_ORIGIN=https://git.kernel.org/pub/scm/devel/pahole/pahole.git

mkdir -p ${REPO_PATH}
cd ${REPO_PATH}
git init
git remote add origin ${PAHOLE_ORIGIN}
git fetch origin
git checkout master

mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -D__LIB=lib ..
make -j$((4*$(nproc))) all
sudo make install

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:/usr/local/lib
ldd $(which pahole)
pahole --version

travis_fold end build_pahole
Loading