Skip to content

Commit 5a3b755

Browse files
committed
dco_linux: fix async message reception
Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn upon some event, after having set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the according parser. This way, no matter what message we get at what time, but we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: #793 Signed-off-by: Antonio Quartulli <antonio@mandelbit.com>
1 parent 684fdd6 commit 5a3b755

File tree

2 files changed

+107
-64
lines changed

2 files changed

+107
-64
lines changed

src/openvpn/dco_linux.c

Lines changed: 106 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -172,18 +172,14 @@ ovpn_nl_recvmsgs(dco_context_t *dco, const char *prefix)
172172
* The method will also free nl_msg
173173
* @param dco The dco context to use
174174
* @param nl_msg the message to use
175-
* @param cb An optional callback if the caller expects an answer
176-
* @param cb_arg An optional param to pass to the callback
177175
* @param prefix A prefix to report in the error message to give the user context
178176
* @return status of sending the message
179177
*/
180178
static int
181-
ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb,
182-
void *cb_arg, const char *prefix)
179+
ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix)
183180
{
184181
dco->status = 1;
185182

186-
nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg);
187183
nl_send_auto(dco->nl_sock, nl_msg);
188184

189185
while (dco->status == 1)
@@ -285,7 +281,7 @@ dco_new_peer(dco_context_t *dco, unsigned int peerid, int sd,
285281
}
286282
nla_nest_end(nl_msg, attr);
287283

288-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
284+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
289285

290286
nla_put_failure:
291287
nlmsg_free(nl_msg);
@@ -384,6 +380,29 @@ ovpn_nl_cb_error(struct sockaddr_nl (*nla) __attribute__ ((unused)),
384380
return NL_STOP;
385381
}
386382

383+
static void
384+
ovpn_dco_register(dco_context_t *dco)
385+
{
386+
msg(D_DCO_DEBUG, __func__);
387+
ovpn_get_mcast_id(dco);
388+
389+
if (dco->ovpn_dco_mcast_id < 0)
390+
{
391+
msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id));
392+
}
393+
394+
/* Register for ovpn-dco specific multicast messages that the kernel may
395+
* send
396+
*/
397+
int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id);
398+
if (ret)
399+
{
400+
msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret);
401+
}
402+
}
403+
404+
static int ovpn_handle_msg(struct nl_msg *msg, void *arg);
405+
387406
static void
388407
ovpn_dco_init_netlink(dco_context_t *dco)
389408
{
@@ -420,11 +439,15 @@ ovpn_dco_init_netlink(dco_context_t *dco)
420439

421440
nl_socket_set_cb(dco->nl_sock, dco->nl_cb);
422441

442+
dco->dco_message_peer_id = -1;
423443
nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status);
424444
nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
425445
&dco->status);
426446
nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish,
427447
&dco->status);
448+
nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco);
449+
450+
ovpn_dco_register(dco);
428451

429452
/* The async PACKET messages confuse libnl and it will drop them with
430453
* wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence
@@ -476,27 +499,6 @@ ovpn_dco_uninit_netlink(dco_context_t *dco)
476499
CLEAR(dco);
477500
}
478501

479-
static void
480-
ovpn_dco_register(dco_context_t *dco)
481-
{
482-
msg(D_DCO_DEBUG, __func__);
483-
ovpn_get_mcast_id(dco);
484-
485-
if (dco->ovpn_dco_mcast_id < 0)
486-
{
487-
msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id));
488-
}
489-
490-
/* Register for ovpn-dco specific multicast messages that the kernel may
491-
* send
492-
*/
493-
int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id);
494-
if (ret)
495-
{
496-
msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret);
497-
}
498-
}
499-
500502
int
501503
open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev)
502504
{
@@ -516,10 +518,6 @@ open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev)
516518
msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev);
517519
}
518520

519-
tt->dco.dco_message_peer_id = -1;
520-
521-
ovpn_dco_register(&tt->dco);
522-
523521
return 0;
524522
}
525523

@@ -548,7 +546,7 @@ dco_swap_keys(dco_context_t *dco, unsigned int peerid)
548546
NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid);
549547
nla_nest_end(nl_msg, attr);
550548

551-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
549+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
552550

553551
nla_put_failure:
554552
nlmsg_free(nl_msg);
@@ -572,7 +570,7 @@ dco_del_peer(dco_context_t *dco, unsigned int peerid)
572570
NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid);
573571
nla_nest_end(nl_msg, attr);
574572

575-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
573+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
576574

577575
nla_put_failure:
578576
nlmsg_free(nl_msg);
@@ -598,7 +596,7 @@ dco_del_key(dco_context_t *dco, unsigned int peerid,
598596
NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot);
599597
nla_nest_end(nl_msg, keyconf);
600598

601-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
599+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
602600

603601
nla_put_failure:
604602
nlmsg_free(nl_msg);
@@ -657,7 +655,7 @@ dco_new_key(dco_context_t *dco, unsigned int peerid, int keyid,
657655
nla_nest_end(nl_msg, key_conf);
658656

659657

660-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
658+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
661659

662660
nla_put_failure:
663661
nlmsg_free(nl_msg);
@@ -686,7 +684,7 @@ dco_set_peer(dco_context_t *dco, unsigned int peerid,
686684
keepalive_timeout);
687685
nla_nest_end(nl_msg, attr);
688686

689-
ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__);
687+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
690688

691689
nla_put_failure:
692690
nlmsg_free(nl_msg);
@@ -754,20 +752,20 @@ ovpn_get_mcast_id(dco_context_t *dco)
754752

755753
/* Even though 'nlctrl' is a constant, there seem to be no library
756754
* provided define for it */
757-
int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl");
755+
dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl");
758756

759757
struct nl_msg *nl_msg = nlmsg_alloc();
760758
if (!nl_msg)
761759
{
762760
return -ENOMEM;
763761
}
764762

765-
genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
763+
genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
766764

767765
int ret = -EMSGSIZE;
768766
NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME);
769767

770-
ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__);
768+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
771769

772770
nla_put_failure:
773771
nlmsg_free(nl_msg);
@@ -879,31 +877,34 @@ dco_update_peer_stat(struct context_2 *c2, struct nlattr *tb[], uint32_t id)
879877
}
880878

881879
static int
882-
dco_parse_peer_multi(struct nl_msg *msg, void *arg)
880+
ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[])
883881
{
884-
struct nlattr *tb[OVPN_A_MAX + 1];
885-
struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
886-
887882
msg(D_DCO_DEBUG, "%s: parsing message...", __func__);
888883

889-
nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
890-
genlmsg_attrlen(gnlh, 0), NULL);
884+
/* this function assumes openvpn is running in multipeer mode as
885+
* it accesses c->multi
886+
*/
887+
if (dco->ifmode != OVPN_MODE_MP)
888+
{
889+
msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__);
890+
return NL_SKIP;
891+
}
891892

892-
if (!tb[OVPN_A_PEER])
893+
if (!attrs[OVPN_A_PEER])
893894
{
894895
return NL_SKIP;
895896
}
896897

897898
struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1];
898-
nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL);
899+
nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL);
899900

900901
if (!tb_peer[OVPN_A_PEER_ID])
901902
{
902903
msg(M_WARN, "%s: no peer-id provided in reply", __func__);
903904
return NL_SKIP;
904905
}
905906

906-
struct multi_context *m = arg;
907+
struct multi_context *m = dco->c->multi;
907908
uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]);
908909

909910
if (peer_id >= m->max_clients || !m->instances[peer_id])
@@ -919,25 +920,18 @@ dco_parse_peer_multi(struct nl_msg *msg, void *arg)
919920
}
920921

921922
static int
922-
dco_parse_peer(struct nl_msg *msg, void *arg)
923+
ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[])
923924
{
924-
struct context *c = arg;
925-
struct nlattr *tb[OVPN_A_MAX + 1];
926-
struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
927-
928925
msg(D_DCO_DEBUG, "%s: parsing message...", __func__);
929926

930-
nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
931-
genlmsg_attrlen(gnlh, 0), NULL);
932-
933-
if (!tb[OVPN_A_PEER])
927+
if (!attrs[OVPN_A_PEER])
934928
{
935929
msg(D_DCO_DEBUG, "%s: malformed reply", __func__);
936930
return NL_SKIP;
937931
}
938932

939933
struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1];
940-
nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL);
934+
nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL);
941935

942936
if (!tb_peer[OVPN_A_PEER_ID])
943937
{
@@ -946,12 +940,33 @@ dco_parse_peer(struct nl_msg *msg, void *arg)
946940
}
947941

948942
uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]);
949-
if (c->c2.tls_multi->dco_peer_id != peer_id)
943+
struct context_2 *c2;
944+
945+
if (dco->ifmode == OVPN_MODE_P2P)
946+
{
947+
c2 = &dco->c->c2;
948+
}
949+
else
950+
{
951+
struct multi_instance *mi = dco->c->multi->instances[peer_id];
952+
if (!mi)
953+
{
954+
msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id);
955+
return NL_SKIP;
956+
}
957+
958+
c2 = &mi->context.c2;
959+
}
960+
961+
/* at this point this check should never fail for MP mode,
962+
* but it's still fully valid for P2P mode
963+
*/
964+
if (c2->tls_multi->dco_peer_id != peer_id)
950965
{
951966
return NL_SKIP;
952967
}
953968

954-
dco_update_peer_stat(&c->c2, tb_peer, peer_id);
969+
dco_update_peer_stat(c2, tb_peer, peer_id);
955970

956971
return NL_OK;
957972
}
@@ -1120,9 +1135,22 @@ ovpn_handle_msg(struct nl_msg *msg, void *arg)
11201135
{
11211136
dco_context_t *dco = arg;
11221137

1123-
struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
11241138
struct nlattr *attrs[OVPN_A_MAX + 1];
11251139
struct nlmsghdr *nlh = nlmsg_hdr(msg);
1140+
struct genlmsghdr *gnlh = genlmsg_hdr(nlh);
1141+
1142+
msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x",
1143+
nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags);
1144+
1145+
/* if we get a message from the NLCTRL family, it means
1146+
* this is the reply to the mcast ID resolution request
1147+
* and we parse it accordingly.
1148+
*/
1149+
if (nlh->nlmsg_type == dco->ctrlid)
1150+
{
1151+
msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message");
1152+
return mcast_family_handler(msg, dco);
1153+
}
11261154

11271155
if (!genlmsg_valid_hdr(nlh, 0))
11281156
{
@@ -1146,6 +1174,21 @@ ovpn_handle_msg(struct nl_msg *msg, void *arg)
11461174
*/
11471175
switch (gnlh->cmd)
11481176
{
1177+
case OVPN_CMD_PEER_GET:
1178+
{
1179+
/* this message is part of a peer list dump, hence triggered
1180+
* by a MP/server instance
1181+
*/
1182+
if (nlh->nlmsg_flags & NLM_F_MULTI)
1183+
{
1184+
return ovpn_handle_peer_multi(dco, attrs);
1185+
}
1186+
else
1187+
{
1188+
return ovpn_handle_peer(dco, attrs);
1189+
}
1190+
}
1191+
11491192
case OVPN_CMD_PEER_DEL_NTF:
11501193
{
11511194
return ovpn_handle_peer_del_ntf(dco, attrs);
@@ -1174,7 +1217,6 @@ int
11741217
dco_do_read(dco_context_t *dco)
11751218
{
11761219
msg(D_DCO_DEBUG, __func__);
1177-
nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco);
11781220

11791221
return ovpn_nl_recvmsgs(dco, __func__);
11801222
}
@@ -1189,7 +1231,7 @@ dco_get_peer_stats_multi(dco_context_t *dco, struct multi_context *m,
11891231

11901232
nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP;
11911233

1192-
int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__);
1234+
int ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
11931235

11941236
nlmsg_free(nl_msg);
11951237

@@ -1227,7 +1269,7 @@ dco_get_peer_stats(struct context *c, const bool raise_sigusr1_on_err)
12271269
NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id);
12281270
nla_nest_end(nl_msg, attr);
12291271

1230-
ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__);
1272+
ret = ovpn_nl_msg_send(dco, nl_msg, __func__);
12311273

12321274
nla_put_failure:
12331275
nlmsg_free(nl_msg);

src/openvpn/dco_linux.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ typedef struct
6666
int status;
6767

6868
struct context *c;
69+
int ctrlid;
6970

7071
enum ovpn_mode ifmode;
7172

0 commit comments

Comments
 (0)