From fd38ddb892369547c94d86414b4830bec14cd8c1 Mon Sep 17 00:00:00 2001 From: Jeremy Lakeman Date: Mon, 23 Nov 2015 09:47:45 +1030 Subject: [PATCH] Attempt to recover from transient network errors --- overlay_interface.c | 47 ++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/overlay_interface.c b/overlay_interface.c index a068edc6..483c9689 100644 --- a/overlay_interface.c +++ b/overlay_interface.c @@ -59,6 +59,7 @@ struct profile_total sock_any_stats; static void overlay_interface_poll(struct sched_ent *alarm); static int inet_up_count=0; +static void rescan_soon(); void overlay_interface_close(overlay_interface *interface) { @@ -1058,18 +1059,23 @@ int overlay_broadcast_ensemble(struct network_destination *destination, struct o bytes, (size_t)len, 0, &destination->address.addr, destination->address.addrlen); if (sent == -1){ - if (errno!=EAGAIN && errno!=EWOULDBLOCK && errno!=ENOENT && errno!=ENOTDIR) + if (errno!=EAGAIN && errno!=EWOULDBLOCK && errno!=ENOENT && errno!=ENOTDIR){ WHYF_perror("sendto(fd=%d,len=%zu,addr=%s) on interface %s", interface->alarm.poll.fd, (size_t)len, alloca_socket_address(&destination->address), interface->name ); - // close the interface if we had any error while sending broadcast packets, - // unicast packets should not bring the interface down - // TODO mark unicast destination as failed? - if (destination == interface->destination) - overlay_interface_close(interface); + + // if we had any error while sending broadcast packets, + // it could be because the interface is coming down + // or there might be some socket error that we can't fix. + // So bring the interface down, and scan for network changes soon + if (destination == interface->destination){ + overlay_interface_close(interface); + rescan_soon(); + } + } ob_free(buffer); return -1; } @@ -1279,7 +1285,7 @@ static int netlink_socket() return sock; } -static int netlink_send_get(int fd) +static int netlink_send_get() { struct { struct nlmsghdr n; @@ -1294,6 +1300,7 @@ static int netlink_send_get(int fd) struct rtattr *rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.n.nlmsg_len)); rta->rta_len = RTA_LENGTH(4); + int fd = ALARM_STRUCT(netlink_poll).poll.fd; if (send(fd, &req, req.n.nlmsg_len, 0)<0) return WHYF_perror("send(%d)", fd); @@ -1315,7 +1322,7 @@ static int netlink_init() watch(alarm); } - return netlink_send_get(alarm->poll.fd); + return 0; } #else @@ -1406,6 +1413,18 @@ static void file_interface_init(const struct config_network_interface *ifconfig) overlay_interface_init(ifconfig->file, &addr, &netmask, &broadcast, ifconfig); } +static void rescan_soon(){ +#ifdef HAVE_LINUX_NETLINK_H + // start listening for network changes & request current interface addresses + netlink_init(); + netlink_send_get(); +#else + // re-check all interfaces periodically + time_ms_t now = gettime_ms(); + RESCHEDULE(&ALARM_STRUCT(overlay_interface_discover), now, now, now); +#endif +} + void overlay_interface_config_change() { unsigned i; @@ -1449,16 +1468,8 @@ void overlay_interface_config_change() file_interface_init(ifconfig); } - if (real_interface){ -#ifdef HAVE_LINUX_NETLINK_H - // start listening for network changes & request current interface addresses - netlink_init(); -#else - // re-check all interfaces periodically - time_ms_t now = gettime_ms(); - RESCHEDULE(&ALARM_STRUCT(overlay_interface_discover), now, now, now); -#endif - } + if (real_interface) + rescan_soon(); } void logServalPacket(int level, struct __sourceloc __whence, const char *message, const unsigned char *packet, size_t len) {