/* * Copyright (c)2019 ZeroTier, Inc. * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file in the project's root directory. * * Change Date: 2025-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2.0 of the Apache License. */ /****/ /* * This creates a pair of feth devices with the lower numbered device * being the ZeroTier virtual interface and the other being the device * used to actually read and write packets. The latter gets no IP config * and is only used for I/O. The behavior of feth is similar to the * veth pairs that exist on Linux. * * The feth device has only existed since MacOS Sierra, but that's fairly * long ago in Mac terms. * * I/O with feth must be done using two different sockets. The BPF socket * is used to receive packets, while an AF_NDRV (low-level network driver * access) socket must be used to inject. AF_NDRV can't read IP frames * since BSD doesn't forward packets out the NDRV tap if they've already * been handled, and while BPF can inject its MTU for injected packets * is limited to 2048. AF_NDRV packet injection is required to inject * ZeroTier's large MTU frames. * * All this stuff is basically undocumented. A lot of tracing through * the Darwin/XNU kernel source was required to figure out how to make * this actually work. * * We hope to develop a DriverKit-based driver in the near-mid future to * replace this weird hack, but it works for now through Big Sur in our * testing. * * See also: * * https://apple.stackexchange.com/questions/337715/fake-ethernet-interfaces-feth-if-fake-anyone-ever-seen-this * https://opensource.apple.com/source/xnu/xnu-4570.41.2/bsd/net/if_fake.c.auto.html * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <unistd.h> #include <signal.h> #include <fcntl.h> #include <errno.h> #include <sys/signal.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/wait.h> #include <sys/select.h> #include <sys/cdefs.h> #include <sys/uio.h> #include <sys/param.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/sysctl.h> #include <sys/resource.h> #include <netinet/in.h> #include <arpa/inet.h> #include <net/bpf.h> #include <net/route.h> #include <net/if.h> #include <net/if_arp.h> #include <net/if_dl.h> #include <net/if_media.h> #include <net/ndrv.h> #include <netinet/in_var.h> #include <netinet/icmp6.h> #include <netinet6/in6_var.h> #include <netinet6/nd6.h> #include <ifaddrs.h> #include "../version.h" #include "MacEthernetTapAgent.h" #ifndef SIOCAUTOCONF_START #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */ #endif #ifndef SIOCAUTOCONF_STOP #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */ #endif #define P_IFCONFIG "/sbin/ifconfig" static unsigned char s_pktReadBuf[131072] __attribute__ ((__aligned__(16))); static unsigned char s_stdinReadBuf[131072] __attribute__ ((__aligned__(16))); static char s_deviceName[IFNAMSIZ]; static char s_peerDeviceName[IFNAMSIZ]; static int s_bpffd = -1; static int s_ndrvfd = -1; static pid_t s_parentPid; static void configureIpv6Parameters(const char *ifname,int performNUD,int acceptRouterAdverts) { struct in6_ndireq nd; struct in6_ifreq ifr; int s = socket(AF_INET6,SOCK_DGRAM,0); if (s <= 0) return; memset(&nd,0,sizeof(nd)); strncpy(nd.ifname,ifname,sizeof(nd.ifname)); if (ioctl(s,SIOCGIFINFO_IN6,&nd)) { close(s); return; } unsigned long oldFlags = (unsigned long)nd.ndi.flags; if (performNUD) nd.ndi.flags |= ND6_IFF_PERFORMNUD; else nd.ndi.flags &= ~ND6_IFF_PERFORMNUD; if (oldFlags != (unsigned long)nd.ndi.flags) { if (ioctl(s,SIOCSIFINFO_FLAGS,&nd)) { close(s); return; } } memset(&ifr,0,sizeof(ifr)); strncpy(ifr.ifr_name,ifname,sizeof(ifr.ifr_name)); if (ioctl(s,acceptRouterAdverts ? SIOCAUTOCONF_START : SIOCAUTOCONF_STOP,&ifr)) { close(s); return; } close(s); } static int run(const char *path,...) { va_list ap; char *args[16]; int argNo = 1; va_start(ap,path); args[0] = (char *)path; for(;argNo<15;++argNo) { args[argNo] = va_arg(ap,char *); if (!args[argNo]) { break; } } args[argNo++] = (char *)0; va_end(ap); pid_t pid = fork(); if (pid < 0) { return -1; } else if (pid == 0) { dup2(STDERR_FILENO,STDOUT_FILENO); execv(args[0],args); _exit(-1); } int rv = 0; waitpid(pid,&rv,0); return rv; } static void die() { if (s_ndrvfd >= 0) close(s_ndrvfd); if (s_bpffd >= 0) close(s_bpffd); if (s_peerDeviceName[0]) run("/sbin/ifconfig",s_peerDeviceName,"destroy",(char *)0); if (s_deviceName[0]) run("/sbin/ifconfig",s_deviceName,"destroy",(char *)0); } static inline void close_inherited_fds() { struct rlimit lim; getrlimit(RLIMIT_NOFILE, &lim); for (int i=3,j=(int)lim.rlim_cur;i<j;++i) close(i); } int main(int argc,char **argv) { char buf[128]; struct ifreq ifr; u_int fl; fd_set rfds,wfds,efds; struct iovec iov[2]; s_deviceName[0] = 0; s_peerDeviceName[0] = 0; s_parentPid = getppid(); atexit(&die); signal(SIGIO,SIG_IGN); signal(SIGCHLD,SIG_IGN); signal(SIGPIPE,SIG_IGN); signal(SIGUSR1,SIG_IGN); signal(SIGUSR2,SIG_IGN); signal(SIGALRM,SIG_IGN); signal(SIGQUIT,&exit); signal(SIGTERM,&exit); signal(SIGKILL,&exit); signal(SIGINT,&exit); signal(SIGPIPE,&exit); close_inherited_fds(); if (getuid() != 0) { if (setuid(0) != 0) { fprintf(stderr,"E must be run as root or with root setuid bit on executable\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST; } } if (argc < 5) { fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST; } const int deviceNo = atoi(argv[1]); if ((deviceNo < 0)||(deviceNo > 4999)) { fprintf(stderr,"E invalid or missing argument(s) (usage: MacEthernetTapAgent <0-4999> <mac> <mtu> <metric>)\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_INVALID_REQUEST; } const char *mac = argv[2]; const char *mtu = argv[3]; const char *metric = argv[4]; s_ndrvfd = socket(AF_NDRV,SOCK_RAW,0); if (s_ndrvfd < 0) { fprintf(stderr,"E unable to open AF_NDRV socket\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } snprintf(s_deviceName,sizeof(s_deviceName),"feth%d",deviceNo); snprintf(s_peerDeviceName,sizeof(s_peerDeviceName),"feth%d",deviceNo+5000); if (run(P_IFCONFIG,s_peerDeviceName,"create",(char *)0) != 0) { fprintf(stderr,"E unable to create %s\n",s_deviceName); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } usleep(10); if (run(P_IFCONFIG,s_deviceName,"create",(char *)0) != 0) { fprintf(stderr,"E unable to create %s\n",s_deviceName); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } run(P_IFCONFIG,s_deviceName,"lladdr",mac,(char *)0); usleep(10); run(P_IFCONFIG,s_peerDeviceName,"peer",s_deviceName,(char *)0); usleep(10); run(P_IFCONFIG,s_peerDeviceName,"mtu",mtu,"up",(char *)0); usleep(10); run(P_IFCONFIG,s_deviceName,"mtu",mtu,"metric",metric,"up",(char *)0); usleep(10); configureIpv6Parameters(s_deviceName,1,0); usleep(10); struct sockaddr_ndrv nd; nd.snd_len = sizeof(struct sockaddr_ndrv); nd.snd_family = AF_NDRV; memcpy(nd.snd_name,s_peerDeviceName,sizeof(nd.snd_name)); if (bind(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) { fprintf(stderr,"E unable to bind AF_NDRV socket\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } if (connect(s_ndrvfd,(struct sockaddr *)&nd,sizeof(nd)) != 0) { fprintf(stderr,"E unable to connect AF_NDRV socket\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } /* Start at /dev/bpf1 since some simple bpf-using net utilities hard-code /dev/bpf0. * Things like libpcap are smart enough to search. */ for(int bpfno=1;bpfno<5000;++bpfno) { char tmp[32]; snprintf(tmp,sizeof(tmp),"/dev/bpf%d",bpfno); s_bpffd = open(tmp,O_RDWR); if (s_bpffd >= 0) { break; } } if (s_bpffd < 0) { fprintf(stderr,"E unable to open bpf device\n"); return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } fl = sizeof(s_pktReadBuf); if (ioctl(s_bpffd,BIOCSBLEN,&fl) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } const size_t readPktSize = (size_t)fl; fl = 1; if (ioctl(s_bpffd,BIOCIMMEDIATE,&fl) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } fl = 0; if (ioctl(s_bpffd,BIOCSSEESENT,&fl) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } memset(&ifr,0,sizeof(ifr)); memcpy(ifr.ifr_name,s_peerDeviceName,IFNAMSIZ); if (ioctl(s_bpffd,BIOCSETIF,&ifr) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } fl = 1; if (ioctl(s_bpffd,BIOCSHDRCMPLT,&fl) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } fl = 1; if (ioctl(s_bpffd,BIOCPROMISC,&fl) != 0) { return ZT_MACETHERNETTAPAGENT_EXIT_CODE_UNABLE_TO_CREATE; } fprintf(stderr,"I %s %s %d.%d.%d.%d\n",s_deviceName,s_peerDeviceName,ZEROTIER_ONE_VERSION_MAJOR,ZEROTIER_ONE_VERSION_MINOR,ZEROTIER_ONE_VERSION_REVISION,ZEROTIER_ONE_VERSION_BUILD); FD_ZERO(&rfds); FD_ZERO(&wfds); FD_ZERO(&efds); long stdinReadPtr = 0; for(;;) { FD_SET(STDIN_FILENO,&rfds); FD_SET(s_bpffd,&rfds); if (select(s_bpffd+1,&rfds,&wfds,&efds,(struct timeval *)0) < 0) { if ((errno == EAGAIN)||(errno == EINTR)) { usleep(10); continue; } return ZT_MACETHERNETTAPAGENT_EXIT_CODE_READ_ERROR; } if (FD_ISSET(s_bpffd,&rfds)) { long n = (long)read(s_bpffd,s_pktReadBuf,readPktSize); if (n > 0) { for(unsigned char *p=s_pktReadBuf,*eof=p+n;p<eof;) { struct bpf_hdr *h = (struct bpf_hdr *)p; if ((h->bh_caplen > 0)&&((p + h->bh_hdrlen + h->bh_caplen) <= eof)) { uint16_t len = (uint16_t)h->bh_caplen; iov[0].iov_base = &len; iov[0].iov_len = 2; iov[1].iov_base = p + h->bh_hdrlen; iov[1].iov_len = h->bh_caplen; writev(STDOUT_FILENO,iov,2); } p += BPF_WORDALIGN(h->bh_hdrlen + h->bh_caplen); } } } if (FD_ISSET(STDIN_FILENO,&rfds)) { long n = (long)read(STDIN_FILENO,s_stdinReadBuf + stdinReadPtr,sizeof(s_stdinReadBuf) - stdinReadPtr); if (n > 0) { stdinReadPtr += n; while (stdinReadPtr >= 2) { long len = *((uint16_t *)s_stdinReadBuf); if (stdinReadPtr >= (len + 2)) { if (len > 0) { unsigned char *msg = s_stdinReadBuf + 2; switch(msg[0]) { case ZT_MACETHERNETTAPAGENT_STDIN_CMD_PACKET: if (len > 1) { if (write(s_ndrvfd,msg+1,len-1) < 0) { fprintf(stderr,"E inject failed size==%ld errno==%d\n",len-1,errno); } } break; case ZT_MACETHERNETTAPAGENT_STDIN_CMD_IFCONFIG: { char *args[16]; args[0] = P_IFCONFIG; args[1] = s_deviceName; int argNo = 2; for(int argPtr=0,k=1,l=(int)len;k<l;++k) { if (!msg[k]) { if (argPtr > 0) { argPtr = 0; ++argNo; if (argNo >= 15) { break; } } } else { if (argPtr == 0) { args[argNo] = (char *)(msg + k); } argPtr++; } } args[argNo] = (char *)0; if (argNo > 2) { pid_t pid = fork(); if (pid < 0) { return -1; } else if (pid == 0) { dup2(STDERR_FILENO,STDOUT_FILENO); execv(args[0],args); _exit(-1); } int rv = 0; waitpid(pid,&rv,0); } } break; case ZT_MACETHERNETTAPAGENT_STDIN_CMD_EXIT: return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS; default: fprintf(stderr,"E unrecognized message type over pipe from host process: %d (length: %d)\n",(int)msg[0],(int)len); break; } } if (stdinReadPtr > (len + 2)) { memmove(s_stdinReadBuf,s_stdinReadBuf + len + 2,stdinReadPtr -= (len + 2)); } else { stdinReadPtr = 0; } } else { break; } } } } } return ZT_MACETHERNETTAPAGENT_EXIT_CODE_SUCCESS; }