Fix for crazy Windows threading bug... repeatedly adding and removing a network now doesn't leave networks in limbo.

This commit is contained in:
Adam Ierymenko 2014-08-21 17:49:05 -07:00
parent 44d58ee871
commit de4e29288d
8 changed files with 166 additions and 169 deletions

View File

@ -20,7 +20,7 @@
<ROW Property="CTRLS" Value="2"/>
<ROW Property="MSIFASTINSTALL" MultiBuildValue="DefaultBuild:2"/>
<ROW Property="Manufacturer" Value="ZeroTier Networks LLC"/>
<ROW Property="ProductCode" Value="1033:{44B21E2C-C01D-4D96-AD50-9466735F2D70} " Type="16"/>
<ROW Property="ProductCode" Value="1033:{39DFC63F-B6EA-40E7-880B-DC4A5E70F75A} " Type="16"/>
<ROW Property="ProductLanguage" Value="1033"/>
<ROW Property="ProductName" Value="ZeroTier One"/>
<ROW Property="ProductVersion" Value="0.9.2" Type="32"/>

View File

@ -25,6 +25,10 @@
* LLC. Start here: http://www.zerotier.com/
*/
// Uncomment on Windows to assume -C and run in console instead of service
// Useful for Visual Studio debugging (launch VS as Administrator to run)
#define ZT_WIN_RUN_IN_CONSOLE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -586,12 +590,18 @@ int main(int argc,char **argv)
const char *homeDir = (const char *)0;
unsigned int udpPort = ZT_DEFAULT_UDP_PORT;
unsigned int tcpPort = 0;
#ifdef __UNIX_LIKE__
bool runAsDaemon = false;
#endif
#ifdef __WINDOWS__
#ifdef ZT_WIN_RUN_IN_CONSOLE
bool winRunFromCommandLine = true;
#else
bool winRunFromCommandLine = false;
#endif
#endif // __WINDOWS__
for(int i=1;i<argc;++i) {
if (argv[i][0] == '-') {
switch(argv[i][1]) {

View File

@ -66,10 +66,13 @@ Network::~Network()
{
Thread::join(_setupThread);
if (_tap)
_r->tapFactory->close(_tap,_destroyOnDelete);
{
Mutex::Lock _l(_lock);
if (_tap)
_r->tapFactory->close(_tap,_destroyed);
}
if (_destroyOnDelete) {
if (_destroyed) {
Utils::rm(std::string(_r->homePath + ZT_PATH_SEPARATOR_S + "networks.d" + ZT_PATH_SEPARATOR_S + idString() + ".conf"));
Utils::rm(std::string(_r->homePath + ZT_PATH_SEPARATOR_S + "networks.d" + ZT_PATH_SEPARATOR_S + idString() + ".mcerts"));
} else {
@ -80,12 +83,6 @@ Network::~Network()
SharedPtr<Network> Network::newInstance(const RuntimeEnvironment *renv,NodeConfig *nc,uint64_t id)
{
/* We construct Network via a static method to ensure that it is immediately
* wrapped in a SharedPtr<>. Otherwise if there is traffic on the Ethernet
* tap device, a SharedPtr<> wrap can occur in the Ethernet frame handler
* that then causes the Network instance to be deleted before it is finished
* being constructed. C++ edge cases, how I love thee. */
SharedPtr<Network> nw(new Network());
nw->_id = id;
nw->_nc = nc;
@ -94,7 +91,7 @@ SharedPtr<Network> Network::newInstance(const RuntimeEnvironment *renv,NodeConfi
nw->_tap = (EthernetTap *)0;
nw->_enabled = true;
nw->_lastConfigUpdate = 0;
nw->_destroyOnDelete = false;
nw->_destroyed = false;
nw->_netconfFailure = NETCONF_FAILURE_NONE;
if (nw->controller() == renv->identity.address()) // TODO: fix Switch to allow packets to self
@ -143,6 +140,9 @@ bool Network::setConfiguration(const Dictionary &conf,bool saveToDisk)
{
Mutex::Lock _l(_lock);
if (_destroyed)
return false;
try {
SharedPtr<NetworkConfig> newConfig(new NetworkConfig(conf)); // throws if invalid
if ((newConfig->networkId() == _id)&&(newConfig->issuedTo() == _r->identity.address())) {
@ -242,6 +242,10 @@ bool Network::isAllowed(const Address &peer) const
void Network::clean()
{
Mutex::Lock _l(_lock);
if (_destroyed)
return;
uint64_t now = Utils::now();
if ((_config)&&(_config->isPublic())) {
@ -277,23 +281,17 @@ void Network::clean()
Network::Status Network::status() const
{
Mutex::Lock _l(_lock);
if (_tap) {
switch(_netconfFailure) {
case NETCONF_FAILURE_ACCESS_DENIED:
return NETWORK_ACCESS_DENIED;
case NETCONF_FAILURE_NOT_FOUND:
return NETWORK_NOT_FOUND;
case NETCONF_FAILURE_NONE:
if (_lastConfigUpdate > 0)
return NETWORK_OK;
else return NETWORK_WAITING_FOR_FIRST_AUTOCONF;
case NETCONF_FAILURE_INIT_FAILED:
default:
return NETWORK_INITIALIZATION_FAILED;
}
} else if (_netconfFailure == NETCONF_FAILURE_INIT_FAILED) {
return NETWORK_INITIALIZATION_FAILED;
} else return NETWORK_INITIALIZING;
switch(_netconfFailure) {
case NETCONF_FAILURE_ACCESS_DENIED:
return NETWORK_ACCESS_DENIED;
case NETCONF_FAILURE_NOT_FOUND:
return NETWORK_NOT_FOUND;
case NETCONF_FAILURE_NONE:
return ((_lastConfigUpdate > 0) ? ((_tap) ? NETWORK_OK : NETWORK_INITIALIZING) : NETWORK_WAITING_FOR_FIRST_AUTOCONF);
//case NETCONF_FAILURE_INIT_FAILED:
default:
return NETWORK_INITIALIZATION_FAILED;
}
}
void Network::_CBhandleTapData(void *arg,const MAC &from,const MAC &to,unsigned int etherType,const Buffer<4096> &data)
@ -366,7 +364,7 @@ void Network::threadMain()
{
Mutex::Lock _l(_lock);
if (_tap) // the tap creation thread can technically be re-launched, though this isn't done right now
_r->tapFactory->close(_tap,_destroyOnDelete);
_r->tapFactory->close(_tap,false);
_tap = t;
if (t) {
if (_config)
@ -409,6 +407,20 @@ void Network::setEnabled(bool enabled)
_tap->setEnabled(enabled);
}
void Network::destroy()
{
Mutex::Lock _l(_lock);
_enabled = false;
_destroyed = true;
Thread::join(_setupThread);
if (_tap)
_r->tapFactory->close(_tap,true);
_tap = (EthernetTap *)0;
}
void Network::_restoreState()
{
Buffer<ZT_NETWORK_CERT_WRITE_BUF_SIZE> buf;

View File

@ -99,11 +99,6 @@ private:
*/
static SharedPtr<Network> newInstance(const RuntimeEnvironment *renv,NodeConfig *nc,uint64_t id);
/**
* Causes all persistent disk presence to be erased on delete, and this network won't be reloaded on next startup
*/
inline void destroyOnDelete() throw() { _destroyOnDelete = true; }
public:
/**
* Broadcast multicast group: ff:ff:ff:ff:ff:ff / 0
@ -419,6 +414,15 @@ public:
*/
void setEnabled(bool enabled);
/**
* Destroy this network
*
* This causes the network to disable itself, destroy its tap device, and on
* delete to delete all trace of itself on disk and remove any persistent tap
* device instances. Call this when a network is being removed from the system.
*/
void destroy();
private:
static void _CBhandleTapData(void *arg,const MAC &from,const MAC &to,unsigned int etherType,const Buffer<4096> &data);
@ -453,7 +457,7 @@ private:
SharedPtr<NetworkConfig> _config;
volatile uint64_t _lastConfigUpdate;
volatile bool _destroyOnDelete;
volatile bool _destroyed;
volatile enum {
NETCONF_FAILURE_NONE,

View File

@ -286,7 +286,7 @@ void NodeConfig::_doCommand(IpcConnection *ipcc,const char *commandLine)
if (nw == _networks.end()) {
ipcc->printf("404 leave %.16llx ERROR: not a member of that network"ZT_EOL_S,(unsigned long long)nwid);
} else {
nw->second->destroyOnDelete();
nw->second->destroy();
_networks.erase(nw);
}
} else {

View File

@ -31,12 +31,13 @@
#include <stdexcept>
#include "Constants.hpp"
#include "AtomicCounter.hpp"
#ifdef __WINDOWS__
#include <WinSock2.h>
#include <Windows.h>
#include <string.h>
#include "Mutex.hpp"
namespace ZeroTier {
@ -56,6 +57,7 @@ public:
throw()
{
_th = NULL;
_tid = 0;
}
template<typename C>
@ -71,8 +73,15 @@ public:
static inline void join(const Thread &t)
{
if (t._th != NULL)
WaitForSingleObject(t._th,INFINITE);
if (t._th != NULL) {
for(;;) {
DWORD ec = STILL_ACTIVE;
GetExitCodeThread(t._th,&ec);
if (ec == STILL_ACTIVE)
WaitForSingleObject(t._th,1000);
else break;
}
}
}
static inline void sleep(unsigned long ms)

View File

@ -233,7 +233,6 @@ WindowsEthernetTap::WindowsEthernetTap(
DWORD tmp = mtu;
RegSetKeyValueA(nwAdapters,mySubkeyName.c_str(),"MTU",REG_DWORD,(LPCVOID)&tmp,sizeof(tmp));
//tmp = NDIS_DEVICE_TYPE_ENDPOINT;
tmp = 0;
RegSetKeyValueA(nwAdapters,mySubkeyName.c_str(),"*NdisDeviceType",REG_DWORD,(LPCVOID)&tmp,sizeof(tmp));
tmp = IF_TYPE_ETHERNET_CSMACD;
@ -571,130 +570,102 @@ void WindowsEthernetTap::threadMain()
continue;
}
uint32_t tmpi = 1;
DWORD bytesReturned = 0;
DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
{
uint32_t tmpi = 1;
DWORD bytesReturned = 0;
DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
bytesReturned = 0;
DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
}
{
#ifdef ZT_WINDOWS_CREATE_FAKE_DEFAULT_ROUTE
/* This inserts a fake default route and a fake ARP entry, forcing
* Windows to detect this as a "real" network and apply proper
* firewall rules.
*
* This hack is completely stupid, but Windows made me do it
* by being broken and insane.
*
* Background: Windows tries to detect its network location by
* matching it to the ARP address of the default route. Networks
* without default routes are "unidentified networks" and cannot
* have their firewall classification changed by the user (easily).
*
* Yes, you read that right.
*
* The common workaround is to set *NdisDeviceType to 1, which
* totally disables all Windows firewall functionality. This is
* the answer you'll find on most forums for things like OpenVPN.
*
* Yes, you read that right.
*
* The default route workaround is also known, but for this to
* work there must be a known default IP that resolves to a known
* ARP address. This works for an OpenVPN tunnel, but not here
* because this isn't a tunnel. It's a mesh. There is no "other
* end," or any other known always on IP.
*
* So let's make a fake one and shove it in there along with its
* fake static ARP entry. Also makes it instant-on and static.
*
* We'll have to see what DHCP does with this. In the future we
* probably will not want to do this on DHCP-enabled networks, so
* when we enable DHCP we will go in and yank this wacko hacko from
* the routing table before doing so.
*
* Like Jesse Pinkman would say: "YEEEEAAH BITCH!" */
const uint32_t fakeIp = htonl(0x19fffffe); // 25.255.255.254 -- unrouted IPv4 block
for(int i=0;i<8;++i) {
MIB_IPNET_ROW2 ipnr;
memset(&ipnr,0,sizeof(ipnr));
ipnr.Address.si_family = AF_INET;
ipnr.Address.Ipv4.sin_addr.s_addr = fakeIp;
ipnr.InterfaceLuid.Value = _deviceLuid.Value;
ipnr.PhysicalAddress[0] = _mac[0] ^ 0x10; // just make something up that's consistent and not part of this net
ipnr.PhysicalAddress[1] = 0x00;
ipnr.PhysicalAddress[2] = (UCHAR)((_deviceGuid.Data1 >> 24) & 0xff);
ipnr.PhysicalAddress[3] = (UCHAR)((_deviceGuid.Data1 >> 16) & 0xff);
ipnr.PhysicalAddress[4] = (UCHAR)((_deviceGuid.Data1 >> 8) & 0xff);
ipnr.PhysicalAddress[5] = (UCHAR)(_deviceGuid.Data1 & 0xff);
ipnr.PhysicalAddressLength = 6;
ipnr.State = NlnsPermanent;
ipnr.IsRouter = 1;
ipnr.IsUnreachable = 0;
ipnr.ReachabilityTime.LastReachable = 0x0fffffff;
ipnr.ReachabilityTime.LastUnreachable = 1;
DWORD result = CreateIpNetEntry2(&ipnr);
if (result != NO_ERROR)
Sleep(500);
else break;
}
for(int i=0;i<8;++i) {
MIB_IPFORWARD_ROW2 nr;
memset(&nr,0,sizeof(nr));
InitializeIpForwardEntry(&nr);
nr.InterfaceLuid.Value = _deviceLuid.Value;
nr.DestinationPrefix.Prefix.si_family = AF_INET; // rest is left as 0.0.0.0/0
nr.NextHop.si_family = AF_INET;
nr.NextHop.Ipv4.sin_addr.s_addr = fakeIp;
nr.Metric = 9999; // do not use as real default route
nr.Protocol = MIB_IPPROTO_NETMGMT;
DWORD result = CreateIpForwardEntry2(&nr);
if (result != NO_ERROR)
Sleep(500);
else break;
}
#endif
}
if (throwOneAway) {
throwOneAway = false;
CloseHandle(_tap);
_tap = INVALID_HANDLE_VALUE;
Sleep(250);
Sleep(1000);
continue;
} else break;
}
/* code not currently used, but keep it around cause it was hard to figure out...
CoInitializeEx(NULL,COINIT_MULTITHREADED);
CComPtr<INetworkListManager> nlm;
nlm.CoCreateInstance(CLSID_NetworkListManager);
if (nlm) {
for(int i=0;i<8;++i) { // wait up to 8s for the NLM (network awareness) to find and initialize its awareness of our new network
CComPtr<IEnumNetworks> nlmNets;
bool foundMyNet = false;
if (SUCCEEDED(nlm->GetNetworks(NLM_ENUM_NETWORK_ALL,&nlmNets))) {
DWORD dwReturn = 0;
while (!foundMyNet) {
CComPtr<INetwork> nlmNet;
HRESULT hr = nlmNets->Next(1,&nlmNet,&dwReturn);
if ((hr == S_OK)&&(dwReturn > 0)&&(nlmNet)) {
CComPtr<IEnumNetworkConnections> nlmNetConns;
if (SUCCEEDED(nlmNet->GetNetworkConnections(&nlmNetConns))) {
for(;;) {
CComPtr<INetworkConnection> nlmNetConn;
hr = nlmNetConns->Next(1,&nlmNetConn,&dwReturn);
if ((hr == S_OK)&&(dwReturn > 0)&&(nlmNetConn)) {
GUID netAdapterId;
nlmNetConn->GetAdapterId(&netAdapterId);
if (netAdapterId == _deviceGuid) {
foundMyNet = true;
printf("*** Found my net!\n");
nlmNet->SetName(L"ZeroTier One Network");
break;
}
} else break;
}
}
} else break;
}
}
if (foundMyNet)
break;
else Thread::sleep(1000);
}
}
*/
#ifdef ZT_WINDOWS_CREATE_FAKE_DEFAULT_ROUTE
/* This inserts a fake default route and a fake ARP entry, forcing
* Windows to detect this as a "real" network and apply proper
* firewall rules.
*
* This hack is completely stupid, but Windows made me do it
* by being broken and insane.
*
* Background: Windows tries to detect its network location by
* matching it to the ARP address of the default route. Networks
* without default routes are "unidentified networks" and cannot
* have their firewall classification changed by the user (easily).
*
* Yes, you read that right.
*
* The common workaround is to set *NdisDeviceType to 1, which
* totally disables all Windows firewall functionality. This is
* the answer you'll find on most forums for things like OpenVPN.
*
* Yes, you read that right.
*
* The default route workaround is also known, but for this to
* work there must be a known default IP that resolves to a known
* ARP address. This works for an OpenVPN tunnel, but not here
* because this isn't a tunnel. It's a mesh. There is no "other
* end," or any other known always on IP.
*
* So let's make a fake one and shove it in there along with its
* fake static ARP entry. Also makes it instant-on and static.
*
* We'll have to see what DHCP does with this. In the future we
* probably will not want to do this on DHCP-enabled networks, so
* when we enable DHCP we will go in and yank this wacko hacko from
* the routing table before doing so.
*
* Like Jesse Pinkman would say: "YEEEEAAH BITCH!" */
for(int i=0;i<8;++i) { // also wait up to 8s for this, though if we got the NLM part we're probably okay
MIB_IPFORWARD_ROW2 nr;
memset(&nr,0,sizeof(nr));
InitializeIpForwardEntry(&nr);
nr.InterfaceLuid.Value = _deviceLuid.Value;
nr.DestinationPrefix.Prefix.si_family = AF_INET; // rest is left as 0.0.0.0/0
nr.NextHop.si_family = AF_INET;
nr.NextHop.Ipv4.sin_addr.s_addr = htonl(0x19fffffe); // 25.255.255.254 -- unrouted IPv4 block
nr.Metric = 9999; // do not use as real default route
nr.Protocol = MIB_IPPROTO_NETMGMT;
DWORD result = CreateIpForwardEntry2(&nr);
if (result == NO_ERROR) {
MIB_IPNET_ROW2 ipnr;
memset(&ipnr,0,sizeof(ipnr));
ipnr.Address.si_family = AF_INET;
ipnr.Address.Ipv4.sin_addr.s_addr = nr.NextHop.Ipv4.sin_addr.s_addr;
ipnr.InterfaceLuid.Value = _deviceLuid.Value;
ipnr.PhysicalAddress[0] = _mac[0] ^ 0x10; // just make something up that's consistent and not part of this net
ipnr.PhysicalAddress[1] = 0x00;
ipnr.PhysicalAddress[2] = (UCHAR)((_deviceGuid.Data1 >> 24) & 0xff);
ipnr.PhysicalAddress[3] = (UCHAR)((_deviceGuid.Data1 >> 16) & 0xff);
ipnr.PhysicalAddress[4] = (UCHAR)((_deviceGuid.Data1 >> 8) & 0xff);
ipnr.PhysicalAddress[5] = (UCHAR)(_deviceGuid.Data1 & 0xff);
ipnr.PhysicalAddressLength = 6;
ipnr.State = NlnsPermanent;
ipnr.IsRouter = 1;
ipnr.IsUnreachable = 0;
ipnr.ReachabilityTime.LastReachable = 0x0fffffff;
CreateIpNetEntry2(&ipnr);
break; // stop retrying, we're done
} else Thread::sleep(1000);
}
#endif
memset(&tapOvlRead,0,sizeof(tapOvlRead));
tapOvlRead.hEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
memset(&tapOvlWrite,0,sizeof(tapOvlWrite));
@ -710,7 +681,7 @@ void WindowsEthernetTap::threadMain()
for(;;) {
if (!_run) break;
DWORD r = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,10000,TRUE);
DWORD r = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,5000,TRUE);
if (!_run) break;
if ((r == WAIT_TIMEOUT)||(r == WAIT_FAILED))

View File

@ -75,6 +75,9 @@ EthernetTap *WindowsEthernetTapFactory::open(
void WindowsEthernetTapFactory::close(EthernetTap *tap,bool destroyPersistentDevices)
{
if (!tap)
return;
std::string instanceId(((WindowsEthernetTap *)tap)->instanceId());
Mutex::Lock _l(_devices_m);
@ -120,20 +123,8 @@ void WindowsEthernetTapFactory::destroyAllPersistentTapDevices(const char *pathT
dataLen = sizeof(data);
if (RegGetValueA(nwAdapters,subkeyName,"DeviceInstanceID",RRF_RT_ANY,&type,(PVOID)data,&dataLen) == ERROR_SUCCESS)
instanceIdPath.assign(data,dataLen);
if (instanceIdPath.length() != 0) {
if (instanceIdPath.length() != 0)
instanceIdPathsToRemove.insert(instanceIdPath);
/*
type = 0;
dataLen = sizeof(data);
if (RegGetValueA(nwAdapters,subkeyName,"_ZeroTierTapIdentifier",RRF_RT_ANY,&type,(PVOID)data,&dataLen) == ERROR_SUCCESS) {
if (dataLen <= 0) {
instanceIdPathsToRemove.insert(instanceIdPath);
} else {
instanceIdPathsToRemove.insert(instanceIdPath);
}
}
*/
}
}
}
} else break; // end of list or failure