This might be a final fix for GitHub issue #173 and possibly others: ACTIVELY detect borked port status on Windows and if any "cable unplugged" or other wacky states are detected whack the adapter (close and reopen). Tested adding a whole bunch of windows networks, removing, adding more, etc. and it seems to work very well!

This commit is contained in:
Adam Ierymenko 2015-06-12 16:02:04 +02:00
parent dbf40f30f9
commit 494681a482
2 changed files with 175 additions and 177 deletions

View File

@ -92,9 +92,6 @@ static const WindowsEthernetTapEnv WINENV;
// Only create or delete devices one at a time
static Mutex _systemTapInitLock;
// Incrementing this causes everyone currently open to close and reopen
static volatile int _systemTapResetStatus = 0;
} // anonymous namespace
WindowsEthernetTap::WindowsEthernetTap(
@ -268,12 +265,6 @@ WindowsEthernetTap::WindowsEthernetTap(
}
} else break; // no more keys or error occurred
}
// When we create a new tap device from scratch, existing taps for
// some reason go into 'unplugged' state. This can be fixed by
// closing and re-opening them. Incrementing this causes all
// existing tap threads to do this.
++_systemTapResetStatus;
}
if (_netCfgInstanceId.length() > 0) {
@ -299,7 +290,6 @@ WindowsEthernetTap::WindowsEthernetTap(
throw std::runtime_error("unable to find or create tap adapter");
}
// Convert device GUID junk... blech... is there an easier way to do this?
{
char nobraces[128];
const char *nbtmp1 = _netCfgInstanceId.c_str();
@ -573,36 +563,23 @@ void WindowsEthernetTap::scanMulticastGroups(std::vector<MulticastGroup> &added,
void WindowsEthernetTap::threadMain()
throw()
{
char tapPath[256];
OVERLAPPED tapOvlRead,tapOvlWrite;
char tapReadBuf[ZT_IF_MTU + 32];
char tapPath[128];
HANDLE wait4[3];
char *tapReadBuf = (char *)0;
/* No idea why I did this. I did it a long time ago and there was only a
* a snarky comment. But I'd never do crap like this without a reason, so
* I am leaving it alone with a more descriptive snarky comment. */
while (!tapReadBuf) {
tapReadBuf = (char *)::malloc(ZT_IF_MTU + 32);
if (!tapReadBuf)
Sleep(1000);
}
OVERLAPPED tapOvlRead,tapOvlWrite;
Utils::snprintf(tapPath,sizeof(tapPath),"\\\\.\\Global\\%s.tap",_netCfgInstanceId.c_str());
int prevTapResetStatus = _systemTapResetStatus;
bool throwOneAway = true; // Restart once on startup, because Windows.
bool powerCycle = true; // If true, "power cycle" the device, because Windows.
try {
while (_run) {
if (powerCycle) {
_disableTapDevice();
Sleep(500);
_enableTapDevice();
Sleep(500);
}
_tap = CreateFileA(tapPath,GENERIC_READ|GENERIC_WRITE,0,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_SYSTEM|FILE_FLAG_OVERLAPPED,NULL);
if (_tap == INVALID_HANDLE_VALUE) {
fprintf(stderr,"Error opening %s -- retrying.\r\n",tapPath);
powerCycle = true;
_disableTapDevice();
_enableTapDevice();
Sleep(1000);
continue;
}
@ -612,8 +589,8 @@ void WindowsEthernetTap::threadMain()
DeviceIoControl(_tap,TAP_WIN_IOCTL_SET_MEDIA_STATUS,&tmpi,sizeof(tmpi),&tmpi,sizeof(tmpi),&bytesReturned,NULL);
}
{
#ifdef ZT_WINDOWS_CREATE_FAKE_DEFAULT_ROUTE
{
/* This inserts a fake default route and a fake ARP entry, forcing
* Windows to detect this as a "real" network and apply proper
* firewall rules.
@ -688,8 +665,8 @@ void WindowsEthernetTap::threadMain()
Sleep(500);
else break;
}
#endif
}
#endif
memset(&tapOvlRead,0,sizeof(tapOvlRead));
tapOvlRead.hEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
@ -702,17 +679,39 @@ void WindowsEthernetTap::threadMain()
ReadFile(_tap,tapReadBuf,sizeof(tapReadBuf),NULL,&tapOvlRead);
bool writeInProgress = false;
ULONGLONG timeOfLastBorkCheck = GetTickCount64();
while (_run) {
if ((prevTapResetStatus != _systemTapResetStatus)||(throwOneAway)) {
powerCycle = throwOneAway;
throwOneAway = false;
prevTapResetStatus = _systemTapResetStatus;
break; // this will cause us to close and reopen the tap
}
DWORD r = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,2500,TRUE);
DWORD waitResult = WaitForMultipleObjectsEx(writeInProgress ? 3 : 2,wait4,FALSE,2500,TRUE);
if (!_run) break; // will also break outer while(_run)
if ((r == WAIT_TIMEOUT)||(r == WAIT_FAILED))
// Check for issues with adapter and close/reopen if any are detected. This
// check fixes a while boatload of Windows adapter 'coma' issues after
// sleep/wake and when adapters are added/removed. Basically if the tap
// device is borked, whack it.
{
ULONGLONG tc = GetTickCount64();
if ((tc - timeOfLastBorkCheck) >= 2500) {
timeOfLastBorkCheck = tc;
MIB_IF_TABLE2 *ift = NULL;
if ((GetIfTable2(&ift) == NO_ERROR)&&(ift)) {
bool isBorked = false;
for(ULONG r=0;r<ift->NumEntries;++r) {
if (ift->Table[r].InterfaceLuid.Value == _deviceLuid.Value) {
if ((ift->Table[r].InterfaceAndOperStatusFlags.NotMediaConnected)||(ift->Table[r].MediaConnectState == MediaConnectStateDisconnected))
isBorked = true;
break;
}
}
FreeMibTable(ift);
if (isBorked) {
// Close and reopen tap device if there's an issue (outer loop)
break;
}
}
}
}
if ((waitResult == WAIT_TIMEOUT)||(waitResult == WAIT_FAILED))
continue;
if (HasOverlappedIoCompleted(&tapOvlRead)) {
@ -756,8 +755,7 @@ void WindowsEthernetTap::threadMain()
// We will restart and re-open the tap unless _run == false
}
::free(tapReadBuf);
} catch ( ... ) {} // catch unexpected exceptions -- this should not happen but would prevent program crash or other weird issues since threads should not throw
}
void WindowsEthernetTap::destroyAllPersistentTapDevices(const char *pathToHelpers)

View File

@ -98,8 +98,8 @@ private:
GUID _deviceGuid;
NET_LUID _deviceLuid;
std::string _netCfgInstanceId; // NetCfgInstanceId, a GUID
std::string _deviceInstanceId; // DeviceInstanceID, another kind of "instance ID"
std::string _netCfgInstanceId;
std::string _deviceInstanceId;
std::vector<MulticastGroup> _multicastGroups;