Replace cluster-geo subprocess with in-memory loaded CSV of GeoIP data. This is faster, cheaper, more reliable. We use https://db-ip.com/ but others would work too.

This commit is contained in:
Adam Ierymenko 2016-04-18 11:33:54 -07:00
parent 02c3727ccd
commit 587b1e05d1
4 changed files with 126 additions and 50 deletions

View File

@ -26,14 +26,17 @@
#include "../node/Constants.hpp" #include "../node/Constants.hpp"
#include "../node/Utils.hpp" #include "../node/Utils.hpp"
#include "../node/NonCopyable.hpp"
#include "../osdep/OSUtils.hpp" #include "../osdep/OSUtils.hpp"
#include "ClusterGeoIpService.hpp"
namespace ZeroTier { namespace ZeroTier {
/** /**
* Parser for cluster definition file * Parser for cluster definition file
*/ */
class ClusterDefinition class ClusterDefinition : NonCopyable
{ {
public: public:
struct MemberDefinition struct MemberDefinition
@ -45,8 +48,17 @@ public:
char name[256]; char name[256];
InetAddress clusterEndpoint; InetAddress clusterEndpoint;
std::vector<InetAddress> zeroTierEndpoints; std::vector<InetAddress> zeroTierEndpoints;
//inline operator<(const MemberDefinition &md) const { return (id < md.id); } // sort order
}; };
/**
* Load and initialize cluster definition and GeoIP data if any
*
* @param myAddress My ZeroTier address
* @param pathToClusterFile Path to cluster definition file
* @throws std::runtime_error Invalid cluster definition or unable to load data
*/
ClusterDefinition(uint64_t myAddress,const char *pathToClusterFile) ClusterDefinition(uint64_t myAddress,const char *pathToClusterFile)
{ {
std::string cf; std::string cf;
@ -62,9 +74,23 @@ public:
if ((fields.size() < 5)||(fields[0][0] == '#')||(fields[0] != myAddressStr)) if ((fields.size() < 5)||(fields[0][0] == '#')||(fields[0] != myAddressStr))
continue; continue;
// <address> geo <CSV path> <ip start column> <ip end column> <latitutde column> <longitude column>
if (fields[1] == "geo") {
if ((fields.size() >= 7)&&(OSUtils::fileExists(fields[2].c_str()))) {
int ipStartColumn = Utils::strToInt(fields[3].c_str());
int ipEndColumn = Utils::strToInt(fields[4].c_str());
int latitudeColumn = Utils::strToInt(fields[5].c_str());
int longitudeColumn = Utils::strToInt(fields[6].c_str());
if (_geo.load(fields[2].c_str(),ipStartColumn,ipEndColumn,latitudeColumn,longitudeColumn) <= 0)
throw std::runtime_error(std::string("failed to load geo-ip data from ")+fields[2]);
}
continue;
}
// <address> <ID> <name> <backplane IP/port(s)> <ZT frontplane IP/port(s)> <x,y,z>
int id = Utils::strToUInt(fields[1].c_str()); int id = Utils::strToUInt(fields[1].c_str());
if ((id < 0)||(id > ZT_CLUSTER_MAX_MEMBERS)) if ((id < 0)||(id > ZT_CLUSTER_MAX_MEMBERS))
continue; throw std::runtime_error(std::string("invalid cluster member ID: ")+fields[1]);
MemberDefinition &md = _md[id]; MemberDefinition &md = _md[id];
md.id = (unsigned int)id; md.id = (unsigned int)id;
@ -92,10 +118,29 @@ public:
std::sort(_ids.begin(),_ids.end()); std::sort(_ids.begin(),_ids.end());
} }
/**
* @return All member definitions in this cluster by ID (ID is array index)
*/
inline const MemberDefinition &operator[](unsigned int id) const throw() { return _md[id]; } inline const MemberDefinition &operator[](unsigned int id) const throw() { return _md[id]; }
/**
* @return Number of members in this cluster
*/
inline unsigned int size() const throw() { return (unsigned int)_ids.size(); } inline unsigned int size() const throw() { return (unsigned int)_ids.size(); }
/**
* @return IDs of members in this cluster sorted by ID
*/
inline const std::vector<unsigned int> &ids() const throw() { return _ids; } inline const std::vector<unsigned int> &ids() const throw() { return _ids; }
/**
* @return GeoIP service for this cluster
*/
inline ClusterGeoIpService &geo() throw() { return _geo; }
/**
* @return A vector (new copy) containing all cluster members
*/
inline std::vector<MemberDefinition> members() const inline std::vector<MemberDefinition> members() const
{ {
std::vector<MemberDefinition> m; std::vector<MemberDefinition> m;
@ -107,6 +152,7 @@ public:
private: private:
MemberDefinition _md[ZT_CLUSTER_MAX_MEMBERS]; MemberDefinition _md[ZT_CLUSTER_MAX_MEMBERS];
std::vector<unsigned int> _ids; std::vector<unsigned int> _ids;
ClusterGeoIpService _geo;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View File

@ -25,7 +25,6 @@
#include "ClusterGeoIpService.hpp" #include "ClusterGeoIpService.hpp"
#include "../node/Utils.hpp" #include "../node/Utils.hpp"
#include "../node/InetAddress.hpp"
#include "../osdep/OSUtils.hpp" #include "../osdep/OSUtils.hpp"
#define ZT_CLUSTERGEOIPSERVICE_FILE_MODIFICATION_CHECK_EVERY 10000 #define ZT_CLUSTERGEOIPSERVICE_FILE_MODIFICATION_CHECK_EVERY 10000
@ -69,12 +68,13 @@ bool ClusterGeoIpService::locate(const InetAddress &ip,int &x,int &y,int &z)
std::vector<_V4E>::const_iterator i(std::upper_bound(_v4db.begin(),_v4db.end(),key)); std::vector<_V4E>::const_iterator i(std::upper_bound(_v4db.begin(),_v4db.end(),key));
while (i != _v4db.begin()) { while (i != _v4db.begin()) {
--i; --i;
if ((key->start >= i->start)&&(key->start <= i->end)) { if ((key.start >= i->start)&&(key.start <= i->end)) {
x = i->x; x = i->x;
y = i->y; y = i->y;
z = i->z; z = i->z;
//printf("%s : %f,%f %d,%d,%d\n",ip.toIpString().c_str(),i->lat,i->lon,x,y,z);
return true; return true;
} else if ((key->start > i->start)&&(key->start > i->end)) } else if ((key.start > i->start)&&(key.start > i->end))
break; break;
} }
} else if ((ip.ss_family == AF_INET6)&&(_v6db.size() > 0)) { } else if ((ip.ss_family == AF_INET6)&&(_v6db.size() > 0)) {
@ -83,12 +83,13 @@ bool ClusterGeoIpService::locate(const InetAddress &ip,int &x,int &y,int &z)
std::vector<_V6E>::const_iterator i(std::upper_bound(_v6db.begin(),_v6db.end(),key)); std::vector<_V6E>::const_iterator i(std::upper_bound(_v6db.begin(),_v6db.end(),key));
while (i != _v6db.begin()) { while (i != _v6db.begin()) {
--i; --i;
const int s_vs_s = memcmp(key->start,i->start,16); const int s_vs_s = memcmp(key.start,i->start,16);
const int s_vs_e = memcmp(key->start,i->end,16); const int s_vs_e = memcmp(key.start,i->end,16);
if ((s_vs_s >= 0)&&(s_vs_e <= 0)) { if ((s_vs_s >= 0)&&(s_vs_e <= 0)) {
x = i->x; x = i->x;
y = i->y; y = i->y;
z = i->z; z = i->z;
//printf("%s : %f,%f %d,%d,%d\n",ip.toIpString().c_str(),i->lat,i->lon,x,y,z);
return true; return true;
} else if ((s_vs_s > 0)&&(s_vs_e > 0)) } else if ((s_vs_s > 0)&&(s_vs_e > 0))
break; break;
@ -98,7 +99,7 @@ bool ClusterGeoIpService::locate(const InetAddress &ip,int &x,int &y,int &z)
return false; return false;
} }
static void _parseLine(const char *line,std::vector<_V4E> &v4db,std::vector<_V6E> &v6db,int ipStartColumn,int ipEndColumn,int latitudeColumn,int longitudeColumn) void ClusterGeoIpService::_parseLine(const char *line,std::vector<_V4E> &v4db,std::vector<_V6E> &v6db,int ipStartColumn,int ipEndColumn,int latitudeColumn,int longitudeColumn)
{ {
std::vector<std::string> ls(Utils::split(line,",\t","\\","\"'")); std::vector<std::string> ls(Utils::split(line,",\t","\\","\"'"));
if ( ((ipStartColumn >= 0)&&(ipStartColumn < (int)ls.size()))&& if ( ((ipStartColumn >= 0)&&(ipStartColumn < (int)ls.size()))&&
@ -114,24 +115,30 @@ static void _parseLine(const char *line,std::vector<_V4E> &v4db,std::vector<_V6E
const double latRadians = lat * 0.01745329251994; // PI / 180 const double latRadians = lat * 0.01745329251994; // PI / 180
const double lonRadians = lon * 0.01745329251994; // PI / 180 const double lonRadians = lon * 0.01745329251994; // PI / 180
const double cosLat = cos(latRadians); const double cosLat = cos(latRadians);
const int x = (int)round((-6371.0) * cosLat * Math.cos(lonRadians)); // 6371 == Earth's approximate radius in kilometers const int x = (int)round((-6371.0) * cosLat * cos(lonRadians)); // 6371 == Earth's approximate radius in kilometers
const int y = (int)round(6371.0 * sin(latRadians)); const int y = (int)round(6371.0 * sin(latRadians));
const int z = (int)round(6371.0 * cosLat * Math.sin(lonRadians)); const int z = (int)round(6371.0 * cosLat * sin(lonRadians));
if (ipStart.ss_family == AF_INET) { if (ipStart.ss_family == AF_INET) {
v4db.push_back(_V4E()); v4db.push_back(_V4E());
v4db.back().start = Utils::ntoh((uint32_t)(reinterpret_cast<const struct sockaddr_in *>(&ipStart)->sin_addr.s_addr)); v4db.back().start = Utils::ntoh((uint32_t)(reinterpret_cast<const struct sockaddr_in *>(&ipStart)->sin_addr.s_addr));
v4db.back().end = Utils::ntoh((uint32_t)(reinterpret_cast<const struct sockaddr_in *>(&ipEnd)->sin_addr.s_addr)); v4db.back().end = Utils::ntoh((uint32_t)(reinterpret_cast<const struct sockaddr_in *>(&ipEnd)->sin_addr.s_addr));
//v4db.back().lat = (float)lat;
//v4db.back().lon = (float)lon;
v4db.back().x = x; v4db.back().x = x;
v4db.back().y = y; v4db.back().y = y;
v4db.back().z = z; v4db.back().z = z;
//printf("%s - %s : %d,%d,%d\n",ipStart.toIpString().c_str(),ipEnd.toIpString().c_str(),x,y,z);
} else if (ipStart.ss_family == AF_INET6) { } else if (ipStart.ss_family == AF_INET6) {
v6db.push_back(_V6E()); v6db.push_back(_V6E());
memcpy(v6db.back().start,reinterpret_cast<const struct sockaddr_in6 *>(&ipStart)->sin6_addr.s6_addr,16); memcpy(v6db.back().start,reinterpret_cast<const struct sockaddr_in6 *>(&ipStart)->sin6_addr.s6_addr,16);
memcpy(v6db.back().end,reinterpret_cast<const struct sockaddr_in6 *>(&ipEnd)->sin6_addr.s6_addr,16); memcpy(v6db.back().end,reinterpret_cast<const struct sockaddr_in6 *>(&ipEnd)->sin6_addr.s6_addr,16);
//v6db.back().lat = (float)lat;
//v6db.back().lon = (float)lon;
v6db.back().x = x; v6db.back().x = x;
v6db.back().y = y; v6db.back().y = y;
v6db.back().z = z; v6db.back().z = z;
//printf("%s - %s : %d,%d,%d\n",ipStart.toIpString().c_str(),ipEnd.toIpString().c_str(),x,y,z);
} }
} }
} }
@ -147,6 +154,8 @@ long ClusterGeoIpService::_load(const char *pathToCsv,int ipStartColumn,int ipEn
std::vector<_V4E> v4db; std::vector<_V4E> v4db;
std::vector<_V6E> v6db; std::vector<_V6E> v6db;
v4db.reserve(16777216);
v6db.reserve(16777216);
char buf[4096]; char buf[4096];
char linebuf[1024]; char linebuf[1024];
@ -199,3 +208,28 @@ long ClusterGeoIpService::_load(const char *pathToCsv,int ipStartColumn,int ipEn
} // namespace ZeroTier } // namespace ZeroTier
#endif // ZT_ENABLE_CLUSTER #endif // ZT_ENABLE_CLUSTER
/*
int main(int argc,char **argv)
{
char buf[1024];
ZeroTier::ClusterGeoIpService gip;
printf("loading...\n");
gip.load("/Users/api/Code/ZeroTier/Infrastructure/root-servers/zerotier-one/cluster-geoip.csv",0,1,5,6);
printf("... done!\n"); fflush(stdout);
while (gets(buf)) { // unsafe, testing only
ZeroTier::InetAddress addr(buf,0);
printf("looking up: %s\n",addr.toString().c_str()); fflush(stdout);
int x = 0,y = 0,z = 0;
if (gip.locate(addr,x,y,z)) {
//printf("%s: %d,%d,%d\n",addr.toString().c_str(),x,y,z); fflush(stdout);
} else {
printf("%s: not found!\n",addr.toString().c_str()); fflush(stdout);
}
}
return 0;
}
*/

View File

@ -32,11 +32,13 @@
#include "../node/Constants.hpp" #include "../node/Constants.hpp"
#include "../node/Mutex.hpp" #include "../node/Mutex.hpp"
#include "../node/NonCopyable.hpp"
#include "../node/InetAddress.hpp"
namespace ZeroTier { namespace ZeroTier {
/** /**
* Loads a DBIP CSV into memory for fast lookup, reloading as needed * Loads a GeoIP CSV into memory for fast lookup, reloading as needed
* *
* This was designed around the CSV from https://db-ip.com but can be used * This was designed around the CSV from https://db-ip.com but can be used
* with any similar GeoIP CSV database that is presented in the form of an * with any similar GeoIP CSV database that is presented in the form of an
@ -45,7 +47,7 @@ namespace ZeroTier {
* It loads the whole database into memory, which can be kind of large. If * It loads the whole database into memory, which can be kind of large. If
* the CSV file changes, the changes are loaded automatically. * the CSV file changes, the changes are loaded automatically.
*/ */
class ClusterGeoIpService class ClusterGeoIpService : NonCopyable
{ {
public: public:
ClusterGeoIpService(); ClusterGeoIpService();
@ -85,7 +87,37 @@ public:
*/ */
bool locate(const InetAddress &ip,int &x,int &y,int &z); bool locate(const InetAddress &ip,int &x,int &y,int &z);
/**
* @return True if IP database/service is available for queries (otherwise locate() will always be false)
*/
inline bool available() const
{
Mutex::Lock _l(_lock);
return ((_v4db.size() + _v6db.size()) > 0);
}
private: private:
struct _V4E
{
uint32_t start;
uint32_t end;
//float lat,lon;
int x,y,z;
inline bool operator<(const _V4E &e) const { return (start < e.start); }
};
struct _V6E
{
uint8_t start[16];
uint8_t end[16];
//float lat,lon;
int x,y,z;
inline bool operator<(const _V6E &e) const { return (memcmp(start,e.start,16) < 0); }
};
static void _parseLine(const char *line,std::vector<_V4E> &v4db,std::vector<_V6E> &v6db,int ipStartColumn,int ipEndColumn,int latitudeColumn,int longitudeColumn);
long _load(const char *pathToCsv,int ipStartColumn,int ipEndColumn,int latitudeColumn,int longitudeColumn); long _load(const char *pathToCsv,int ipStartColumn,int ipEndColumn,int latitudeColumn,int longitudeColumn);
std::string _pathToCsv; std::string _pathToCsv;
@ -98,24 +130,6 @@ private:
uint64_t _csvModificationTime; uint64_t _csvModificationTime;
int64_t _csvFileSize; int64_t _csvFileSize;
struct _V4E
{
uint32_t start;
uint32_t end;
int x,y,z;
inline bool operator<(const _V4E &e) const { return (start < e.start); }
};
struct _V6E
{
uint8_t start[16];
uint8_t end[16];
int x,y,z;
inline bool operator<(const _V6E &e) const { return (memcmp(start,e.start,16) < 0); }
};
std::vector<_V4E> _v4db; std::vector<_V4E> _v4db;
std::vector<_V6E> _v6db; std::vector<_V6E> _v6db;

View File

@ -520,7 +520,6 @@ public:
// Cluster management instance if enabled // Cluster management instance if enabled
#ifdef ZT_ENABLE_CLUSTER #ifdef ZT_ENABLE_CLUSTER
PhySocket *_clusterMessageSocket; PhySocket *_clusterMessageSocket;
ClusterGeoIpService *_clusterGeoIpService;
ClusterDefinition *_clusterDefinition; ClusterDefinition *_clusterDefinition;
unsigned int _clusterMemberId; unsigned int _clusterMemberId;
#endif #endif
@ -553,7 +552,6 @@ public:
#endif #endif
#ifdef ZT_ENABLE_CLUSTER #ifdef ZT_ENABLE_CLUSTER
,_clusterMessageSocket((PhySocket *)0) ,_clusterMessageSocket((PhySocket *)0)
,_clusterGeoIpService((ClusterGeoIpService *)0)
,_clusterDefinition((ClusterDefinition *)0) ,_clusterDefinition((ClusterDefinition *)0)
,_clusterMemberId(0) ,_clusterMemberId(0)
#endif #endif
@ -633,7 +631,6 @@ public:
delete _controller; delete _controller;
#endif #endif
#ifdef ZT_ENABLE_CLUSTER #ifdef ZT_ENABLE_CLUSTER
delete _clusterGeoIpService;
delete _clusterDefinition; delete _clusterDefinition;
#endif #endif
} }
@ -750,33 +747,18 @@ public:
return _termReason; return _termReason;
} }
if (OSUtils::fileExists((_homePath + ZT_PATH_SEPARATOR_S + "cluster-geo.exe").c_str()))
_clusterGeoIpService = new ClusterGeoIpService((_homePath + ZT_PATH_SEPARATOR_S + "cluster-geo.exe").c_str());
const ClusterDefinition::MemberDefinition &me = (*_clusterDefinition)[_clusterMemberId]; const ClusterDefinition::MemberDefinition &me = (*_clusterDefinition)[_clusterMemberId];
InetAddress endpoints[255]; InetAddress endpoints[255];
unsigned int numEndpoints = 0; unsigned int numEndpoints = 0;
for(std::vector<InetAddress>::const_iterator i(me.zeroTierEndpoints.begin());i!=me.zeroTierEndpoints.end();++i) for(std::vector<InetAddress>::const_iterator i(me.zeroTierEndpoints.begin());i!=me.zeroTierEndpoints.end();++i)
endpoints[numEndpoints++] = *i; endpoints[numEndpoints++] = *i;
if (_node->clusterInit( if (_node->clusterInit(_clusterMemberId,reinterpret_cast<const struct sockaddr_storage *>(endpoints),numEndpoints,me.x,me.y,me.z,&SclusterSendFunction,this,_clusterDefinition->geo().available() ? &SclusterGeoIpFunction : 0,this) == ZT_RESULT_OK) {
_clusterMemberId,
reinterpret_cast<const struct sockaddr_storage *>(endpoints),
numEndpoints,
me.x,
me.y,
me.z,
&SclusterSendFunction,
this,
(_clusterGeoIpService) ? &SclusterGeoIpFunction : 0,
this) == ZT_RESULT_OK) {
std::vector<ClusterDefinition::MemberDefinition> members(_clusterDefinition->members()); std::vector<ClusterDefinition::MemberDefinition> members(_clusterDefinition->members());
for(std::vector<ClusterDefinition::MemberDefinition>::iterator m(members.begin());m!=members.end();++m) { for(std::vector<ClusterDefinition::MemberDefinition>::iterator m(members.begin());m!=members.end();++m) {
if (m->id != _clusterMemberId) if (m->id != _clusterMemberId)
_node->clusterAddMember(m->id); _node->clusterAddMember(m->id);
} }
} }
} else { } else {
delete _clusterDefinition; delete _clusterDefinition;
@ -1611,7 +1593,7 @@ static void SclusterSendFunction(void *uptr,unsigned int toMemberId,const void *
static int SclusterGeoIpFunction(void *uptr,const struct sockaddr_storage *addr,int *x,int *y,int *z) static int SclusterGeoIpFunction(void *uptr,const struct sockaddr_storage *addr,int *x,int *y,int *z)
{ {
OneServiceImpl *const impl = reinterpret_cast<OneServiceImpl *>(uptr); OneServiceImpl *const impl = reinterpret_cast<OneServiceImpl *>(uptr);
return (int)(impl->_clusterGeoIpService->locate(*(reinterpret_cast<const InetAddress *>(addr)),*x,*y,*z)); return (int)(impl->_clusterDefinition->geo().locate(*(reinterpret_cast<const InetAddress *>(addr)),*x,*y,*z));
} }
#endif #endif