From e61898b46eafb2f404defd6ec9fb9129a40661ea Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 15 Sep 2014 15:02:33 -0700 Subject: [PATCH] Start of GeoJSON from datamaps v2 --- geojson.c | 783 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 783 insertions(+) create mode 100644 geojson.c diff --git a/geojson.c b/geojson.c new file mode 100644 index 0000000..8198c11 --- /dev/null +++ b/geojson.c @@ -0,0 +1,783 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" + +int mapbits = 2 * (16 + 8); // zoom level 16 +int metabits = 0; +int version = 2; + +#define MAX_INPUT 2000 + +struct file { + int legs; + int level; + FILE *f; + + struct file *next; +}; + +struct pool { + char *s; + long long off; + struct pool *left; + struct pool *right; +}; + +long long poolString(char *s, struct pool **pool, FILE *extra, long long *xoff) { + while (1) { + if (*pool == NULL) { + *pool = malloc(sizeof(struct pool)); + (*pool)->s = strdup(s); + (*pool)->off = *xoff; + *xoff += fwrite(s, sizeof(char), strlen(s) + 1, extra); + (*pool)->left = NULL; + (*pool)->right = NULL; + + return (*pool)->off; + } + + int cmp = strcmp(s, (*pool)->s); + + if (cmp == 0) { + return (*pool)->off; + } else if (cmp < 0) { + pool = &((*pool)->left); + } else { + pool = &((*pool)->right); + } + } +} + +void usage(char *name) { + fprintf(stderr, "Usage: %s [-z zoom] [-m metadata-bits] -o destdir [file ...]\n", + name); +} + +char *dequote(char **cp, int *type) { + char *start = *cp; + char *out = *cp; + + if (**cp == '"') { + *type = META_STRING; + } else { + *type = META_INTEGER; + } + + int within = 0; + for (; **cp; (*cp)++) { + if (**cp == '"') { + within = !within; + } else if (**cp == '\\' && (cp[0][1] == '\\' || cp[0][1] == '\"')) { + *out++ = cp[0][1]; + (*cp)++; + } else if (**cp == '\\' && cp[0][1] == 'u') { + char hex[5] = "aaaa"; + memcpy(hex, &(cp[0][2]), 4); + *cp += 5; + + unsigned long ch = strtoul(hex, NULL, 16); + if (ch <= 0x7F) { + *out++ = ch; + } else if (ch <= 0x7FF) { + *out++ = 0xC0 | (ch >> 6); + *out++ = 0x80 | (ch & 0x3F); + } else { + *out++ = 0xE0 | (ch >> 12); + *out++ = 0x80 | ((ch >> 6) & 0x3F); + *out++ = 0x80 | (ch & 0x3F); + } + } else if (isspace(**cp) && !within) { + break; + } else { + *out++ = cp[0][0]; + } + } + + if (*cp) { + (*cp)++; + } + + *out = '\0'; + return start; +} + +void encode(char *destdir, struct file **files, int *maxn, FILE *extra, long long *xoff, struct pool **pool, + int n, double *lat, double *lon, + int m, int *metasize, char **metaname, char **meta, int *metatype); + +void read_json(FILE *f, char *destdir, struct file **files, int *maxn, FILE *extra, long long *xoff, struct pool **pool) { + json_pull *jp = json_begin_file(f); + + while (1) { + json_object *j = json_read(jp); + + if (j == NULL) { + if (jp->error != NULL) { + fprintf(stderr, "%d: %s\n", jp->line, jp->error); + } + + json_free(jp->root); + break; + } + + json_object *type = json_hash_get(j, "type"); + if (type != NULL && type->type == JSON_STRING && strcmp(type->string, "Feature") == 0) { + json_object *geometry = json_hash_get(j, "geometry"); + + if (geometry != NULL) { + json_object *geometry_type = json_hash_get(geometry, "type"); + if (geometry_type != NULL && geometry_type->type == JSON_STRING) { + int t = -1; + + if (strcmp(geometry_type->string, "Point") == 0) { + t = GEOM_POINT; + } else if (strcmp(geometry_type->string, "LineString") == 0) { + t = GEOM_LINESTRING; + } else { + fprintf(stderr, "%d: Can't handle geometry type %s\n", jp->line, geometry_type->string); + } + + if (t != -1) { + json_object *properties = json_hash_get(j, "properties"); + json_object *coordinates = json_hash_get(geometry, "coordinates"); + + if (properties != NULL && properties->type == JSON_HASH) { + int metasize[properties->length]; + char *metaname[properties->length]; + char *meta[properties->length]; + int metatype[properties->length]; + int m = 0; + + int i; + for (i = 0; i < properties->length; i++) { + if (properties->keys[i]->type == JSON_STRING) { + metasize[m] = metabits; + metaname[m] = properties->keys[i]->string; + + if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) { + metatype[m] = META_STRING; + meta[m] = properties->values[i]->string; + m++; + } else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) { + metatype[m] = META_INTEGER; + meta[m] = properties->values[i]->string; + m++; + } else { + fprintf(stderr, "%d: Unsupported meta type\n", jp->line); + } + } + } + + if (coordinates != NULL && coordinates->type == JSON_ARRAY) { + int n; + + if (t == GEOM_POINT) { + n = 1; + } else { + n = coordinates->length; + } + + double lon[n], lat[n]; + int ok = 1; + + if (t == GEOM_POINT) { + if (coordinates->length >= 2) { + lon[0] = coordinates->array[0]->number; + lat[0] = coordinates->array[1]->number; + } else { + fprintf(stderr, "%d: not enough coordinates for a point\n", jp->line); + ok = 0; + } + } else { + for (i = 0; i < n; i++) { + if (coordinates->array[i]->type == JSON_ARRAY && + coordinates->array[i]->length >= 2) { + lon[i] = coordinates->array[i]->array[0]->number; + lat[i] = coordinates->array[i]->array[1]->number; + } else { + fprintf(stderr, "%d: not enough coordinates within the geometry\n", jp->line); + ok = 0; + } + } + } + + if (ok) { + encode(destdir, files, maxn, extra, xoff, pool, n, lat, lon, m, metasize, metaname, meta, metatype); + } + } else { + fprintf(stderr, "%d: feature geometry with no coordinates\n", jp->line); + } + } else { + fprintf(stderr, "%d: feature with no properties\n", jp->line); + } + } + } + } else { + fprintf(stderr, "%d: feature with no geometry\n", jp->line); + } + + json_free(j); + } + } + + json_end(jp); +} + +void read_file(FILE *f, char *destdir, struct file **files, int *maxn, FILE *extra, long long *xoff, struct pool **pool) { + char s[MAX_INPUT]; + double lat[MAX_INPUT], lon[MAX_INPUT]; + int metasize[MAX_INPUT]; + char *meta[MAX_INPUT]; + char *metaname[MAX_INPUT]; + int metatype[MAX_INPUT]; + unsigned long long seq = 0; + long long maxmeta = -1; + + if (version == 2) { + metabits = 40; + } + + int c = getc(f); + if (c != EOF) { + ungetc(c, f); + } + + if (c == '{' || c == '[') { + read_json(f, destdir, files, maxn, extra, xoff, pool); + return; + } + + while (fgets(s, MAX_INPUT, f)) { + char *cp = s; + int n = 0, m = 0; + + if (seq % 100000 == 0) { + fprintf(stderr, "Read %.1f million records\r", seq / 1000000.0); + } + seq++; + + while (1) { + if (sscanf(cp, "%lf,%lf", &lat[n], &lon[n]) == 2) { + n++; + while (*cp != '\0' && *cp != ' ') { + cp++; + } + while (*cp == ' ') { + cp++; + } + } else if (sscanf(cp, "%d:", &metasize[m]) == 1) { + metasize[m] = atoi(cp); + while (*cp && isdigit(*cp)) { + cp++; + } + if (*cp == ':') { + cp++; + } + + meta[m] = dequote(&cp, &metatype[m]); + + if (atoll(meta[m]) > maxmeta && atoll(meta[m]) >= (1LLU << metabits)) { + fprintf(stderr, "Warning: metadata %s too big for -m%d\n", meta[m], metabits); + maxmeta = atoll(meta[m]); + } + metaname[m] = "meta"; + m++; + while (*cp == ' ') { + cp++; + } + } else if (*cp == ':') { + metasize[m] = metabits; + cp++; + + meta[m] = dequote(&cp, &metatype[m]); + + if (atoll(meta[m]) > maxmeta && atoll(meta[m]) >= (1LLU << metabits)) { + fprintf(stderr, "Warning: metadata %s too big for -m%d\n", meta[m], metabits); + maxmeta = atoll(meta[m]); + } + metaname[m] = "meta"; + m++; + while (*cp == ' ') { + cp++; + } + } else if (*cp == '/' && cp[1] == '/') { + break; + } else if (*cp == '=' || isalpha(*cp)) { + metasize[m] = metabits; + + if (*cp == '=') { + cp++; + } + + metaname[m] = cp; + for (; *cp; cp++) { + if (*cp == '=') { + break; + } + } + + if (*cp == 0) { + fprintf(stderr, "Didn't find value for %s\n", metaname[m]); + break; + } + + *cp = '\0'; + cp++; + + meta[m] = dequote(&cp, &metatype[m]); + m++; + while (*cp == ' ') { + cp++; + } + } else { + break; + } + } + + if (n == 0) { + fprintf(stderr, "No valid points in %s", s); + continue; + } + + encode(destdir, files, maxn, extra, xoff, pool, n, lat, lon, m, metasize, metaname, meta, metatype); + } +} + +void encode(char *destdir, struct file **files, int *maxn, FILE *extra, long long *xoff, struct pool **pool, + int n, double *lat, double *lon, + int m, int *metasize, char **metaname, char **meta, int *metatype) { + unsigned int x[MAX_INPUT], y[MAX_INPUT]; + + { + // Project each point to web mercator + + int i; + for (i = 0; i < n; i++) { + if (lat[i] > 85.0511 || lat[i] < -85.0511) { + fprintf(stderr, "Can't represent latitude %f\n", lat[i]); + n = 0; + break; + } + if (lon[i] >= 180 || lon[i] <= -180) { + fprintf(stderr, "Can't represent longitude %f\n", lon[i]); + n = 0; + break; + } + + latlon2tile(lat[i], lon[i], 32, &x[i], &y[i]); + } + + if (n == 0) { + return; + } + + // If this is a polyline, find out how many leading bits in common + // all the points have. + + int common = 0; + int components = n; + + if (n > 1) { + int ok = 1; + for (common = 0; ok && common < mapbits / 2; common++) { + int x0 = x[0] & (1 << (31 - common)); + int y0 = y[0] & (1 << (31 - common)); + + for (i = 1; i < n; i++) { + if ((x[i] & (1 << (31 - common))) != x0 || + (y[i] & (1 << (31 - common))) != y0) { + ok = 0; + break; + } + } + + if (!ok) { + break; + } + } + + if (version >= 2) { + components = n; + n = 1; + *maxn = 1; + } else { + if (n > *maxn) { + *maxn = n; + } + } + } else { + if (n > *maxn) { + *maxn = n; + } + } + + int bytes = bytesfor(mapbits, metabits, n, common); + unsigned char buf[bytes]; + memset(buf, 0, bytes); + + int off = 0; + xy2buf(x[0], y[0], buf, &off, mapbits, 0); + for (i = 1; i < n; i++) { + xy2buf(x[i], y[i], buf, &off, mapbits, common); + } + + if (version >= 2) { + long long keys[m], values[m]; + + for (i = 0; i < m; i++) { + keys[i] = poolString(metaname[i], pool, extra, xoff); + + if (metatype[i] == META_STRING) { + values[i] = poolString(meta[i], pool, extra, xoff); + } + } + + meta2buf(metabits, *xoff, buf, &off, bytes * 8); + long long here = *xoff; + + if (components > 1) { + n = 0; + } + + if (components == 1) { + *xoff += writeSigned(extra, (components << GEOM_TYPE_BITS) | GEOM_POINT); + } else { // XXX additional types + *xoff += writeSigned(extra, (components << GEOM_TYPE_BITS) | GEOM_LINESTRING); + } + + int s = 32 - (mapbits / 2); + for (i = 1; i < components; i++) { + *xoff += writeSigned(extra, (long long) (x[i] >> s) - (long long) (x[i - 1] >> s)); + *xoff += writeSigned(extra, (long long) (y[i] >> s) - (long long) (y[i - 1] >> s)); + } + + *xoff += writeSigned(extra, m); + for (i = 0; i < m; i++) { + *xoff += writeSigned(extra, keys[i] - here); + *xoff += writeSigned(extra, metatype[i]); + + if (metatype[i] == META_STRING) { + // string + *xoff += writeSigned(extra, values[i] - here); + } else if (metatype[i] == META_INTEGER) { + // integer + *xoff += writeSigned(extra, atoll(meta[i])); + } else { + // XXX + fprintf(stderr, "unsupported type %d\n", metatype[i]); + exit(EXIT_FAILURE); + } + } + } else { + for (i = 0; i < m; i++) { + meta2buf(metasize[i], atoll(meta[i]), buf, &off, bytes * 8); + } + } + + struct file **fo; + + for (fo = files; *fo != NULL; fo = &((*fo)->next)) { + if ((*fo)->legs == n && (*fo)->level == common) { + break; + } + } + + if (*fo == NULL) { + *fo = malloc(sizeof(struct file)); + + if (*fo == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + char fn[strlen(destdir) + 10 + 1 + 10 + 1]; + sprintf(fn, "%s/%d,%d", destdir, n, common); + + (*fo)->next = NULL; + (*fo)->legs = n; + (*fo)->level = common; + (*fo)->f = fopen(fn, "w"); + + if ((*fo)->f == NULL) { + perror(fn); + exit(EXIT_FAILURE); + } + } + + fwrite(buf, sizeof(char), bytes, (*fo)->f); + } +} + +struct merge { + long long start; + long long end; + + struct merge *next; +}; + +void insert(struct merge *m, struct merge **head, unsigned char *map, int bytes) { + while (*head != NULL && memcmp(map + m->start, map + (*head)->start, bytes) > 0) { + head = &((*head)->next); + } + + m->next = *head; + *head = m; +} + +void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f, int bytes, long long nrec) { + int i; + struct merge *head = NULL; + long long along = 0; + long long reported = -1; + + for (i = 0; i < nmerges; i++) { + if (merges[i].start < merges[i].end) { + insert(&(merges[i]), &head, map, bytes); + } + } + + while (head != NULL) { + fwrite(map + head->start, bytes, 1, f); + head->start += bytes; + + struct merge *m = head; + head = m->next; + m->next = NULL; + + if (m->start < m->end) { + insert(m, &head, map, bytes); + } + + along++; + long long report = 100 * along / nrec; + if (report != reported) { + fprintf(stderr, "Merging: %lld%%\r", report); + reported = report; + } + } +} + +int main(int argc, char **argv) { + int i; + extern int optind; + extern char *optarg; + char *destdir = NULL; + struct pool *pool = NULL; + + while ((i = getopt(argc, argv, "z:m:o:")) != -1) { + switch (i) { + case 'z': + mapbits = 2 * (atoi(optarg) + 8); + break; + + case 'm': + metabits = atoi(optarg); + break; + + case 'o': + destdir = optarg; + break; + + default: + usage(argv[0]); + exit(EXIT_FAILURE); + } + } + + if (mapbits <= 8) { + fprintf(stderr, "%s: Zoom level (-z) must be > 0\n", argv[0]); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (mapbits % 8 != 0) { + int b = (mapbits + 7) & ~7; + fprintf(stderr, "%s: Using zoom level %d, not %d\n", argv[0], + b / 2 - 8, mapbits / 2 - 8); + mapbits = b; + } + + if (destdir == NULL) { + fprintf(stderr, "%s: Must specify a directory with -o\n", argv[0]); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (mkdir(destdir, 0777) != 0) { + perror(destdir); + exit(EXIT_FAILURE); + } + + char s[strlen(destdir) + 6 + 1]; + sprintf(s, "%s/extra", destdir); + FILE *extra = NULL; + long long xoff = 0; + if (version >= 2) { + extra = fopen(s, "wb"); + if (extra == NULL) { + perror(s); + exit(EXIT_FAILURE); + } + putc('\0', extra); + xoff++; + } + + struct file *files = NULL; + int maxn = 0; + + if (optind == argc) { + read_file(stdin, destdir, &files, &maxn, extra, &xoff, &pool); + } else { + for (i = optind; i < argc; i++) { + FILE *f = fopen(argv[i], "r"); + if (f == NULL) { + perror(argv[i]); + exit(EXIT_FAILURE); + } + + read_file(f, destdir, &files, &maxn, extra, &xoff, &pool); + fclose(f); + } + } + + sprintf(s, "%s/meta", destdir); + FILE *f = fopen(s, "w"); + if (f == NULL) { + perror(s); + exit(EXIT_FAILURE); + } + fprintf(f, "%d\n", version); + fprintf(f, "%d %d %d\n", mapbits, metabits, maxn); + fclose(f); + + if (version >= 2) { + off_t fp = ftello(extra); + + if (fp != xoff) { + fprintf(stderr, "Thought file position was %lld but it was %lld\n", + (long long) xoff, (long long) fp); + } + + fclose(extra); + } + + for (; files != NULL; files = files->next) { + fclose(files->f); + + char fn[strlen(destdir) + 10 + 1 + 10 + 1]; + sprintf(fn, "%s/%d,%d", destdir, files->legs, files->level); + + int fd = open(fn, O_RDWR); + if (fd < 0) { + perror(fn); + exit(EXIT_FAILURE); + } + + struct stat st; + if (fstat(fd, &st) < 0) { + perror("stat"); + exit(EXIT_FAILURE); + } + + int bytes; + if (files->legs == 0) { + bytes = bytesfor(mapbits, metabits, 1, files->level); + } else { + bytes = bytesfor(mapbits, metabits, files->legs, files->level); + } + gSortBytes = bytes; + + fprintf(stderr, + "Sorting %lld shapes sized for zoom level %d\n", + (long long) st.st_size / bytes, + files->level); + + int page = sysconf(_SC_PAGESIZE); + long long unit = (50 * 1024 * 1024 / bytes) * bytes; + while (unit % page != 0) { + unit += bytes; + } + + int nmerges = (st.st_size + unit - 1) / unit; + struct merge merges[nmerges]; + + long long start; + for (start = 0; start < st.st_size; start += unit) { + long long end = start + unit; + if (end > st.st_size) { + end = st.st_size; + } + + if (nmerges != 1) { + fprintf(stderr, "Sorting part %lld of %d\r", start / unit + 1, nmerges); + } + + merges[start / unit].start = start; + merges[start / unit].end = end; + merges[start / unit].next = NULL; + + void *map = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, start); + if (map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + qsort(map, (end - start) / bytes, bytes, bufcmp); + + // Sorting and then copying avoids the need to + // write out intermediate stages of the sort. + + void *map2 = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_SHARED, fd, start); + if (map2 == MAP_FAILED) { + perror("mmap (write)"); + exit(EXIT_FAILURE); + } + + memcpy(map2, map, end - start); + + munmap(map, end - start); + munmap(map2, end - start); + } + + if (nmerges != 1) { + fprintf(stderr, "\n"); + } + + void *map = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + if (unlink(fn) != 0) { + perror("unlink"); + exit(EXIT_FAILURE); + } + + FILE *f = fopen(fn, "w"); + if (f == NULL) { + perror(fn); + exit(EXIT_FAILURE); + } + + merge(merges, nmerges, map, f, bytes, st.st_size / bytes); + + munmap(map, st.st_size); + fclose(f); + close(fd); + } + + fprintf(stderr, "\n"); + + return 0; +}