From 55e93a5d379e08149c00864adab7be5d061eda28 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 17 Jun 2015 16:46:36 -0700 Subject: [PATCH 01/12] Use variable-length zigzag for ints and long longs --- geojson.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/geojson.c b/geojson.c index 60edac0..f1effa0 100644 --- a/geojson.c +++ b/geojson.c @@ -60,13 +60,25 @@ size_t fwrite_check(const void *ptr, size_t size, size_t nitems, FILE *stream, c } void serialize_int(FILE *out, int n, long long *fpos, const char *fname) { - fwrite_check(&n, sizeof(int), 1, out, fname); - *fpos += sizeof(int); + serialize_long_long(out, n, fpos, fname); } void serialize_long_long(FILE *out, long long n, long long *fpos, const char *fname) { - fwrite_check(&n, sizeof(long long), 1, out, fname); - *fpos += sizeof(long long); + unsigned long long zigzag = (n << 1) ^ (n >> 63); + + while (1) { + unsigned char b = zigzag & 0x7F; + if ((zigzag >> 7) != 0) { + b |= 0x80; + fwrite_check(&b, sizeof(unsigned char), 1, out, fname); + *fpos += 1; + zigzag >>= 7; + } else { + fwrite_check(&b, sizeof(unsigned char), 1, out, fname); + *fpos += 1; + break; + } + } } void serialize_byte(FILE *out, signed char n, long long *fpos, const char *fname) { @@ -156,13 +168,29 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } void deserialize_int(char **f, int *n) { - memcpy(n, *f, sizeof(int)); - *f += sizeof(int); + long long ll; + deserialize_long_long(f, &ll); + *n = ll; } void deserialize_long_long(char **f, long long *n) { - memcpy(n, *f, sizeof(long long)); - *f += sizeof(long long); + unsigned long long zigzag = 0; + int shift = 0; + + while (1) { + if ((**f & 0x80) == 0) { + zigzag |= ((unsigned long long) **f) << shift; + *f += 1; + shift += 7; + break; + } else { + zigzag |= ((unsigned long long) (**f & 0x7F)) << shift; + *f += 1; + shift += 7; + } + } + + *n = (zigzag >> 1) ^ (-(zigzag & 1)); } void deserialize_uint(char **f, unsigned *n) { From cde1e60603146b9f261922b6e136cf287e221d4f Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 17 Jun 2015 17:18:08 -0700 Subject: [PATCH 02/12] Use a string pool to avoid duplicating keys and values --- geojson.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++----- tile.cc | 20 ++++++++++---- tile.h | 2 +- 3 files changed, 92 insertions(+), 13 deletions(-) diff --git a/geojson.c b/geojson.c index f1effa0..d8e6483 100644 --- a/geojson.c +++ b/geojson.c @@ -214,7 +214,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) { return ret; } -int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) { +int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) { int i; for (i = 0; i <= maxzoom; i++) { long long most = 0; @@ -275,7 +275,7 @@ int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned * // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent); + long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent); if (len < 0) { return i - 1; @@ -381,14 +381,49 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } } +struct stringpool { + char *s; + struct stringpool *left; + struct stringpool *right; + long long off; +} *pooltree = NULL; + +long long addpool(FILE *poolfile, long long *poolpos, char *s) { + struct stringpool **sp = &pooltree; + + while (*sp != NULL) { + int cmp = strcmp(s, (*sp)->s); + if (cmp < 0) { + sp = &((*sp)->left); + } else if (cmp > 0) { + sp = &((*sp)->right); + } else { + return (*sp)->off; + } + } + + *sp = malloc(sizeof(struct stringpool)); + (*sp)->s = strdup(s); // XXX really should be mapped from the pool itself + (*sp)->left = NULL; + (*sp)->right = NULL; + (*sp)->off = *poolpos; + + fwrite_check(s, strlen(s) + 1, sizeof(char), poolfile, "string pool"); + *poolpos += strlen(s) + 1; + + return (*sp)->off; +} + int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) { int ret = EXIT_SUCCESS; char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1]; + char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1]; char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1]; char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1]; sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX"); + sprintf(poolname, "%s%s", tmpdir, "/pool.XXXXXXXX"); sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX"); sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX"); @@ -397,6 +432,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(metaname); exit(EXIT_FAILURE); } + int poolfd = mkstemp(poolname); + if (poolfd < 0) { + perror(poolname); + exit(EXIT_FAILURE); + } int geomfd = mkstemp(geomname); if (geomfd < 0) { perror(geomname); @@ -413,6 +453,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(metaname); exit(EXIT_FAILURE); } + FILE *poolfile = fopen(poolname, "wb"); + if (poolfile == NULL) { + perror(poolname); + exit(EXIT_FAILURE); + } FILE *geomfile = fopen(geomname, "wb"); if (geomfile == NULL) { perror(geomname); @@ -424,13 +469,19 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } long long metapos = 0; + long long poolpos = 0; long long geompos = 0; long long indexpos = 0; unlink(metaname); + unlink(poolname); unlink(geomname); unlink(indexname); + // So we still have a legitimate map even if no metadata + fprintf(poolfile, "\n"); + poolpos++; + unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0}; unsigned midx = 0, midy = 0; long long seq = 0; @@ -592,8 +643,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_int(metafile, m, &metapos, fname); for (i = 0; i < m; i++) { serialize_int(metafile, metatype[i], &metapos, fname); - serialize_string(metafile, metakey[i], &metapos, fname); - serialize_string(metafile, metaval[i], &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i]), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i]), &metapos, fname); } long long geomstart = geompos; @@ -667,11 +718,13 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max } fclose(metafile); + fclose(poolfile); fclose(geomfile); fclose(indexfile); struct stat geomst; struct stat metast; + struct stat poolst; if (fstat(geomfd, &geomst) != 0) { perror("stat geom\n"); @@ -681,6 +734,10 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror("stat meta\n"); exit(EXIT_FAILURE); } + if (fstat(poolfd, &poolst) != 0) { + perror("stat pool\n"); + exit(EXIT_FAILURE); + } if (geomst.st_size == 0 || metast.st_size == 0) { fprintf(stderr, "did not read any valid geometries\n"); @@ -693,6 +750,12 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } + char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0); + if (stringpool == MAP_FAILED) { + perror("mmap stringpool"); + exit(EXIT_FAILURE); + } + struct pool file_keys1[nlayers]; struct pool *file_keys[nlayers]; int i; @@ -915,9 +978,9 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max size[j] = 0; } - fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata\n", seq, (long long) geomst.st_size, (long long) metast.st_size); + fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size); - int written = traverse_zooms(fd, size, meta, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent); + int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent); if (maxzoom != written) { fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written); @@ -928,11 +991,17 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max if (munmap(meta, metast.st_size) != 0) { perror("munmap meta"); } - if (close(metafd) < 0) { perror("close meta"); } + if (munmap(stringpool, poolst.st_size) != 0) { + perror("munmap pool"); + } + if (close(poolfd) < 0) { + perror("close pool"); + } + double minlat = 0, minlon = 0, maxlat = 0, maxlon = 0, midlat = 0, midlon = 0; tile2latlon(midx, midy, maxzoom, &maxlat, &minlon); diff --git a/tile.cc b/tile.cc index 059a618..1cd0691 100644 --- a/tile.cc +++ b/tile.cc @@ -186,7 +186,17 @@ int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2) { return cmp; } -void decode_meta(char **meta, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector *intmeta, char *only) { +struct pool_val *retrieve_string(char **f, struct pool *p, int type, char *stringpool) { + struct pool_val *ret; + long long off; + + deserialize_long_long(f, &off); + ret = pool(p, stringpool + off, type); + + return ret; +} + +void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector *intmeta, char *only) { int m; deserialize_int(meta, &m); @@ -194,13 +204,13 @@ void decode_meta(char **meta, struct pool *keys, struct pool *values, struct poo for (i = 0; i < m; i++) { int t; deserialize_int(meta, &t); - struct pool_val *key = deserialize_string(meta, keys, VT_STRING); + struct pool_val *key = retrieve_string(meta, keys, VT_STRING, stringpool); if (only != NULL && (strcmp(key->s, only) != 0)) { deserialize_int(meta, &t); *meta += t; } else { - struct pool_val *value = deserialize_string(meta, values, t); + struct pool_val *value = retrieve_string(meta, values, t, stringpool); intmeta->push_back(key->n); intmeta->push_back(value->n); @@ -349,7 +359,7 @@ void evaluate(std::vector &features, char *metabase, struct pool *file } #endif -long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) { +long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) { int line_detail; static bool evaluated = false; double oprogress = 0; @@ -614,7 +624,7 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u c.metasrc = meta; c.coalesced = false; - decode_meta(&meta, keys[layer], values[layer], file_keys[layer], &c.meta, NULL); + decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta, NULL); features[layer].push_back(c); } } diff --git a/tile.h b/tile.h index 3193231..5494c20 100644 --- a/tile.h +++ b/tile.h @@ -25,4 +25,4 @@ void deserialize_uint(char **f, unsigned *n); void deserialize_byte(char **f, signed char *n); struct pool_val *deserialize_string(char **f, struct pool *p, int type); -long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent); +long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent); From 725ea71e570287555151960c0a7d04ebb7bb57eb Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 17 Jun 2015 17:30:17 -0700 Subject: [PATCH 03/12] Fix formatting --- geojson.c | 2 +- tile.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/geojson.c b/geojson.c index d8e6483..69d6e67 100644 --- a/geojson.c +++ b/geojson.c @@ -403,7 +403,7 @@ long long addpool(FILE *poolfile, long long *poolpos, char *s) { } *sp = malloc(sizeof(struct stringpool)); - (*sp)->s = strdup(s); // XXX really should be mapped from the pool itself + (*sp)->s = strdup(s); // XXX really should be mapped from the pool itself (*sp)->left = NULL; (*sp)->right = NULL; (*sp)->off = *poolpos; diff --git a/tile.cc b/tile.cc index 1cd0691..f01506a 100644 --- a/tile.cc +++ b/tile.cc @@ -187,13 +187,13 @@ int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2) { } struct pool_val *retrieve_string(char **f, struct pool *p, int type, char *stringpool) { - struct pool_val *ret; + struct pool_val *ret; long long off; - deserialize_long_long(f, &off); - ret = pool(p, stringpool + off, type); + deserialize_long_long(f, &off); + ret = pool(p, stringpool + off, type); - return ret; + return ret; } void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector *intmeta, char *only) { From 46626e4f0892612322e7b87a6a06cc7472e70086 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 17 Jun 2015 17:48:29 -0700 Subject: [PATCH 04/12] Delta encoding for motion within features --- geojson.c | 13 ++++++++----- geometry.cc | 16 +++++++++++----- tile.cc | 7 +++++-- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/geojson.c b/geojson.c index 69d6e67..d9ff6a5 100644 --- a/geojson.c +++ b/geojson.c @@ -100,7 +100,7 @@ void serialize_string(FILE *out, const char *s, long long *fpos, const char *fna *fpos += len + 1; } -void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE *out, int op, const char *fname, json_pull *source) { +void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE *out, int op, const char *fname, json_pull *source, long long *wx, long long *wy) { if (j == NULL || j->type != JSON_ARRAY) { fprintf(stderr, "%s:%d: expected array for type %d\n", fname, source->line, t); return; @@ -119,7 +119,7 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } } - parse_geometry(within, j->array[i], bbox, fpos, out, op, fname, source); + parse_geometry(within, j->array[i], bbox, fpos, out, op, fname, source, wx, wy); } } else { if (j->length >= 2 && j->array[0]->type == JSON_NUMBER && j->array[1]->type == JSON_NUMBER) { @@ -153,8 +153,10 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } serialize_byte(out, op, fpos, fname); - serialize_uint(out, x, fpos, fname); - serialize_uint(out, y, fpos, fname); + serialize_long_long(out, x - *wx, fpos, fname); + serialize_long_long(out, y - *wy, fpos, fname); + *wx = x; + *wy = y; } else { fprintf(stderr, "%s:%d: malformed point\n", fname, source->line); } @@ -652,7 +654,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_byte(geomfile, mb_geometry[t], &geompos, fname); serialize_byte(geomfile, n, &geompos, fname); serialize_long_long(geomfile, metastart, &geompos, fname); - parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp); + long long wx = 0, wy = 0; + parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp, &wx, &wy); serialize_byte(geomfile, VT_END, &geompos, fname); /* diff --git a/geometry.cc b/geometry.cc index de5a467..6a79dcf 100644 --- a/geometry.cc +++ b/geometry.cc @@ -25,6 +25,8 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail bbox[2] = LONG_LONG_MIN; bbox[3] = LONG_LONG_MIN; + long long wx = 0, wy = 0; + while (1) { draw d; @@ -34,12 +36,16 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail } if (d.op == VT_MOVETO || d.op == VT_LINETO) { - unsigned wx, wy; - deserialize_uint(meta, &wx); - deserialize_uint(meta, &wy); + long long dx, dy; - long long wwx = (unsigned) wx; - long long wwy = (unsigned) wy; + deserialize_long_long(meta, &dx); + deserialize_long_long(meta, &dy); + + wx += dx; + wy += dy; + + long long wwx = wx; + long long wwy = wy; if (z != 0) { wwx -= tx << (32 - z); diff --git a/tile.cc b/tile.cc index f01506a..7c05564 100644 --- a/tile.cc +++ b/tile.cc @@ -506,13 +506,16 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f serialize_byte(geomfile[j], t, &geompos[j], fname); serialize_byte(geomfile[j], layer, &geompos[j], fname); serialize_long_long(geomfile[j], metastart, &geompos[j], fname); + long long wx = 0, wy = 0; for (unsigned u = 0; u < geom.size(); u++) { serialize_byte(geomfile[j], geom[u].op, &geompos[j], fname); if (geom[u].op != VT_CLOSEPATH) { - serialize_uint(geomfile[j], geom[u].x + sx, &geompos[j], fname); - serialize_uint(geomfile[j], geom[u].y + sy, &geompos[j], fname); + serialize_long_long(geomfile[j], geom[u].x + sx - wx, &geompos[j], fname); + serialize_long_long(geomfile[j], geom[u].y + sy - wy, &geompos[j], fname); + wx = geom[u].x + sx; + wy = geom[u].y + sy; } } From 1a44538bdff15fb1a6984b3d2912036926ef8016 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 10:50:57 -0700 Subject: [PATCH 05/12] Use the first coordinates of the first feature as the origin for deltas --- geojson.c | 19 +++++++++++++++---- geometry.cc | 2 +- tile.cc | 2 +- tile.h | 2 ++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/geojson.c b/geojson.c index d9ff6a5..1863c5a 100644 --- a/geojson.c +++ b/geojson.c @@ -25,6 +25,9 @@ int low_detail = 10; int full_detail = -1; int min_detail = 7; +unsigned initial_x = 0, initial_y = 0; +int initialized = 0; + #define GEOM_POINT 0 /* array of positions */ #define GEOM_MULTIPOINT 1 /* array of arrays of positions */ #define GEOM_LINESTRING 2 /* array of arrays of positions */ @@ -100,7 +103,7 @@ void serialize_string(FILE *out, const char *s, long long *fpos, const char *fna *fpos += len + 1; } -void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE *out, int op, const char *fname, json_pull *source, long long *wx, long long *wy) { +void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE *out, int op, const char *fname, json_pull *source, long long *wx, long long *wy, int *initialized) { if (j == NULL || j->type != JSON_ARRAY) { fprintf(stderr, "%s:%d: expected array for type %d\n", fname, source->line, t); return; @@ -119,7 +122,7 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } } - parse_geometry(within, j->array[i], bbox, fpos, out, op, fname, source, wx, wy); + parse_geometry(within, j->array[i], bbox, fpos, out, op, fname, source, wx, wy, initialized); } } else { if (j->length >= 2 && j->array[0]->type == JSON_NUMBER && j->array[1]->type == JSON_NUMBER) { @@ -152,6 +155,14 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } } + if (!*initialized) { + initial_x = x; + initial_y = y; + *wx = x; + *wy = y; + *initialized = 1; + } + serialize_byte(out, op, fpos, fname); serialize_long_long(out, x - *wx, fpos, fname); serialize_long_long(out, y - *wy, fpos, fname); @@ -654,8 +665,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_byte(geomfile, mb_geometry[t], &geompos, fname); serialize_byte(geomfile, n, &geompos, fname); serialize_long_long(geomfile, metastart, &geompos, fname); - long long wx = 0, wy = 0; - parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp, &wx, &wy); + long long wx = initial_x, wy = initial_y; + parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp, &wx, &wy, &initialized); serialize_byte(geomfile, VT_END, &geompos, fname); /* diff --git a/geometry.cc b/geometry.cc index 6a79dcf..14d489d 100644 --- a/geometry.cc +++ b/geometry.cc @@ -25,7 +25,7 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail bbox[2] = LONG_LONG_MIN; bbox[3] = LONG_LONG_MIN; - long long wx = 0, wy = 0; + long long wx = initial_x, wy = initial_y; while (1) { draw d; diff --git a/tile.cc b/tile.cc index 7c05564..12791c3 100644 --- a/tile.cc +++ b/tile.cc @@ -506,7 +506,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f serialize_byte(geomfile[j], t, &geompos[j], fname); serialize_byte(geomfile[j], layer, &geompos[j], fname); serialize_long_long(geomfile[j], metastart, &geompos[j], fname); - long long wx = 0, wy = 0; + long long wx = initial_x, wy = initial_y; for (unsigned u = 0; u < geom.size(); u++) { serialize_byte(geomfile[j], geom[u].op, &geompos[j], fname); diff --git a/tile.h b/tile.h index 5494c20..bccc145 100644 --- a/tile.h +++ b/tile.h @@ -26,3 +26,5 @@ void deserialize_byte(char **f, signed char *n); struct pool_val *deserialize_string(char **f, struct pool *p, int type); long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent); + +extern unsigned initial_x, initial_y; From a1d3ecf9bb5cda46591694bf4d256a15d661111b Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 12:12:20 -0700 Subject: [PATCH 06/12] Save another byte per attribute by moving the type to the string pool --- geojson.c | 24 +++++++++++++++--------- tile.cc | 18 ++++++++---------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/geojson.c b/geojson.c index 1863c5a..d43b0db 100644 --- a/geojson.c +++ b/geojson.c @@ -395,17 +395,22 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } struct stringpool { - char *s; + char *s; // first byte is type struct stringpool *left; struct stringpool *right; long long off; } *pooltree = NULL; -long long addpool(FILE *poolfile, long long *poolpos, char *s) { +long long addpool(FILE *poolfile, long long *poolpos, char *s, char type) { struct stringpool **sp = &pooltree; while (*sp != NULL) { - int cmp = strcmp(s, (*sp)->s); + int cmp = strcmp(s, (*sp)->s + 1); + + if (cmp == 0) { + cmp = type - (*sp)->s[0]; + } + if (cmp < 0) { sp = &((*sp)->left); } else if (cmp > 0) { @@ -416,13 +421,15 @@ long long addpool(FILE *poolfile, long long *poolpos, char *s) { } *sp = malloc(sizeof(struct stringpool)); - (*sp)->s = strdup(s); // XXX really should be mapped from the pool itself + (*sp)->s = malloc(strlen(s) + 2); + (*sp)->s[0] = type; + strcpy((*sp)->s + 1, s); // XXX really should be mapped from the pool itself (*sp)->left = NULL; (*sp)->right = NULL; (*sp)->off = *poolpos; - fwrite_check(s, strlen(s) + 1, sizeof(char), poolfile, "string pool"); - *poolpos += strlen(s) + 1; + fwrite_check((*sp)->s, strlen(s) + 2, sizeof(char), poolfile, "string pool"); + *poolpos += strlen(s) + 2; return (*sp)->off; } @@ -655,9 +662,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_int(metafile, m, &metapos, fname); for (i = 0; i < m; i++) { - serialize_int(metafile, metatype[i], &metapos, fname); - serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i]), &metapos, fname); - serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i]), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i], VT_STRING), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i], metatype[i]), &metapos, fname); } long long geomstart = geompos; diff --git a/tile.cc b/tile.cc index 12791c3..2877da4 100644 --- a/tile.cc +++ b/tile.cc @@ -186,12 +186,12 @@ int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2) { return cmp; } -struct pool_val *retrieve_string(char **f, struct pool *p, int type, char *stringpool) { +struct pool_val *retrieve_string(char **f, struct pool *p, char *stringpool) { struct pool_val *ret; long long off; deserialize_long_long(f, &off); - ret = pool(p, stringpool + off, type); + ret = pool(p, stringpool + off + 1, stringpool[off]); return ret; } @@ -202,22 +202,20 @@ void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool * int i; for (i = 0; i < m; i++) { - int t; - deserialize_int(meta, &t); - struct pool_val *key = retrieve_string(meta, keys, VT_STRING, stringpool); + struct pool_val *key = retrieve_string(meta, keys, stringpool); if (only != NULL && (strcmp(key->s, only) != 0)) { - deserialize_int(meta, &t); - *meta += t; + // XXX if evaluate ever works again, check whether this is sufficient + (void) retrieve_string(meta, values, stringpool); } else { - struct pool_val *value = retrieve_string(meta, values, t, stringpool); + struct pool_val *value = retrieve_string(meta, values, stringpool); intmeta->push_back(key->n); intmeta->push_back(value->n); - if (!is_pooled(file_keys, key->s, t)) { + if (!is_pooled(file_keys, key->s, value->type)) { // Dup to retain after munmap - pool(file_keys, strdup(key->s), t); + pool(file_keys, strdup(key->s), value->type); } } } From a185073f0abea557a9ab11a23a4219aaaad2ea0a Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 14:16:16 -0700 Subject: [PATCH 07/12] Shrink the geometry by shaving off bits below the maxzoom tile resolution --- geojson.c | 10 +++++++--- geometry.cc | 4 ++-- tile.cc | 4 ++-- tile.h | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/geojson.c b/geojson.c index d43b0db..c381e95 100644 --- a/geojson.c +++ b/geojson.c @@ -26,6 +26,7 @@ int full_detail = -1; int min_detail = 7; unsigned initial_x = 0, initial_y = 0; +int geometry_scale = 0; int initialized = 0; #define GEOM_POINT 0 /* array of positions */ @@ -164,8 +165,8 @@ void parse_geometry(int t, json_object *j, unsigned *bbox, long long *fpos, FILE } serialize_byte(out, op, fpos, fname); - serialize_long_long(out, x - *wx, fpos, fname); - serialize_long_long(out, y - *wy, fpos, fname); + serialize_long_long(out, (x >> geometry_scale) - (*wx >> geometry_scale), fpos, fname); + serialize_long_long(out, (y >> geometry_scale) - (*wy >> geometry_scale), fpos, fname); *wx = x; *wy = y; } else { @@ -494,7 +495,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max long long indexpos = 0; unlink(metaname); - unlink(poolname); + //unlink(poolname); unlink(geomname); unlink(indexname); @@ -1182,6 +1183,9 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } + geometry_scale = 32 - (full_detail + maxzoom); + printf("geometry scale is %d\n", geometry_scale); + if (outdir == NULL) { fprintf(stderr, "%s: must specify -o out.mbtiles\n", argv[0]); exit(EXIT_FAILURE); diff --git a/geometry.cc b/geometry.cc index 14d489d..281a41c 100644 --- a/geometry.cc +++ b/geometry.cc @@ -41,8 +41,8 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail deserialize_long_long(meta, &dx); deserialize_long_long(meta, &dy); - wx += dx; - wy += dy; + wx += dx << geometry_scale; + wy += dy << geometry_scale; long long wwx = wx; long long wwy = wy; diff --git a/tile.cc b/tile.cc index 2877da4..585783f 100644 --- a/tile.cc +++ b/tile.cc @@ -510,8 +510,8 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f serialize_byte(geomfile[j], geom[u].op, &geompos[j], fname); if (geom[u].op != VT_CLOSEPATH) { - serialize_long_long(geomfile[j], geom[u].x + sx - wx, &geompos[j], fname); - serialize_long_long(geomfile[j], geom[u].y + sy - wy, &geompos[j], fname); + serialize_long_long(geomfile[j], ((geom[u].x + sx) >> geometry_scale) - (wx >> geometry_scale), &geompos[j], fname); + serialize_long_long(geomfile[j], ((geom[u].y + sy) >> geometry_scale) - (wy >> geometry_scale), &geompos[j], fname); wx = geom[u].x + sx; wy = geom[u].y + sy; } diff --git a/tile.h b/tile.h index bccc145..644662e 100644 --- a/tile.h +++ b/tile.h @@ -28,3 +28,4 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type); long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent); extern unsigned initial_x, initial_y; +extern int geometry_scale; From d96dee8dad60980f885c18c3b05a6aa52568b56e Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 14:47:29 -0700 Subject: [PATCH 08/12] Fix formatting again --- geojson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geojson.c b/geojson.c index c381e95..46b8184 100644 --- a/geojson.c +++ b/geojson.c @@ -396,7 +396,7 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } struct stringpool { - char *s; // first byte is type + char *s; // first byte is type struct stringpool *left; struct stringpool *right; long long off; From dc3021656e21172df9cd83b7551a61eb2f597cfd Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 16:13:37 -0700 Subject: [PATCH 09/12] Build the string pool in an appendable memory map, not the normal heap --- Makefile | 2 +- geojson.c | 108 ++++++++++++++++++++++++++++++------------------------ memfile.c | 69 ++++++++++++++++++++++++++++++++++ memfile.h | 10 +++++ 4 files changed, 141 insertions(+), 48 deletions(-) create mode 100644 memfile.c create mode 100644 memfile.h diff --git a/Makefile b/Makefile index 94c63fc..efb279f 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ C = $(shell find . '(' -name '*.c' -o -name '*.cc' ')') INCLUDES = -I/usr/local/include LIBS = -L/usr/local/lib -tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o +tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o memfile.o g++ $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lm -lz -lprotobuf-lite -lsqlite3 enumerate: enumerate.o diff --git a/geojson.c b/geojson.c index 46b8184..1146360 100644 --- a/geojson.c +++ b/geojson.c @@ -20,6 +20,7 @@ #include "mbtiles.h" #include "projection.h" #include "version.h" +#include "memfile.h" int low_detail = 10; int full_detail = -1; @@ -396,43 +397,71 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } struct stringpool { - char *s; // first byte is type - struct stringpool *left; - struct stringpool *right; + long long left; + long long right; long long off; -} *pooltree = NULL; +}; +long long pooltree = 0; -long long addpool(FILE *poolfile, long long *poolpos, char *s, char type) { - struct stringpool **sp = &pooltree; +long long addpool(struct memfile *poolfile, char *s, char type) { + long long *sp = &pooltree; - while (*sp != NULL) { - int cmp = strcmp(s, (*sp)->s + 1); + while (*sp != 0) { + int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off + 1); if (cmp == 0) { - cmp = type - (*sp)->s[0]; + cmp = type - (poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off)[0]; } if (cmp < 0) { - sp = &((*sp)->left); + sp = &(((struct stringpool *) (poolfile->map + *sp))->left); } else if (cmp > 0) { - sp = &((*sp)->right); + sp = &(((struct stringpool *) (poolfile->map + *sp))->right); } else { - return (*sp)->off; + return ((struct stringpool *) (poolfile->map + *sp))->off; } } - *sp = malloc(sizeof(struct stringpool)); - (*sp)->s = malloc(strlen(s) + 2); - (*sp)->s[0] = type; - strcpy((*sp)->s + 1, s); // XXX really should be mapped from the pool itself - (*sp)->left = NULL; - (*sp)->right = NULL; - (*sp)->off = *poolpos; + // *sp is probably in the memory-mapped file, and will move if the file grows. + long long ssp; + if (sp == &pooltree) { + ssp = -1; + } else { + ssp = ((char *) sp) - poolfile->map; + } - fwrite_check((*sp)->s, strlen(s) + 2, sizeof(char), poolfile, "string pool"); - *poolpos += strlen(s) + 2; + long long off = poolfile->off; + if (memfile_write(poolfile, &type, 1) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } + if (memfile_write(poolfile, s, strlen(s) + 1) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } - return (*sp)->off; + struct stringpool tsp; + tsp.left = 0; + tsp.right = 0; + tsp.off = off; + + // alignment + while (poolfile->off % sizeof(long long) != 0) { + memfile_write(poolfile, "", 1); + } + + long long p = poolfile->off; + if (memfile_write(poolfile, &tsp, sizeof(struct stringpool)) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } + + if (ssp == -1) { + pooltree = p; + } else { + *((long long *) (poolfile->map + ssp)) = p; + } + return off; } int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) { @@ -474,7 +503,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(metaname); exit(EXIT_FAILURE); } - FILE *poolfile = fopen(poolname, "wb"); + struct memfile *poolfile = memfile_open(poolfd); if (poolfile == NULL) { perror(poolname); exit(EXIT_FAILURE); @@ -490,18 +519,16 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } long long metapos = 0; - long long poolpos = 0; long long geompos = 0; long long indexpos = 0; unlink(metaname); - //unlink(poolname); + unlink(poolname); unlink(geomname); unlink(indexname); - // So we still have a legitimate map even if no metadata - fprintf(poolfile, "\n"); - poolpos++; + // To distinguish a null value + memfile_write(poolfile, "", 1); unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0}; unsigned midx = 0, midy = 0; @@ -663,8 +690,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_int(metafile, m, &metapos, fname); for (i = 0; i < m; i++) { - serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i], VT_STRING), &metapos, fname); - serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i], metatype[i]), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, metakey[i], VT_STRING), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, metaval[i], metatype[i]), &metapos, fname); } long long geomstart = geompos; @@ -739,13 +766,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max } fclose(metafile); - fclose(poolfile); fclose(geomfile); fclose(indexfile); struct stat geomst; struct stat metast; - struct stat poolst; if (fstat(geomfd, &geomst) != 0) { perror("stat geom\n"); @@ -755,10 +780,6 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror("stat meta\n"); exit(EXIT_FAILURE); } - if (fstat(poolfd, &poolst) != 0) { - perror("stat pool\n"); - exit(EXIT_FAILURE); - } if (geomst.st_size == 0 || metast.st_size == 0) { fprintf(stderr, "did not read any valid geometries\n"); @@ -771,11 +792,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } - char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0); - if (stringpool == MAP_FAILED) { - perror("mmap stringpool"); - exit(EXIT_FAILURE); - } + char *stringpool = poolfile->map; struct pool file_keys1[nlayers]; struct pool *file_keys[nlayers]; @@ -856,6 +873,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max merges[start / unit].end = end; merges[start / unit].next = NULL; + // MAP_PRIVATE to avoid disk writes if it fits in memory void *map = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_PRIVATE, indexfd, start); if (map == MAP_FAILED) { perror("mmap"); @@ -999,7 +1017,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max size[j] = 0; } - fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size); + fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, poolfile->off); int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent); @@ -1016,10 +1034,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror("close meta"); } - if (munmap(stringpool, poolst.st_size) != 0) { - perror("munmap pool"); - } - if (close(poolfd) < 0) { + if (memfile_close(poolfile) != 0) { perror("close pool"); } @@ -1184,7 +1199,6 @@ int main(int argc, char **argv) { } geometry_scale = 32 - (full_detail + maxzoom); - printf("geometry scale is %d\n", geometry_scale); if (outdir == NULL) { fprintf(stderr, "%s: must specify -o out.mbtiles\n", argv[0]); diff --git a/memfile.c b/memfile.c new file mode 100644 index 0000000..454628c --- /dev/null +++ b/memfile.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include "memfile.h" + +#define INCREMENT 131072 + +struct memfile *memfile_open(int fd) { + if (ftruncate(fd, INCREMENT) != 0) { + return NULL; + } + + char *map = mmap(NULL, INCREMENT, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + return NULL; + } + + struct memfile *mf = malloc(sizeof(struct memfile)); + if (mf == NULL) { + munmap(map, INCREMENT); + return NULL; + } + + mf->fd = fd; + mf->map = map; + mf->len = INCREMENT; + mf->off = 0; + + return mf; +} + +int memfile_close(struct memfile *file) { + if (munmap(file->map, file->len) != 0) { + return -1; + } + + if (file->fd >= 0) { + if (close(file->fd) != 0) { + return -1; + } + } + + free(file); + return 0; +} + +int memfile_write(struct memfile *file, void *s, long long len) { + if (file->off + len > file->len) { + if (munmap(file->map, file->len) != 0) { + return -1; + } + + file->len += INCREMENT; + + if (ftruncate(file->fd, file->len) != 0) { + return -1; + } + + file->map = mmap(NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0); + if (file->map == MAP_FAILED) { + return -1; + } + } + + memcpy(file->map + file->off, s, len); + file->off += len; + return len; +} diff --git a/memfile.h b/memfile.h new file mode 100644 index 0000000..b536a6c --- /dev/null +++ b/memfile.h @@ -0,0 +1,10 @@ +struct memfile { + int fd; + char *map; + long long len; + long long off; +}; + +struct memfile *memfile_open(int fd); +int memfile_close(struct memfile *file); +int memfile_write(struct memfile *file, void *s, long long len); From 498e7235631b3700c3eafbc81215ecb849355d71 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 18 Jun 2015 16:30:51 -0700 Subject: [PATCH 10/12] Yet another temp file, for the tree of pointers into the string pool --- geojson.c | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/geojson.c b/geojson.c index 1146360..b794097 100644 --- a/geojson.c +++ b/geojson.c @@ -403,22 +403,22 @@ struct stringpool { }; long long pooltree = 0; -long long addpool(struct memfile *poolfile, char *s, char type) { +long long addpool(struct memfile *poolfile, struct memfile *treefile, char *s, char type) { long long *sp = &pooltree; while (*sp != 0) { - int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off + 1); + int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off + 1); if (cmp == 0) { - cmp = type - (poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off)[0]; + cmp = type - (poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off)[0]; } if (cmp < 0) { - sp = &(((struct stringpool *) (poolfile->map + *sp))->left); + sp = &(((struct stringpool *) (treefile->map + *sp))->left); } else if (cmp > 0) { - sp = &(((struct stringpool *) (poolfile->map + *sp))->right); + sp = &(((struct stringpool *) (treefile->map + *sp))->right); } else { - return ((struct stringpool *) (poolfile->map + *sp))->off; + return ((struct stringpool *) (treefile->map + *sp))->off; } } @@ -427,7 +427,7 @@ long long addpool(struct memfile *poolfile, char *s, char type) { if (sp == &pooltree) { ssp = -1; } else { - ssp = ((char *) sp) - poolfile->map; + ssp = ((char *) sp) - treefile->map; } long long off = poolfile->off; @@ -445,13 +445,8 @@ long long addpool(struct memfile *poolfile, char *s, char type) { tsp.right = 0; tsp.off = off; - // alignment - while (poolfile->off % sizeof(long long) != 0) { - memfile_write(poolfile, "", 1); - } - - long long p = poolfile->off; - if (memfile_write(poolfile, &tsp, sizeof(struct stringpool)) < 0) { + long long p = treefile->off; + if (memfile_write(treefile, &tsp, sizeof(struct stringpool)) < 0) { perror("memfile write"); exit(EXIT_FAILURE); } @@ -459,7 +454,7 @@ long long addpool(struct memfile *poolfile, char *s, char type) { if (ssp == -1) { pooltree = p; } else { - *((long long *) (poolfile->map + ssp)) = p; + *((long long *) (treefile->map + ssp)) = p; } return off; } @@ -469,11 +464,13 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1]; char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1]; + char treename[strlen(tmpdir) + strlen("/tree.XXXXXXXX") + 1]; char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1]; char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1]; sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX"); sprintf(poolname, "%s%s", tmpdir, "/pool.XXXXXXXX"); + sprintf(treename, "%s%s", tmpdir, "/tree.XXXXXXXX"); sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX"); sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX"); @@ -487,6 +484,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(poolname); exit(EXIT_FAILURE); } + int treefd = mkstemp(treename); + if (treefd < 0) { + perror(treename); + exit(EXIT_FAILURE); + } int geomfd = mkstemp(geomname); if (geomfd < 0) { perror(geomname); @@ -508,6 +510,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(poolname); exit(EXIT_FAILURE); } + struct memfile *treefile = memfile_open(treefd); + if (treefile == NULL) { + perror(treename); + exit(EXIT_FAILURE); + } FILE *geomfile = fopen(geomname, "wb"); if (geomfile == NULL) { perror(geomname); @@ -524,11 +531,15 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max unlink(metaname); unlink(poolname); + unlink(treename); unlink(geomname); unlink(indexname); // To distinguish a null value - memfile_write(poolfile, "", 1); + { + struct stringpool p; + memfile_write(treefile, &p, sizeof(struct stringpool)); + } unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0}; unsigned midx = 0, midy = 0; @@ -690,8 +701,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_int(metafile, m, &metapos, fname); for (i = 0; i < m; i++) { - serialize_long_long(metafile, addpool(poolfile, metakey[i], VT_STRING), &metapos, fname); - serialize_long_long(metafile, addpool(poolfile, metaval[i], metatype[i]), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, treefile, metakey[i], VT_STRING), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, treefile, metaval[i], metatype[i]), &metapos, fname); } long long geomstart = geompos; @@ -768,6 +779,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max fclose(metafile); fclose(geomfile); fclose(indexfile); + memfile_close(treefile); struct stat geomst; struct stat metast; From e6997b00ff5b8b4691fa5af1bd13262517f75639 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 19 Jun 2015 15:49:51 -0700 Subject: [PATCH 11/12] Swizzle the string comparison so it's not pathological if input is presorted --- geojson.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/geojson.c b/geojson.c index b794097..97afa65 100644 --- a/geojson.c +++ b/geojson.c @@ -403,11 +403,47 @@ struct stringpool { }; long long pooltree = 0; +static unsigned char swizzle[256] = { + 0x2B, 0xBF, 0x18, 0xDE, 0x93, 0xC9, 0xB1, 0x5E, 0xDF, 0xBE, 0x72, 0x5A, 0xBB, 0x42, 0x64, 0xC6, + 0xD8, 0xB7, 0x15, 0x74, 0x1C, 0x8B, 0x91, 0xF5, 0x29, 0x46, 0xEC, 0x6F, 0xCA, 0x20, 0xF0, 0x06, + 0x27, 0x61, 0x87, 0xE0, 0x6E, 0x43, 0x50, 0xC5, 0x1B, 0xB4, 0x37, 0xC3, 0x69, 0xA6, 0xEE, 0x80, + 0xAF, 0x9B, 0xA1, 0x76, 0x23, 0x24, 0x53, 0xF3, 0x5B, 0x65, 0x19, 0xF4, 0xFC, 0xDD, 0x26, 0xE8, + 0x10, 0xF7, 0xCE, 0x92, 0x48, 0xF6, 0x94, 0x60, 0x07, 0xC4, 0xB9, 0x97, 0x6D, 0xA4, 0x11, 0x0D, + 0x1F, 0x4D, 0x13, 0xB0, 0x5D, 0xBA, 0x31, 0xD5, 0x8D, 0x51, 0x36, 0x00, 0x96, 0x7A, 0x03, 0x7F, + 0xDA, 0x17, 0xDB, 0xD4, 0x83, 0xE2, 0x79, 0x6A, 0xE1, 0x95, 0x38, 0xFF, 0x28, 0xB2, 0xB3, 0xA7, + 0xAE, 0xF8, 0x54, 0xCC, 0xDC, 0x9A, 0x6B, 0xFB, 0x3F, 0xD7, 0xBC, 0x21, 0xC8, 0x71, 0x09, 0x16, + 0xAC, 0x3C, 0x8A, 0x62, 0x05, 0xC2, 0x8C, 0x32, 0x4E, 0x35, 0x9C, 0x5F, 0x75, 0xCD, 0x2E, 0xA2, + 0x3E, 0x1A, 0xC1, 0x8E, 0x14, 0xA0, 0xD3, 0x7D, 0xD9, 0xEB, 0x5C, 0x70, 0xE6, 0x9E, 0x12, 0x3B, + 0xEF, 0x1E, 0x49, 0xD2, 0x98, 0x39, 0x7E, 0x44, 0x4B, 0x6C, 0x88, 0x02, 0x2C, 0xAD, 0xE5, 0x9F, + 0x40, 0x7B, 0x4A, 0x3D, 0xA9, 0xAB, 0x0B, 0xD6, 0x2F, 0x90, 0x2A, 0xB6, 0x1D, 0xC7, 0x22, 0x55, + 0x34, 0x0A, 0xD0, 0xB5, 0x68, 0xE3, 0x59, 0xFD, 0xFA, 0x57, 0x77, 0x25, 0xA3, 0x04, 0xB8, 0x33, + 0x89, 0x78, 0x82, 0xE4, 0xC0, 0x0E, 0x8F, 0x85, 0xD1, 0x84, 0x08, 0x67, 0x47, 0x9D, 0xCB, 0x58, + 0x4C, 0xAA, 0xED, 0x52, 0xF2, 0x4F, 0xF1, 0x66, 0xCF, 0xA5, 0x56, 0xEA, 0x7C, 0xE9, 0x63, 0xE7, + 0x01, 0xF9, 0xFE, 0x0C, 0x99, 0x2D, 0x0F, 0x3A, 0x41, 0x45, 0xA8, 0x30, 0x73, 0xBD, 0x86, 0x81, +}; + +int swizzlecmp(char *a, char *b) { + while (*a || *b) { + int aa = swizzle[(unsigned char) *a]; + int bb = swizzle[(unsigned char) *b]; + + int cmp = aa - bb; + if (cmp != 0) { + return cmp; + } + + a++; + b++; + } + + return 0; +} + long long addpool(struct memfile *poolfile, struct memfile *treefile, char *s, char type) { long long *sp = &pooltree; while (*sp != 0) { - int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off + 1); + int cmp = swizzlecmp(s, poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off + 1); if (cmp == 0) { cmp = type - (poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off)[0]; From e6c5aa9bfebfad2581d722dbbdb63f732370e97b Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 19 Jun 2015 15:53:09 -0700 Subject: [PATCH 12/12] Oops. Make sure that 0 stays 0 even when swizzling --- geojson.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/geojson.c b/geojson.c index 97afa65..42572f8 100644 --- a/geojson.c +++ b/geojson.c @@ -404,22 +404,22 @@ struct stringpool { long long pooltree = 0; static unsigned char swizzle[256] = { - 0x2B, 0xBF, 0x18, 0xDE, 0x93, 0xC9, 0xB1, 0x5E, 0xDF, 0xBE, 0x72, 0x5A, 0xBB, 0x42, 0x64, 0xC6, + 0x00, 0xBF, 0x18, 0xDE, 0x93, 0xC9, 0xB1, 0x5E, 0xDF, 0xBE, 0x72, 0x5A, 0xBB, 0x42, 0x64, 0xC6, 0xD8, 0xB7, 0x15, 0x74, 0x1C, 0x8B, 0x91, 0xF5, 0x29, 0x46, 0xEC, 0x6F, 0xCA, 0x20, 0xF0, 0x06, 0x27, 0x61, 0x87, 0xE0, 0x6E, 0x43, 0x50, 0xC5, 0x1B, 0xB4, 0x37, 0xC3, 0x69, 0xA6, 0xEE, 0x80, 0xAF, 0x9B, 0xA1, 0x76, 0x23, 0x24, 0x53, 0xF3, 0x5B, 0x65, 0x19, 0xF4, 0xFC, 0xDD, 0x26, 0xE8, 0x10, 0xF7, 0xCE, 0x92, 0x48, 0xF6, 0x94, 0x60, 0x07, 0xC4, 0xB9, 0x97, 0x6D, 0xA4, 0x11, 0x0D, - 0x1F, 0x4D, 0x13, 0xB0, 0x5D, 0xBA, 0x31, 0xD5, 0x8D, 0x51, 0x36, 0x00, 0x96, 0x7A, 0x03, 0x7F, - 0xDA, 0x17, 0xDB, 0xD4, 0x83, 0xE2, 0x79, 0x6A, 0xE1, 0x95, 0x38, 0xFF, 0x28, 0xB2, 0xB3, 0xA7, - 0xAE, 0xF8, 0x54, 0xCC, 0xDC, 0x9A, 0x6B, 0xFB, 0x3F, 0xD7, 0xBC, 0x21, 0xC8, 0x71, 0x09, 0x16, - 0xAC, 0x3C, 0x8A, 0x62, 0x05, 0xC2, 0x8C, 0x32, 0x4E, 0x35, 0x9C, 0x5F, 0x75, 0xCD, 0x2E, 0xA2, - 0x3E, 0x1A, 0xC1, 0x8E, 0x14, 0xA0, 0xD3, 0x7D, 0xD9, 0xEB, 0x5C, 0x70, 0xE6, 0x9E, 0x12, 0x3B, - 0xEF, 0x1E, 0x49, 0xD2, 0x98, 0x39, 0x7E, 0x44, 0x4B, 0x6C, 0x88, 0x02, 0x2C, 0xAD, 0xE5, 0x9F, - 0x40, 0x7B, 0x4A, 0x3D, 0xA9, 0xAB, 0x0B, 0xD6, 0x2F, 0x90, 0x2A, 0xB6, 0x1D, 0xC7, 0x22, 0x55, - 0x34, 0x0A, 0xD0, 0xB5, 0x68, 0xE3, 0x59, 0xFD, 0xFA, 0x57, 0x77, 0x25, 0xA3, 0x04, 0xB8, 0x33, - 0x89, 0x78, 0x82, 0xE4, 0xC0, 0x0E, 0x8F, 0x85, 0xD1, 0x84, 0x08, 0x67, 0x47, 0x9D, 0xCB, 0x58, - 0x4C, 0xAA, 0xED, 0x52, 0xF2, 0x4F, 0xF1, 0x66, 0xCF, 0xA5, 0x56, 0xEA, 0x7C, 0xE9, 0x63, 0xE7, - 0x01, 0xF9, 0xFE, 0x0C, 0x99, 0x2D, 0x0F, 0x3A, 0x41, 0x45, 0xA8, 0x30, 0x73, 0xBD, 0x86, 0x81, + 0x1F, 0x4D, 0x13, 0xB0, 0x5D, 0xBA, 0x31, 0xD5, 0x8D, 0x51, 0x36, 0x96, 0x7A, 0x03, 0x7F, 0xDA, + 0x17, 0xDB, 0xD4, 0x83, 0xE2, 0x79, 0x6A, 0xE1, 0x95, 0x38, 0xFF, 0x28, 0xB2, 0xB3, 0xA7, 0xAE, + 0xF8, 0x54, 0xCC, 0xDC, 0x9A, 0x6B, 0xFB, 0x3F, 0xD7, 0xBC, 0x21, 0xC8, 0x71, 0x09, 0x16, 0xAC, + 0x3C, 0x8A, 0x62, 0x05, 0xC2, 0x8C, 0x32, 0x4E, 0x35, 0x9C, 0x5F, 0x75, 0xCD, 0x2E, 0xA2, 0x3E, + 0x1A, 0xC1, 0x8E, 0x14, 0xA0, 0xD3, 0x7D, 0xD9, 0xEB, 0x5C, 0x70, 0xE6, 0x9E, 0x12, 0x3B, 0xEF, + 0x1E, 0x49, 0xD2, 0x98, 0x39, 0x7E, 0x44, 0x4B, 0x6C, 0x88, 0x02, 0x2C, 0xAD, 0xE5, 0x9F, 0x40, + 0x7B, 0x4A, 0x3D, 0xA9, 0xAB, 0x0B, 0xD6, 0x2F, 0x90, 0x2A, 0xB6, 0x1D, 0xC7, 0x22, 0x55, 0x34, + 0x0A, 0xD0, 0xB5, 0x68, 0xE3, 0x59, 0xFD, 0xFA, 0x57, 0x77, 0x25, 0xA3, 0x04, 0xB8, 0x33, 0x89, + 0x78, 0x82, 0xE4, 0xC0, 0x0E, 0x8F, 0x85, 0xD1, 0x84, 0x08, 0x67, 0x47, 0x9D, 0xCB, 0x58, 0x4C, + 0xAA, 0xED, 0x52, 0xF2, 0x4F, 0xF1, 0x66, 0xCF, 0xA5, 0x56, 0xEA, 0x7C, 0xE9, 0x63, 0xE7, 0x01, + 0xF9, 0xFE, 0x0C, 0x99, 0x2D, 0x0F, 0x3A, 0x41, 0x45, 0xA8, 0x30, 0x2B, 0x73, 0xBD, 0x86, 0x81, }; int swizzlecmp(char *a, char *b) {