From 97d65e6b7dbcf2d6d903432133d688f43539324b Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 5 Mar 2015 15:15:56 -0800 Subject: [PATCH 01/10] Bring back the index: now just a file position and a point --- geojson.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/geojson.c b/geojson.c index adf3f6c..e694176 100644 --- a/geojson.c +++ b/geojson.c @@ -288,12 +288,81 @@ void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned fprintf(stderr, "\n"); } +struct index { + long long fpos; + unsigned long long index; +}; + +int indexcmp(const void *v1, const void *v2) { + const struct index *i1 = (const struct index *) v1; + const struct index *i2 = (const struct index *) v2; + + if (i1->index < i2->index) { + return -1; + } else if (i1->index > i2->index) { + return 1; + } + + return 0; +} + +struct merge { + long long start; + long long end; + + struct merge *next; +}; + +static void insert(struct merge *m, struct merge **head, unsigned char *map, int bytes) { + while (*head != NULL && indexcmp(map + m->start, map + (*head)->start) > 0) { + head = &((*head)->next); + } + + m->next = *head; + *head = m; +} + +static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f, int bytes, long long nrec) { + int i; + struct merge *head = NULL; + long long along = 0; + long long reported = -1; + + for (i = 0; i < nmerges; i++) { + if (merges[i].start < merges[i].end) { + insert(&(merges[i]), &head, map, bytes); + } + } + + while (head != NULL) { + fwrite(map + head->start, bytes, 1, f); + head->start += bytes; + + struct merge *m = head; + head = m->next; + m->next = NULL; + + if (m->start < m->end) { + insert(m, &head, map, bytes); + } + + along++; + long long report = 100 * along / nrec; + if (report != reported) { + fprintf(stderr, "Merging: %lld%%\r", report); + reported = report; + } + } +} + void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir) { char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1]; char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1]; + char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1]; sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX"); sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX"); + sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX"); int metafd = mkstemp(metaname); if (metafd < 0) { @@ -305,6 +374,11 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i perror(geomname); exit(EXIT_FAILURE); } + int indexfd = mkstemp(indexname); + if (indexfd < 0) { + perror(indexname); + exit(EXIT_FAILURE); + } FILE *metafile = fopen(metaname, "wb"); if (metafile == NULL) { @@ -316,11 +390,18 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i perror(geomname); exit(EXIT_FAILURE); } + FILE *indexfile = fopen(indexname, "wb"); + if (indexfile == NULL) { + perror(indexname); + exit(EXIT_FAILURE); + } long long metapos = 0; long long geompos = 0; + long long indexpos = 0; unlink(metaname); unlink(geomname); + unlink(indexname); unsigned file_bbox[] = { UINT_MAX, UINT_MAX, 0, 0 }; unsigned midx = 0, midy = 0; @@ -456,11 +537,19 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i serialize_string(metafile, metaval[i], &metapos, fname, jp); } + long long geomstart = geompos; + serialize_int(geomfile, mb_geometry[t], &geompos, fname, jp); serialize_long_long(geomfile, metastart, &geompos, fname, jp); parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp); serialize_byte(geomfile, VT_END, &geompos, fname, jp); + struct index index; + index.fpos = geomstart; + index.index = encode(bbox[0] / 2 + bbox[2] / 2, bbox[1] / 2 + bbox[3] / 2); + fwrite_check(&index, sizeof(struct index), 1, indexfile, fname, jp); + indexpos += sizeof(struct index); + /* * Note that minzoom for lines is the dimension * of the geometry in world coordinates, but @@ -519,6 +608,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i json_end(jp); fclose(metafile); fclose(geomfile); + fclose(indexfile); struct stat geomst; struct stat metast; @@ -577,6 +667,84 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i printf("using layer name %s\n", trunc); } + { + int bytes = sizeof(struct index); + + fprintf(stderr, + "Sorting %lld indices for %lld features\n", + (long long) indexpos / bytes, + seq); + + int page = sysconf(_SC_PAGESIZE); + long long unit = (50 * 1024 * 1024 / bytes) * bytes; + while (unit % page != 0) { + unit += bytes; + } + + int nmerges = (indexpos + unit - 1) / unit; + struct merge merges[nmerges]; + + long long start; + for (start = 0; start < indexpos; start += unit) { + long long end = start + unit; + if (end > indexpos) { + end = indexpos; + } + + if (nmerges != 1) { + fprintf(stderr, "Sorting part %lld of %d\r", start / unit + 1, nmerges); + } + + merges[start / unit].start = start; + merges[start / unit].end = end; + merges[start / unit].next = NULL; + + void *map = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_PRIVATE, indexfd, start); + if (map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + qsort(map, (end - start) / bytes, bytes, indexcmp); + + // Sorting and then copying avoids the need to + // write out intermediate stages of the sort. + + void *map2 = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_SHARED, indexfd, start); + if (map2 == MAP_FAILED) { + perror("mmap (write)"); + exit(EXIT_FAILURE); + } + + memcpy(map2, map, end - start); + + munmap(map, end - start); + munmap(map2, end - start); + } + + if (nmerges != 1) { + fprintf(stderr, "\n"); + } + + void *map = mmap(NULL, indexpos, PROT_READ, MAP_PRIVATE, indexfd, 0); + if (map == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + FILE *f = fopen(indexname, "w"); + if (f == NULL) { + perror(indexname); + exit(EXIT_FAILURE); + } + + merge(merges, nmerges, (unsigned char *) map, f, bytes, indexpos / bytes); + + munmap(map, indexpos); + fclose(f); + close(indexfd); + } + int fd[4]; off_t size[4]; From a8b2db8d5ac887173e950b16c20bf5b08ad30644 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 5 Mar 2015 15:22:58 -0800 Subject: [PATCH 02/10] Fix double-close of top level geometry file --- geojson.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geojson.c b/geojson.c index e694176..a594ab3 100644 --- a/geojson.c +++ b/geojson.c @@ -765,9 +765,9 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i perror("munmap meta"); } - close(geomfd); - close(metafd); - + if (close(metafd) < 0) { + perror("close meta"); + } double minlat = 0, minlon = 0, maxlat = 0, maxlon = 0, midlat = 0, midlon = 0; From 572df8ad39e52e7fdca06931e63a890aeea68a92 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 5 Mar 2015 16:18:01 -0800 Subject: [PATCH 03/10] Reorder the geometries by index --- geojson.c | 108 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 91 insertions(+), 17 deletions(-) diff --git a/geojson.c b/geojson.c index a594ab3..fdc8321 100644 --- a/geojson.c +++ b/geojson.c @@ -289,7 +289,8 @@ void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned } struct index { - long long fpos; + long long start; + long long end; unsigned long long index; }; @@ -409,11 +410,6 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i json_pull *jp = json_begin_file(f); long long seq = 0; - /* initial tile is 0/0/0 */ - serialize_int(geomfile, 0, &geompos, fname, jp); - serialize_uint(geomfile, 0, &geompos, fname, jp); - serialize_uint(geomfile, 0, &geompos, fname, jp); - while (1) { json_object *j = json_read(jp); if (j == NULL) { @@ -544,12 +540,6 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp); serialize_byte(geomfile, VT_END, &geompos, fname, jp); - struct index index; - index.fpos = geomstart; - index.index = encode(bbox[0] / 2 + bbox[2] / 2, bbox[1] / 2 + bbox[3] / 2); - fwrite_check(&index, sizeof(struct index), 1, indexfile, fname, jp); - indexpos += sizeof(struct index); - /* * Note that minzoom for lines is the dimension * of the geometry in world coordinates, but @@ -580,6 +570,13 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i serialize_byte(geomfile, minzoom, &geompos, fname, jp); + struct index index; + index.start = geomstart; + index.end = geompos; + index.index = encode(bbox[0] / 2 + bbox[2] / 2, bbox[1] / 2 + bbox[3] / 2); + fwrite_check(&index, sizeof(struct index), 1, indexfile, fname, jp); + indexpos += sizeof(struct index); + for (i = 0; i < 2; i++) { if (bbox[i] < file_bbox[i]) { file_bbox[i] = bbox[i]; @@ -602,9 +599,6 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i /* XXX check for any non-features in the outer object */ } - /* end of tile */ - serialize_int(geomfile, -2, &geompos, fname, jp); - json_end(jp); fclose(metafile); fclose(geomfile); @@ -667,13 +661,15 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i printf("using layer name %s\n", trunc); } + /* Sort the index by geometry */ + { int bytes = sizeof(struct index); fprintf(stderr, - "Sorting %lld indices for %lld features\n", + "Sorting %lld indices for %lld features, %lld bytes of geometry\n", (long long) indexpos / bytes, - seq); + seq, (long long) geomst.st_size); int page = sysconf(_SC_PAGESIZE); long long unit = (50 * 1024 * 1024 / bytes) * bytes; @@ -745,6 +741,84 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i close(indexfd); } + /* Copy geometries to a new file in index order */ + + indexfd = open(indexname, O_RDONLY); + if (indexfd < 0) { + perror("reopen sorted index"); + exit(EXIT_FAILURE); + } + struct index *index_map = mmap(NULL, indexpos, PROT_READ, MAP_PRIVATE, indexfd, 0); + if (index_map == MAP_FAILED) { + perror("mmap index"); + exit(EXIT_FAILURE); + } + unlink(indexname); + + char *geom_map = mmap(NULL, geomst.st_size, PROT_READ, MAP_PRIVATE, geomfd, 0); + if (geom_map == MAP_FAILED) { + perror("mmap unsorted geometry"); + exit(EXIT_FAILURE); + } + if (close(geomfd) != 0) { + perror("close unsorted geometry"); + } + + sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX"); + geomfd = mkstemp(geomname); + if (geomfd < 0) { + perror(geomname); + exit(EXIT_FAILURE); + } + geomfile = fopen(geomname, "wb"); + if (geomfile == NULL) { + perror(geomname); + exit(EXIT_FAILURE); + } + + { + geompos = 0; + + /* initial tile is 0/0/0 */ + serialize_int(geomfile, 0, &geompos, fname, jp); + serialize_uint(geomfile, 0, &geompos, fname, jp); + serialize_uint(geomfile, 0, &geompos, fname, jp); + + long long i; + long long sum = 0; + for (i = 0; i < indexpos / sizeof(struct index); i++) { + fwrite_check(geom_map + index_map[i].start, sizeof(char), index_map[i].end - index_map[i].start, geomfile, fname, jp); + sum += index_map[i].end - index_map[i].start; + } + + /* end of tile */ + serialize_int(geomfile, -2, &geompos, fname, jp); + fclose(geomfile); + } + + if (munmap(index_map, indexpos) != 0) { + perror("unmap sorted index"); + } + if (munmap(geom_map, geomst.st_size) != 0) { + perror("unmap unsorted geometry"); + } + if (close(indexfd) != 0) { + perror("close sorted index"); + } + + /* Traverse and split the geometries for each zoom level */ + + geomfd = open(geomname, O_RDONLY); + if (geomfd < 0) { + perror("reopen sorted geometry"); + exit(EXIT_FAILURE); + } + unlink(geomname); + if (fstat(geomfd, &geomst) != 0) { + perror("stat sorted geom\n"); + exit(EXIT_FAILURE); + } + int fd[4]; off_t size[4]; From 558a7a412cf8d8e1ed3dc8d3ccf016f57566bc36 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 10:56:02 -0800 Subject: [PATCH 04/10] Restore old code for uniform instead of random dot-dropping From ffe4c953760d8fd --- geojson.c | 13 +++++++------ tile.cc | 24 +++++++++++++++++++++--- tile.h | 2 +- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/geojson.c b/geojson.c index fdc8321..ba11b21 100644 --- a/geojson.c +++ b/geojson.c @@ -194,7 +194,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) { return ret; } -void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, const char *tmpdir) { +void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, const char *tmpdir, double gamma) { int i; for (i = 0; i <= maxzoom; i++) { long long most = 0; @@ -255,7 +255,7 @@ void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, maxzoom, file_keys, layername, outdb, droprate, buffer, fname, jp, sub, minzoom, maxzoom, todo, geomstart, along); + long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, maxzoom, file_keys, layername, outdb, droprate, buffer, fname, jp, sub, minzoom, maxzoom, todo, geomstart, along, gamma); if (z == maxzoom && len > most) { *midx = x; @@ -356,7 +356,7 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } } -void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir) { +void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma) { char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1]; char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1]; char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1]; @@ -833,7 +833,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata\n", seq, (long long) geomst.st_size, (long long) metast.st_size); - traverse_zooms(fd, size, meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer, fname, jp, tmpdir); + traverse_zooms(fd, size, meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer, fname, jp, tmpdir, gamma); if (munmap(meta, metast.st_size) != 0) { perror("munmap meta"); @@ -884,6 +884,7 @@ int main(int argc, char **argv) { int minzoom = 0; int force = 0; double droprate = 2.5; + double gamma = 1; int buffer = 5; const char *tmpdir = "/tmp"; @@ -982,7 +983,7 @@ int main(int argc, char **argv) { if (f == NULL) { fprintf(stderr, "%s: %s: %s\n", argv[0], argv[i], strerror(errno)); } else { - read_json(f, name ? name : argv[i], layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir); + read_json(f, name ? name : argv[i], layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); fclose(f); } } @@ -990,7 +991,7 @@ int main(int argc, char **argv) { fprintf(stderr, "%s: Only accepts one input file\n", argv[0]); exit(EXIT_FAILURE); } else { - read_json(stdin, name ? name : outdir, layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir); + read_json(stdin, name ? name : outdir, layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); } mbtiles_close(outdb, argv); diff --git a/tile.cc b/tile.cc index 99cb323..0eb4fef 100644 --- a/tile.cc +++ b/tile.cc @@ -342,7 +342,7 @@ void evaluate(std::vector &features, char *metabase, struct pool *file pool_free(&keys); } -long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, json_pull *jp, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along) { +long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, json_pull *jp, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma) { int line_detail; static bool evaluated = false; double oprogress = 0; @@ -361,6 +361,12 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u //long long along = 0; double accum_area = 0; + double interval = 0; + double seq = 0; + if (z < basezoom) { + interval = exp(log(droprate) * (basezoom - z)); + } + std::vector features; int within[4] = { 0 }; @@ -475,11 +481,23 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u continue; } - if ((t == VT_LINE && z + line_detail <= feature_minzoom) || - (t == VT_POINT && z < feature_minzoom)) { + if (t == VT_LINE && z + line_detail <= feature_minzoom) { continue; } + if (t == VT_POINT && z < feature_minzoom && gamma == 0) { + continue; + } + + if (t == VT_POINT && gamma != 0) { + seq++; + if (seq >= 0) { + seq -= interval; + } else { + continue; + } + } + bool reduced = false; if (t == VT_POLYGON) { geom = reduce_tiny_poly(geom, z, line_detail, &reduced, &accum_area); diff --git a/tile.h b/tile.h index 6807f08..031bb41 100644 --- a/tile.h +++ b/tile.h @@ -26,4 +26,4 @@ void deserialize_uint(char **f, unsigned *n); void deserialize_byte(char **f, signed char *n); struct pool_val *deserialize_string(char **f, struct pool *p, int type); -long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along); +long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma); From 41b28b2a1b95ecec630668fd680cfe31f0a1f008 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 13:12:32 -0800 Subject: [PATCH 05/10] Add dot gamma processing --- README.md | 1 + geojson.c | 6 +++++- tile.cc | 31 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a308df8..9dbbe7d 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Options * -f: Delete the mbtiles file if it already exists instead of giving an error * -r rate: Rate at which dots are dropped at lower zoom levels (default 2.5) * -b pixels: Buffer size where features are duplicated from adjacent tiles (default 5) + * -g gamma: Rate at which especially dense dots are dropped (default 1, for no effect). A gamma of 2 reduces the number of dots duplicating the same pixel to the square root of their original number. Example ------- diff --git a/geojson.c b/geojson.c index ba11b21..0a0dbd4 100644 --- a/geojson.c +++ b/geojson.c @@ -893,7 +893,7 @@ int main(int argc, char **argv) { pool_init(&include, 0); int exclude_all = 0; - while ((i = getopt(argc, argv, "l:n:z:Z:d:D:o:x:y:r:b:fXt:")) != -1) { + while ((i = getopt(argc, argv, "l:n:z:Z:d:D:o:x:y:r:b:fXt:g:")) != -1) { switch (i) { case 'n': name = optarg; @@ -952,6 +952,10 @@ int main(int argc, char **argv) { tmpdir = optarg; break; + case 'g': + gamma = atof(optarg); + break; + default: fprintf(stderr, "Usage: %s -o out.mbtiles [-n name] [-l layername] [-z maxzoom] [-Z minzoom] [-d detail] [-D lower-detail] [-x excluded-field ...] [-y included-field ...] [-X] [-r droprate] [-b buffer] [-t tmpdir] [file.json]\n", argv[0]); exit(EXIT_FAILURE); diff --git a/tile.cc b/tile.cc index 0eb4fef..a0c9b85 100644 --- a/tile.cc +++ b/tile.cc @@ -367,6 +367,10 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u interval = exp(log(droprate) * (basezoom - z)); } + unsigned long long previndex = 0; + double scale = (double) (1LL << (64 - 2 * (z + 8))); + double gap = 0; + std::vector features; int within[4] = { 0 }; @@ -496,6 +500,33 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u } else { continue; } + + unsigned long long index = encode(bbox[0] / 2 + bbox[2] / 2, bbox[1] / 2 + bbox[3] / 2); + if (gap > 0) { + if (index == previndex) { + continue; // Exact duplicate: can't fulfil the gap requirement + } + + if (exp(log((index - previndex) / scale) * gamma) >= gap) { + // Dot is further from the previous than the nth root of the gap, + // so produce it, and choose a new gap at the next point. + gap = 0; + } else { + continue; + } + } else { + gap = (index - previndex) / scale; + + if (gap == 0) { + continue; // Exact duplicate: skip + } else if (gap < 1) { + continue; // Narrow dot spacing: need to stretch out + } else { + gap = 0; // Wider spacing than minimum: so pass through unchanged + } + } + + previndex = index; } bool reduced = false; From 816ef2eca8cd7e924096901df1b5ea9aafcb0bee Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 14:32:53 -0800 Subject: [PATCH 06/10] Flesh out README a little --- README.md | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9dbbe7d..55ea74c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,28 @@ tippecanoe ========== -Build vector tilesets from large collections of GeoJSON features. +Builds vector tilesets from large collections of GeoJSON features. + +Intent +------ + +The goal of Tippecanoe is to enable making a scale-independent view of your data, +so that at any level from the entire world to a single building, you can see +the density and texture of the data rather than a simplification from dropping +supposedly unimportant features or clustering or aggregating them. + +If you give it all of OpenStreetMap and zoom out, it should give you back +something that looks like "[All Streets](http://benfry.com/allstreets/map5.html)" +rather than something that looks like an Interstate road atlas. + +If you give it all the building footprints in Los Angeles and zoom out +far enough that most individual buildings are no longer discernable, you +should still be able to see the extent and variety of development in every neighborhood, +not just the largest downtown buildings. + +If you give it a collection of years of tweet locations, you should be able to +see the shape and relative popularity of every point of interest and every +significant travel corridor. Installation ------------ @@ -56,6 +77,10 @@ For point features, it drops 1/2.5 of the dots for each zoom level above the bas I don't know why 2.5 is the appropriate number, but the densities of many different data sets fall off at about this same rate. You can use -r to specify a different rate. +You can use the gamma option to thin out especially dense clusters of points. +For any area that is denser than one point per pixel (at whatever zoom level), +a gamma of 3, for example, will reduce these clusters to 1/3 of their original density. + For line features, it drops any features that are too small to draw at all. This still leaves the lower zooms too dark (and too dense for the 500K tile limit, in some places), so I need to figure out an equitable way to throw features away. @@ -83,3 +108,8 @@ and sqlite3 (apt-get install libsqlite3-dev). To build: and perhaps make install + +Name +---- + +The name is [a joking reference](http://en.wikipedia.org/wiki/Tippecanoe_and_Tyler_Too) to making tiles. From 9343c5fcc1607579045ff935acbe979d084115fa Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 14:35:39 -0800 Subject: [PATCH 07/10] Less wordy message about sorting --- geojson.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/geojson.c b/geojson.c index 0a0dbd4..37e16a2 100644 --- a/geojson.c +++ b/geojson.c @@ -665,11 +665,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i { int bytes = sizeof(struct index); - - fprintf(stderr, - "Sorting %lld indices for %lld features, %lld bytes of geometry\n", - (long long) indexpos / bytes, - seq, (long long) geomst.st_size); + fprintf(stderr, "Sorting %lld features\n", (long long) indexpos / bytes); int page = sysconf(_SC_PAGESIZE); long long unit = (50 * 1024 * 1024 / bytes) * bytes; From f0a8e5b1920f4095cf64575e87ac93743cffd23f Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 15:32:52 -0800 Subject: [PATCH 08/10] Progress indicator for reordering geometry --- geojson.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/geojson.c b/geojson.c index 37e16a2..4acb77c 100644 --- a/geojson.c +++ b/geojson.c @@ -782,9 +782,16 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i long long i; long long sum = 0; + long long progress = 0; for (i = 0; i < indexpos / sizeof(struct index); i++) { fwrite_check(geom_map + index_map[i].start, sizeof(char), index_map[i].end - index_map[i].start, geomfile, fname, jp); sum += index_map[i].end - index_map[i].start; + + long long p = 1000 * i / (indexpos / sizeof(struct index)); + if (p != progress) { + fprintf(stderr, "Reordering geometry: %3.1f%%\r", p / 10.0); + progress = p; + } } /* end of tile */ From b59a251924fdbf7e223a76ff10ee4e0afe22df11 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 6 Mar 2015 16:33:32 -0800 Subject: [PATCH 09/10] Write metadata even when tiling fails so you can look at the partial map --- geojson.c | 27 +++++++++++++++++++++------ tile.cc | 4 ++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/geojson.c b/geojson.c index 4acb77c..e17912a 100644 --- a/geojson.c +++ b/geojson.c @@ -194,7 +194,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) { return ret; } -void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, const char *tmpdir, double gamma) { +int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, const char *tmpdir, double gamma) { int i; for (i = 0; i <= maxzoom; i++) { long long most = 0; @@ -257,6 +257,10 @@ void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, maxzoom, file_keys, layername, outdb, droprate, buffer, fname, jp, sub, minzoom, maxzoom, todo, geomstart, along, gamma); + if (len < 0) { + return i - 1; + } + if (z == maxzoom && len > most) { *midx = x; *midy = y; @@ -286,6 +290,7 @@ void traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned } fprintf(stderr, "\n"); + return maxzoom; } struct index { @@ -356,7 +361,9 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } } -void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma) { +int read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma) { + int ret = EXIT_SUCCESS; + char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1]; char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1]; char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1]; @@ -836,7 +843,13 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata\n", seq, (long long) geomst.st_size, (long long) metast.st_size); - traverse_zooms(fd, size, meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer, fname, jp, tmpdir, gamma); + int written = traverse_zooms(fd, size, meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer, fname, jp, tmpdir, gamma); + + if (maxzoom != written) { + fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written); + maxzoom = written; + ret = EXIT_FAILURE; + } if (munmap(meta, metast.st_size) != 0) { perror("munmap meta"); @@ -873,6 +886,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i mbtiles_write_metadata(outdb, fname, layername, minzoom, maxzoom, minlat, minlon, maxlat, maxlon, midlat, midlon, &file_keys); pool_free_strings(&file_keys); + return ret; } int main(int argc, char **argv) { @@ -982,6 +996,7 @@ int main(int argc, char **argv) { } sqlite3 *outdb = mbtiles_open(outdir, argv); + int ret = EXIT_SUCCESS; if (argc == optind + 1) { int i; @@ -990,7 +1005,7 @@ int main(int argc, char **argv) { if (f == NULL) { fprintf(stderr, "%s: %s: %s\n", argv[0], argv[i], strerror(errno)); } else { - read_json(f, name ? name : argv[i], layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); + ret = read_json(f, name ? name : argv[i], layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); fclose(f); } } @@ -998,9 +1013,9 @@ int main(int argc, char **argv) { fprintf(stderr, "%s: Only accepts one input file\n", argv[0]); exit(EXIT_FAILURE); } else { - read_json(stdin, name ? name : outdir, layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); + ret = read_json(stdin, name ? name : outdir, layer, maxzoom, minzoom, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma); } mbtiles_close(outdb, argv); - return 0; + return ret; } diff --git a/tile.cc b/tile.cc index a0c9b85..48abeca 100644 --- a/tile.cc +++ b/tile.cc @@ -621,7 +621,7 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u if (features.size() > 200000) { fprintf(stderr, "tile %d/%u/%u has %lld features, >200000 \n", z, tx, ty, (long long) features.size()); fprintf(stderr, "Try using -z to set a higher base zoom level.\n"); - exit(EXIT_FAILURE); + return -1; } mapnik::vector::tile tile = create_tile(layername, line_detail, features, &count, &keys, &values); @@ -652,6 +652,6 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u } fprintf(stderr, "could not make tile %d/%u/%u small enough\n", z, tx, ty); - exit(EXIT_FAILURE); + return -1; } From f3e051a610b1e638250790ad1cefeaaca9f8ee92 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 9 Mar 2015 12:36:15 -0700 Subject: [PATCH 10/10] Clarify and correct gamma in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55ea74c..113f1d3 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ Options * -f: Delete the mbtiles file if it already exists instead of giving an error * -r rate: Rate at which dots are dropped at lower zoom levels (default 2.5) * -b pixels: Buffer size where features are duplicated from adjacent tiles (default 5) - * -g gamma: Rate at which especially dense dots are dropped (default 1, for no effect). A gamma of 2 reduces the number of dots duplicating the same pixel to the square root of their original number. + * -g gamma: Rate at which especially dense dots are dropped (default 1, for no effect). A gamma of 2 reduces the number of dots less than a pixel apart to the square root of their original number. Example ------- @@ -78,8 +78,8 @@ I don't know why 2.5 is the appropriate number, but the densities of many differ data sets fall off at about this same rate. You can use -r to specify a different rate. You can use the gamma option to thin out especially dense clusters of points. -For any area that is denser than one point per pixel (at whatever zoom level), -a gamma of 3, for example, will reduce these clusters to 1/3 of their original density. +For any area that where dots are closer than one pixel together (at whatever zoom level), +a gamma of 3, for example, will reduce these clusters to the cube root of their original density. For line features, it drops any features that are too small to draw at all. This still leaves the lower zooms too dark (and too dense for the 500K tile limit,