From e5a6f981b703cda563e8fcc7d073af7ad33c5423 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 8 Jul 2015 16:33:22 -0700 Subject: [PATCH 01/17] Work out the sharding math for multhreading --- geojson.c | 6 ++--- tile.cc | 67 +++++++++++++++++++++++++++++++++++++++---------------- tile.h | 2 +- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/geojson.c b/geojson.c index 4091e37..e947f88 100644 --- a/geojson.c +++ b/geojson.c @@ -960,14 +960,14 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } - int fd[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)]; - off_t size[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)]; + int fd[TEMP_FILES]; + off_t size[TEMP_FILES]; fd[0] = geomfd; size[0] = geomst.st_size; int j; - for (j = 1; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + for (j = 1; j < TEMP_FILES; j++) { fd[j] = -1; size[j] = 0; } diff --git a/tile.cc b/tile.cc index ec27700..0f96269 100644 --- a/tile.cc +++ b/tile.cc @@ -358,7 +358,7 @@ void evaluate(std::vector &features, char *metabase, struct pool *file } #endif -void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, signed char layer, long long metastart, signed char feature_minzoom) { +void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, signed char layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment) { if (geom.size() > 0 && nextzoom <= file_maxzoom) { int xo, yo; int span = 1 << (nextzoom - z); @@ -394,7 +394,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb // j is the shard that the child tile's data is being written to. // - // Be careful: We can't jump more zoom levels than MAX_ZOOM_INCREMENT + // Be careful: We can't jump more zoom levels than max_zoom_increment // because that could break the constraint that each of the children // of the current tile must have its own shard, because the data for // the child tile must be contiguous within the shard. @@ -403,9 +403,13 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb // the four that would normally result from splitting one tile, // because it will go through all the shards when it does the // next zoom. + // + // If child_shards is a power of 2 but not a power of 4, this will + // shard X more widely than Y. XXX Is there a better way to do this + // without causing collisions? - int j = ((jx & ((1 << MAX_ZOOM_INCREMENT) - 1)) << MAX_ZOOM_INCREMENT) | - ((jy & ((1 << MAX_ZOOM_INCREMENT) - 1))); + int j = ((jx << max_zoom_increment) | + ((jy & ((1 << max_zoom_increment) - 1)))) & (child_shards - 1); { if (!within[j]) { @@ -447,7 +451,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb } } -long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) { +long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, int child_shards) { int line_detail; static bool evaluated = false; double oprogress = 0; @@ -455,12 +459,23 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f char *og = *geoms; + // XXX is there a way to do this without floating point? + int max_zoom_increment = log(child_shards) / log(4); + if (child_shards < 4 || max_zoom_increment < 1) { + fprintf(stderr, "Internal error: %d shards, max zoom increment %d\n", child_shards, max_zoom_increment); + exit(EXIT_FAILURE); + } + if ((((child_shards - 1) << 1) & child_shards) != child_shards) { + fprintf(stderr, "Internal error: %d shards not a power of 2\n", child_shards); + exit(EXIT_FAILURE); + } + int nextzoom = z + 1; if (nextzoom < file_minzoom) { - if (z + MAX_ZOOM_INCREMENT > file_minzoom) { + if (z + max_zoom_increment > file_minzoom) { nextzoom = file_minzoom; } else { - nextzoom = z + MAX_ZOOM_INCREMENT; + nextzoom = z + max_zoom_increment; } } @@ -502,8 +517,10 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f features.push_back(std::vector()); } - int within[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)] = {0}; - long long geompos[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)] = {0}; + int within[child_shards]; + long long geompos[child_shards]; + memset(within, '\0', sizeof(within)); + memset(geompos, '\0', sizeof(geompos)); *geoms = og; @@ -559,7 +576,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f } if (line_detail == detail && fraction == 1) { /* only write out the next zoom once, even if we retry */ - rewrite(geom, z, nextzoom, file_maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom); + rewrite(geom, z, nextzoom, file_maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom, child_shards, max_zoom_increment); } if (z < file_minzoom) { @@ -673,7 +690,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f } int j; - for (j = 0; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + for (j = 0; j < child_shards; j++) { if (within[j]) { serialize_byte(geomfile[j], -2, &geompos[j], fname); within[j] = 0; @@ -783,10 +800,10 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo for (i = 0; i <= maxzoom; i++) { long long most = 0; - FILE *sub[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)]; - int subfd[(1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT)]; + FILE *sub[TEMP_FILES]; + int subfd[TEMP_FILES]; int j; - for (j = 0; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + for (j = 0; j < TEMP_FILES; j++) { char geomname[strlen(tmpdir) + strlen("/geom2.XXXXXXXX") + 1]; sprintf(geomname, "%s/geom%d.XXXXXXXX", tmpdir, j); subfd[j] = mkstemp(geomname); @@ -805,11 +822,23 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo long long todo = 0; long long along = 0; - for (j = 0; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + for (j = 0; j < TEMP_FILES; j++) { todo += geom_size[j]; + printf("%d", geom_size[j] != 0); } + printf("\n"); - for (j = 0; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + // XXX Should be the number of temp files that have data, + // capped by the number of processor threads we can actually run + // or by the number of temp files divided by 4, or by + // some number larger than 4 if we are trying to skip zooms. + // + // Will need to be a power of 2 to make sharding come out right. + int threads = 1; + + int thread = 0; + + for (j = 0; j < TEMP_FILES; j++) { if (geomfd[j] < 0) { // only one source file for zoom level 0 continue; @@ -839,10 +868,10 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent); + long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub + thread * (TEMP_FILES / threads), minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent, TEMP_FILES / threads); if (len < 0) { - return i - 1; + return z - 1; } if (z == maxzoom && len > most) { @@ -858,7 +887,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo along += geom_size[j]; } - for (j = 0; j < (1 << MAX_ZOOM_INCREMENT) * (1 << MAX_ZOOM_INCREMENT); j++) { + for (j = 0; j < TEMP_FILES; j++) { close(geomfd[j]); fclose(sub[j]); diff --git a/tile.h b/tile.h index 42f9607..cb91014 100644 --- a/tile.h +++ b/tile.h @@ -32,4 +32,4 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo extern unsigned initial_x, initial_y; extern int geometry_scale; -#define MAX_ZOOM_INCREMENT 3 +#define TEMP_FILES 64 From 3452ee92ab850e5177eda74994df83ba4aa69f80 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 8 Jul 2015 16:35:02 -0700 Subject: [PATCH 02/17] Fix formatting --- tile.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tile.cc b/tile.cc index 0f96269..afc7d8e 100644 --- a/tile.cc +++ b/tile.cc @@ -408,8 +408,9 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb // shard X more widely than Y. XXX Is there a better way to do this // without causing collisions? - int j = ((jx << max_zoom_increment) | - ((jy & ((1 << max_zoom_increment) - 1)))) & (child_shards - 1); + int j = ((jx << max_zoom_increment) | + ((jy & ((1 << max_zoom_increment) - 1)))) & + (child_shards - 1); { if (!within[j]) { From 2bdb51e995060014f622187649d17c3d3a086b64 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 9 Jul 2015 15:24:47 -0700 Subject: [PATCH 03/17] Calculate how many threads should be run --- tile.cc | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tile.cc b/tile.cc index 1f8fba7..1126544 100644 --- a/tile.cc +++ b/tile.cc @@ -827,21 +827,32 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo unlink(geomname); } + int useful_threads = 0; long long todo = 0; long long along = 0; for (j = 0; j < TEMP_FILES; j++) { todo += geom_size[j]; printf("%d", geom_size[j] != 0); + if (geom_size[j] > 0) { + useful_threads++; + } } printf("\n"); - // XXX Should be the number of temp files that have data, - // capped by the number of processor threads we can actually run - // or by the number of temp files divided by 4, or by - // some number larger than 4 if we are trying to skip zooms. - // - // Will need to be a power of 2 to make sharding come out right. - int threads = 1; +#define MAX_THREADS 20 // XXX Obtain from sysctl(hw.ncpu), /proc/cpuinfo, etc. + + int threads = MAX_THREADS; + if (threads > TEMP_FILES / 4) { + threads = TEMP_FILES / 4; + } + // XXX is it useful to divide further if we know we are skipping + // some zoom levels? Is it faster to have fewer CPUs working on + // sharding, but more deeply, or fewer CPUs, less deeply? + if (threads > useful_threads) { + threads = useful_threads; + } + // Round down to a power of 2 + threads = 1 << (int)(log(threads) / log(2)); int thread = 0; From 2957f16b4bf516c564548eb766c8bbb0cb6d7f0c Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 9 Jul 2015 16:09:40 -0700 Subject: [PATCH 04/17] Assign tasks (reading temporary files) to threads --- tile.cc | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tile.cc b/tile.cc index 1126544..3b078f2 100644 --- a/tile.cc +++ b/tile.cc @@ -854,6 +854,62 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo // Round down to a power of 2 threads = 1 << (int)(log(threads) / log(2)); + // Assign temporary files to threads + + struct task { + int fileno; + struct task *next; + } tasks[TEMP_FILES]; + + struct dispatch { + struct task *tasks; + long long todo; + struct dispatch *next; + } dispatches[threads]; + struct dispatch *dispatch_head = &dispatches[0]; + for (j = 0; j < threads; j++) { + dispatches[j].tasks = NULL; + dispatches[j].todo = 0; + if (j + 1 < threads) { + dispatches[j].next = &dispatches[j + 1]; + } else { + dispatches[j].next = NULL; + } + } + + for (j = 0; j < TEMP_FILES; j++) { + if (geom_size[j] == 0) { + continue; + } + + tasks[j].fileno = j; + tasks[j].next = dispatch_head->tasks; + dispatch_head->tasks = &tasks[j]; + dispatch_head->todo += geom_size[j]; + + struct dispatch *here = dispatch_head; + dispatch_head = dispatch_head->next; + + dispatch **d; + for (d = &dispatch_head; *d != NULL; d = &((*d)->next)) { + if (here->todo < (*d)->todo) { + break; + } + } + + here->next = *d; + *d = here; + + + printf("to do: "); + + dispatch *a; + for (a = dispatch_head; a != NULL; a = a->next) { + printf("%lld ", a->todo); + } + printf("\n"); + } + int thread = 0; for (j = 0; j < TEMP_FILES; j++) { From d55af3b3a6be544951767b7dc8a7b7ccac986e8b Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 9 Jul 2015 16:14:24 -0700 Subject: [PATCH 05/17] Run through the thread task queues, although still sequentially --- tile.cc | 100 +++++++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/tile.cc b/tile.cc index 3b078f2..d36cca6 100644 --- a/tile.cc +++ b/tile.cc @@ -899,66 +899,62 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo here->next = *d; *d = here; - - - printf("to do: "); - - dispatch *a; - for (a = dispatch_head; a != NULL; a = a->next) { - printf("%lld ", a->todo); - } - printf("\n"); } - int thread = 0; + int thread; + for (thread = 0; thread < threads; thread++) { + struct task *task; - for (j = 0; j < TEMP_FILES; j++) { - if (geomfd[j] < 0) { - // only one source file for zoom level 0 - continue; - } - if (geom_size[j] == 0) { - continue; - } + for (task = dispatches[thread].tasks; task != NULL; task = task->next) { + int j = task->fileno; - // printf("%lld of geom_size\n", (long long) geom_size[j]); - - char *geom = (char *) mmap(NULL, geom_size[j], PROT_READ, MAP_PRIVATE, geomfd[j], 0); - if (geom == MAP_FAILED) { - perror("mmap geom"); - exit(EXIT_FAILURE); - } - - char *geomstart = geom; - char *end = geom + geom_size[j]; - - while (geom < end) { - int z; - unsigned x, y; - - deserialize_int(&geom, &z); - deserialize_uint(&geom, &x); - deserialize_uint(&geom, &y); - - // fprintf(stderr, "%d/%u/%u\n", z, x, y); - - long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub + thread * (TEMP_FILES / threads), minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent, TEMP_FILES / threads); - - if (len < 0) { - return z - 1; + if (geomfd[j] < 0) { + // only one source file for zoom level 0 + continue; + } + if (geom_size[j] == 0) { + continue; } - if (z == maxzoom && len > most) { - *midx = x; - *midy = y; - most = len; - } - } + // printf("%lld of geom_size\n", (long long) geom_size[j]); - if (munmap(geomstart, geom_size[j]) != 0) { - perror("munmap geom"); + char *geom = (char *) mmap(NULL, geom_size[j], PROT_READ, MAP_PRIVATE, geomfd[j], 0); + if (geom == MAP_FAILED) { + perror("mmap geom"); + exit(EXIT_FAILURE); + } + + char *geomstart = geom; + char *end = geom + geom_size[j]; + + while (geom < end) { + int z; + unsigned x, y; + + deserialize_int(&geom, &z); + deserialize_uint(&geom, &x); + deserialize_uint(&geom, &y); + + // fprintf(stderr, "%d/%u/%u\n", z, x, y); + + long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub + thread * (TEMP_FILES / threads), minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent, TEMP_FILES / threads); + + if (len < 0) { + return z - 1; + } + + if (z == maxzoom && len > most) { + *midx = x; + *midy = y; + most = len; + } + } + + if (munmap(geomstart, geom_size[j]) != 0) { + perror("munmap geom"); + } + along += geom_size[j]; } - along += geom_size[j]; } for (j = 0; j < TEMP_FILES; j++) { From a64913c989b843b0e643c08371968c96d412861e Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 23 Jul 2015 16:17:23 -0700 Subject: [PATCH 06/17] Move the data that threads will need into a parameter block --- tile.cc | 180 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 127 insertions(+), 53 deletions(-) diff --git a/tile.cc b/tile.cc index d36cca6..f4f34d8 100644 --- a/tile.cc +++ b/tile.cc @@ -802,6 +802,100 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f return -1; } +struct task { + int fileno; + struct task *next; +} tasks[TEMP_FILES]; + +struct write_tile_args { + struct task *tasks; + char *metabase; + char *stringpool; + unsigned *file_bbox; + int min_detail; + int basezoom; + struct pool **file_keys; + char **layernames; + sqlite3 *outdb; + double droprate; + int buffer; + const char *fname; + FILE **geomfile; + int file_minzoom; + int file_maxzoom; + double todo; + long long *along; + double gamma; + int nlayers; + char *prevent; + int child_shards; + int *geomfd; + off_t *geom_size; + unsigned *midx; + unsigned *midy; + int maxzoom; + int minzoom; + int full_detail; + int low_detail; + long long *most; +}; + +void run_thread(write_tile_args *arg) { + struct task *task; + + for (task = arg->tasks; task != NULL; task = task->next) { + int j = task->fileno; + + if (arg->geomfd[j] < 0) { + // only one source file for zoom level 0 + continue; + } + if (arg->geom_size[j] == 0) { + continue; + } + + // printf("%lld of geom_size\n", (long long) geom_size[j]); + + char *geom = (char *) mmap(NULL, arg->geom_size[j], PROT_READ, MAP_PRIVATE, arg->geomfd[j], 0); + if (geom == MAP_FAILED) { + perror("mmap geom"); + exit(EXIT_FAILURE); + } + + char *geomstart = geom; + char *end = geom + arg->geom_size[j]; + + while (geom < end) { + int z; + unsigned x, y; + + deserialize_int(&geom, &z); + deserialize_uint(&geom, &x); + deserialize_uint(&geom, &y); + + // fprintf(stderr, "%d/%u/%u\n", z, x, y); + + long long len = write_tile(&geom, arg->metabase, arg->stringpool, arg->file_bbox, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->child_shards); + + if (len < 0) { + return; // XXX how to report errors from threads? + // return z - 1; + } + + if (z == arg->maxzoom && len > *arg->most) { + *arg->midx = x; + *arg->midy = y; + *arg->most = len; + } + } + + if (munmap(geomstart, arg->geom_size[j]) != 0) { + perror("munmap geom"); + } + *arg->along += arg->geom_size[j]; + } +} + int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, int full_detail, int low_detail, int min_detail) { int i; for (i = 0; i <= maxzoom; i++) { @@ -856,11 +950,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo // Assign temporary files to threads - struct task { - int fileno; - struct task *next; - } tasks[TEMP_FILES]; - struct dispatch { struct task *tasks; long long todo; @@ -903,58 +992,43 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo int thread; for (thread = 0; thread < threads; thread++) { - struct task *task; + write_tile_args args; - for (task = dispatches[thread].tasks; task != NULL; task = task->next) { - int j = task->fileno; + args.tasks = tasks; + args.metabase = metabase; + args.stringpool = stringpool; + args.file_bbox = file_bbox; // XXX locking + args.min_detail = min_detail; + args.basezoom = maxzoom; // XXX rename? + args.file_keys = file_keys; // XXX locking + args.layernames = layernames; + args.outdb = outdb; // XXX locking + args.droprate = droprate; + args.buffer = buffer; + args.fname = fname; + args.geomfile = sub + thread * (TEMP_FILES / threads); + args.file_minzoom = minzoom; + args.file_maxzoom = maxzoom; + args.todo = todo; + args.along = &along; // XXX locking + args.gamma = gamma; + args.nlayers = nlayers; + args.prevent = prevent; + args.child_shards = TEMP_FILES / threads; - if (geomfd[j] < 0) { - // only one source file for zoom level 0 - continue; - } - if (geom_size[j] == 0) { - continue; - } + args.geomfd = geomfd; + args.geom_size = geom_size; + args.midx = midx; // XXX locking + args.midy = midy; // XXX locking + args.maxzoom = maxzoom; + args.minzoom = minzoom; + args.full_detail = full_detail; + args.low_detail = low_detail; + args.most = &most; // XXX locking - // printf("%lld of geom_size\n", (long long) geom_size[j]); + args.tasks = dispatches[thread].tasks; - char *geom = (char *) mmap(NULL, geom_size[j], PROT_READ, MAP_PRIVATE, geomfd[j], 0); - if (geom == MAP_FAILED) { - perror("mmap geom"); - exit(EXIT_FAILURE); - } - - char *geomstart = geom; - char *end = geom + geom_size[j]; - - while (geom < end) { - int z; - unsigned x, y; - - deserialize_int(&geom, &z); - deserialize_uint(&geom, &x); - deserialize_uint(&geom, &y); - - // fprintf(stderr, "%d/%u/%u\n", z, x, y); - - long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub + thread * (TEMP_FILES / threads), minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent, TEMP_FILES / threads); - - if (len < 0) { - return z - 1; - } - - if (z == maxzoom && len > most) { - *midx = x; - *midy = y; - most = len; - } - } - - if (munmap(geomstart, geom_size[j]) != 0) { - perror("munmap geom"); - } - along += geom_size[j]; - } + run_thread(&args); } for (j = 0; j < TEMP_FILES; j++) { From b897712a10ae5f532b32ae1594c7def176fb18e5 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Thu, 23 Jul 2015 16:24:50 -0700 Subject: [PATCH 07/17] Remove double-setting of thread task list --- tile.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tile.cc b/tile.cc index f4f34d8..decd2a2 100644 --- a/tile.cc +++ b/tile.cc @@ -994,7 +994,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo for (thread = 0; thread < threads; thread++) { write_tile_args args; - args.tasks = tasks; args.metabase = metabase; args.stringpool = stringpool; args.file_bbox = file_bbox; // XXX locking From 5d014e040f8cb1eff0765f19ee98b321644d83e4 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 12:32:40 -0700 Subject: [PATCH 08/17] Actually create threads and hand the tiling tasks off to them --- tile.cc | 99 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/tile.cc b/tile.cc index bf67825..b73e867 100644 --- a/tile.cc +++ b/tile.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include "vector_tile.pb.h" #include "geometry.hh" @@ -31,6 +32,8 @@ extern "C" { #define XSTRINGIFY(s) STRINGIFY(s) #define STRINGIFY(s) #s +pthread_mutex_t db_lock = PTHREAD_MUTEX_INITIALIZER; + // https://github.com/mapbox/mapnik-vector-tile/blob/master/src/vector_tile_compression.hpp static inline int compress(std::string const &input, std::string &output) { z_stream deflate_s; @@ -841,7 +844,18 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *f line_detail++; // to keep it the same when the loop decrements it } } else { + if (pthread_mutex_lock(&db_lock) != 0) { + perror("pthread_mutex_lock"); + exit(EXIT_FAILURE); + } + mbtiles_write_tile(outdb, z, tx, ty, compressed.data(), compressed.size()); + + if (pthread_mutex_unlock(&db_lock) != 0) { + perror("pthread_mutex_unlock"); + exit(EXIT_FAILURE); + } + return count; } } else { @@ -898,7 +912,8 @@ struct write_tile_args { long long *most; }; -void run_thread(write_tile_args *arg) { +void *run_thread(void *vargs) { + write_tile_args *arg = (write_tile_args *) vargs; struct task *task; for (task = arg->tasks; task != NULL; task = task->next) { @@ -936,7 +951,7 @@ void run_thread(write_tile_args *arg) { long long len = write_tile(&geom, arg->metabase, arg->stringpool, arg->file_bbox, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); if (len < 0) { - return; // XXX how to report errors from threads? + return NULL; // XXX how to report errors from threads? // return z - 1; } @@ -952,6 +967,8 @@ void run_thread(write_tile_args *arg) { } *arg->along += arg->geom_size[j]; } + + return NULL; } int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail) { @@ -1048,45 +1065,57 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo *d = here; } + pthread_t pthreads[threads]; + write_tile_args args[threads]; + int thread; for (thread = 0; thread < threads; thread++) { - write_tile_args args; + args[thread].metabase = metabase; + args[thread].stringpool = stringpool; + args[thread].file_bbox = file_bbox; // XXX locking + args[thread].min_detail = min_detail; + args[thread].basezoom = maxzoom; // XXX rename? + args[thread].file_keys = file_keys; // XXX locking + args[thread].layernames = layernames; + args[thread].outdb = outdb; // XXX locking + args[thread].droprate = droprate; + args[thread].buffer = buffer; + args[thread].fname = fname; + args[thread].geomfile = sub + thread * (TEMP_FILES / threads); + args[thread].file_minzoom = minzoom; + args[thread].file_maxzoom = maxzoom; + args[thread].todo = todo; + args[thread].along = &along; // XXX locking + args[thread].gamma = gamma; + args[thread].nlayers = nlayers; + args[thread].prevent = prevent; + args[thread].additional = additional; + args[thread].child_shards = TEMP_FILES / threads; - args.metabase = metabase; - args.stringpool = stringpool; - args.file_bbox = file_bbox; // XXX locking - args.min_detail = min_detail; - args.basezoom = maxzoom; // XXX rename? - args.file_keys = file_keys; // XXX locking - args.layernames = layernames; - args.outdb = outdb; // XXX locking - args.droprate = droprate; - args.buffer = buffer; - args.fname = fname; - args.geomfile = sub + thread * (TEMP_FILES / threads); - args.file_minzoom = minzoom; - args.file_maxzoom = maxzoom; - args.todo = todo; - args.along = &along; // XXX locking - args.gamma = gamma; - args.nlayers = nlayers; - args.prevent = prevent; - args.additional = additional; - args.child_shards = TEMP_FILES / threads; + args[thread].geomfd = geomfd; + args[thread].geom_size = geom_size; + args[thread].midx = midx; // XXX locking + args[thread].midy = midy; // XXX locking + args[thread].maxzoom = maxzoom; + args[thread].minzoom = minzoom; + args[thread].full_detail = full_detail; + args[thread].low_detail = low_detail; + args[thread].most = &most; // XXX locking - args.geomfd = geomfd; - args.geom_size = geom_size; - args.midx = midx; // XXX locking - args.midy = midy; // XXX locking - args.maxzoom = maxzoom; - args.minzoom = minzoom; - args.full_detail = full_detail; - args.low_detail = low_detail; - args.most = &most; // XXX locking + args[thread].tasks = dispatches[thread].tasks; - args.tasks = dispatches[thread].tasks; + if (pthread_create(&pthreads[thread], NULL, run_thread, &args[thread]) != 0) { + perror("pthread_create"); + exit(EXIT_FAILURE); + } + } - run_thread(&args); + for (thread = 0; thread < threads; thread++) { + void *retval; + + if (pthread_join(pthreads[thread], &retval) != 0) { + perror("pthread_join"); + } } for (j = 0; j < TEMP_FILES; j++) { From 3da692250d4756453a6dd6d85650411215245a8b Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 13:11:00 -0700 Subject: [PATCH 09/17] Remove unused argument --- geojson.c | 2 +- tile.cc | 8 +++----- tile.h | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/geojson.c b/geojson.c index 48719a4..064c9f9 100644 --- a/geojson.c +++ b/geojson.c @@ -1089,7 +1089,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, poolfile->off); } - int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail); + int written = traverse_zooms(fd, size, meta, stringpool, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail); if (maxzoom != written) { fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written); diff --git a/tile.cc b/tile.cc index b73e867..3ac1670 100644 --- a/tile.cc +++ b/tile.cc @@ -473,7 +473,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb } } -long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { +long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { int line_detail; static bool evaluated = false; double oprogress = 0; @@ -882,7 +882,6 @@ struct write_tile_args { struct task *tasks; char *metabase; char *stringpool; - unsigned *file_bbox; int min_detail; int basezoom; struct pool **file_keys; @@ -948,7 +947,7 @@ void *run_thread(void *vargs) { // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, arg->metabase, arg->stringpool, arg->file_bbox, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); + long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); if (len < 0) { return NULL; // XXX how to report errors from threads? @@ -971,7 +970,7 @@ void *run_thread(void *vargs) { return NULL; } -int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail) { +int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail) { int i; for (i = 0; i <= maxzoom; i++) { long long most = 0; @@ -1072,7 +1071,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo for (thread = 0; thread < threads; thread++) { args[thread].metabase = metabase; args[thread].stringpool = stringpool; - args[thread].file_bbox = file_bbox; // XXX locking args[thread].min_detail = min_detail; args[thread].basezoom = maxzoom; // XXX rename? args[thread].file_keys = file_keys; // XXX locking diff --git a/tile.h b/tile.h index 00d4ada..8d73566 100644 --- a/tile.h +++ b/tile.h @@ -27,7 +27,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type); long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional); -int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail); +int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail); extern unsigned initial_x, initial_y; extern int geometry_scale; From ef5fdf40362ce2fedfc1f7f66ab11f90784d5e23 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 13:26:47 -0700 Subject: [PATCH 10/17] Locking around the file keys --- pool.c | 23 +++++++------- tile.cc | 97 ++++++++++----------------------------------------------- 2 files changed, 29 insertions(+), 91 deletions(-) diff --git a/pool.c b/pool.c index 4641afc..4c46bef 100644 --- a/pool.c +++ b/pool.c @@ -34,26 +34,27 @@ struct pool_val *pool(struct pool *p, const char *s, int type) { } } - *v = malloc(sizeof(struct pool_val)); - if (*v == NULL) { + struct pool_val *nv = malloc(sizeof(struct pool_val)); + if (nv == NULL) { fprintf(stderr, "out of memory making string pool\n"); exit(EXIT_FAILURE); } - (*v)->left = NULL; - (*v)->right = NULL; - (*v)->next = NULL; - (*v)->s = s; - (*v)->type = type; - (*v)->n = p->n++; + nv->left = NULL; + nv->right = NULL; + nv->next = NULL; + nv->s = s; + nv->type = type; + nv->n = p->n++; if (p->tail != NULL) { - p->tail->next = *v; + p->tail->next = nv; } - p->tail = *v; + p->tail = nv; if (p->head == NULL) { - p->head = *v; + p->head = nv; } + *v = nv; return *v; } diff --git a/tile.cc b/tile.cc index 3ac1670..78c5666 100644 --- a/tile.cc +++ b/tile.cc @@ -33,6 +33,7 @@ extern "C" { #define STRINGIFY(s) #s pthread_mutex_t db_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t var_lock = PTHREAD_MUTEX_INITIALIZER; // https://github.com/mapbox/mapnik-vector-tile/blob/master/src/vector_tile_compression.hpp static inline int compress(std::string const &input, std::string &output) { @@ -211,26 +212,30 @@ struct pool_val *retrieve_string(char **f, struct pool *p, char *stringpool) { return ret; } -void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector *intmeta, char *only) { +void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector *intmeta) { int m; deserialize_int(meta, &m); int i; for (i = 0; i < m; i++) { struct pool_val *key = retrieve_string(meta, keys, stringpool); + struct pool_val *value = retrieve_string(meta, values, stringpool); - if (only != NULL && (strcmp(key->s, only) != 0)) { - // XXX if evaluate ever works again, check whether this is sufficient - (void) retrieve_string(meta, values, stringpool); - } else { - struct pool_val *value = retrieve_string(meta, values, stringpool); + intmeta->push_back(key->n); + intmeta->push_back(value->n); - intmeta->push_back(key->n); - intmeta->push_back(value->n); + if (!is_pooled(file_keys, key->s, value->type)) { + if (pthread_mutex_lock(&var_lock) != 0) { + perror("pthread_mutex_lock"); + exit(EXIT_FAILURE); + } - if (!is_pooled(file_keys, key->s, value->type)) { - // Dup to retain after munmap - pool(file_keys, strdup(key->s), value->type); + // Dup to retain after munmap + pool(file_keys, strdup(key->s), value->type); + + if (pthread_mutex_unlock(&var_lock) != 0) { + perror("pthread_mutex_unlock"); + exit(EXIT_FAILURE); } } } @@ -312,66 +317,6 @@ struct sll { } }; -#if 0 -void evaluate(std::vector &features, char *metabase, struct pool *file_keys, const char *layername, int line_detail, long long orig) { - std::vector options; - - struct pool_val *pv; - for (pv = file_keys->head; pv != NULL; pv = pv->next) { - struct pool keys, values; - pool_init(&keys, 0); - pool_init(&values, 0); - long long count = 0; - - for (unsigned i = 0; i < features.size(); i++) { - char *meta = features[i].metasrc; - - features[i].meta.resize(0); - decode_meta(&meta, &keys, &values, file_keys, &features[i].meta, pv->s); - } - - std::vector empty; - mapnik::vector::tile tile = create_tile(layername, line_detail, empty, &count, &keys, &values, 1); // XXX layer - - std::string s; - std::string compressed; - - tile.SerializeToString(&s); - compress(s, compressed); - - options.push_back(sll(pv->s, compressed.size())); - - pool_free(&values); - pool_free(&keys); - } - - std::sort(options.begin(), options.end()); - for (unsigned i = 0; i < options.size(); i++) { - if (options[i].val > 1024) { - fprintf(stderr, "using -x %s would save about %lld, for a tile size of of %lld\n", options[i].name, options[i].val, orig - options[i].val); - } - } - - struct pool keys, values; - pool_init(&keys, 0); - pool_init(&values, 0); - long long count = 0; - - std::vector empty; - mapnik::vector::tile tile = create_tile(layername, line_detail, features, &count, &keys, &values, nlayers); - - std::string s; - std::string compressed; - - tile.SerializeToString(&s); - compress(s, compressed); - fprintf(stderr, "geometry alone (-X) would be %lld\n", (long long) compressed.size()); - - pool_free(&values); - pool_free(&keys); -} -#endif - void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom) { if (geom.size() > 0 && nextzoom <= file_maxzoom) { int xo, yo; @@ -475,7 +420,6 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { int line_detail; - static bool evaluated = false; double oprogress = 0; double fraction = 1; @@ -737,7 +681,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi c.coalesced = false; c.original_seq = original_seq; - decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta, NULL); + decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta); features[layer].push_back(c); } } @@ -826,13 +770,6 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi fprintf(stderr, "tile %d/%u/%u size is %lld with detail %d, >500000 \n", z, tx, ty, (long long) compressed.size(), line_detail); } - if (line_detail == min_detail || !evaluated) { - evaluated = true; -#if 0 - evaluate(features[0], metabase, file_keys[0], layername, line_detail, compressed.size()); // XXX layer -#endif - } - if (prevent['d' & 0xFF]) { // The 95% is a guess to avoid too many retries // and probably actually varies based on how much duplicated metadata there is From 3bb87227ec440222ff3af536dd76ed392555de9c Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 13:32:02 -0700 Subject: [PATCH 11/17] Locking for map center --- tile.cc | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tile.cc b/tile.cc index 78c5666..58d3e3e 100644 --- a/tile.cc +++ b/tile.cc @@ -891,11 +891,21 @@ void *run_thread(void *vargs) { // return z - 1; } + if (pthread_mutex_lock(&db_lock) != 0) { + perror("pthread_mutex_lock"); + exit(EXIT_FAILURE); + } + if (z == arg->maxzoom && len > *arg->most) { *arg->midx = x; *arg->midy = y; *arg->most = len; } + + if (pthread_mutex_unlock(&db_lock) != 0) { + perror("pthread_mutex_unlock"); + exit(EXIT_FAILURE); + } } if (munmap(geomstart, arg->geom_size[j]) != 0) { @@ -1010,9 +1020,9 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].stringpool = stringpool; args[thread].min_detail = min_detail; args[thread].basezoom = maxzoom; // XXX rename? - args[thread].file_keys = file_keys; // XXX locking + args[thread].file_keys = file_keys; // locked with var_lock args[thread].layernames = layernames; - args[thread].outdb = outdb; // XXX locking + args[thread].outdb = outdb; // locked with db_lock args[thread].droprate = droprate; args[thread].buffer = buffer; args[thread].fname = fname; @@ -1029,13 +1039,13 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].geomfd = geomfd; args[thread].geom_size = geom_size; - args[thread].midx = midx; // XXX locking - args[thread].midy = midy; // XXX locking + args[thread].midx = midx; // locked with var_lock + args[thread].midy = midy; // locked with var_lock args[thread].maxzoom = maxzoom; args[thread].minzoom = minzoom; args[thread].full_detail = full_detail; args[thread].low_detail = low_detail; - args[thread].most = &most; // XXX locking + args[thread].most = &most; // locked with var_lock args[thread].tasks = dispatches[thread].tasks; From 56910fd016dfa5d6484c652f6dea6cc67bee4cef Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 13:39:44 -0700 Subject: [PATCH 12/17] Restore the error message when a tile can't be made small enough --- tile.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tile.cc b/tile.cc index 58d3e3e..f8311ca 100644 --- a/tile.cc +++ b/tile.cc @@ -522,7 +522,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi deserialize_byte(geoms, &feature_minzoom); double progress = floor((((*geoms - geomstart + along) / (double) todo) + z) / (file_maxzoom + 1) * 1000) / 10; - if (progress != oprogress) { + if (progress >= oprogress + 0.1) { if (!quiet) { fprintf(stderr, " %3.1f%% %d/%u/%u \r", progress, z, tx, ty); } @@ -887,8 +887,9 @@ void *run_thread(void *vargs) { long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); if (len < 0) { - return NULL; // XXX how to report errors from threads? - // return z - 1; + int *err = (int *) malloc(sizeof(int)); + *err = z - 1; + return err; } if (pthread_mutex_lock(&db_lock) != 0) { @@ -1055,12 +1056,18 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo } } + int err = INT_MAX; + for (thread = 0; thread < threads; thread++) { void *retval; if (pthread_join(pthreads[thread], &retval) != 0) { perror("pthread_join"); } + + if (retval != NULL) { + err = *((int *) retval); + } } for (j = 0; j < TEMP_FILES; j++) { @@ -1076,6 +1083,10 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo geomfd[j] = subfd[j]; geom_size[j] = geomst.st_size; } + + if (err != INT_MAX) { + return err; + } } if (!quiet) { From 965176f254ae226a17e75121c6ce65e54e76fa79 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 14:17:04 -0700 Subject: [PATCH 13/17] Make the progress indicator more aware of progress in other threads --- tile.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tile.cc b/tile.cc index f8311ca..71ee72c 100644 --- a/tile.cc +++ b/tile.cc @@ -418,9 +418,8 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb } } -long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { +long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long *along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { int line_detail; - double oprogress = 0; double fraction = 1; char *og = *geoms; @@ -460,7 +459,6 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi } long long count = 0; - // long long along = 0; double accum_area = 0; double interval = 0; @@ -488,6 +486,8 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi memset(within, '\0', sizeof(within)); memset(geompos, '\0', sizeof(geompos)); + double oprogress = 0; + *geoms = og; while (1) { @@ -521,7 +521,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi signed char feature_minzoom; deserialize_byte(geoms, &feature_minzoom); - double progress = floor((((*geoms - geomstart + along) / (double) todo) + z) / (file_maxzoom + 1) * 1000) / 10; + double progress = floor((((*geoms - geomstart + *along) / (double) todo) + z) / (file_maxzoom + 1) * 1000) / 10; if (progress >= oprogress + 0.1) { if (!quiet) { fprintf(stderr, " %3.1f%% %d/%u/%u \r", progress, z, tx, ty); @@ -873,6 +873,7 @@ void *run_thread(void *vargs) { char *geomstart = geom; char *end = geom + arg->geom_size[j]; + char *prevgeom = geom; while (geom < end) { int z; @@ -884,7 +885,7 @@ void *run_thread(void *vargs) { // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, *arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); + long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->maxzoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards); if (len < 0) { int *err = (int *) malloc(sizeof(int)); @@ -892,7 +893,7 @@ void *run_thread(void *vargs) { return err; } - if (pthread_mutex_lock(&db_lock) != 0) { + if (pthread_mutex_lock(&var_lock) != 0) { perror("pthread_mutex_lock"); exit(EXIT_FAILURE); } @@ -903,7 +904,10 @@ void *run_thread(void *vargs) { *arg->most = len; } - if (pthread_mutex_unlock(&db_lock) != 0) { + *arg->along += geom - prevgeom; + prevgeom = geom; + + if (pthread_mutex_unlock(&var_lock) != 0) { perror("pthread_mutex_unlock"); exit(EXIT_FAILURE); } @@ -912,7 +916,6 @@ void *run_thread(void *vargs) { if (munmap(geomstart, arg->geom_size[j]) != 0) { perror("munmap geom"); } - *arg->along += arg->geom_size[j]; } return NULL; From e4e14b207801027a4736decbdade446d8adaa2c4 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 19 Oct 2015 15:43:06 -0700 Subject: [PATCH 14/17] Link with -lpthread --- Makefile | 2 +- tile.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c9e0d02..0ffc05f 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ INCLUDES = -I/usr/local/include LIBS = -L/usr/local/lib tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o - g++ $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lm -lz -lprotobuf-lite -lsqlite3 + g++ $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lm -lz -lprotobuf-lite -lsqlite3 -lpthread enumerate: enumerate.o gcc $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lsqlite3 diff --git a/tile.cc b/tile.cc index 71ee72c..811c49b 100644 --- a/tile.cc +++ b/tile.cc @@ -1034,7 +1034,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].file_minzoom = minzoom; args[thread].file_maxzoom = maxzoom; args[thread].todo = todo; - args[thread].along = &along; // XXX locking + args[thread].along = &along; // locked with var_lock args[thread].gamma = gamma; args[thread].nlayers = nlayers; args[thread].prevent = prevent; From a38668a6dab0f14cfb8dbd9364970ef6562f13bc Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 20 Oct 2015 10:15:02 -0700 Subject: [PATCH 15/17] Add volatile declaration to shared global variables --- tile.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tile.cc b/tile.cc index 811c49b..23818fc 100644 --- a/tile.cc +++ b/tile.cc @@ -418,7 +418,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int file_maxzoom, long long *bb } } -long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long *along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { +long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, volatile long long *along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) { int line_detail; double fraction = 1; @@ -831,7 +831,7 @@ struct write_tile_args { int file_minzoom; int file_maxzoom; double todo; - long long *along; + volatile long long *along; double gamma; int nlayers; char *prevent; @@ -839,13 +839,13 @@ struct write_tile_args { int child_shards; int *geomfd; off_t *geom_size; - unsigned *midx; - unsigned *midy; + volatile unsigned *midx; + volatile unsigned *midy; int maxzoom; int minzoom; int full_detail; int low_detail; - long long *most; + volatile long long *most; }; void *run_thread(void *vargs) { @@ -956,7 +956,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo useful_threads++; } } - printf("\n"); #define MAX_THREADS 20 // XXX Obtain from sysctl(hw.ncpu), /proc/cpuinfo, etc. @@ -973,6 +972,8 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo // Round down to a power of 2 threads = 1 << (int)(log(threads) / log(2)); + printf(" %d threads\n", threads); + // Assign temporary files to threads struct dispatch { From eb5c78482a2bdee9d869b1cf5f79af7f4f2018f5 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 20 Oct 2015 12:01:10 -0700 Subject: [PATCH 16/17] Bump version number and add changelog details --- CHANGELOG.md | 13 +++++++++++++ tile.cc | 3 --- version.h | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dad6e9..b212f7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## 1.3.0 + +* Tile generation is multithreaded to take advantage of multiple CPUs +* More compact data representation reduces memory usage and improves speed +* Polygon clipping uses [Clipper](http://www.angusj.com/delphi/clipper/documentation/Docs/_Body.htm) + and makes sure interior and exterior rings are distinguished by winding order +* Individual GeoJSON features can specify their own minzoom and maxzoom +* New `tile-join` utility can add new properties from a CSV file to an existing tileset +* Feature coalescing, line-reversing, and reordering by attribute are now options, not defaults +* Output of `decode` utility is now in GeoJSON format +* Tile generation with a minzoom spends less time on unused lower zoom levels +* Bare geometries without a Feature wrapper are accepted + ## 1.2.0 * Switched to top-down rendering, yielding performance improvements diff --git a/tile.cc b/tile.cc index 23818fc..8a8cd83 100644 --- a/tile.cc +++ b/tile.cc @@ -951,7 +951,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo long long along = 0; for (j = 0; j < TEMP_FILES; j++) { todo += geom_size[j]; - printf("%d", geom_size[j] != 0); if (geom_size[j] > 0) { useful_threads++; } @@ -972,8 +971,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo // Round down to a power of 2 threads = 1 << (int)(log(threads) / log(2)); - printf(" %d threads\n", threads); - // Assign temporary files to threads struct dispatch { diff --git a/version.h b/version.h index 3dfed50..25399db 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define VERSION "tippecanoe v1.2.0\n" +#define VERSION "tippecanoe v1.3.0\n" From e554a121fbac534c86a837c07cd0216641239515 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 20 Oct 2015 12:03:49 -0700 Subject: [PATCH 17/17] Fix formatting --- decode.cc | 1 - geometry.cc | 10 +++++----- tile.cc | 18 +++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/decode.cc b/decode.cc index fb97780..e6062ab 100644 --- a/decode.cc +++ b/decode.cc @@ -417,6 +417,5 @@ int main(int argc, char **argv) { usage(argv); } - return 0; } diff --git a/geometry.cc b/geometry.cc index 508b308..482142d 100644 --- a/geometry.cc +++ b/geometry.cc @@ -339,11 +339,11 @@ drawvec reduce_tiny_poly(drawvec &geom, int z, int detail, bool *reduced, double if (*accum_area > pixel * pixel) { // XXX use centroid; - out.push_back(draw(VT_MOVETO, geom[i].x - pixel/2, geom[i].y - pixel/2)); - out.push_back(draw(VT_LINETO, geom[i].x + pixel/2, geom[i].y - pixel/2)); - out.push_back(draw(VT_LINETO, geom[i].x + pixel/2, geom[i].y + pixel/2)); - out.push_back(draw(VT_LINETO, geom[i].x - pixel/2, geom[i].y + pixel/2)); - out.push_back(draw(VT_LINETO, geom[i].x - pixel/2, geom[i].y - pixel/2)); + out.push_back(draw(VT_MOVETO, geom[i].x - pixel / 2, geom[i].y - pixel / 2)); + out.push_back(draw(VT_LINETO, geom[i].x + pixel / 2, geom[i].y - pixel / 2)); + out.push_back(draw(VT_LINETO, geom[i].x + pixel / 2, geom[i].y + pixel / 2)); + out.push_back(draw(VT_LINETO, geom[i].x - pixel / 2, geom[i].y + pixel / 2)); + out.push_back(draw(VT_LINETO, geom[i].x - pixel / 2, geom[i].y - pixel / 2)); *accum_area -= pixel * pixel; } diff --git a/tile.cc b/tile.cc index 8a8cd83..5e4cb44 100644 --- a/tile.cc +++ b/tile.cc @@ -956,7 +956,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo } } -#define MAX_THREADS 20 // XXX Obtain from sysctl(hw.ncpu), /proc/cpuinfo, etc. +#define MAX_THREADS 20 // XXX Obtain from sysctl(hw.ncpu), /proc/cpuinfo, etc. int threads = MAX_THREADS; if (threads > TEMP_FILES / 4) { @@ -969,7 +969,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo threads = useful_threads; } // Round down to a power of 2 - threads = 1 << (int)(log(threads) / log(2)); + threads = 1 << (int) (log(threads) / log(2)); // Assign temporary files to threads @@ -1021,10 +1021,10 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].metabase = metabase; args[thread].stringpool = stringpool; args[thread].min_detail = min_detail; - args[thread].basezoom = maxzoom; // XXX rename? - args[thread].file_keys = file_keys; // locked with var_lock + args[thread].basezoom = maxzoom; // XXX rename? + args[thread].file_keys = file_keys; // locked with var_lock args[thread].layernames = layernames; - args[thread].outdb = outdb; // locked with db_lock + args[thread].outdb = outdb; // locked with db_lock args[thread].droprate = droprate; args[thread].buffer = buffer; args[thread].fname = fname; @@ -1032,7 +1032,7 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].file_minzoom = minzoom; args[thread].file_maxzoom = maxzoom; args[thread].todo = todo; - args[thread].along = &along; // locked with var_lock + args[thread].along = &along; // locked with var_lock args[thread].gamma = gamma; args[thread].nlayers = nlayers; args[thread].prevent = prevent; @@ -1041,13 +1041,13 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].geomfd = geomfd; args[thread].geom_size = geom_size; - args[thread].midx = midx; // locked with var_lock - args[thread].midy = midy; // locked with var_lock + args[thread].midx = midx; // locked with var_lock + args[thread].midy = midy; // locked with var_lock args[thread].maxzoom = maxzoom; args[thread].minzoom = minzoom; args[thread].full_detail = full_detail; args[thread].low_detail = low_detail; - args[thread].most = &most; // locked with var_lock + args[thread].most = &most; // locked with var_lock args[thread].tasks = dispatches[thread].tasks;