Merge pull request #136 from mapbox/multithread-input2

Multithreaded input
This commit is contained in:
Eric Fischer 2016-01-04 17:29:25 -08:00
commit 32111af79d
13 changed files with 694 additions and 226 deletions

View File

@ -1,3 +1,7 @@
## 1.6.0
* Add option of parallelized input when reading from a line-delimited file
## 1.5.1
* Fix internal error when number of CPUs is not a power of 2

View File

@ -65,6 +65,11 @@ Options
* -o _file_.mbtiles: Name the output file.
* -f: Delete the mbtiles file if it already exists instead of giving an error
* -t _directory_: Put the temporary files in _directory_.
* -P: Use multiple threads to read different parts of each input file at once.
This will only work if the input is line-delimited JSON with each Feature on its
own line, because it knows nothing of the top-level structure around the Features.
In addition, it only works if the input is a named file that can be mapped into memory
rather than a stream that can only be read sequentially.
### Zoom levels and resolution

826
geojson.c

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,7 @@ extern "C" {
#include "projection.h"
}
drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox) {
drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y) {
drawvec out;
bbox[0] = LONG_LONG_MAX;

View File

@ -16,7 +16,7 @@ struct draw {
typedef std::vector<draw> drawvec;
drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox);
drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y);
void to_tile_scale(drawvec &geom, int z, int detail);
drawvec remove_noop(drawvec geom, int type, int shift);
drawvec clip_point(drawvec &geom, int z, int detail, long long buffer);

View File

@ -332,20 +332,17 @@ again:
/////////////////////////// Comma
if (c == ',') {
if (j->container == NULL) {
j->error = "Found comma at top level";
return NULL;
}
if (j->container != NULL) {
if (j->container->expect != JSON_COMMA) {
j->error = "Found unexpected comma";
return NULL;
}
if (j->container->expect != JSON_COMMA) {
j->error = "Found unexpected comma";
return NULL;
}
if (j->container->type == JSON_HASH) {
j->container->expect = JSON_KEY;
} else {
j->container->expect = JSON_ITEM;
if (j->container->type == JSON_HASH) {
j->container->expect = JSON_KEY;
} else {
j->container->expect = JSON_ITEM;
}
}
if (cb != NULL) {
@ -568,11 +565,19 @@ void json_free(json_object *o) {
free(o->string);
}
json_disconnect(o);
free(o);
}
void json_disconnect(json_object *o) {
// Expunge references to this as an array element
// or a hash key or value.
if (o->parent != NULL) {
if (o->parent->type == JSON_ARRAY) {
int i;
for (i = 0; i < o->parent->length; i++) {
if (o->parent->array[i] == o) {
break;
@ -586,6 +591,8 @@ void json_free(json_object *o) {
}
if (o->parent->type == JSON_HASH) {
int i;
for (i = 0; i < o->parent->length; i++) {
if (o->parent->keys[i] == o) {
o->parent->keys[i] = fabricate_object(o->parent, JSON_NULL);
@ -612,5 +619,5 @@ void json_free(json_object *o) {
}
}
free(o);
o->parent = NULL;
}

View File

@ -59,5 +59,6 @@ json_object *json_read_tree(json_pull *j);
json_object *json_read(json_pull *j);
json_object *json_read_separators(json_pull *j, json_separator_callback cb, void *state);
void json_free(json_object *j);
void json_disconnect(json_object *j);
json_object *json_hash_get(json_object *o, const char *s);

View File

@ -5,6 +5,9 @@ Builds vector tilesets
\[la]http://geojson.org/\[ra]
features. This is a tool for making maps from huge datasets
\[la]MADE_WITH.md\[ra]\&.
.PP
[Build Status](https://travis\-ci.org/mapbox/tippecanoe.svg)
\[la]https://travis-ci.org/mapbox/tippecanoe\[ra]
.SH Intent
.PP
The goal of Tippecanoe is to enable making a scale\-independent view of your data,
@ -67,6 +70,12 @@ specified, the files are all merged into the single named layer.
\-f: Delete the mbtiles file if it already exists instead of giving an error
.IP \(bu 2
\-t \fIdirectory\fP: Put the temporary files in \fIdirectory\fP\&.
.IP \(bu 2
\-P: Use multiple threads to read different parts of each input file at once.
This will only work if the input is line\-delimited JSON with each Feature on its
own line, because it knows nothing of the top\-level structure around the Features.
In addition, it only works if the input is a named file that can be mapped into memory
rather than a stream that can only be read sequentially.
.RE
.SS Zoom levels and resolution
.RS

View File

@ -26,6 +26,7 @@ struct memfile *memfile_open(int fd) {
mf->map = map;
mf->len = INCREMENT;
mf->off = 0;
mf->tree = 0;
return mf;
}

View File

@ -3,6 +3,7 @@ struct memfile {
char *map;
long long len;
long long off;
long long tree;
};
struct memfile *memfile_open(int fd);

30
tile.cc
View File

@ -363,7 +363,7 @@ struct sll {
}
};
void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom) {
void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom, int segment, unsigned *initial_x, unsigned *initial_y) {
if (geom.size() > 0 && nextzoom <= maxzoom) {
int xo, yo;
int span = 1 << (nextzoom - z);
@ -442,8 +442,9 @@ void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, u
if (tippecanoe_maxzoom != -1) {
serialize_int(geomfile[j], tippecanoe_maxzoom, geompos, fname);
}
serialize_int(geomfile[j], segment, &geompos[j], fname);
serialize_long_long(geomfile[j], metastart, &geompos[j], fname);
long long wx = initial_x, wy = initial_y;
long long wx = initial_x[segment], wy = initial_y[segment];
for (unsigned u = 0; u < geom.size(); u++) {
serialize_byte(geomfile[j], geom[u].op, &geompos[j], fname);
@ -464,7 +465,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, u
}
}
long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, char *geomstart, volatile long long *along, double gamma, int nlayers, char *prevent, char *additional, int child_shards) {
long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, char *geomstart, volatile long long *along, double gamma, int nlayers, char *prevent, char *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) {
int line_detail;
double fraction = 1;
@ -557,12 +558,15 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
}
layer >>= 2;
int segment;
deserialize_int(geoms, &segment);
long long metastart;
deserialize_long_long(geoms, &metastart);
char *meta = metabase + metastart;
char *meta = metabase + metastart + meta_off[segment];
long long bbox[4];
drawvec geom = decode_geometry(geoms, z, tx, ty, line_detail, bbox);
drawvec geom = decode_geometry(geoms, z, tx, ty, line_detail, bbox, initial_x[segment], initial_y[segment]);
signed char feature_minzoom;
deserialize_byte(geoms, &feature_minzoom);
@ -631,7 +635,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
}
if (line_detail == detail && fraction == 1) { /* only write out the next zoom once, even if we retry */
rewrite(geom, z, nextzoom, maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom, child_shards, max_zoom_increment, original_seq, tippecanoe_minzoom, tippecanoe_maxzoom);
rewrite(geom, z, nextzoom, maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom, child_shards, max_zoom_increment, original_seq, tippecanoe_minzoom, tippecanoe_maxzoom, segment, initial_x, initial_y);
}
if (z < minzoom) {
@ -754,7 +758,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
c.coalesced = false;
c.original_seq = original_seq;
decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta);
decode_meta(&meta, stringpool + pool_off[segment], keys[layer], values[layer], file_keys[layer], &c.meta);
features[layer].push_back(c);
}
}
@ -923,6 +927,10 @@ struct write_tile_args {
int full_detail;
int low_detail;
volatile long long *most;
long long *meta_off;
long long *pool_off;
unsigned *initial_x;
unsigned *initial_y;
};
void *run_thread(void *vargs) {
@ -962,7 +970,7 @@ void *run_thread(void *vargs) {
// fprintf(stderr, "%d/%u/%u\n", z, x, y);
long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards);
long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y);
if (len < 0) {
int *err = (int *) malloc(sizeof(int));
@ -998,7 +1006,7 @@ void *run_thread(void *vargs) {
return NULL;
}
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail) {
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) {
int i;
for (i = 0; i <= maxzoom; i++) {
long long most = 0;
@ -1122,6 +1130,10 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo
args[thread].full_detail = full_detail;
args[thread].low_detail = low_detail;
args[thread].most = &most; // locked with var_lock
args[thread].meta_off = meta_off;
args[thread].pool_off = pool_off;
args[thread].initial_x = initial_x;
args[thread].initial_y = initial_y;
args[thread].tasks = dispatches[thread].tasks;

2
tile.h
View File

@ -27,7 +27,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type);
long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent, char *additional);
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail);
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent, char *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y);
extern unsigned initial_x, initial_y;
extern int geometry_scale;

View File

@ -1 +1 @@
#define VERSION "tippecanoe v1.5.1\n"
#define VERSION "tippecanoe v1.6.0\n"