Make parallel reading a command-line option

This commit is contained in:
Eric Fischer 2015-12-22 14:58:19 -08:00
parent ca4d1beb7c
commit fde3aa45de
3 changed files with 41 additions and 20 deletions

View File

@ -65,6 +65,11 @@ Options
* -o _file_.mbtiles: Name the output file.
* -f: Delete the mbtiles file if it already exists instead of giving an error
* -t _directory_: Put the temporary files in _directory_.
* -P: Use multiple threads to read different parts of each input file at once.
This will only work if the input is line-delimited JSON with each Feature on its
own line, because it knows nothing of the top-level structure around the Features.
In addition, it only works if the input is a named file that can be mapped into memory
rather than a stream that can only be read sequentially.
### Zoom levels and resolution

View File

@ -823,15 +823,15 @@ struct reader {
int geomfd;
int indexfd;
FILE *metafile;
struct memfile *poolfile;
struct memfile *treefile;
FILE *geomfile;
FILE *indexfile;
FILE *metafile;
struct memfile *poolfile;
struct memfile *treefile;
FILE *geomfile;
FILE *indexfile;
long long metapos;
long long geompos;
long long indexpos;
long long metapos;
long long geompos;
long long indexpos;
long long *file_bbox;
@ -841,7 +841,7 @@ struct reader {
char *geom_map;
};
int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent, char *additional) {
int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent, char *additional, int read_parallel) {
int ret = EXIT_SUCCESS;
struct reader reader[CPUS];
@ -971,10 +971,12 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
char *map = NULL;
off_t off = 0;
if (fstat(fd, &st) == 0) {
off = lseek(fd, 0, SEEK_CUR);
if (off >= 0) {
map = mmap(NULL, st.st_size - off, PROT_READ, MAP_PRIVATE, fd, off);
if (read_parallel) {
if (fstat(fd, &st) == 0) {
off = lseek(fd, 0, SEEK_CUR);
if (off >= 0) {
map = mmap(NULL, st.st_size - off, PROT_READ, MAP_PRIVATE, fd, off);
}
}
}
@ -1067,8 +1069,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
}
if (reader[i].geomst.st_size == 0 || reader[i].metast.st_size == 0) {
fprintf(stderr, "did not read any valid geometries\n"); // XXX
// exit(EXIT_FAILURE);
fprintf(stderr, "did not read any valid geometries\n"); // XXX
// exit(EXIT_FAILURE);
}
}
@ -1634,7 +1636,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
}
char *stringpool = NULL;
if (poolpos > 0) { // Will be 0 if -X was specified
if (poolpos > 0) { // Will be 0 if -X was specified
stringpool = (char *) mmap(NULL, poolpos, PROT_READ, MAP_PRIVATE, poolfd, 0);
if (stringpool == MAP_FAILED) {
perror("mmap string pool");
@ -1673,7 +1675,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
midlat = (maxlat + minlat) / 2;
midlon = (maxlon + minlon) / 2;
long long file_bbox[4] = { UINT_MAX, UINT_MAX, 0, 0 };
long long file_bbox[4] = {UINT_MAX, UINT_MAX, 0, 0};
for (i = 0; i < CPUS; i++) {
if (reader[i].file_bbox[0] < file_bbox[0]) {
file_bbox[0] = reader[i].file_bbox[0];
@ -1763,13 +1765,14 @@ int main(int argc, char **argv) {
pool_init(&exclude, 0);
pool_init(&include, 0);
int exclude_all = 0;
int read_parallel = 0;
for (i = 0; i < 256; i++) {
prevent[i] = 0;
additional[i] = 0;
}
while ((i = getopt(argc, argv, "l:n:z:Z:d:D:m:o:x:y:r:b:fXt:g:p:vqa:B:")) != -1) {
while ((i = getopt(argc, argv, "l:n:z:Z:d:D:m:o:x:y:r:b:fXt:g:p:vqa:B:P")) != -1) {
switch (i) {
case 'n':
name = optarg;
@ -1882,8 +1885,12 @@ int main(int argc, char **argv) {
fprintf(stderr, VERSION);
exit(EXIT_FAILURE);
case 'P':
read_parallel = 1;
break;
default:
fprintf(stderr, "Usage: %s -o out.mbtiles [-n name] [-l layername] [-z maxzoom] [-Z minzoom] [-B basezoom] [-d detail] [-D lower-detail] [-m min-detail] [-x excluded-field ...] [-y included-field ...] [-X] [-r droprate] [-b buffer] [-t tmpdir] [-a rco] [-p sfkld] [-q] [file.json ...]\n", argv[0]);
fprintf(stderr, "Usage: %s -o out.mbtiles [-n name] [-l layername] [-z maxzoom] [-Z minzoom] [-B basezoom] [-d detail] [-D lower-detail] [-m min-detail] [-x excluded-field ...] [-y included-field ...] [-X] [-r droprate] [-b buffer] [-t tmpdir] [-a rco] [-p sfkld] [-q] [-P] [file.json ...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
@ -1927,7 +1934,7 @@ int main(int argc, char **argv) {
sqlite3 *outdb = mbtiles_open(outdir, argv);
int ret = EXIT_SUCCESS;
ret = read_json(argc - optind, argv + optind, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, prevent, additional);
ret = read_json(argc - optind, argv + optind, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, prevent, additional, read_parallel);
mbtiles_close(outdb, argv);

View File

@ -5,6 +5,9 @@ Builds vector tilesets
\[la]http://geojson.org/\[ra]
features. This is a tool for making maps from huge datasets
\[la]MADE_WITH.md\[ra]\&.
.PP
[Build Status](https://travis\-ci.org/mapbox/tippecanoe.svg)
\[la]https://travis-ci.org/mapbox/tippecanoe\[ra]
.SH Intent
.PP
The goal of Tippecanoe is to enable making a scale\-independent view of your data,
@ -67,6 +70,12 @@ specified, the files are all merged into the single named layer.
\-f: Delete the mbtiles file if it already exists instead of giving an error
.IP \(bu 2
\-t \fIdirectory\fP: Put the temporary files in \fIdirectory\fP\&.
.IP \(bu 2
\-P: Use multiple threads to read different parts of each input file at once.
This will only work if the input is line\-delimited JSON with each Feature on its
own line, because it knows nothing of the top\-level structure around the Features.
In addition, it only works if the input is a named file that can be mapped into memory
rather than a stream that can only be read sequentially.
.RE
.SS Zoom levels and resolution
.RS