Merge pull request #568 from mapbox/gzip-input

Directly support gzipped GeoJSON files as input
This commit is contained in:
Eric Fischer 2018-05-15 00:08:51 +02:00 committed by GitHub
commit 6db02e8457
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 141 additions and 229 deletions

View File

@ -1,3 +1,7 @@
## 1.28.0
* Directly support gzipped GeoJSON as input files
## 1.27.16
* Fix thread safety issues related to the out-of-disk-space checker

View File

@ -85,9 +85,11 @@ SPACE = $(NULL) $(NULL)
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) raw-tiles-test parallel-test pbf-test join-test enumerate-test decode-test join-filter-test unit json-tool-test allow-existing-test csv-test
./unit
suffixes = json json.gz
# Work around Makefile and filename punctuation limits: _ for space, @ for :, % for /
%.json.check:
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.check,%,$(word 4,$(subst /, ,$@)))))) $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json) < /dev/null
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.check,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix))) < /dev/null
./tippecanoe-decode $@.mbtiles > $@.out
cmp $@.out $(patsubst %.check,%,$@)
rm $@.out $@.mbtiles
@ -101,7 +103,8 @@ fewer-tests: tippecanoe tippecanoe-decode geobuf-test raw-tiles-test parallel-te
# XXX Use proper makefile rules instead of a for loop
%.json.checkbuf:
for i in $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json); do ./tippecanoe-json-tool -w $$i | ./node_modules/geobuf/bin/json2geobuf > $$i.geobuf; done
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.checkbuf,%,$(word 4,$(subst /, ,$@)))))) $(addsuffix .geobuf,$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json)) < /dev/null
for i in $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json.gz); do gzip -dc $$i | ./tippecanoe-json-tool -w | ./node_modules/geobuf/bin/json2geobuf > $$i.geobuf; done
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.checkbuf,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(addsuffix .geobuf,$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix)))) < /dev/null
./tippecanoe-decode $@.mbtiles | sed 's/checkbuf/check/g' > $@.out
cmp $@.out $(patsubst %.checkbuf,%,$@)
rm $@.out $@.mbtiles
@ -160,7 +163,7 @@ pbf-test:
rm tests/pbf/11-328-791.3857.vector.pbf.out
enumerate-test:
./tippecanoe -z5 -f -o tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/in.json
./tippecanoe -z5 -f -o tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/in.json.gz
./tippecanoe-enumerate tests/ne_110m_admin_0_countries/out/enum.mbtiles > tests/ne_110m_admin_0_countries/out/enum.check
cmp tests/ne_110m_admin_0_countries/out/enum tests/ne_110m_admin_0_countries/out/enum.check
rm tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/out/enum.check
@ -280,7 +283,7 @@ csv-test:
prep-test: $(TESTS)
tests/%.json: Makefile tippecanoe tippecanoe-decode
./tippecanoe -f -o $@.check.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json,%,$(word 4,$(subst /, ,$@)))))) $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json)
./tippecanoe -f -o $@.check.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix)))
./tippecanoe-decode $@.check.mbtiles > $@
cmp $(patsubst %.check,%,$@) $@
rm $@.check.mbtiles

View File

@ -56,7 +56,7 @@ Usage
-----
```sh
$ tippecanoe -o file.mbtiles [options] [file.json file.geobuf ...]
$ tippecanoe -o file.mbtiles [options] [file.json file.json.gz file.geobuf ...]
```
If no files are specified, it reads GeoJSON from the standard input.
@ -142,6 +142,7 @@ If your input is formatted as newline-delimited GeoJSON, use `-P` to make input
### Input files and layer names
* _name_`.json` or _name_`.geojson`: Read the named GeoJSON input file into a layer called _name_.
* _name_`.json.gz` or _name_`.geojson.gz`: Read the named gzipped GeoJSON input file into a layer called _name_.
* _name_`.geobuf`: Read the named Geobuf input file into a layer called _name_.
* _name_`.csv`: Read the named CSV input file into a layer called _name_.
* `-l` _name_ or `--layer=`_name_: Use the specified layer name instead of deriving a name from the input filename or output tileset. If there are multiple input files

151
main.cpp
View File

@ -26,6 +26,7 @@
#include <getopt.h>
#include <signal.h>
#include <sys/time.h>
#include <zlib.h>
#include <algorithm>
#include <vector>
#include <string>
@ -476,9 +477,96 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
}
}
static ssize_t read_stream(json_pull *j, char *buffer, size_t n);
struct STREAM {
FILE *fp = NULL;
gzFile gz = NULL;
int fclose() {
int ret;
if (gz != NULL) {
ret = gzclose(gz);
} else {
ret = ::fclose(fp);
}
delete this;
return ret;
}
int peekc() {
if (gz != NULL) {
int c = gzgetc(gz);
if (c != EOF) {
gzungetc(c, gz);
}
return c;
} else {
int c = getc(fp);
if (c != EOF) {
ungetc(c, fp);
}
return c;
}
}
size_t read(char *out, size_t count) {
if (gz != NULL) {
int ret = gzread(gz, out, count);
if (ret < 0) {
fprintf(stderr, "%s: Error reading compressed data\n", *av);
exit(EXIT_FAILURE);
}
return ret;
} else {
return ::fread(out, 1, count, fp);
}
}
json_pull *json_begin() {
return ::json_begin(read_stream, this);
}
};
static ssize_t read_stream(json_pull *j, char *buffer, size_t n) {
return ((STREAM *) j->source)->read(buffer, n);
}
STREAM *streamfdopen(int fd, const char *mode, std::string const &fname) {
STREAM *s = new STREAM;
s->fp = NULL;
s->gz = NULL;
if (fname.size() > 3 && fname.substr(fname.size() - 3) == std::string(".gz")) {
s->gz = gzdopen(fd, mode);
if (s->gz == NULL) {
fprintf(stderr, "%s: %s: Decompression error\n", *av, fname.c_str());
exit(EXIT_FAILURE);
}
} else {
s->fp = fdopen(fd, mode);
if (s->fp == NULL) {
perror(fname.c_str());
exit(EXIT_FAILURE);
}
}
return s;
}
STREAM *streamfpopen(FILE *fp) {
STREAM *s = new STREAM;
s->fp = fp;
s->gz = NULL;
return s;
}
struct read_parallel_arg {
int fd = 0;
FILE *fp = NULL;
STREAM *fp = NULL;
long long offset = 0;
long long len = 0;
std::atomic<int> *is_parsing = NULL;
@ -532,7 +620,7 @@ void *run_read_parallel(void *v) {
if (munmap(map, rpa->len) != 0) {
perror("munmap source file");
}
if (fclose(rpa->fp) != 0) {
if (rpa->fp->fclose() != 0) {
perror("close source file");
exit(EXIT_FAILURE);
}
@ -543,7 +631,7 @@ void *run_read_parallel(void *v) {
return NULL;
}
void start_parsing(int fd, FILE *fp, long long offset, long long len, std::atomic<int> *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, std::vector<struct reader> *readers, std::atomic<long long> *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, json_object *filter, int basezoom, int source, std::vector<std::map<std::string, layermap_entry> > &layermaps, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) {
void start_parsing(int fd, STREAM *fp, long long offset, long long len, std::atomic<int> *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, std::vector<struct reader> *readers, std::atomic<long long> *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, json_object *filter, int basezoom, int source, std::vector<std::map<std::string, layermap_entry> > &layermaps, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) {
// This has to kick off an intermediate thread to start the parser threads,
// so the main thread can get back to reading the next input stage while
// the intermediate thread waits for the completion of the parser threads.
@ -1173,30 +1261,24 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
}
std::string trunc = std::string(use);
std::vector<std::string> trim = {
".json",
".geojson",
".geobuf",
".mbtiles",
".gz",
};
// Trim .json or .mbtiles from the name
while (true) {
ssize_t cp;
cp = trunc.find(".json");
if (cp >= 0 && (size_t) cp + 5 == trunc.size()) {
trunc = trunc.substr(0, cp);
continue;
bool again = true;
while (again) {
again = false;
for (size_t i = 0; i < trim.size(); i++) {
if (trunc.size() > trim[i].size() && trunc.substr(trunc.size() - trim[i].size()) == trim[i]) {
trunc = trunc.substr(0, trunc.size() - trim[i].size());
again = true;
}
}
cp = trunc.find(".geojson");
if (cp >= 0 && (size_t) cp + 8 == trunc.size()) {
trunc = trunc.substr(0, cp);
continue;
}
cp = trunc.find(".geobuf");
if (cp >= 0 && (size_t) cp + 7 == trunc.size()) {
trunc = trunc.substr(0, cp);
continue;
}
cp = trunc.find(".mbtiles");
if (cp >= 0 && (size_t) cp + 8 == trunc.size()) {
trunc = trunc.substr(0, cp);
continue;
}
break;
}
// Trim out characters that can't be part of selector
@ -1394,7 +1476,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
int read_parallel_this = read_parallel ? '\n' : 0;
if (1) {
if (!(sources[source].file.size() > 3 && sources[source].file.substr(sources[source].file.size() - 3) == std::string(".gz"))) {
if (fstat(fd, &st) == 0) {
off = lseek(fd, 0, SEEK_CUR);
if (off >= 0) {
@ -1439,7 +1521,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
exit(EXIT_FAILURE);
}
} else {
FILE *fp = fdopen(fd, "r");
STREAM *fp = streamfdopen(fd, "r", sources[layer].file);
if (fp == NULL) {
perror(sources[layer].file.c_str());
if (close(fd) != 0) {
@ -1449,10 +1531,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
continue;
}
int c = getc(fp);
if (c != EOF) {
ungetc(c, fp);
}
int c = fp->peekc();
if (c == 0x1E) {
read_parallel_this = 0x1E;
}
@ -1487,7 +1566,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
char buf[READ_BUF];
int n;
while ((n = fread(buf, sizeof(char), READ_BUF, fp)) > 0) {
while ((n = fp->read(buf, READ_BUF)) > 0) {
fwrite_check(buf, sizeof(char), n, readfp, reading.c_str());
ahead += n;
@ -1506,7 +1585,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
}
fflush(readfp);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
start_parsing(readfd, streamfpopen(readfp), initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
initial_offset += ahead;
overall_offset += ahead;
@ -1543,7 +1622,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
fflush(readfp);
if (ahead > 0) {
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
start_parsing(readfd, streamfpopen(readfp), initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
if (parser_created) {
if (pthread_join(parallel_parser, NULL) != 0) {
@ -1559,7 +1638,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
// Plain serial reading
std::atomic<long long> layer_seq(overall_offset);
json_pull *jp = json_begin_file(fp);
json_pull *jp = fp->json_begin();
struct serialization_state sst;
sst.fname = reading.c_str();
@ -1591,7 +1670,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
checkdisk(&readers);
}
if (fclose(fp) != 0) {
if (fp->fclose() != 0) {
perror("fclose input");
exit(EXIT_FAILURE);
}

View File

@ -54,7 +54,7 @@ compiler errors.
.PP
.RS
.nf
$ tippecanoe \-o file.mbtiles [options] [file.json file.geobuf ...]
$ tippecanoe \-o file.mbtiles [options] [file.json file.json.gz file.geobuf ...]
.fi
.RE
.PP
@ -149,6 +149,8 @@ or if metadata fields can't be set. You probably don't want to use this.
.IP \(bu 2
\fIname\fP\fB\fC\&.json\fR or \fIname\fP\fB\fC\&.geojson\fR: Read the named GeoJSON input file into a layer called \fIname\fP\&.
.IP \(bu 2
\fIname\fP\fB\fC\&.json.gz\fR or \fIname\fP\fB\fC\&.geojson.gz\fR: Read the named gzipped GeoJSON input file into a layer called \fIname\fP\&.
.IP \(bu 2
\fIname\fP\fB\fC\&.geobuf\fR: Read the named Geobuf input file into a layer called \fIname\fP\&.
.IP \(bu 2
\fIname\fP\fB\fC\&.csv\fR: Read the named CSV input file into a layer called \fIname\fP\&.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -1390,7 +1390,7 @@ serial_feature next_feature(FILE *geoms, std::atomic<long long> *geompos_in, cha
// Remove nulls, now that the filter has run
for (ssize_t i = sf.keys.size() - 1; i >= 0; i--) {
for (ssize_t i = (ssize_t) sf.keys.size() - 1; i >= 0; i--) {
int type = (stringpool + pool_off[sf.segment])[sf.values[i]];
if (type == mvt_null) {
@ -2367,10 +2367,16 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
mingap_fraction = mingap_fraction * max_tile_size / compressed.size() * 0.90;
unsigned long long mg = choose_mingap(indices, mingap_fraction);
if (mg <= mingap) {
mg = (mingap + 1) * 1.5;
double nmg = (mingap + 1) * 1.5;
if (mg <= mingap) {
if (nmg <= mingap || nmg > ULONG_MAX) {
mg = ULONG_MAX;
} else {
mg = nmg;
if (mg <= mingap) {
mg = ULONG_MAX;
}
}
}
mingap = mg;

View File

@ -1,6 +1,6 @@
#ifndef VERSION_HPP
#define VERSION_HPP
#define VERSION "tippecanoe v1.27.16\n"
#define VERSION "tippecanoe v1.28.0\n"
#endif