mirror of
https://github.com/mapbox/tippecanoe.git
synced 2025-02-23 18:40:17 +00:00
Merge pull request #568 from mapbox/gzip-input
Directly support gzipped GeoJSON files as input
This commit is contained in:
commit
6db02e8457
@ -1,3 +1,7 @@
|
||||
## 1.28.0
|
||||
|
||||
* Directly support gzipped GeoJSON as input files
|
||||
|
||||
## 1.27.16
|
||||
|
||||
* Fix thread safety issues related to the out-of-disk-space checker
|
||||
|
11
Makefile
11
Makefile
@ -85,9 +85,11 @@ SPACE = $(NULL) $(NULL)
|
||||
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) raw-tiles-test parallel-test pbf-test join-test enumerate-test decode-test join-filter-test unit json-tool-test allow-existing-test csv-test
|
||||
./unit
|
||||
|
||||
suffixes = json json.gz
|
||||
|
||||
# Work around Makefile and filename punctuation limits: _ for space, @ for :, % for /
|
||||
%.json.check:
|
||||
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.check,%,$(word 4,$(subst /, ,$@)))))) $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json) < /dev/null
|
||||
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.check,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix))) < /dev/null
|
||||
./tippecanoe-decode $@.mbtiles > $@.out
|
||||
cmp $@.out $(patsubst %.check,%,$@)
|
||||
rm $@.out $@.mbtiles
|
||||
@ -101,7 +103,8 @@ fewer-tests: tippecanoe tippecanoe-decode geobuf-test raw-tiles-test parallel-te
|
||||
# XXX Use proper makefile rules instead of a for loop
|
||||
%.json.checkbuf:
|
||||
for i in $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json); do ./tippecanoe-json-tool -w $$i | ./node_modules/geobuf/bin/json2geobuf > $$i.geobuf; done
|
||||
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.checkbuf,%,$(word 4,$(subst /, ,$@)))))) $(addsuffix .geobuf,$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json)) < /dev/null
|
||||
for i in $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json.gz); do gzip -dc $$i | ./tippecanoe-json-tool -w | ./node_modules/geobuf/bin/json2geobuf > $$i.geobuf; done
|
||||
./tippecanoe -a@ -f -o $@.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json.checkbuf,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(addsuffix .geobuf,$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix)))) < /dev/null
|
||||
./tippecanoe-decode $@.mbtiles | sed 's/checkbuf/check/g' > $@.out
|
||||
cmp $@.out $(patsubst %.checkbuf,%,$@)
|
||||
rm $@.out $@.mbtiles
|
||||
@ -160,7 +163,7 @@ pbf-test:
|
||||
rm tests/pbf/11-328-791.3857.vector.pbf.out
|
||||
|
||||
enumerate-test:
|
||||
./tippecanoe -z5 -f -o tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/in.json
|
||||
./tippecanoe -z5 -f -o tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/in.json.gz
|
||||
./tippecanoe-enumerate tests/ne_110m_admin_0_countries/out/enum.mbtiles > tests/ne_110m_admin_0_countries/out/enum.check
|
||||
cmp tests/ne_110m_admin_0_countries/out/enum tests/ne_110m_admin_0_countries/out/enum.check
|
||||
rm tests/ne_110m_admin_0_countries/out/enum.mbtiles tests/ne_110m_admin_0_countries/out/enum.check
|
||||
@ -280,7 +283,7 @@ csv-test:
|
||||
prep-test: $(TESTS)
|
||||
|
||||
tests/%.json: Makefile tippecanoe tippecanoe-decode
|
||||
./tippecanoe -f -o $@.check.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json,%,$(word 4,$(subst /, ,$@)))))) $(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.json)
|
||||
./tippecanoe -f -o $@.check.mbtiles $(subst @,:,$(subst %,/,$(subst _, ,$(patsubst %.json,%,$(word 4,$(subst /, ,$@)))))) $(foreach suffix,$(suffixes),$(wildcard $(subst $(SPACE),/,$(wordlist 1,2,$(subst /, ,$@)))/*.$(suffix)))
|
||||
./tippecanoe-decode $@.check.mbtiles > $@
|
||||
cmp $(patsubst %.check,%,$@) $@
|
||||
rm $@.check.mbtiles
|
||||
|
@ -56,7 +56,7 @@ Usage
|
||||
-----
|
||||
|
||||
```sh
|
||||
$ tippecanoe -o file.mbtiles [options] [file.json file.geobuf ...]
|
||||
$ tippecanoe -o file.mbtiles [options] [file.json file.json.gz file.geobuf ...]
|
||||
```
|
||||
|
||||
If no files are specified, it reads GeoJSON from the standard input.
|
||||
@ -142,6 +142,7 @@ If your input is formatted as newline-delimited GeoJSON, use `-P` to make input
|
||||
### Input files and layer names
|
||||
|
||||
* _name_`.json` or _name_`.geojson`: Read the named GeoJSON input file into a layer called _name_.
|
||||
* _name_`.json.gz` or _name_`.geojson.gz`: Read the named gzipped GeoJSON input file into a layer called _name_.
|
||||
* _name_`.geobuf`: Read the named Geobuf input file into a layer called _name_.
|
||||
* _name_`.csv`: Read the named CSV input file into a layer called _name_.
|
||||
* `-l` _name_ or `--layer=`_name_: Use the specified layer name instead of deriving a name from the input filename or output tileset. If there are multiple input files
|
||||
|
151
main.cpp
151
main.cpp
@ -26,6 +26,7 @@
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <sys/time.h>
|
||||
#include <zlib.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@ -476,9 +477,96 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t read_stream(json_pull *j, char *buffer, size_t n);
|
||||
|
||||
struct STREAM {
|
||||
FILE *fp = NULL;
|
||||
gzFile gz = NULL;
|
||||
|
||||
int fclose() {
|
||||
int ret;
|
||||
|
||||
if (gz != NULL) {
|
||||
ret = gzclose(gz);
|
||||
} else {
|
||||
ret = ::fclose(fp);
|
||||
}
|
||||
|
||||
delete this;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int peekc() {
|
||||
if (gz != NULL) {
|
||||
int c = gzgetc(gz);
|
||||
if (c != EOF) {
|
||||
gzungetc(c, gz);
|
||||
}
|
||||
return c;
|
||||
} else {
|
||||
int c = getc(fp);
|
||||
if (c != EOF) {
|
||||
ungetc(c, fp);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
size_t read(char *out, size_t count) {
|
||||
if (gz != NULL) {
|
||||
int ret = gzread(gz, out, count);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "%s: Error reading compressed data\n", *av);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return ret;
|
||||
} else {
|
||||
return ::fread(out, 1, count, fp);
|
||||
}
|
||||
}
|
||||
|
||||
json_pull *json_begin() {
|
||||
return ::json_begin(read_stream, this);
|
||||
}
|
||||
};
|
||||
|
||||
static ssize_t read_stream(json_pull *j, char *buffer, size_t n) {
|
||||
return ((STREAM *) j->source)->read(buffer, n);
|
||||
}
|
||||
|
||||
STREAM *streamfdopen(int fd, const char *mode, std::string const &fname) {
|
||||
STREAM *s = new STREAM;
|
||||
s->fp = NULL;
|
||||
s->gz = NULL;
|
||||
|
||||
if (fname.size() > 3 && fname.substr(fname.size() - 3) == std::string(".gz")) {
|
||||
s->gz = gzdopen(fd, mode);
|
||||
if (s->gz == NULL) {
|
||||
fprintf(stderr, "%s: %s: Decompression error\n", *av, fname.c_str());
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} else {
|
||||
s->fp = fdopen(fd, mode);
|
||||
if (s->fp == NULL) {
|
||||
perror(fname.c_str());
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
STREAM *streamfpopen(FILE *fp) {
|
||||
STREAM *s = new STREAM;
|
||||
s->fp = fp;
|
||||
s->gz = NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
struct read_parallel_arg {
|
||||
int fd = 0;
|
||||
FILE *fp = NULL;
|
||||
STREAM *fp = NULL;
|
||||
long long offset = 0;
|
||||
long long len = 0;
|
||||
std::atomic<int> *is_parsing = NULL;
|
||||
@ -532,7 +620,7 @@ void *run_read_parallel(void *v) {
|
||||
if (munmap(map, rpa->len) != 0) {
|
||||
perror("munmap source file");
|
||||
}
|
||||
if (fclose(rpa->fp) != 0) {
|
||||
if (rpa->fp->fclose() != 0) {
|
||||
perror("close source file");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@ -543,7 +631,7 @@ void *run_read_parallel(void *v) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void start_parsing(int fd, FILE *fp, long long offset, long long len, std::atomic<int> *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, std::vector<struct reader> *readers, std::atomic<long long> *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, json_object *filter, int basezoom, int source, std::vector<std::map<std::string, layermap_entry> > &layermaps, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) {
|
||||
void start_parsing(int fd, STREAM *fp, long long offset, long long len, std::atomic<int> *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, std::vector<struct reader> *readers, std::atomic<long long> *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, json_object *filter, int basezoom, int source, std::vector<std::map<std::string, layermap_entry> > &layermaps, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) {
|
||||
// This has to kick off an intermediate thread to start the parser threads,
|
||||
// so the main thread can get back to reading the next input stage while
|
||||
// the intermediate thread waits for the completion of the parser threads.
|
||||
@ -1173,30 +1261,24 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
}
|
||||
std::string trunc = std::string(use);
|
||||
|
||||
std::vector<std::string> trim = {
|
||||
".json",
|
||||
".geojson",
|
||||
".geobuf",
|
||||
".mbtiles",
|
||||
".gz",
|
||||
};
|
||||
|
||||
// Trim .json or .mbtiles from the name
|
||||
while (true) {
|
||||
ssize_t cp;
|
||||
cp = trunc.find(".json");
|
||||
if (cp >= 0 && (size_t) cp + 5 == trunc.size()) {
|
||||
trunc = trunc.substr(0, cp);
|
||||
continue;
|
||||
bool again = true;
|
||||
while (again) {
|
||||
again = false;
|
||||
for (size_t i = 0; i < trim.size(); i++) {
|
||||
if (trunc.size() > trim[i].size() && trunc.substr(trunc.size() - trim[i].size()) == trim[i]) {
|
||||
trunc = trunc.substr(0, trunc.size() - trim[i].size());
|
||||
again = true;
|
||||
}
|
||||
}
|
||||
cp = trunc.find(".geojson");
|
||||
if (cp >= 0 && (size_t) cp + 8 == trunc.size()) {
|
||||
trunc = trunc.substr(0, cp);
|
||||
continue;
|
||||
}
|
||||
cp = trunc.find(".geobuf");
|
||||
if (cp >= 0 && (size_t) cp + 7 == trunc.size()) {
|
||||
trunc = trunc.substr(0, cp);
|
||||
continue;
|
||||
}
|
||||
cp = trunc.find(".mbtiles");
|
||||
if (cp >= 0 && (size_t) cp + 8 == trunc.size()) {
|
||||
trunc = trunc.substr(0, cp);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Trim out characters that can't be part of selector
|
||||
@ -1394,7 +1476,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
|
||||
int read_parallel_this = read_parallel ? '\n' : 0;
|
||||
|
||||
if (1) {
|
||||
if (!(sources[source].file.size() > 3 && sources[source].file.substr(sources[source].file.size() - 3) == std::string(".gz"))) {
|
||||
if (fstat(fd, &st) == 0) {
|
||||
off = lseek(fd, 0, SEEK_CUR);
|
||||
if (off >= 0) {
|
||||
@ -1439,7 +1521,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
} else {
|
||||
FILE *fp = fdopen(fd, "r");
|
||||
STREAM *fp = streamfdopen(fd, "r", sources[layer].file);
|
||||
if (fp == NULL) {
|
||||
perror(sources[layer].file.c_str());
|
||||
if (close(fd) != 0) {
|
||||
@ -1449,10 +1531,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
continue;
|
||||
}
|
||||
|
||||
int c = getc(fp);
|
||||
if (c != EOF) {
|
||||
ungetc(c, fp);
|
||||
}
|
||||
int c = fp->peekc();
|
||||
if (c == 0x1E) {
|
||||
read_parallel_this = 0x1E;
|
||||
}
|
||||
@ -1487,7 +1566,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
char buf[READ_BUF];
|
||||
int n;
|
||||
|
||||
while ((n = fread(buf, sizeof(char), READ_BUF, fp)) > 0) {
|
||||
while ((n = fp->read(buf, READ_BUF)) > 0) {
|
||||
fwrite_check(buf, sizeof(char), n, readfp, reading.c_str());
|
||||
ahead += n;
|
||||
|
||||
@ -1506,7 +1585,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
}
|
||||
|
||||
fflush(readfp);
|
||||
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
|
||||
start_parsing(readfd, streamfpopen(readfp), initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
|
||||
|
||||
initial_offset += ahead;
|
||||
overall_offset += ahead;
|
||||
@ -1543,7 +1622,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
fflush(readfp);
|
||||
|
||||
if (ahead > 0) {
|
||||
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
|
||||
start_parsing(readfd, streamfpopen(readfp), initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), &readers, &progress_seq, exclude, include, exclude_all, filter, basezoom, layer, layermaps, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom, prefilter != NULL || postfilter != NULL);
|
||||
|
||||
if (parser_created) {
|
||||
if (pthread_join(parallel_parser, NULL) != 0) {
|
||||
@ -1559,7 +1638,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
// Plain serial reading
|
||||
|
||||
std::atomic<long long> layer_seq(overall_offset);
|
||||
json_pull *jp = json_begin_file(fp);
|
||||
json_pull *jp = fp->json_begin();
|
||||
struct serialization_state sst;
|
||||
|
||||
sst.fname = reading.c_str();
|
||||
@ -1591,7 +1670,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
|
||||
checkdisk(&readers);
|
||||
}
|
||||
|
||||
if (fclose(fp) != 0) {
|
||||
if (fp->fclose() != 0) {
|
||||
perror("fclose input");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ compiler errors.
|
||||
.PP
|
||||
.RS
|
||||
.nf
|
||||
$ tippecanoe \-o file.mbtiles [options] [file.json file.geobuf ...]
|
||||
$ tippecanoe \-o file.mbtiles [options] [file.json file.json.gz file.geobuf ...]
|
||||
.fi
|
||||
.RE
|
||||
.PP
|
||||
@ -149,6 +149,8 @@ or if metadata fields can't be set. You probably don't want to use this.
|
||||
.IP \(bu 2
|
||||
\fIname\fP\fB\fC\&.json\fR or \fIname\fP\fB\fC\&.geojson\fR: Read the named GeoJSON input file into a layer called \fIname\fP\&.
|
||||
.IP \(bu 2
|
||||
\fIname\fP\fB\fC\&.json.gz\fR or \fIname\fP\fB\fC\&.geojson.gz\fR: Read the named gzipped GeoJSON input file into a layer called \fIname\fP\&.
|
||||
.IP \(bu 2
|
||||
\fIname\fP\fB\fC\&.geobuf\fR: Read the named Geobuf input file into a layer called \fIname\fP\&.
|
||||
.IP \(bu 2
|
||||
\fIname\fP\fB\fC\&.csv\fR: Read the named CSV input file into a layer called \fIname\fP\&.
|
||||
|
File diff suppressed because one or more lines are too long
BIN
tests/ne_110m_admin_0_countries/in.json.gz
Normal file
BIN
tests/ne_110m_admin_0_countries/in.json.gz
Normal file
Binary file not shown.
12
tile.cpp
12
tile.cpp
@ -1390,7 +1390,7 @@ serial_feature next_feature(FILE *geoms, std::atomic<long long> *geompos_in, cha
|
||||
|
||||
// Remove nulls, now that the filter has run
|
||||
|
||||
for (ssize_t i = sf.keys.size() - 1; i >= 0; i--) {
|
||||
for (ssize_t i = (ssize_t) sf.keys.size() - 1; i >= 0; i--) {
|
||||
int type = (stringpool + pool_off[sf.segment])[sf.values[i]];
|
||||
|
||||
if (type == mvt_null) {
|
||||
@ -2367,10 +2367,16 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
|
||||
mingap_fraction = mingap_fraction * max_tile_size / compressed.size() * 0.90;
|
||||
unsigned long long mg = choose_mingap(indices, mingap_fraction);
|
||||
if (mg <= mingap) {
|
||||
mg = (mingap + 1) * 1.5;
|
||||
double nmg = (mingap + 1) * 1.5;
|
||||
|
||||
if (mg <= mingap) {
|
||||
if (nmg <= mingap || nmg > ULONG_MAX) {
|
||||
mg = ULONG_MAX;
|
||||
} else {
|
||||
mg = nmg;
|
||||
|
||||
if (mg <= mingap) {
|
||||
mg = ULONG_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
mingap = mg;
|
||||
|
@ -1,6 +1,6 @@
|
||||
#ifndef VERSION_HPP
|
||||
#define VERSION_HPP
|
||||
|
||||
#define VERSION "tippecanoe v1.27.16\n"
|
||||
#define VERSION "tippecanoe v1.28.0\n"
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user