Merge pull request #425 from mapbox/better-maxzoom-guessing

Better maxzoom guessing by considering resolution within features
This commit is contained in:
Eric Fischer 2017-05-30 17:22:40 -07:00 committed by GitHub
commit b80a1d7621
10 changed files with 2440 additions and 1015 deletions

View File

@ -1,3 +1,8 @@
## 1.19.0
* Tile-join can merge and create directories, not only mbtiles
* Maxzoom guessing (-zg) takes into account resolution within each feature
## 1.18.2
* Fix crash with very long (>128K) attribute values

View File

@ -20,6 +20,7 @@
#include <sys/resource.h>
#include <pthread.h>
#include <vector>
#include <algorithm>
#include <set>
#include <map>
#include <string>
@ -184,7 +185,7 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i
return g;
}
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types) {
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, double *dist_sum, size_t *dist_count, bool want_dist) {
json_object *geometry_type = json_hash_get(geometry, "type");
if (geometry_type == NULL) {
static int warned = 0;
@ -425,6 +426,33 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
dv = fix_polygon(dv);
}
if (want_dist) {
std::vector<unsigned long long> locs;
for (size_t i = 0; i < dv.size(); i++) {
if (dv[i].op == VT_MOVETO || dv[i].op == VT_LINETO) {
locs.push_back(encode(dv[i].x << geometry_scale, dv[i].y << geometry_scale));
}
}
std::sort(locs.begin(), locs.end());
size_t n = 0;
double sum = 0;
for (size_t i = 1; i < locs.size(); i++) {
if (locs[i - 1] != locs[i]) {
sum += log(locs[i] - locs[i - 1]);
n++;
}
}
if (n > 0) {
double avg = exp(sum / n);
// Convert approximately from tile units to feet
double dist_ft = sqrt(avg) / 33;
*dist_sum += log(dist_ft) * n;
*dist_count += n;
}
locs.clear();
}
bool inline_meta = true;
// Don't inline metadata for features that will span several tiles at maxzoom
if (g > 0 && (bbox[2] < bbox[0] || bbox[3] < bbox[1])) {
@ -575,7 +603,7 @@ void check_crs(json_object *j, const char *reading) {
}
}
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types) {
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, double *dist_sum, size_t *dist_count, bool want_dist) {
long long found_hashes = 0;
long long found_features = 0;
long long found_geometries = 0;
@ -643,7 +671,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
}
found_geometries++;
serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types);
serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist);
json_free(j);
continue;
}
@ -686,10 +714,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
if (geometries != NULL) {
size_t g;
for (g = 0; g < geometries->length; g++) {
serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types);
serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist);
}
} else {
serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types);
serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist);
}
json_free(j);
@ -701,7 +729,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
void *run_parse_json(void *v) {
struct parse_json_args *pja = (struct parse_json_args *) v;
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername, pja->uses_gamma, pja->attribute_types);
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername, pja->uses_gamma, pja->attribute_types, pja->dist_sum, pja->dist_count, pja->want_dist);
return NULL;
}

View File

@ -29,10 +29,13 @@ struct parse_json_args {
std::string *layername;
bool uses_gamma;
std::map<std::string, int> const *attribute_types;
double *dist_sum;
size_t *dist_count;
bool want_dist;
};
struct json_pull *json_begin_map(char *map, long long len);
void json_end_map(struct json_pull *jp);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, double *dist_sum, size_t *dist_count, bool want_dist);
void *run_parse_json(void *v);

View File

@ -372,7 +372,7 @@ void *run_sort(void *v) {
return NULL;
}
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator) {
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist) {
long long segs[CPUS + 1];
segs[0] = 0;
segs[CPUS] = len;
@ -385,11 +385,15 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
}
}
double dist_sums[CPUS];
size_t dist_counts[CPUS];
volatile long long layer_seq[CPUS];
for (size_t i = 0; i < CPUS; i++) {
// To preserve feature ordering, unique id for each segment
// begins with that segment's offset into the input
layer_seq[i] = segs[i] + initial_offset;
dist_sums[i] = dist_counts[i] = 0;
}
struct parse_json_args pja[CPUS];
@ -431,6 +435,9 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
pja[i].layername = &layername;
pja[i].uses_gamma = uses_gamma;
pja[i].attribute_types = attribute_types;
pja[i].dist_sum = &(dist_sums[i]);
pja[i].dist_count = &(dist_counts[i]);
pja[i].want_dist = want_dist;
if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) {
perror("pthread_create");
@ -445,6 +452,9 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
perror("pthread_join 370");
}
*dist_sum += dist_sums[i];
*dist_count += dist_counts[i];
json_end_map(pja[i].jp);
}
}
@ -476,6 +486,9 @@ struct read_parallel_arg {
std::string layername;
bool uses_gamma;
std::map<std::string, int> const *attribute_types;
double *dist_sum;
size_t *dist_count;
bool want_dist;
};
void *run_read_parallel(void *v) {
@ -497,7 +510,7 @@ void *run_read_parallel(void *v) {
}
madvise(map, rpa->len, MADV_RANDOM); // sequential, but from several pointers at once
do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma, rpa->attribute_types, rpa->separator);
do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma, rpa->attribute_types, rpa->separator, rpa->dist_sum, rpa->dist_count, rpa->want_dist);
madvise(map, rpa->len, MADV_DONTNEED);
if (munmap(map, rpa->len) != 0) {
@ -514,7 +527,7 @@ void *run_read_parallel(void *v) {
return NULL;
}
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator) {
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator, double *dist_sum, size_t *dist_count, bool want_dist) {
// This has to kick off an intermediate thread to start the parser threads,
// so the main thread can get back to reading the next input stage while
// the intermediate thread waits for the completion of the parser threads.
@ -553,6 +566,9 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i
rpa->layername = layername;
rpa->uses_gamma = uses_gamma;
rpa->attribute_types = attribute_types;
rpa->dist_sum = dist_sum;
rpa->dist_count = dist_count;
rpa->want_dist = want_dist;
if (pthread_create(parallel_parser, NULL, run_read_parallel, rpa) != 0) {
perror("pthread_create");
@ -1173,6 +1189,8 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
}
long overall_offset = 0;
double dist_sum = 0;
size_t dist_count = 0;
size_t nsources = sources.size();
for (size_t source = 0; source < nsources; source++) {
@ -1235,7 +1253,7 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
}
if (map != NULL && map != MAP_FAILED && read_parallel_this) {
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, uses_gamma, attribute_types, read_parallel_this);
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, uses_gamma, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom);
overall_offset += st.st_size - off;
checkdisk(reader, CPUS);
@ -1311,7 +1329,7 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
}
fflush(readfp);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom);
initial_offset += ahead;
overall_offset += ahead;
@ -1348,7 +1366,7 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
fflush(readfp);
if (ahead > 0) {
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this, &dist_sum, &dist_count, guess_maxzoom);
if (parser_created) {
if (pthread_join(parallel_parser, NULL) != 0) {
@ -1365,7 +1383,7 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
long long layer_seq = overall_offset;
json_pull *jp = json_begin_file(fp);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, layer, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], sources[layer].layer, uses_gamma, attribute_types);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, layer, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], sources[layer].layer, uses_gamma, attribute_types, &dist_sum, &dist_count, guess_maxzoom);
json_end(jp);
overall_offset = layer_seq;
checkdisk(reader, CPUS);
@ -1618,6 +1636,11 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
}
}
if (count == 0 && dist_count == 0) {
fprintf(stderr, "Can't guess maxzoom (-zg) without at least two distinct feature locations\n");
exit(EXIT_FAILURE);
}
if (count > 0) {
// Geometric mean is appropriate because distances between features
// are typically lognormally distributed
@ -1625,7 +1648,8 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
// Convert approximately from tile units to feet
double dist_ft = sqrt(avg) / 33;
double want = dist_ft / 250;
// Factor of 8 (3 zooms) beyond minimum required to distinguish features
double want = dist_ft / 8;
maxzoom = ceil(log(360 / (.00000274 * want)) / log(2) - full_detail);
if (maxzoom < 0) {
@ -1638,9 +1662,25 @@ int read_input(std::vector<source> &sources, char *fname, int &maxzoom, int minz
if (!quiet) {
fprintf(stderr, "Choosing a maxzoom of -z%d for features about %d feet apart\n", maxzoom, (int) ceil(dist_ft));
}
} else {
fprintf(stderr, "Can't guess maxzoom (-zg) without at least two distinct feature locations\n");
exit(EXIT_FAILURE);
}
if (dist_count != 0) {
double want2 = exp(dist_sum / dist_count) / 8;
int mz = ceil(log(360 / (.00000274 * want2)) / log(2) - full_detail);
if (mz < 0) {
mz = 0;
}
if (mz > MAX_ZOOM) {
mz = MAX_ZOOM;
}
if (mz > maxzoom || count <= 0) {
if (!quiet) {
fprintf(stderr, "Choosing a maxzoom of -z%d for resolution of about %d feet within features\n", mz, (int) exp(dist_sum / dist_count));
}
maxzoom = mz;
}
}
if (maxzoom < minzoom) {

53
tests/knox/in.json Normal file

File diff suppressed because one or more lines are too long

1144
tests/knox/out/-zg.json Normal file

File diff suppressed because it is too large Load Diff

1144
tests/knox/out/-zg_-P.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1834,7 +1834,7 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
line_detail++; // to keep it the same when the loop decrements it
continue;
} else {
fprintf(stderr, "Try using -B (and --drop-lines or --drop-polygons if needed) to set a higher base zoom level.\n");
fprintf(stderr, "Try using --drop-fraction-as-needed or --drop-densest-as-needed.\n");
return -1;
}
}

View File

@ -1 +1 @@
#define VERSION "tippecanoe v1.18.2\n"
#define VERSION "tippecanoe v1.19.0\n"