Merge pull request #202 from mapbox/radix

Restructure geometry reordering to try to reduce virtual memory thrashing
This commit is contained in:
Eric Fischer 2016-04-05 15:13:44 -07:00
commit d1456c0f66
6 changed files with 10359 additions and 354 deletions

View File

@ -1,3 +1,8 @@
## 1.9.8
* Use an on-disk radix sort for the index to control virtual memory thrashing
when the geometry and index are too large to fit in memory
## 1.9.7 ## 1.9.7
* Fix build problem (wrong spelling of long long max/min constants) * Fix build problem (wrong spelling of long long max/min constants)

917
geojson.c

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

43
tile.cc
View File

@ -212,10 +212,7 @@ struct pool_val *retrieve_string(char **f, struct pool *p, char *stringpool) {
return ret; return ret;
} }
void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta) { void decode_meta(int m, char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta) {
int m;
deserialize_int(meta, &m);
int i; int i;
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
struct pool_val *key = retrieve_string(meta, keys, stringpool); struct pool_val *key = retrieve_string(meta, keys, stringpool);
@ -364,7 +361,7 @@ struct sll {
} }
}; };
void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom, int segment, unsigned *initial_x, unsigned *initial_y) { void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int line_detail, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom, int segment, unsigned *initial_x, unsigned *initial_y, int m) {
if (geom.size() > 0 && nextzoom <= maxzoom) { if (geom.size() > 0 && nextzoom <= maxzoom) {
int xo, yo; int xo, yo;
int span = 1 << (nextzoom - z); int span = 1 << (nextzoom - z);
@ -447,6 +444,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, u
} }
serialize_int(geomfile[j], segment, &geompos[j], fname); serialize_int(geomfile[j], segment, &geompos[j], fname);
serialize_long_long(geomfile[j], metastart, &geompos[j], fname); serialize_long_long(geomfile[j], metastart, &geompos[j], fname);
serialize_int(geomfile[j], m, &geompos[j], fname);
long long wx = initial_x[segment], wy = initial_y[segment]; long long wx = initial_x[segment], wy = initial_y[segment];
for (size_t u = 0; u < geom.size(); u++) { for (size_t u = 0; u < geom.size(); u++) {
@ -471,6 +469,7 @@ void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, u
struct partial { struct partial {
std::vector<drawvec> geoms; std::vector<drawvec> geoms;
long long layer; long long layer;
int m;
char *meta; char *meta;
signed char t; signed char t;
int segment; int segment;
@ -697,7 +696,9 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
deserialize_int(geoms, &segment); deserialize_int(geoms, &segment);
long long metastart; long long metastart;
int m;
deserialize_long_long(geoms, &metastart); deserialize_long_long(geoms, &metastart);
deserialize_int(geoms, &m);
char *meta = metabase + metastart + meta_off[segment]; char *meta = metabase + metastart + meta_off[segment];
long long bbox[4]; long long bbox[4];
@ -770,7 +771,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
} }
if (line_detail == detail && fraction == 1) { /* only write out the next zoom once, even if we retry */ if (line_detail == detail && fraction == 1) { /* only write out the next zoom once, even if we retry */
rewrite(geom, z, nextzoom, maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom, child_shards, max_zoom_increment, original_seq, tippecanoe_minzoom, tippecanoe_maxzoom, segment, initial_x, initial_y); rewrite(geom, z, nextzoom, maxzoom, bbox, tx, ty, buffer, line_detail, within, geompos, geomfile, fname, t, layer, metastart, feature_minzoom, child_shards, max_zoom_increment, original_seq, tippecanoe_minzoom, tippecanoe_maxzoom, segment, initial_x, initial_y, m);
} }
if (z < minzoom) { if (z < minzoom) {
@ -823,6 +824,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
partial p; partial p;
p.geoms.push_back(geom); p.geoms.push_back(geom);
p.layer = layer; p.layer = layer;
p.m = m;
p.meta = meta; p.meta = meta;
p.t = t; p.t = t;
p.segment = segment; p.segment = segment;
@ -892,7 +894,7 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi
c.coalesced = false; c.coalesced = false;
c.original_seq = original_seq; c.original_seq = original_seq;
decode_meta(&meta, stringpool + pool_off[segment], keys[layer], values[layer], file_keys[layer], &c.meta); decode_meta(partials[i].m, &meta, stringpool + pool_off[segment], keys[layer], values[layer], file_keys[layer], &c.meta);
features[layer].push_back(c); features[layer].push_back(c);
} }
} }
@ -1090,6 +1092,8 @@ void *run_thread(void *vargs) {
perror("mmap geom"); perror("mmap geom");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
madvise(geom, arg->geom_size[j], MADV_SEQUENTIAL);
madvise(geom, arg->geom_size[j], MADV_WILLNEED);
char *geomstart = geom; char *geomstart = geom;
char *end = geom + arg->geom_size[j]; char *end = geom + arg->geom_size[j];
@ -1148,6 +1152,7 @@ void *run_thread(void *vargs) {
} }
} }
madvise(geomstart, arg->geom_size[j], MADV_DONTNEED);
if (munmap(geomstart, arg->geom_size[j]) != 0) { if (munmap(geomstart, arg->geom_size[j]) != 0) {
perror("munmap geom"); perror("munmap geom");
} }
@ -1311,8 +1316,17 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo
} }
for (j = 0; j < TEMP_FILES; j++) { for (j = 0; j < TEMP_FILES; j++) {
close(geomfd[j]); // Can be < 0 if there is only one source file, at z0
fclose(sub[j]); if (geomfd[j] >= 0) {
if (close(geomfd[j]) != 0) {
perror("close geom");
exit(EXIT_FAILURE);
}
}
if (fclose(sub[j]) != 0) {
perror("close subfile");
exit(EXIT_FAILURE);
}
struct stat geomst; struct stat geomst;
if (fstat(subfd[j], &geomst) != 0) { if (fstat(subfd[j], &geomst) != 0) {
@ -1329,6 +1343,17 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo
} }
} }
int j;
for (j = 0; j < TEMP_FILES; j++) {
// Can be < 0 if there is only one source file, at z0
if (geomfd[j] >= 0) {
if (close(geomfd[j]) != 0) {
perror("close geom");
exit(EXIT_FAILURE);
}
}
}
if (!quiet) { if (!quiet) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }

2
tile.h
View File

@ -46,6 +46,8 @@ static int additional_options[] = {
A_REORDER, A_REORDER,
#define A_LINE_DROP ((int) 'l') #define A_LINE_DROP ((int) 'l')
A_LINE_DROP, A_LINE_DROP,
#define A_PREFER_RADIX_SORT ((int) 'r')
A_PREFER_RADIX_SORT,
}; };
static int prevent_options[] = { static int prevent_options[] = {

View File

@ -1 +1 @@
#define VERSION "tippecanoe v1.9.7\n" #define VERSION "tippecanoe v1.9.8\n"