From efe66dcafe0dbdbecb0aea7205c8d0e12d7c07f2 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Mon, 11 Apr 2016 15:59:02 -0700 Subject: [PATCH] Use stdio instead of mmap for geometry while tiling to reduce thrashing --- geojson.c | 50 ++++++++++++++++++++++++++++++++++++++ geometry.cc | 8 +++---- geometry.hh | 2 +- tile.cc | 69 +++++++++++++++++++++++++++++------------------------ tile.h | 6 ++++- 5 files changed, 98 insertions(+), 37 deletions(-) diff --git a/geojson.c b/geojson.c index 145467a..d9c10b0 100644 --- a/geojson.c +++ b/geojson.c @@ -332,6 +332,56 @@ void deserialize_byte(char **f, signed char *n) { *f += sizeof(signed char); } +int deserialize_long_long_io(FILE *f, long long *n, long long *geompos) { + unsigned long long zigzag = 0; + int shift = 0; + + while (1) { + int c = getc(f); + if (c == EOF) { + return 0; + } + (*geompos)++; + + if ((c & 0x80) == 0) { + zigzag |= ((unsigned long long) c) << shift; + shift += 7; + break; + } else { + zigzag |= ((unsigned long long) (c & 0x7F)) << shift; + shift += 7; + } + } + + *n = (zigzag >> 1) ^ (-(zigzag & 1)); + return 1; +} + +int deserialize_int_io(FILE *f, int *n, long long *geompos) { + long long ll; + int ret = deserialize_long_long_io(f, &ll, geompos); + *n = ll; + return ret; +} + +int deserialize_uint_io(FILE *f, unsigned *n, long long *geompos) { + if (fread(n, sizeof(unsigned), 1, f) != 1) { + return 0; + } + *geompos += sizeof(unsigned); + return 1; +} + +int deserialize_byte_io(FILE *f, signed char *n, long long *geompos) { + int c = getc(f); + if (c == EOF) { + return 0; + } + *n = c; + (*geompos)++; + return 1; +} + struct index { long long start; long long end; diff --git a/geometry.cc b/geometry.cc index 27913be..dd36032 100644 --- a/geometry.cc +++ b/geometry.cc @@ -18,7 +18,7 @@ extern "C" { #include "projection.h" } -drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y) { +drawvec decode_geometry(FILE *meta, long long *geompos, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y) { drawvec out; bbox[0] = LLONG_MAX; @@ -31,7 +31,7 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail while (1) { draw d; - deserialize_byte(meta, &d.op); + deserialize_byte_io(meta, &d.op, geompos); if (d.op == VT_END) { break; } @@ -39,8 +39,8 @@ drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail if (d.op == VT_MOVETO || d.op == VT_LINETO) { long long dx, dy; - deserialize_long_long(meta, &dx); - deserialize_long_long(meta, &dy); + deserialize_long_long_io(meta, &dx, geompos); + deserialize_long_long_io(meta, &dy, geompos); wx += dx << geometry_scale; wy += dy << geometry_scale; diff --git a/geometry.hh b/geometry.hh index 907db38..6c94822 100644 --- a/geometry.hh +++ b/geometry.hh @@ -16,7 +16,7 @@ struct draw { typedef std::vector drawvec; -drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y); +drawvec decode_geometry(FILE *meta, long long *geompos, int z, unsigned tx, unsigned ty, int detail, long long *bbox, unsigned initial_x, unsigned initial_y); void to_tile_scale(drawvec &geom, int z, int detail); drawvec remove_noop(drawvec geom, int type, int shift); drawvec clip_point(drawvec &geom, int z, int detail, long long buffer); diff --git a/tile.cc b/tile.cc index 6d3c8db..6fb413a 100644 --- a/tile.cc +++ b/tile.cc @@ -596,11 +596,11 @@ int manage_gap(unsigned long long index, unsigned long long *previndex, double s return 0; } -long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, char *geomstart, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) { +long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) { int line_detail; double fraction = 1; - char *og = *geoms; + long long og = *geompos_in; // XXX is there a way to do this without floating point? int max_zoom_increment = log(child_shards) / log(4); @@ -669,45 +669,51 @@ long long write_tile(char **geoms, char *metabase, char *stringpool, int z, unsi memset(within, '\0', sizeof(within)); memset(geompos, '\0', sizeof(geompos)); - *geoms = og; + if (*geompos_in != og) { + if (fseek(geoms, og, SEEK_SET) != 0) { + perror("fseek geom"); + exit(EXIT_FAILURE); + } + *geompos_in = og; + } while (1) { signed char t; - deserialize_byte(geoms, &t); + deserialize_byte_io(geoms, &t, geompos_in); if (t < 0) { break; } long long original_seq; - deserialize_long_long(geoms, &original_seq); + deserialize_long_long_io(geoms, &original_seq, geompos_in); long long layer; - deserialize_long_long(geoms, &layer); + deserialize_long_long_io(geoms, &layer, geompos_in); int tippecanoe_minzoom = -1, tippecanoe_maxzoom = -1; if (layer & 2) { - deserialize_int(geoms, &tippecanoe_minzoom); + deserialize_int_io(geoms, &tippecanoe_minzoom, geompos_in); } if (layer & 1) { - deserialize_int(geoms, &tippecanoe_maxzoom); + deserialize_int_io(geoms, &tippecanoe_maxzoom, geompos_in); } layer >>= 2; int segment; - deserialize_int(geoms, &segment); + deserialize_int_io(geoms, &segment, geompos_in); long long metastart; int m; - deserialize_long_long(geoms, &metastart); - deserialize_int(geoms, &m); + deserialize_long_long_io(geoms, &metastart, geompos_in); + deserialize_int_io(geoms, &m, geompos_in); char *meta = metabase + metastart + meta_off[segment]; long long bbox[4]; - drawvec geom = decode_geometry(geoms, z, tx, ty, line_detail, bbox, initial_x[segment], initial_y[segment]); + drawvec geom = decode_geometry(geoms, geompos_in, z, tx, ty, line_detail, bbox, initial_x[segment], initial_y[segment]); signed char feature_minzoom; - deserialize_byte(geoms, &feature_minzoom); + deserialize_byte_io(geoms, &feature_minzoom, geompos_in); - double progress = floor((((*geoms - geomstart + *along) / (double) todo) + z) / (maxzoom + 1) * 1000) / 10; + double progress = floor((((*geompos_in - *along) / (double) todo) + z) / (maxzoom + 1) * 1000) / 10; if (progress >= oprogress + 0.1) { if (!quiet) { fprintf(stderr, " %3.1f%% %d/%u/%u \r", progress, z, tx, ty); @@ -1089,29 +1095,28 @@ void *run_thread(void *vargs) { // printf("%lld of geom_size\n", (long long) geom_size[j]); - char *geom = (char *) mmap(NULL, arg->geom_size[j], PROT_READ, MAP_PRIVATE, arg->geomfd[j], 0); - if (geom == MAP_FAILED) { + FILE *geom = fdopen(arg->geomfd[j], "rb"); + if (geom == NULL) { perror("mmap geom"); exit(EXIT_FAILURE); } - madvise(geom, arg->geom_size[j], MADV_SEQUENTIAL); - madvise(geom, arg->geom_size[j], MADV_WILLNEED); - char *geomstart = geom; - char *end = geom + arg->geom_size[j]; - char *prevgeom = geom; + long long geompos = 0; + long long prevgeom = 0; - while (geom < end) { + while (1) { int z; unsigned x, y; - deserialize_int(&geom, &z); - deserialize_uint(&geom, &x); - deserialize_uint(&geom, &y); + if (!deserialize_int_io(geom, &z, &geompos)) { + break; + } + deserialize_uint_io(geom, &x, &geompos); + deserialize_uint_io(geom, &y, &geompos); // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(&geom, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, geomstart, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running); + long long len = write_tile(geom, &geompos, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running); if (len < 0) { int *err = (int *) malloc(sizeof(int)); @@ -1145,8 +1150,8 @@ void *run_thread(void *vargs) { } } - *arg->along += geom - prevgeom; - prevgeom = geom; + *arg->along += geompos - prevgeom; + prevgeom = geompos; if (pthread_mutex_unlock(&var_lock) != 0) { perror("pthread_mutex_unlock"); @@ -1154,10 +1159,12 @@ void *run_thread(void *vargs) { } } - madvise(geomstart, arg->geom_size[j], MADV_DONTNEED); - if (munmap(geomstart, arg->geom_size[j]) != 0) { - perror("munmap geom"); + if (fclose(geom) != 0) { + perror("close geom"); + exit(EXIT_FAILURE); } + // Since the fclose() has closed the underlying file descriptor + arg->geomfd[j] = -1; } arg->running--; diff --git a/tile.h b/tile.h index 3a0d3ef..904fef8 100644 --- a/tile.h +++ b/tile.h @@ -23,7 +23,11 @@ void deserialize_int(char **f, int *n); void deserialize_long_long(char **f, long long *n); void deserialize_uint(char **f, unsigned *n); void deserialize_byte(char **f, signed char *n); -struct pool_val *deserialize_string(char **f, struct pool *p, int type); + +int deserialize_int_io(FILE *f, int *n, long long *geompos); +int deserialize_long_long_io(FILE *f, long long *n, long long *geompos); +int deserialize_uint_io(FILE *f, unsigned *n, long long *geompos); +int deserialize_byte_io(FILE *f, signed char *n, long long *geompos); long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, int *prevent, int *additional);