From 32aa65308282a39632d8e78dbc8f98aeed0c1b42 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Fri, 11 Nov 2016 17:37:46 -0800 Subject: [PATCH] Don't spend geometry space on index or extent unless it is needed --- geojson.cpp | 21 +++++++++++++-------- geojson.hpp | 3 ++- main.cpp | 21 ++++++++++++--------- serial.cpp | 18 +++++++++++++++--- tile.cpp | 24 ++++++++++++++---------- 5 files changed, 56 insertions(+), 31 deletions(-) diff --git a/geojson.cpp b/geojson.cpp index 4a601ce..01bbe2a 100644 --- a/geojson.cpp +++ b/geojson.cpp @@ -167,7 +167,7 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i return g; } -int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map *layermap, std::string const &layername) { +int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map *layermap, std::string const &layername, bool uses_gamma) { json_object *geometry_type = json_hash_get(geometry, "type"); if (geometry_type == NULL) { static int warned = 0; @@ -392,6 +392,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje } long long geomstart = *geompos; + long long bbox_index; serial_feature sf; sf.layer = layer; @@ -413,7 +414,11 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje // and then mask to bring it back into the addressable area long long midx = (bbox[0] / 2 + bbox[2] / 2) & ((1LL << 32) - 1); long long midy = (bbox[1] / 2 + bbox[3] / 2) & ((1LL << 32) - 1); - sf.index = encode(midx, midy); + bbox_index = encode(midx, midy); + + if (additional[A_INCREASE_SPACING_AS_NEEDED] || additional[A_CALCULATE_FEATURE_DENSITY] || additional[A_INCREASE_GAMMA_AS_NEEDED] || uses_gamma) { + sf.index = bbox_index; + } if (inline_meta) { sf.metapos = -1; @@ -437,7 +442,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje index.segment = segment; index.seq = *layer_seq; index.t = sf.t; - index.index = sf.index; + index.index = bbox_index; fwrite_check(&index, sizeof(struct index), 1, indexfile, fname); *indexpos += sizeof(struct index); @@ -480,7 +485,7 @@ void check_crs(json_object *j, const char *reading) { } } -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map *layermap, std::string layername) { +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map *layermap, std::string layername, bool uses_gamma) { long long found_hashes = 0; long long found_features = 0; long long found_geometries = 0; @@ -548,7 +553,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se } found_geometries++; - serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername); + serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma); json_free(j); continue; } @@ -590,10 +595,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (geometries != NULL) { size_t g; for (g = 0; g < geometries->length; g++) { - serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername); + serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma); } } else { - serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername); + serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma); } json_free(j); @@ -605,7 +610,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se void *run_parse_json(void *v) { struct parse_json_args *pja = (struct parse_json_args *) v; - parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername); + parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername, pja->uses_gamma); return NULL; } diff --git a/geojson.hpp b/geojson.hpp index 4065af1..db6116f 100644 --- a/geojson.hpp +++ b/geojson.hpp @@ -27,10 +27,11 @@ struct parse_json_args { int maxzoom; std::map *layermap; std::string *layername; + bool uses_gamma; }; struct json_pull *json_begin_map(char *map, long long len); void json_end_map(struct json_pull *jp); -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map *layermap, std::string layername); +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map *layermap, std::string layername, bool uses_gamma); void *run_parse_json(void *v); diff --git a/main.cpp b/main.cpp index 77ee1fd..562004e 100644 --- a/main.cpp +++ b/main.cpp @@ -363,7 +363,7 @@ void *run_sort(void *v) { return NULL; } -void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername) { +void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma) { long long segs[CPUS + 1]; segs[0] = 0; segs[CPUS] = len; @@ -420,6 +420,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const pja[i].maxzoom = maxzoom; pja[i].layermap = &(*layermaps)[i]; pja[i].layername = &layername; + pja[i].uses_gamma = uses_gamma; if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) { perror("pthread_create"); @@ -462,6 +463,7 @@ struct read_parallel_arg { unsigned *initial_x; unsigned *initial_y; std::string layername; + bool uses_gamma; }; void *run_read_parallel(void *v) { @@ -483,7 +485,7 @@ void *run_read_parallel(void *v) { } madvise(map, rpa->len, MADV_RANDOM); // sequential, but from several pointers at once - do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername); + do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma); madvise(map, rpa->len, MADV_DONTNEED); if (munmap(map, rpa->len) != 0) { @@ -500,7 +502,7 @@ void *run_read_parallel(void *v) { return NULL; } -void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername) { +void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma) { // This has to kick off an intermediate thread to start the parser threads, // so the main thread can get back to reading the next input stage while // the intermediate thread waits for the completion of the parser threads. @@ -536,6 +538,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i rpa->initial_y = initial_y; rpa->maxzoom = maxzoom; rpa->layername = layername; + rpa->uses_gamma = uses_gamma; if (pthread_create(parallel_parser, NULL, run_read_parallel, rpa) != 0) { perror("pthread_create"); @@ -940,7 +943,7 @@ void radix(struct reader *reader, int nreaders, FILE *geomfile, int geomfd, FILE } } -int read_input(std::vector &sources, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, std::set *exclude, std::set *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, int read_parallel, int forcetable, const char *attribution) { +int read_input(std::vector &sources, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, std::set *exclude, std::set *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, int read_parallel, int forcetable, const char *attribution, bool uses_gamma) { int ret = EXIT_SUCCESS; struct reader reader[CPUS]; @@ -1156,7 +1159,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } if (map != NULL && map != MAP_FAILED) { - do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]); + do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], uses_gamma); overall_offset += st.st_size - off; checkdisk(reader, CPUS); @@ -1224,7 +1227,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } fflush(readfp); - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], gamma != 0); initial_offset += ahead; overall_offset += ahead; @@ -1261,7 +1264,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, fflush(readfp); if (ahead > 0) { - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], gamma != 0); if (parser_created) { if (pthread_join(parallel_parser, NULL) != 0) { @@ -1278,7 +1281,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, long long layer_seq = overall_offset; json_pull *jp = json_begin_file(fp); - parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], layernames[source < nlayers ? source : 0]); + parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], layernames[source < nlayers ? source : 0], uses_gamma); json_end(jp); overall_offset = layer_seq; checkdisk(reader, CPUS); @@ -2226,7 +2229,7 @@ int main(int argc, char **argv) { sources.push_back(src); } - ret = read_input(sources, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, read_parallel, forcetable, attribution); + ret = read_input(sources, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, read_parallel, forcetable, attribution, gamma != 0); mbtiles_close(outdb, argv); diff --git a/serial.cpp b/serial.cpp index 7db9b42..c23882c 100644 --- a/serial.cpp +++ b/serial.cpp @@ -178,7 +178,15 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, c serialize_byte(geomfile, sf->t, geompos, fname); serialize_long_long(geomfile, sf->seq, geompos, fname); - serialize_long_long(geomfile, (sf->layer << 3) | (sf->has_id ? 4 : 0) | (sf->has_tippecanoe_minzoom ? 2 : 0) | (sf->has_tippecanoe_maxzoom ? 1 : 0), geompos, fname); + long long layer = 0; + layer |= sf->layer << 5; + layer |= (sf->index != 0) << 4; + layer |= (sf->extent != 0) << 3; + layer |= sf->has_id << 2; + layer |= sf->has_tippecanoe_minzoom << 1; + layer |= sf->has_tippecanoe_maxzoom << 0; + + serialize_long_long(geomfile, layer, geompos, fname); if (sf->has_tippecanoe_minzoom) { serialize_int(geomfile, sf->tippecanoe_minzoom, geompos, fname); } @@ -193,8 +201,12 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, c write_geometry(sf->geometry, geompos, geomfile, fname, wx, wy); serialize_byte(geomfile, VT_END, geompos, fname); - serialize_ulong_long(geomfile, sf->index, geompos, fname); - serialize_long_long(geomfile, sf->extent, geompos, fname); + if (sf->index != 0) { + serialize_ulong_long(geomfile, sf->index, geompos, fname); + } + if (sf->extent != 0) { + serialize_long_long(geomfile, sf->extent, geompos, fname); + } serialize_int(geomfile, sf->m, geompos, fname); serialize_long_long(geomfile, sf->metapos, geompos, fname); diff --git a/tile.cpp b/tile.cpp index 8e8f298..8c70a1f 100644 --- a/tile.cpp +++ b/tile.cpp @@ -1302,33 +1302,37 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s long long original_seq; deserialize_long_long_io(geoms, &original_seq, geompos_in); - long long layer; - deserialize_long_long_io(geoms, &layer, geompos_in); + long long xlayer; + deserialize_long_long_io(geoms, &xlayer, geompos_in); int tippecanoe_minzoom = -1, tippecanoe_maxzoom = -1; unsigned long long id = 0; bool has_id = false; - if (layer & 2) { + if (xlayer & (1 << 1)) { deserialize_int_io(geoms, &tippecanoe_minzoom, geompos_in); } - if (layer & 1) { + if (xlayer & (1 << 0)) { deserialize_int_io(geoms, &tippecanoe_maxzoom, geompos_in); } - if (layer & 4) { + if (xlayer & (1 << 2)) { has_id = true; deserialize_ulong_long_io(geoms, &id, geompos_in); } - layer >>= 3; + long long layer = xlayer >> 5; int segment; deserialize_int_io(geoms, &segment, geompos_in); long long bbox[4]; - unsigned long long index; - long long extent; + unsigned long long index = 0; + long long extent = 0; drawvec geom = decode_geometry(geoms, geompos_in, z, tx, ty, line_detail, bbox, initial_x[segment], initial_y[segment]); - deserialize_ulong_long_io(geoms, &index, geompos_in); - deserialize_long_long_io(geoms, &extent, geompos_in); + if (xlayer & (1 << 4)) { + deserialize_ulong_long_io(geoms, &index, geompos_in); + } + if (xlayer & (1 << 3)) { + deserialize_long_long_io(geoms, &extent, geompos_in); + } long long metastart; int m;