Don't spend geometry space on index or extent unless it is needed

This commit is contained in:
Eric Fischer 2016-11-11 17:37:46 -08:00
parent 013e6512b4
commit 32aa653082
5 changed files with 56 additions and 31 deletions

@ -167,7 +167,7 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i
return g;
}
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string const &layername) {
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string const &layername, bool uses_gamma) {
json_object *geometry_type = json_hash_get(geometry, "type");
if (geometry_type == NULL) {
static int warned = 0;
@ -392,6 +392,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
}
long long geomstart = *geompos;
long long bbox_index;
serial_feature sf;
sf.layer = layer;
@ -413,7 +414,11 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
// and then mask to bring it back into the addressable area
long long midx = (bbox[0] / 2 + bbox[2] / 2) & ((1LL << 32) - 1);
long long midy = (bbox[1] / 2 + bbox[3] / 2) & ((1LL << 32) - 1);
sf.index = encode(midx, midy);
bbox_index = encode(midx, midy);
if (additional[A_INCREASE_SPACING_AS_NEEDED] || additional[A_CALCULATE_FEATURE_DENSITY] || additional[A_INCREASE_GAMMA_AS_NEEDED] || uses_gamma) {
sf.index = bbox_index;
}
if (inline_meta) {
sf.metapos = -1;
@ -437,7 +442,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
index.segment = segment;
index.seq = *layer_seq;
index.t = sf.t;
index.index = sf.index;
index.index = bbox_index;
fwrite_check(&index, sizeof(struct index), 1, indexfile, fname);
*indexpos += sizeof(struct index);
@ -480,7 +485,7 @@ void check_crs(json_object *j, const char *reading) {
}
}
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername) {
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma) {
long long found_hashes = 0;
long long found_features = 0;
long long found_geometries = 0;
@ -548,7 +553,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
}
found_geometries++;
serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername);
serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma);
json_free(j);
continue;
}
@ -590,10 +595,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
if (geometries != NULL) {
size_t g;
for (g = 0; g < geometries->length; g++) {
serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername);
serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma);
}
} else {
serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername);
serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma);
}
json_free(j);
@ -605,7 +610,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
void *run_parse_json(void *v) {
struct parse_json_args *pja = (struct parse_json_args *) v;
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername);
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->maxzoom, pja->layermap, *pja->layername, pja->uses_gamma);
return NULL;
}

@ -27,10 +27,11 @@ struct parse_json_args {
int maxzoom;
std::map<std::string, layermap_entry> *layermap;
std::string *layername;
bool uses_gamma;
};
struct json_pull *json_begin_map(char *map, long long len);
void json_end_map(struct json_pull *jp);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map<std::string, layermap_entry> *layermap, std::string layername, bool uses_gamma);
void *run_parse_json(void *v);

@ -363,7 +363,7 @@ void *run_sort(void *v) {
return NULL;
}
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername) {
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma) {
long long segs[CPUS + 1];
segs[0] = 0;
segs[CPUS] = len;
@ -420,6 +420,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
pja[i].maxzoom = maxzoom;
pja[i].layermap = &(*layermaps)[i];
pja[i].layername = &layername;
pja[i].uses_gamma = uses_gamma;
if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) {
perror("pthread_create");
@ -462,6 +463,7 @@ struct read_parallel_arg {
unsigned *initial_x;
unsigned *initial_y;
std::string layername;
bool uses_gamma;
};
void *run_read_parallel(void *v) {
@ -483,7 +485,7 @@ void *run_read_parallel(void *v) {
}
madvise(map, rpa->len, MADV_RANDOM); // sequential, but from several pointers at once
do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername);
do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma);
madvise(map, rpa->len, MADV_DONTNEED);
if (munmap(map, rpa->len) != 0) {
@ -500,7 +502,7 @@ void *run_read_parallel(void *v) {
return NULL;
}
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername) {
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma) {
// This has to kick off an intermediate thread to start the parser threads,
// so the main thread can get back to reading the next input stage while
// the intermediate thread waits for the completion of the parser threads.
@ -536,6 +538,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i
rpa->initial_y = initial_y;
rpa->maxzoom = maxzoom;
rpa->layername = layername;
rpa->uses_gamma = uses_gamma;
if (pthread_create(parallel_parser, NULL, run_read_parallel, rpa) != 0) {
perror("pthread_create");
@ -940,7 +943,7 @@ void radix(struct reader *reader, int nreaders, FILE *geomfile, int geomfd, FILE
}
}
int read_input(std::vector<source> &sources, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, int read_parallel, int forcetable, const char *attribution) {
int read_input(std::vector<source> &sources, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, int read_parallel, int forcetable, const char *attribution, bool uses_gamma) {
int ret = EXIT_SUCCESS;
struct reader reader[CPUS];
@ -1156,7 +1159,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
if (map != NULL && map != MAP_FAILED) {
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]);
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], uses_gamma);
overall_offset += st.st_size - off;
checkdisk(reader, CPUS);
@ -1224,7 +1227,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
fflush(readfp);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], gamma != 0);
initial_offset += ahead;
overall_offset += ahead;
@ -1261,7 +1264,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
fflush(readfp);
if (ahead > 0) {
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0]);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, layernames[source < nlayers ? source : 0], gamma != 0);
if (parser_created) {
if (pthread_join(parallel_parser, NULL) != 0) {
@ -1278,7 +1281,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
long long layer_seq = overall_offset;
json_pull *jp = json_begin_file(fp);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], layernames[source < nlayers ? source : 0]);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, maxzoom, &layermaps[0], layernames[source < nlayers ? source : 0], uses_gamma);
json_end(jp);
overall_offset = layer_seq;
checkdisk(reader, CPUS);
@ -2226,7 +2229,7 @@ int main(int argc, char **argv) {
sources.push_back(src);
}
ret = read_input(sources, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, read_parallel, forcetable, attribution);
ret = read_input(sources, name ? name : outdir, layer, maxzoom, minzoom, basezoom, basezoom_marker_width, outdb, &exclude, &include, exclude_all, droprate, buffer, tmpdir, gamma, read_parallel, forcetable, attribution, gamma != 0);
mbtiles_close(outdb, argv);

@ -178,7 +178,15 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, c
serialize_byte(geomfile, sf->t, geompos, fname);
serialize_long_long(geomfile, sf->seq, geompos, fname);
serialize_long_long(geomfile, (sf->layer << 3) | (sf->has_id ? 4 : 0) | (sf->has_tippecanoe_minzoom ? 2 : 0) | (sf->has_tippecanoe_maxzoom ? 1 : 0), geompos, fname);
long long layer = 0;
layer |= sf->layer << 5;
layer |= (sf->index != 0) << 4;
layer |= (sf->extent != 0) << 3;
layer |= sf->has_id << 2;
layer |= sf->has_tippecanoe_minzoom << 1;
layer |= sf->has_tippecanoe_maxzoom << 0;
serialize_long_long(geomfile, layer, geompos, fname);
if (sf->has_tippecanoe_minzoom) {
serialize_int(geomfile, sf->tippecanoe_minzoom, geompos, fname);
}
@ -193,8 +201,12 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, c
write_geometry(sf->geometry, geompos, geomfile, fname, wx, wy);
serialize_byte(geomfile, VT_END, geompos, fname);
serialize_ulong_long(geomfile, sf->index, geompos, fname);
serialize_long_long(geomfile, sf->extent, geompos, fname);
if (sf->index != 0) {
serialize_ulong_long(geomfile, sf->index, geompos, fname);
}
if (sf->extent != 0) {
serialize_long_long(geomfile, sf->extent, geompos, fname);
}
serialize_int(geomfile, sf->m, geompos, fname);
serialize_long_long(geomfile, sf->metapos, geompos, fname);

@ -1302,33 +1302,37 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
long long original_seq;
deserialize_long_long_io(geoms, &original_seq, geompos_in);
long long layer;
deserialize_long_long_io(geoms, &layer, geompos_in);
long long xlayer;
deserialize_long_long_io(geoms, &xlayer, geompos_in);
int tippecanoe_minzoom = -1, tippecanoe_maxzoom = -1;
unsigned long long id = 0;
bool has_id = false;
if (layer & 2) {
if (xlayer & (1 << 1)) {
deserialize_int_io(geoms, &tippecanoe_minzoom, geompos_in);
}
if (layer & 1) {
if (xlayer & (1 << 0)) {
deserialize_int_io(geoms, &tippecanoe_maxzoom, geompos_in);
}
if (layer & 4) {
if (xlayer & (1 << 2)) {
has_id = true;
deserialize_ulong_long_io(geoms, &id, geompos_in);
}
layer >>= 3;
long long layer = xlayer >> 5;
int segment;
deserialize_int_io(geoms, &segment, geompos_in);
long long bbox[4];
unsigned long long index;
long long extent;
unsigned long long index = 0;
long long extent = 0;
drawvec geom = decode_geometry(geoms, geompos_in, z, tx, ty, line_detail, bbox, initial_x[segment], initial_y[segment]);
deserialize_ulong_long_io(geoms, &index, geompos_in);
deserialize_long_long_io(geoms, &extent, geompos_in);
if (xlayer & (1 << 4)) {
deserialize_ulong_long_io(geoms, &index, geompos_in);
}
if (xlayer & (1 << 3)) {
deserialize_long_long_io(geoms, &extent, geompos_in);
}
long long metastart;
int m;