From f7e64dca5f1101674585e977dcbb8450a1a79dc2 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 23 Aug 2016 15:33:53 -0700 Subject: [PATCH] Work in progress on being able to specify per-feature layer names --- geojson.cpp | 33 +++++++++++--- geojson.hpp | 3 +- main.cpp | 125 +++++++++++++++++++++++++++++----------------------- 3 files changed, 98 insertions(+), 63 deletions(-) diff --git a/geojson.cpp b/geojson.cpp index 3f071e2..aaf0143 100644 --- a/geojson.cpp +++ b/geojson.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include extern "C" { @@ -166,7 +167,7 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i return g; } -int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom, json_object *feature) { +int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom, json_object *feature, std::map *layermap) { json_object *geometry_type = json_hash_get(geometry, "type"); if (geometry_type == NULL) { static int warned = 0; @@ -206,6 +207,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje int tippecanoe_minzoom = -1; int tippecanoe_maxzoom = -1; + std::string tippecanoe_layername; if (tippecanoe != NULL) { json_object *min = json_hash_get(tippecanoe, "minzoom"); @@ -223,6 +225,11 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje if (max != NULL && max->type == JSON_STRING) { tippecanoe_maxzoom = atoi(max->string); } + + json_object *ln = json_hash_get(tippecanoe, "layername"); + if (ln != NULL && (max->type == JSON_STRING || max->type == JSON_NUMBER)) { + tippecanoe_layername = std::string(ln->string); + } } bool has_id = false; @@ -364,6 +371,20 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje feature_minzoom = basezoom - floor(log(r) / -log(droprate)); } + if (tippecanoe_layername.size() != 0) { + if (layermap->count(tippecanoe_layername) == 0) { + layermap->insert(std::pair(tippecanoe_layername, layermap->size())); + } + + auto ai = layermap->find(tippecanoe_layername); + if (ai != layermap->end()) { + layer = ai->second; + } else { + fprintf(stderr, "Internal error: can't find layer name %s\n", tippecanoe_layername.c_str()); + exit(EXIT_FAILURE); + } + } + long long geomstart = *geompos; serial_feature sf; @@ -450,7 +471,7 @@ void check_crs(json_object *j, const char *reading) { } } -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom) { +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom, std::map *layermap) { long long found_hashes = 0; long long found_features = 0; long long found_geometries = 0; @@ -518,7 +539,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se } found_geometries++; - serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j); + serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j, layermap); json_free(j); continue; } @@ -560,10 +581,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (geometries != NULL) { size_t g; for (g = 0; g < geometries->length; g++) { - serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j); + serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j, layermap); } } else { - serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j); + serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys, maxzoom, j, layermap); } json_free(j); @@ -575,7 +596,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se void *run_parse_json(void *v) { struct parse_json_args *pja = (struct parse_json_args *) v; - parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->file_keys, pja->maxzoom); + parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->file_keys, pja->maxzoom, pja->layermap); return NULL; } diff --git a/geojson.hpp b/geojson.hpp index 5c48c23..83ebc47 100644 --- a/geojson.hpp +++ b/geojson.hpp @@ -26,8 +26,9 @@ struct parse_json_args { struct reader *readers; std::set *file_keys; int maxzoom; + std::map *layermap; }; struct json_pull *json_begin_map(char *map, long long len); -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom); +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys, int maxzoom, std::map *layermap); void *run_parse_json(void *v); diff --git a/main.cpp b/main.cpp index 402215e..66289b4 100644 --- a/main.cpp +++ b/main.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef __APPLE__ #include @@ -299,7 +300,7 @@ void *run_sort(void *v) { return NULL; } -void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys, int maxzoom) { +void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys, int maxzoom) { long long segs[CPUS + 1]; segs[0] = 0; segs[CPUS] = len; @@ -356,6 +357,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const pja[i].readers = reader; pja[i].file_keys = &file_subkeys[i]; pja[i].maxzoom = maxzoom; + pja[i].layermap = &(*layermaps)[i]; if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) { perror("pthread_create"); @@ -398,6 +400,7 @@ struct read_parallel_arg { int basezoom; int source; int nlayers; + std::vector > *layermaps; double droprate; int *initialized; unsigned *initial_x; @@ -424,7 +427,7 @@ void *run_read_parallel(void *v) { } madvise(map, a->len, MADV_RANDOM); // sequential, but from several pointers at once - do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->droprate, a->initialized, a->initial_x, a->initial_y, a->file_keys, a->maxzoom); + do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->layermaps, a->droprate, a->initialized, a->initial_x, a->initial_y, a->file_keys, a->maxzoom); madvise(map, a->len, MADV_DONTNEED); if (munmap(map, a->len) != 0) { @@ -441,7 +444,7 @@ void *run_read_parallel(void *v) { return NULL; } -void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys, int maxzoom) { +void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys, int maxzoom) { // This has to kick off an intermediate thread to start the parser threads, // so the main thread can get back to reading the next input stage while // the intermediate thread waits for the completion of the parser threads. @@ -470,6 +473,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i rpa->basezoom = basezoom; rpa->source = source; rpa->nlayers = nlayers; + rpa->layermaps = &layermaps; rpa->droprate = droprate; rpa->initialized = initialized; rpa->initial_x = initial_x; @@ -976,6 +980,64 @@ int read_input(std::vector &sources, char *fname, const char *layername, } } + std::vector layernames; + for (size_t l = 0; l < nlayers; l++) { + if (layername != NULL) { + layernames.push_back(std::string(layername)); + } else { + const char *src; + if (sources.size() < 1) { + src = fname; + } else if (sources[l].layer.size() != 0) { + src = sources[l].layer.c_str(); + } else { + src = sources[l].file.c_str(); + } + + // Find the last component of the pathname + const char *ocp, *use = src; + for (ocp = src; *ocp; ocp++) { + if (*ocp == '/' && ocp[1] != '\0') { + use = ocp + 1; + } + } + std::string trunc = std::string(use); + + // Trim .json or .mbtiles from the name + ssize_t cp; + cp = trunc.find(".json"); + if (cp >= 0) { + trunc = trunc.substr(0, cp); + } + cp = trunc.find(".mbtiles"); + if (cp >= 0) { + trunc = trunc.substr(0, cp); + } + + // Trim out characters that can't be part of selector + std::string out; + for (size_t p = 0; p < trunc.size(); p++) { + if (isalpha(trunc[p]) || isdigit(trunc[p]) || trunc[p] == '_') { + out.append(trunc, p, 1); + } + } + layernames.push_back(out); + + if (!quiet) { + fprintf(stderr, "For layer %d, using name \"%s\"\n", (int) l, out.c_str()); + } + } + } + + std::map layermap; + for (size_t l = 0; l < nlayers; l++) { + layermap.insert(std::pair(layernames[l], l)); + } + std::vector > layermaps; + for (size_t l = 0; l < CPUS; l++) { + layermaps.push_back(layermap); + } + size_t nsources = sources.size(); if (nsources == 0) { nsources = 1; @@ -1026,7 +1088,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } if (map != NULL && map != MAP_FAILED) { - do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); + do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); overall_offset += st.st_size - off; checkdisk(reader, CPUS); @@ -1094,7 +1156,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } fflush(readfp); - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); initial_offset += ahead; overall_offset += ahead; @@ -1131,7 +1193,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, fflush(readfp); if (ahead > 0) { - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, layermaps, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0], maxzoom); if (parser_created) { if (pthread_join(parallel_parser, NULL) != 0) { @@ -1148,7 +1210,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, long long layer_seq = overall_offset; json_pull *jp = json_begin_file(fp); - parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, &file_keys[source < nlayers ? source : 0], maxzoom); + parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, &file_keys[source < nlayers ? source : 0], maxzoom, &layermaps[0]); json_end(jp); overall_offset = layer_seq; checkdisk(reader, CPUS); @@ -1191,55 +1253,6 @@ int read_input(std::vector &sources, char *fname, const char *layername, } } - std::vector layernames; - for (size_t l = 0; l < nlayers; l++) { - if (layername != NULL) { - layernames.push_back(std::string(layername)); - } else { - const char *src; - if (sources.size() < 1) { - src = fname; - } else if (sources[l].layer.size() != 0) { - src = sources[l].layer.c_str(); - } else { - src = sources[l].file.c_str(); - } - - // Find the last component of the pathname - const char *ocp, *use = src; - for (ocp = src; *ocp; ocp++) { - if (*ocp == '/' && ocp[1] != '\0') { - use = ocp + 1; - } - } - std::string trunc = std::string(use); - - // Trim .json or .mbtiles from the name - ssize_t cp; - cp = trunc.find(".json"); - if (cp >= 0) { - trunc = trunc.substr(0, cp); - } - cp = trunc.find(".mbtiles"); - if (cp >= 0) { - trunc = trunc.substr(0, cp); - } - - // Trim out characters that can't be part of selector - std::string out; - for (size_t p = 0; p < trunc.size(); p++) { - if (isalpha(trunc[p]) || isdigit(trunc[p]) || trunc[p] == '_') { - out.append(trunc, p, 1); - } - } - layernames.push_back(out); - - if (!quiet) { - fprintf(stderr, "For layer %d, using name \"%s\"\n", (int) l, out.c_str()); - } - } - } - // Create a combined string pool and a combined metadata file // but keep track of the offsets into it since we still need // segment+offset to find the data.