diff --git a/Makefile b/Makefile index d6033e6..9638d85 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pb parallel-test: mkdir -p tests/parallel - perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in1.json + perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { \"yes\": \"no\", \"who\": 1 }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in1.json perl -e 'for ($$i = 0; $$i < 300000; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in2.json perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in3.json ./tippecanoe -z5 -f -pi -l test -n test -o tests/parallel/linear-file.mbtiles tests/parallel/in[123].json diff --git a/geojson.cpp b/geojson.cpp index ec42043..ce1ea08 100644 --- a/geojson.cpp +++ b/geojson.cpp @@ -28,12 +28,12 @@ extern "C" { } #include "pool.hpp" -#include "mbtiles.hpp" #include "projection.hpp" #include "version.hpp" #include "memfile.hpp" #include "serial.hpp" #include "main.hpp" +#include "mbtiles.hpp" #include "geojson.hpp" #include "geometry.hpp" @@ -153,7 +153,7 @@ void parse_geometry(int t, json_object *j, long long *bbox, long long *fpos, FIL } } -int serialize_geometry(json_object *geometry, json_object *properties, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers) { +int serialize_geometry(json_object *geometry, json_object *properties, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys) { json_object *geometry_type = json_hash_get(geometry, "type"); if (geometry_type == NULL) { static int warned = 0; @@ -225,37 +225,47 @@ int serialize_geometry(json_object *geometry, json_object *properties, const cha int i; for (i = 0; i < nprop; i++) { if (properties->keys[i]->type == JSON_STRING) { + std::string s(properties->keys[i]->string); + if (exclude_all) { - if (include->count(std::string(properties->keys[i]->string)) == 0) { + if (include->count(s) == 0) { continue; } - } else if (exclude->count(std::string(properties->keys[i]->string)) != 0) { + } else if (exclude->count(s) != 0) { continue; } + type_and_string tas; + tas.string = s; + tas.type = -1; + metakey[m] = properties->keys[i]->string; mustfree[m] = 0; if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) { - metatype[m] = VT_STRING; + tas.type = metatype[m] = VT_STRING; metaval[m] = properties->values[i]->string; m++; } else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) { - metatype[m] = VT_NUMBER; + tas.type = metatype[m] = VT_NUMBER; metaval[m] = properties->values[i]->string; m++; } else if (properties->values[i] != NULL && (properties->values[i]->type == JSON_TRUE || properties->values[i]->type == JSON_FALSE)) { - metatype[m] = VT_BOOLEAN; + tas.type = metatype[m] = VT_BOOLEAN; metaval[m] = properties->values[i]->type == JSON_TRUE ? "true" : "false"; m++; } else if (properties->values[i] != NULL && (properties->values[i]->type == JSON_NULL)) { ; } else { - metatype[m] = VT_STRING; + tas.type = metatype[m] = VT_STRING; metaval[m] = json_stringify(properties->values[i]); mustfree[m] = 1; m++; } + + if (tas.type >= 0) { + file_keys->insert(tas); + } } } @@ -356,7 +366,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, const cha return 1; } -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers) { +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys) { long long found_hashes = 0; long long found_features = 0; long long found_geometries = 0; @@ -421,7 +431,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se } found_geometries++; - serialize_geometry(j, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers); + serialize_geometry(j, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, file_keys); json_free(j); continue; } @@ -456,10 +466,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (geometries != NULL) { size_t g; for (g = 0; g < geometries->length; g++) { - serialize_geometry(geometries->array[g], properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers); + serialize_geometry(geometries->array[g], properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys); } } else { - serialize_geometry(geometry, properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers); + serialize_geometry(geometry, properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys); } json_free(j); @@ -471,7 +481,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se void *run_parse_json(void *v) { struct parse_json_args *pja = (struct parse_json_args *) v; - parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers); + parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->file_keys); return NULL; } diff --git a/geojson.hpp b/geojson.hpp index d493e41..cec9820 100644 --- a/geojson.hpp +++ b/geojson.hpp @@ -24,8 +24,9 @@ struct parse_json_args { unsigned *initial_x; unsigned *initial_y; struct reader *readers; + std::set *file_keys; }; struct json_pull *json_begin_map(char *map, long long len); -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers); +void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set *file_keys); void *run_parse_json(void *v); diff --git a/main.cpp b/main.cpp index ca82ec6..46b2f54 100644 --- a/main.cpp +++ b/main.cpp @@ -39,11 +39,11 @@ extern "C" { #include "tile.hpp" #include "pool.hpp" -#include "mbtiles.hpp" #include "projection.hpp" #include "version.hpp" #include "memfile.hpp" #include "serial.hpp" +#include "mbtiles.hpp" #include "main.hpp" #include "geojson.hpp" #include "geometry.hpp" @@ -298,7 +298,7 @@ void *run_sort(void *v) { return NULL; } -void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y) { +void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys) { long long segs[CPUS + 1]; segs[0] = 0; segs[CPUS] = len; @@ -321,6 +321,11 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const struct parse_json_args pja[CPUS]; pthread_t pthreads[CPUS]; + std::vector > file_subkeys; + + for (i = 0; i < CPUS; i++) { + file_subkeys.push_back(std::set()); + } for (i = 0; i < CPUS; i++) { pja[i].jp = json_begin_map(map + segs[i], segs[i + 1] - segs[i]); @@ -348,6 +353,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const pja[i].initial_x = &initial_x[i]; pja[i].initial_y = &initial_y[i]; pja[i].readers = reader; + pja[i].file_keys = &file_subkeys[i]; if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) { perror("pthread_create"); @@ -362,6 +368,11 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const perror("pthread_join"); } + std::set::iterator j; + for (j = file_subkeys[i].begin(); j != file_subkeys[i].end(); ++j) { + file_keys->insert(*j); + } + free(pja[i].jp->source); json_end(pja[i].jp); } @@ -389,6 +400,7 @@ struct read_parallel_arg { int *initialized; unsigned *initial_x; unsigned *initial_y; + std::set *file_keys; }; void *run_read_parallel(void *v) { @@ -410,7 +422,7 @@ void *run_read_parallel(void *v) { } madvise(map, a->len, MADV_RANDOM); // sequential, but from several pointers at once - do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->droprate, a->initialized, a->initial_x, a->initial_y); + do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->droprate, a->initialized, a->initial_x, a->initial_y, a->file_keys); madvise(map, a->len, MADV_DONTNEED); if (munmap(map, a->len) != 0) { @@ -427,7 +439,7 @@ void *run_read_parallel(void *v) { return NULL; } -void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y) { +void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set *exclude, std::set *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set *file_keys) { // This has to kick off an intermediate thread to start the parser threads, // so the main thread can get back to reading the next input stage while // the intermediate thread waits for the completion of the parser threads. @@ -460,6 +472,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i rpa->initialized = initialized; rpa->initial_x = initial_x; rpa->initial_y = initial_y; + rpa->file_keys = file_keys; if (pthread_create(parallel_parser, NULL, run_read_parallel, rpa) != 0) { perror("pthread_create"); @@ -966,10 +979,12 @@ int read_input(std::vector &sources, char *fname, const char *layername, nsources = 1; } + std::vector >file_keys; long overall_offset = 0; int source; for (source = 0; source < nsources; source++) { + file_keys.push_back(std::set()); std::string reading; int fd; @@ -1003,7 +1018,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } if (map != NULL && map != MAP_FAILED) { - do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y); + do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]); overall_offset += st.st_size - off; checkdisk(reader, CPUS); @@ -1069,7 +1084,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } fflush(readfp); - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]); initial_offset += ahead; overall_offset += ahead; @@ -1105,7 +1120,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, fflush(readfp); if (ahead > 0) { - start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y); + start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, ¶llel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]); if (pthread_join(parallel_parser, NULL) != 0) { perror("pthread_join"); @@ -1119,7 +1134,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, long long layer_seq = overall_offset; json_pull *jp = json_begin_file(fp); - parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader); + parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, &file_keys[source < nlayers ? source : 0]); json_end(jp); overall_offset = layer_seq; checkdisk(reader, CPUS); @@ -1162,13 +1177,6 @@ int read_input(std::vector &sources, char *fname, const char *layername, } } - struct pool file_keys1[nlayers]; - struct pool *file_keys[nlayers]; - for (i = 0; i < nlayers; i++) { - pool_init(&file_keys1[i], 0); - file_keys[i] = &file_keys1[i]; - } - char *layernames[nlayers]; for (i = 0; i < nlayers; i++) { if (layername != NULL) { @@ -1598,7 +1606,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } unsigned midx = 0, midy = 0; - int written = traverse_zooms(fd, size, meta, stringpool, file_keys, &midx, &midy, layernames, maxzoom, minzoom, basezoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail, meta_off, pool_off, initial_x, initial_y); + int written = traverse_zooms(fd, size, meta, stringpool, &midx, &midy, layernames, maxzoom, minzoom, basezoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail, meta_off, pool_off, initial_x, initial_y); if (maxzoom != written) { fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written); @@ -1615,7 +1623,7 @@ int read_input(std::vector &sources, char *fname, const char *layername, } if (poolpos > 0) { - madvise((void *) pool, poolpos, MADV_DONTNEED); + madvise((void *) stringpool, poolpos, MADV_DONTNEED); if (munmap(stringpool, poolpos) != 0) { perror("munmap stringpool"); } @@ -1685,7 +1693,6 @@ int read_input(std::vector &sources, char *fname, const char *layername, mbtiles_write_metadata(outdb, fname, layernames, minzoom, maxzoom, minlat, minlon, maxlat, maxlon, midlat, midlon, file_keys, nlayers, forcetable, attribution); for (i = 0; i < nlayers; i++) { - pool_free_strings(&file_keys1[i]); free(layernames[i]); } diff --git a/mbtiles.cpp b/mbtiles.cpp index ca45de1..97318f5 100644 --- a/mbtiles.cpp +++ b/mbtiles.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include "main.hpp" #include "pool.hpp" #include "mbtiles.hpp" #include "geometry.hpp" @@ -120,19 +122,17 @@ static void aprintf(std::string *buf, const char *format, ...) { free(tmp); } -static int pvcmp(const void *v1, const void *v2) { - const struct pool_val *const *pv1 = (const struct pool_val *const *) v1; - const struct pool_val *const *pv2 = (const struct pool_val *const *) v2; - - int n = strcmp((*pv1)->s, (*pv2)->s); - if (n != 0) { - return n; +bool type_and_string::operator<(const type_and_string &o) const { + if (string < o.string) { + return true; } - - return (*pv1)->type - (*pv2)->type; + if (string == o.string && type < o.type) { + return true; + } + return false; } -void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, struct pool **file_keys, int nlayers, int forcetable, const char *attribution) { +void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, std::vector > &file_keys, int nlayers, int forcetable, const char *attribution) { char *sql, *err; sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('name', %Q);", fname); @@ -240,37 +240,25 @@ void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, quote(&buf, layername[i]); aprintf(&buf, "\", \"description\": \"\", \"minzoom\": %d, \"maxzoom\": %d, \"fields\": {", minzoom, maxzoom); - int n = 0; - struct pool_val *pv; - for (pv = file_keys[i]->head; pv != NULL; pv = pv->next) { - n++; - } + std::set::iterator j; + bool first = true; + for (j = file_keys[i].begin(); j != file_keys[i].end(); ++j) { + if (first) { + first = false; + } else { + aprintf(&buf, ", "); + } - struct pool_val *vals[n]; - n = 0; - for (pv = file_keys[i]->head; pv != NULL; pv = pv->next) { - vals[n++] = pv; - } - - qsort(vals, n, sizeof(struct pool_val *), pvcmp); - - int j; - for (j = 0; j < n; j++) { - pv = vals[j]; aprintf(&buf, "\""); - quote(&buf, pv->s); + quote(&buf, j->string.c_str()); - if (pv->type == VT_NUMBER) { + if (j->type == VT_NUMBER) { aprintf(&buf, "\": \"Number\""); - } else if (pv->type == VT_BOOLEAN) { + } else if (j->type == VT_BOOLEAN) { aprintf(&buf, "\": \"Boolean\""); } else { aprintf(&buf, "\": \"String\""); } - - if (j + 1 < n) { - aprintf(&buf, ", "); - } } aprintf(&buf, "} }"); diff --git a/mbtiles.hpp b/mbtiles.hpp index 50f72cf..3f275bb 100644 --- a/mbtiles.hpp +++ b/mbtiles.hpp @@ -1,7 +1,14 @@ +struct type_and_string { + int type; + std::string string; + + bool operator<(const type_and_string &o) const; +}; + sqlite3 *mbtiles_open(char *dbname, char **argv, int forcetable); void mbtiles_write_tile(sqlite3 *outdb, int z, int tx, int ty, const char *data, int size); -void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, struct pool **file_keys, int nlayers, int forcetable, const char *attribution); +void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, std::vector > &file_keys, int nlayers, int forcetable, const char *attribution); void mbtiles_close(sqlite3 *outdb, char **argv); diff --git a/pool.cpp b/pool.cpp index e169fc4..364e527 100644 --- a/pool.cpp +++ b/pool.cpp @@ -5,120 +5,6 @@ #include "memfile.hpp" #include "pool.hpp" -#define POOL_WIDTH 256 - -static int hash(const char *s) { - unsigned h = 0; - for (; *s; s++) { - h = (h * 37LL + (*s & 0xFF)) & ULONG_MAX; - } - return h & 0xFF; -} - -struct pool_val *pool(struct pool *p, const char *s, int type) { - int h = hash(s); - struct pool_val **v = &(p->vals[h]); - - while (*v != NULL) { - int cmp = strcmp(s, (*v)->s); - - if (cmp == 0) { - cmp = type - (*v)->type; - } - - if (cmp == 0) { - return *v; - } else if (cmp < 0) { - v = &((*v)->left); - } else { - v = &((*v)->right); - } - } - - struct pool_val *nv = (struct pool_val *) malloc(sizeof(struct pool_val)); - if (nv == NULL) { - fprintf(stderr, "out of memory making string pool\n"); - exit(EXIT_FAILURE); - } - nv->left = NULL; - nv->right = NULL; - nv->next = NULL; - nv->s = s; - nv->type = type; - nv->n = p->n++; - - if (p->tail != NULL) { - p->tail->next = nv; - } - p->tail = nv; - if (p->head == NULL) { - p->head = nv; - } - - *v = nv; - return *v; -} - -int is_pooled(struct pool *p, const char *s, int type) { - int h = hash(s); - struct pool_val **v = &(p->vals[h]); - - while (*v != NULL) { - int cmp = strcmp(s, (*v)->s); - - if (cmp == 0) { - cmp = type - (*v)->type; - } - - if (cmp == 0) { - return 1; - } else if (cmp < 0) { - v = &((*v)->left); - } else { - v = &((*v)->right); - } - } - - return 0; -} - -void pool_free1(struct pool *p, void (*func)(void *)) { - while (p->head != NULL) { - if (func != NULL) { - func((void *) p->head->s); - } - - struct pool_val *next = p->head->next; - free(p->head); - p->head = next; - } - - p->head = NULL; - p->tail = NULL; - - free(p->vals); - p->vals = NULL; -} - -void pool_free(struct pool *p) { - pool_free1(p, NULL); -} - -void pool_free_strings(struct pool *p) { - pool_free1(p, free); -} - -void pool_init(struct pool *p, int n) { - p->n = n; - p->vals = (struct pool_val **) calloc(POOL_WIDTH, sizeof(struct pool_val *)); - if (p->vals == NULL) { - fprintf(stderr, "out of memory creating string pool\n"); - exit(EXIT_FAILURE); - } - p->head = NULL; - p->tail = NULL; -} - static unsigned char swizzle[256] = { 0x00, 0xBF, 0x18, 0xDE, 0x93, 0xC9, 0xB1, 0x5E, 0xDF, 0xBE, 0x72, 0x5A, 0xBB, 0x42, 0x64, 0xC6, 0xD8, 0xB7, 0x15, 0x74, 0x1C, 0x8B, 0x91, 0xF5, 0x29, 0x46, 0xEC, 0x6F, 0xCA, 0x20, 0xF0, 0x06, diff --git a/pool.hpp b/pool.hpp index c064524..b3abd3c 100644 --- a/pool.hpp +++ b/pool.hpp @@ -1,28 +1,3 @@ -struct pool_val { - const char *s; - int type; - int n; - - struct pool_val *left; - struct pool_val *right; - - struct pool_val *next; -}; - -struct pool { - struct pool_val **vals; - - struct pool_val *head; - struct pool_val *tail; - int n; -}; - -struct pool_val *pool(struct pool *p, const char *s, int type); -void pool_free(struct pool *p); -void pool_free_strings(struct pool *p); -void pool_init(struct pool *p, int n); -int is_pooled(struct pool *p, const char *s, int type); - struct stringpool { long long left; long long right; diff --git a/tile-join.cpp b/tile-join.cpp index acccbb3..596d087 100644 --- a/tile-join.cpp +++ b/tile-join.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include "mvt.hpp" @@ -24,7 +25,7 @@ struct stats { double minlat, minlon, maxlat, maxlon; }; -void handle(std::string message, int z, unsigned x, unsigned y, struct pool **file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, std::vector &header, std::map > &mapping, struct pool *exclude, int ifmatched) { +void handle(std::string message, int z, unsigned x, unsigned y, std::vector > &file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, std::vector &header, std::map > &mapping, std::set &exclude, int ifmatched) { mvt_tile tile; mvt_tile outtile; int features_added = 0; @@ -51,19 +52,14 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi } } if (ll == *nlayers) { - *file_keys = (struct pool *) realloc(*file_keys, (ll + 1) * sizeof(struct pool)); + file_keys.push_back(std::set()); *layernames = (char **) realloc(*layernames, (ll + 1) * sizeof(char *)); - if (*file_keys == NULL) { - perror("realloc file_keys"); - exit(EXIT_FAILURE); - } if (*layernames == NULL) { perror("realloc layernames"); exit(EXIT_FAILURE); } - pool_init(&((*file_keys)[ll]), 0); (*layernames)[ll] = strdup(ln); if ((*layernames)[ll] == NULL) { perror("Out of memory"); @@ -122,16 +118,11 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi continue; } - if (!is_pooled(exclude, key, VT_STRING)) { - if (!is_pooled(&((*file_keys)[ll]), key, type)) { - char *copy = strdup(key); - if (copy == NULL) { - perror("Out of memory"); - exit(EXIT_FAILURE); - } - pool(&((*file_keys)[ll]), copy, type); - } - + if (exclude.count(std::string(key)) == 0) { + type_and_string tas; + tas.string = std::string(key); + tas.type = type; + file_keys[ll].insert(tas); outlayer.tag(outfeature, layer.keys[feat.tags[t]], val); } @@ -157,15 +148,12 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi const char *sjoinkey = joinkey.c_str(); - if (!is_pooled(exclude, sjoinkey, VT_STRING)) { - if (!is_pooled(&((*file_keys)[ll]), sjoinkey, type)) { - char *copy = strdup(sjoinkey); - if (copy == NULL) { - perror("Out of memory"); - exit(EXIT_FAILURE); - } - pool(&((*file_keys)[ll]), copy, type); - } + if (exclude.count(joinkey) == 0) { + type_and_string tas; + tas.string = std::string(sjoinkey); + tas.type = type; + file_keys[ll].insert(tas); + outlayer.tag(outfeature, layer.keys[feat.tags[t]], val); mvt_value outval; if (type == VT_STRING) { @@ -227,7 +215,7 @@ double max(double a, double b) { } } -void decode(char *fname, char *map, struct pool **file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, struct stats *st, std::vector &header, std::map > &mapping, struct pool *exclude, int ifmatched, char **attribution) { +void decode(char *fname, char *map, std::vector > &file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, struct stats *st, std::vector &header, std::map > &mapping, std::set &exclude, int ifmatched, char **attribution) { sqlite3 *db; if (sqlite3_open(fname, &db) != SQLITE_OK) { @@ -392,8 +380,7 @@ int main(int argc, char **argv) { std::vector header; std::map > mapping; - struct pool exclude; - pool_init(&exclude, 0); + std::set exclude; extern int optind; extern char *optarg; @@ -424,7 +411,7 @@ int main(int argc, char **argv) { break; case 'x': - pool(&exclude, optarg, VT_STRING); + exclude.insert(std::string(optarg)); break; default: @@ -446,21 +433,16 @@ int main(int argc, char **argv) { st.minzoom = st.minlat = st.minlon = INT_MAX; st.maxzoom = st.maxlat = st.maxlon = INT_MIN; - struct pool *file_keys = NULL; + std::vector > file_keys; char **layernames = NULL; int nlayers = 0; char *attribution = NULL; for (i = optind; i < argc; i++) { - decode(argv[i], csv, &file_keys, &layernames, &nlayers, outdb, &st, header, mapping, &exclude, ifmatched, &attribution); + decode(argv[i], csv, file_keys, &layernames, &nlayers, outdb, &st, header, mapping, exclude, ifmatched, &attribution); } - struct pool *fk[nlayers]; - for (i = 0; i < nlayers; i++) { - fk[i] = &(file_keys[i]); - } - - mbtiles_write_metadata(outdb, outfile, layernames, st.minzoom, st.maxzoom, st.minlat, st.minlon, st.maxlat, st.maxlon, st.midlat, st.midlon, fk, nlayers, 0, attribution); + mbtiles_write_metadata(outdb, outfile, layernames, st.minzoom, st.maxzoom, st.minlat, st.minlon, st.maxlat, st.maxlon, st.midlat, st.midlon, file_keys, nlayers, 0, attribution); mbtiles_close(outdb, argv); return 0; diff --git a/tile.cpp b/tile.cpp index 4488783..1fa4332 100644 --- a/tile.cpp +++ b/tile.cpp @@ -23,11 +23,11 @@ #include "geometry.hpp" #include "tile.hpp" #include "pool.hpp" -#include "mbtiles.hpp" #include "projection.hpp" #include "serial.hpp" #include "options.hpp" #include "main.hpp" +#include "mbtiles.hpp" #define CMD_BITS 3 @@ -160,7 +160,7 @@ mvt_value retrieve_string(char **f, char *stringpool, int *otype) { return tv; } -void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_feature &feature, struct pool *file_keys) { +void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_feature &feature) { int i; for (i = 0; i < m; i++) { int otype; @@ -168,26 +168,6 @@ void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_fea mvt_value value = retrieve_string(meta, stringpool, &otype); layer.tag(feature, key.string_value, value); - - if (!is_pooled(file_keys, key.string_value.c_str(), otype)) { - if (pthread_mutex_lock(&var_lock) != 0) { - perror("pthread_mutex_lock"); - exit(EXIT_FAILURE); - } - - // Dup to retain after munmap - char *copy = strdup(key.string_value.c_str()); - if (copy == NULL) { - perror("Out of memory"); - exit(EXIT_FAILURE); - } - pool(file_keys, copy, otype); - - if (pthread_mutex_unlock(&var_lock) != 0) { - perror("pthread_mutex_unlock"); - exit(EXIT_FAILURE); - } - } } } @@ -526,7 +506,7 @@ int manage_gap(unsigned long long index, unsigned long long *previndex, double s return 0; } -long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) { +long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) { int line_detail; double fraction = 1; @@ -936,7 +916,7 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s feature.geometry = to_feature(features[j][x].geom); count += features[j][x].geom.size(); - decode_meta(features[j][x].m, &features[j][x].meta, features[j][x].stringpool, layer, feature, file_keys[j]); + decode_meta(features[j][x].m, &features[j][x].meta, features[j][x].stringpool, layer, feature); layer.features.push_back(feature); } @@ -1014,7 +994,6 @@ struct write_tile_args { char *stringpool; int min_detail; int basezoom; - struct pool **file_keys; char **layernames; sqlite3 *outdb; double droprate; @@ -1082,7 +1061,7 @@ void *run_thread(void *vargs) { // fprintf(stderr, "%d/%u/%u\n", z, x, y); - long long len = write_tile(geom, &geompos, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running); + long long len = write_tile(geom, &geompos, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running); if (len < 0) { int *err = (int *) malloc(sizeof(int)); @@ -1137,7 +1116,7 @@ void *run_thread(void *vargs) { return NULL; } -int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) { +int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) { int i; for (i = 0; i <= maxzoom; i++) { long long most = 0; @@ -1247,7 +1226,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo args[thread].stringpool = stringpool; args[thread].min_detail = min_detail; args[thread].basezoom = basezoom; - args[thread].file_keys = file_keys; // locked with var_lock args[thread].layernames = layernames; args[thread].outdb = outdb; // locked with db_lock args[thread].droprate = droprate; diff --git a/tile.hpp b/tile.hpp index e37a02a..c7bddc7 100644 --- a/tile.hpp +++ b/tile.hpp @@ -1,5 +1,5 @@ -long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, int *prevent, int *additional); +long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, int *prevent, int *additional); -int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y); +int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y); int manage_gap(unsigned long long index, unsigned long long *previndex, double scale, double gamma, double *gap);