Use std::set to track the layer-wide feature attribute types.

Track them during parsing, not tiling.  Remove the old string pool code.
This commit is contained in:
Eric Fischer 2016-04-28 14:43:04 -07:00
parent 87b90a5033
commit adc70341ad
11 changed files with 108 additions and 274 deletions

@ -81,7 +81,7 @@ test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pb
parallel-test:
mkdir -p tests/parallel
perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in1.json
perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { \"yes\": \"no\", \"who\": 1 }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in1.json
perl -e 'for ($$i = 0; $$i < 300000; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in2.json
perl -e 'for ($$i = 0; $$i < 20; $$i++) { $$lon = rand(360) - 180; $$lat = rand(180) - 90; print "{ \"type\": \"Feature\", \"properties\": { }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ $$lon, $$lat ] } }\n"; }' > tests/parallel/in3.json
./tippecanoe -z5 -f -pi -l test -n test -o tests/parallel/linear-file.mbtiles tests/parallel/in[123].json

@ -28,12 +28,12 @@ extern "C" {
}
#include "pool.hpp"
#include "mbtiles.hpp"
#include "projection.hpp"
#include "version.hpp"
#include "memfile.hpp"
#include "serial.hpp"
#include "main.hpp"
#include "mbtiles.hpp"
#include "geojson.hpp"
#include "geometry.hpp"
@ -153,7 +153,7 @@ void parse_geometry(int t, json_object *j, long long *bbox, long long *fpos, FIL
}
}
int serialize_geometry(json_object *geometry, json_object *properties, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers) {
int serialize_geometry(json_object *geometry, json_object *properties, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set<type_and_string> *file_keys) {
json_object *geometry_type = json_hash_get(geometry, "type");
if (geometry_type == NULL) {
static int warned = 0;
@ -225,37 +225,47 @@ int serialize_geometry(json_object *geometry, json_object *properties, const cha
int i;
for (i = 0; i < nprop; i++) {
if (properties->keys[i]->type == JSON_STRING) {
std::string s(properties->keys[i]->string);
if (exclude_all) {
if (include->count(std::string(properties->keys[i]->string)) == 0) {
if (include->count(s) == 0) {
continue;
}
} else if (exclude->count(std::string(properties->keys[i]->string)) != 0) {
} else if (exclude->count(s) != 0) {
continue;
}
type_and_string tas;
tas.string = s;
tas.type = -1;
metakey[m] = properties->keys[i]->string;
mustfree[m] = 0;
if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) {
metatype[m] = VT_STRING;
tas.type = metatype[m] = VT_STRING;
metaval[m] = properties->values[i]->string;
m++;
} else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) {
metatype[m] = VT_NUMBER;
tas.type = metatype[m] = VT_NUMBER;
metaval[m] = properties->values[i]->string;
m++;
} else if (properties->values[i] != NULL && (properties->values[i]->type == JSON_TRUE || properties->values[i]->type == JSON_FALSE)) {
metatype[m] = VT_BOOLEAN;
tas.type = metatype[m] = VT_BOOLEAN;
metaval[m] = properties->values[i]->type == JSON_TRUE ? "true" : "false";
m++;
} else if (properties->values[i] != NULL && (properties->values[i]->type == JSON_NULL)) {
;
} else {
metatype[m] = VT_STRING;
tas.type = metatype[m] = VT_STRING;
metaval[m] = json_stringify(properties->values[i]);
mustfree[m] = 1;
m++;
}
if (tas.type >= 0) {
file_keys->insert(tas);
}
}
}
@ -356,7 +366,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, const cha
return 1;
}
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers) {
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set<type_and_string> *file_keys) {
long long found_hashes = 0;
long long found_features = 0;
long long found_geometries = 0;
@ -421,7 +431,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
}
found_geometries++;
serialize_geometry(j, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers);
serialize_geometry(j, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, file_keys);
json_free(j);
continue;
}
@ -456,10 +466,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
if (geometries != NULL) {
size_t g;
for (g = 0; g < geometries->length; g++) {
serialize_geometry(geometries->array[g], properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers);
serialize_geometry(geometries->array[g], properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys);
}
} else {
serialize_geometry(geometry, properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers);
serialize_geometry(geometry, properties, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, file_keys);
}
json_free(j);
@ -471,7 +481,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se
void *run_parse_json(void *v) {
struct parse_json_args *pja = (struct parse_json_args *) v;
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers);
parse_json(pja->jp, pja->reading, pja->layer_seq, pja->progress_seq, pja->metapos, pja->geompos, pja->indexpos, pja->exclude, pja->include, pja->exclude_all, pja->metafile, pja->geomfile, pja->indexfile, pja->poolfile, pja->treefile, pja->fname, pja->basezoom, pja->layer, pja->droprate, pja->file_bbox, pja->segment, pja->initialized, pja->initial_x, pja->initial_y, pja->readers, pja->file_keys);
return NULL;
}

@ -24,8 +24,9 @@ struct parse_json_args {
unsigned *initial_x;
unsigned *initial_y;
struct reader *readers;
std::set<type_and_string> *file_keys;
};
struct json_pull *json_begin_map(char *map, long long len);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers);
void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, std::set<type_and_string> *file_keys);
void *run_parse_json(void *v);

@ -39,11 +39,11 @@ extern "C" {
#include "tile.hpp"
#include "pool.hpp"
#include "mbtiles.hpp"
#include "projection.hpp"
#include "version.hpp"
#include "memfile.hpp"
#include "serial.hpp"
#include "mbtiles.hpp"
#include "main.hpp"
#include "geojson.hpp"
#include "geometry.hpp"
@ -298,7 +298,7 @@ void *run_sort(void *v) {
return NULL;
}
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y) {
void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set<type_and_string> *file_keys) {
long long segs[CPUS + 1];
segs[0] = 0;
segs[CPUS] = len;
@ -321,6 +321,11 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
struct parse_json_args pja[CPUS];
pthread_t pthreads[CPUS];
std::vector<std::set<type_and_string> > file_subkeys;
for (i = 0; i < CPUS; i++) {
file_subkeys.push_back(std::set<type_and_string>());
}
for (i = 0; i < CPUS; i++) {
pja[i].jp = json_begin_map(map + segs[i], segs[i + 1] - segs[i]);
@ -348,6 +353,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
pja[i].initial_x = &initial_x[i];
pja[i].initial_y = &initial_y[i];
pja[i].readers = reader;
pja[i].file_keys = &file_subkeys[i];
if (pthread_create(&pthreads[i], NULL, run_parse_json, &pja[i]) != 0) {
perror("pthread_create");
@ -362,6 +368,11 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
perror("pthread_join");
}
std::set<type_and_string>::iterator j;
for (j = file_subkeys[i].begin(); j != file_subkeys[i].end(); ++j) {
file_keys->insert(*j);
}
free(pja[i].jp->source);
json_end(pja[i].jp);
}
@ -389,6 +400,7 @@ struct read_parallel_arg {
int *initialized;
unsigned *initial_x;
unsigned *initial_y;
std::set<type_and_string> *file_keys;
};
void *run_read_parallel(void *v) {
@ -410,7 +422,7 @@ void *run_read_parallel(void *v) {
}
madvise(map, a->len, MADV_RANDOM); // sequential, but from several pointers at once
do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->droprate, a->initialized, a->initial_x, a->initial_y);
do_read_parallel(map, a->len, a->offset, a->reading, a->reader, a->progress_seq, a->exclude, a->include, a->exclude_all, a->fname, a->basezoom, a->source, a->nlayers, a->droprate, a->initialized, a->initial_x, a->initial_y, a->file_keys);
madvise(map, a->len, MADV_DONTNEED);
if (munmap(map, a->len) != 0) {
@ -427,7 +439,7 @@ void *run_read_parallel(void *v) {
return NULL;
}
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y) {
void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, std::set<type_and_string> *file_keys) {
// This has to kick off an intermediate thread to start the parser threads,
// so the main thread can get back to reading the next input stage while
// the intermediate thread waits for the completion of the parser threads.
@ -460,6 +472,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i
rpa->initialized = initialized;
rpa->initial_x = initial_x;
rpa->initial_y = initial_y;
rpa->file_keys = file_keys;
if (pthread_create(parallel_parser, NULL, run_read_parallel, rpa) != 0) {
perror("pthread_create");
@ -966,10 +979,12 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
nsources = 1;
}
std::vector<std::set<type_and_string> >file_keys;
long overall_offset = 0;
int source;
for (source = 0; source < nsources; source++) {
file_keys.push_back(std::set<type_and_string>());
std::string reading;
int fd;
@ -1003,7 +1018,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
if (map != NULL && map != MAP_FAILED) {
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y);
do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]);
overall_offset += st.st_size - off;
checkdisk(reader, CPUS);
@ -1069,7 +1084,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
fflush(readfp);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]);
initial_offset += ahead;
overall_offset += ahead;
@ -1105,7 +1120,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
fflush(readfp);
if (ahead > 0) {
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y);
start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, source, nlayers, droprate, initialized, initial_x, initial_y, &file_keys[source < nlayers ? source : 0]);
if (pthread_join(parallel_parser, NULL) != 0) {
perror("pthread_join");
@ -1119,7 +1134,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
long long layer_seq = overall_offset;
json_pull *jp = json_begin_file(fp);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader);
parse_json(jp, reading.c_str(), &layer_seq, &progress_seq, &reader[0].metapos, &reader[0].geompos, &reader[0].indexpos, exclude, include, exclude_all, reader[0].metafile, reader[0].geomfile, reader[0].indexfile, reader[0].poolfile, reader[0].treefile, fname, basezoom, source < nlayers ? source : 0, droprate, reader[0].file_bbox, 0, &initialized[0], &initial_x[0], &initial_y[0], reader, &file_keys[source < nlayers ? source : 0]);
json_end(jp);
overall_offset = layer_seq;
checkdisk(reader, CPUS);
@ -1162,13 +1177,6 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
}
struct pool file_keys1[nlayers];
struct pool *file_keys[nlayers];
for (i = 0; i < nlayers; i++) {
pool_init(&file_keys1[i], 0);
file_keys[i] = &file_keys1[i];
}
char *layernames[nlayers];
for (i = 0; i < nlayers; i++) {
if (layername != NULL) {
@ -1598,7 +1606,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
unsigned midx = 0, midy = 0;
int written = traverse_zooms(fd, size, meta, stringpool, file_keys, &midx, &midy, layernames, maxzoom, minzoom, basezoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail, meta_off, pool_off, initial_x, initial_y);
int written = traverse_zooms(fd, size, meta, stringpool, &midx, &midy, layernames, maxzoom, minzoom, basezoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent, additional, full_detail, low_detail, min_detail, meta_off, pool_off, initial_x, initial_y);
if (maxzoom != written) {
fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written);
@ -1615,7 +1623,7 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
}
if (poolpos > 0) {
madvise((void *) pool, poolpos, MADV_DONTNEED);
madvise((void *) stringpool, poolpos, MADV_DONTNEED);
if (munmap(stringpool, poolpos) != 0) {
perror("munmap stringpool");
}
@ -1685,7 +1693,6 @@ int read_input(std::vector<source> &sources, char *fname, const char *layername,
mbtiles_write_metadata(outdb, fname, layernames, minzoom, maxzoom, minlat, minlon, maxlat, maxlon, midlat, midlon, file_keys, nlayers, forcetable, attribution);
for (i = 0; i < nlayers; i++) {
pool_free_strings(&file_keys1[i]);
free(layernames[i]);
}

@ -9,6 +9,8 @@
#include <sqlite3.h>
#include <vector>
#include <string>
#include <set>
#include "main.hpp"
#include "pool.hpp"
#include "mbtiles.hpp"
#include "geometry.hpp"
@ -120,19 +122,17 @@ static void aprintf(std::string *buf, const char *format, ...) {
free(tmp);
}
static int pvcmp(const void *v1, const void *v2) {
const struct pool_val *const *pv1 = (const struct pool_val *const *) v1;
const struct pool_val *const *pv2 = (const struct pool_val *const *) v2;
int n = strcmp((*pv1)->s, (*pv2)->s);
if (n != 0) {
return n;
bool type_and_string::operator<(const type_and_string &o) const {
if (string < o.string) {
return true;
}
return (*pv1)->type - (*pv2)->type;
if (string == o.string && type < o.type) {
return true;
}
return false;
}
void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, struct pool **file_keys, int nlayers, int forcetable, const char *attribution) {
void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, std::vector<std::set<type_and_string> > &file_keys, int nlayers, int forcetable, const char *attribution) {
char *sql, *err;
sql = sqlite3_mprintf("INSERT INTO metadata (name, value) VALUES ('name', %Q);", fname);
@ -240,37 +240,25 @@ void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername,
quote(&buf, layername[i]);
aprintf(&buf, "\", \"description\": \"\", \"minzoom\": %d, \"maxzoom\": %d, \"fields\": {", minzoom, maxzoom);
int n = 0;
struct pool_val *pv;
for (pv = file_keys[i]->head; pv != NULL; pv = pv->next) {
n++;
}
std::set<type_and_string>::iterator j;
bool first = true;
for (j = file_keys[i].begin(); j != file_keys[i].end(); ++j) {
if (first) {
first = false;
} else {
aprintf(&buf, ", ");
}
struct pool_val *vals[n];
n = 0;
for (pv = file_keys[i]->head; pv != NULL; pv = pv->next) {
vals[n++] = pv;
}
qsort(vals, n, sizeof(struct pool_val *), pvcmp);
int j;
for (j = 0; j < n; j++) {
pv = vals[j];
aprintf(&buf, "\"");
quote(&buf, pv->s);
quote(&buf, j->string.c_str());
if (pv->type == VT_NUMBER) {
if (j->type == VT_NUMBER) {
aprintf(&buf, "\": \"Number\"");
} else if (pv->type == VT_BOOLEAN) {
} else if (j->type == VT_BOOLEAN) {
aprintf(&buf, "\": \"Boolean\"");
} else {
aprintf(&buf, "\": \"String\"");
}
if (j + 1 < n) {
aprintf(&buf, ", ");
}
}
aprintf(&buf, "} }");

@ -1,7 +1,14 @@
struct type_and_string {
int type;
std::string string;
bool operator<(const type_and_string &o) const;
};
sqlite3 *mbtiles_open(char *dbname, char **argv, int forcetable);
void mbtiles_write_tile(sqlite3 *outdb, int z, int tx, int ty, const char *data, int size);
void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, struct pool **file_keys, int nlayers, int forcetable, const char *attribution);
void mbtiles_write_metadata(sqlite3 *outdb, const char *fname, char **layername, int minzoom, int maxzoom, double minlat, double minlon, double maxlat, double maxlon, double midlat, double midlon, std::vector<std::set<type_and_string> > &file_keys, int nlayers, int forcetable, const char *attribution);
void mbtiles_close(sqlite3 *outdb, char **argv);

114
pool.cpp

@ -5,120 +5,6 @@
#include "memfile.hpp"
#include "pool.hpp"
#define POOL_WIDTH 256
static int hash(const char *s) {
unsigned h = 0;
for (; *s; s++) {
h = (h * 37LL + (*s & 0xFF)) & ULONG_MAX;
}
return h & 0xFF;
}
struct pool_val *pool(struct pool *p, const char *s, int type) {
int h = hash(s);
struct pool_val **v = &(p->vals[h]);
while (*v != NULL) {
int cmp = strcmp(s, (*v)->s);
if (cmp == 0) {
cmp = type - (*v)->type;
}
if (cmp == 0) {
return *v;
} else if (cmp < 0) {
v = &((*v)->left);
} else {
v = &((*v)->right);
}
}
struct pool_val *nv = (struct pool_val *) malloc(sizeof(struct pool_val));
if (nv == NULL) {
fprintf(stderr, "out of memory making string pool\n");
exit(EXIT_FAILURE);
}
nv->left = NULL;
nv->right = NULL;
nv->next = NULL;
nv->s = s;
nv->type = type;
nv->n = p->n++;
if (p->tail != NULL) {
p->tail->next = nv;
}
p->tail = nv;
if (p->head == NULL) {
p->head = nv;
}
*v = nv;
return *v;
}
int is_pooled(struct pool *p, const char *s, int type) {
int h = hash(s);
struct pool_val **v = &(p->vals[h]);
while (*v != NULL) {
int cmp = strcmp(s, (*v)->s);
if (cmp == 0) {
cmp = type - (*v)->type;
}
if (cmp == 0) {
return 1;
} else if (cmp < 0) {
v = &((*v)->left);
} else {
v = &((*v)->right);
}
}
return 0;
}
void pool_free1(struct pool *p, void (*func)(void *)) {
while (p->head != NULL) {
if (func != NULL) {
func((void *) p->head->s);
}
struct pool_val *next = p->head->next;
free(p->head);
p->head = next;
}
p->head = NULL;
p->tail = NULL;
free(p->vals);
p->vals = NULL;
}
void pool_free(struct pool *p) {
pool_free1(p, NULL);
}
void pool_free_strings(struct pool *p) {
pool_free1(p, free);
}
void pool_init(struct pool *p, int n) {
p->n = n;
p->vals = (struct pool_val **) calloc(POOL_WIDTH, sizeof(struct pool_val *));
if (p->vals == NULL) {
fprintf(stderr, "out of memory creating string pool\n");
exit(EXIT_FAILURE);
}
p->head = NULL;
p->tail = NULL;
}
static unsigned char swizzle[256] = {
0x00, 0xBF, 0x18, 0xDE, 0x93, 0xC9, 0xB1, 0x5E, 0xDF, 0xBE, 0x72, 0x5A, 0xBB, 0x42, 0x64, 0xC6,
0xD8, 0xB7, 0x15, 0x74, 0x1C, 0x8B, 0x91, 0xF5, 0x29, 0x46, 0xEC, 0x6F, 0xCA, 0x20, 0xF0, 0x06,

@ -1,28 +1,3 @@
struct pool_val {
const char *s;
int type;
int n;
struct pool_val *left;
struct pool_val *right;
struct pool_val *next;
};
struct pool {
struct pool_val **vals;
struct pool_val *head;
struct pool_val *tail;
int n;
};
struct pool_val *pool(struct pool *p, const char *s, int type);
void pool_free(struct pool *p);
void pool_free_strings(struct pool *p);
void pool_init(struct pool *p, int n);
int is_pooled(struct pool *p, const char *s, int type);
struct stringpool {
long long left;
long long right;

@ -7,6 +7,7 @@
#include <vector>
#include <string>
#include <map>
#include <set>
#include <zlib.h>
#include <math.h>
#include "mvt.hpp"
@ -24,7 +25,7 @@ struct stats {
double minlat, minlon, maxlat, maxlon;
};
void handle(std::string message, int z, unsigned x, unsigned y, struct pool **file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, std::vector<std::string> &header, std::map<std::string, std::vector<std::string> > &mapping, struct pool *exclude, int ifmatched) {
void handle(std::string message, int z, unsigned x, unsigned y, std::vector<std::set<type_and_string> > &file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, std::vector<std::string> &header, std::map<std::string, std::vector<std::string> > &mapping, std::set<std::string> &exclude, int ifmatched) {
mvt_tile tile;
mvt_tile outtile;
int features_added = 0;
@ -51,19 +52,14 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi
}
}
if (ll == *nlayers) {
*file_keys = (struct pool *) realloc(*file_keys, (ll + 1) * sizeof(struct pool));
file_keys.push_back(std::set<type_and_string>());
*layernames = (char **) realloc(*layernames, (ll + 1) * sizeof(char *));
if (*file_keys == NULL) {
perror("realloc file_keys");
exit(EXIT_FAILURE);
}
if (*layernames == NULL) {
perror("realloc layernames");
exit(EXIT_FAILURE);
}
pool_init(&((*file_keys)[ll]), 0);
(*layernames)[ll] = strdup(ln);
if ((*layernames)[ll] == NULL) {
perror("Out of memory");
@ -122,16 +118,11 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi
continue;
}
if (!is_pooled(exclude, key, VT_STRING)) {
if (!is_pooled(&((*file_keys)[ll]), key, type)) {
char *copy = strdup(key);
if (copy == NULL) {
perror("Out of memory");
exit(EXIT_FAILURE);
}
pool(&((*file_keys)[ll]), copy, type);
}
if (exclude.count(std::string(key)) == 0) {
type_and_string tas;
tas.string = std::string(key);
tas.type = type;
file_keys[ll].insert(tas);
outlayer.tag(outfeature, layer.keys[feat.tags[t]], val);
}
@ -157,15 +148,12 @@ void handle(std::string message, int z, unsigned x, unsigned y, struct pool **fi
const char *sjoinkey = joinkey.c_str();
if (!is_pooled(exclude, sjoinkey, VT_STRING)) {
if (!is_pooled(&((*file_keys)[ll]), sjoinkey, type)) {
char *copy = strdup(sjoinkey);
if (copy == NULL) {
perror("Out of memory");
exit(EXIT_FAILURE);
}
pool(&((*file_keys)[ll]), copy, type);
}
if (exclude.count(joinkey) == 0) {
type_and_string tas;
tas.string = std::string(sjoinkey);
tas.type = type;
file_keys[ll].insert(tas);
outlayer.tag(outfeature, layer.keys[feat.tags[t]], val);
mvt_value outval;
if (type == VT_STRING) {
@ -227,7 +215,7 @@ double max(double a, double b) {
}
}
void decode(char *fname, char *map, struct pool **file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, struct stats *st, std::vector<std::string> &header, std::map<std::string, std::vector<std::string> > &mapping, struct pool *exclude, int ifmatched, char **attribution) {
void decode(char *fname, char *map, std::vector<std::set<type_and_string> > &file_keys, char ***layernames, int *nlayers, sqlite3 *outdb, struct stats *st, std::vector<std::string> &header, std::map<std::string, std::vector<std::string> > &mapping, std::set<std::string> &exclude, int ifmatched, char **attribution) {
sqlite3 *db;
if (sqlite3_open(fname, &db) != SQLITE_OK) {
@ -392,8 +380,7 @@ int main(int argc, char **argv) {
std::vector<std::string> header;
std::map<std::string, std::vector<std::string> > mapping;
struct pool exclude;
pool_init(&exclude, 0);
std::set<std::string> exclude;
extern int optind;
extern char *optarg;
@ -424,7 +411,7 @@ int main(int argc, char **argv) {
break;
case 'x':
pool(&exclude, optarg, VT_STRING);
exclude.insert(std::string(optarg));
break;
default:
@ -446,21 +433,16 @@ int main(int argc, char **argv) {
st.minzoom = st.minlat = st.minlon = INT_MAX;
st.maxzoom = st.maxlat = st.maxlon = INT_MIN;
struct pool *file_keys = NULL;
std::vector<std::set<type_and_string> > file_keys;
char **layernames = NULL;
int nlayers = 0;
char *attribution = NULL;
for (i = optind; i < argc; i++) {
decode(argv[i], csv, &file_keys, &layernames, &nlayers, outdb, &st, header, mapping, &exclude, ifmatched, &attribution);
decode(argv[i], csv, file_keys, &layernames, &nlayers, outdb, &st, header, mapping, exclude, ifmatched, &attribution);
}
struct pool *fk[nlayers];
for (i = 0; i < nlayers; i++) {
fk[i] = &(file_keys[i]);
}
mbtiles_write_metadata(outdb, outfile, layernames, st.minzoom, st.maxzoom, st.minlat, st.minlon, st.maxlat, st.maxlon, st.midlat, st.midlon, fk, nlayers, 0, attribution);
mbtiles_write_metadata(outdb, outfile, layernames, st.minzoom, st.maxzoom, st.minlat, st.minlon, st.maxlat, st.maxlon, st.midlat, st.midlon, file_keys, nlayers, 0, attribution);
mbtiles_close(outdb, argv);
return 0;

@ -23,11 +23,11 @@
#include "geometry.hpp"
#include "tile.hpp"
#include "pool.hpp"
#include "mbtiles.hpp"
#include "projection.hpp"
#include "serial.hpp"
#include "options.hpp"
#include "main.hpp"
#include "mbtiles.hpp"
#define CMD_BITS 3
@ -160,7 +160,7 @@ mvt_value retrieve_string(char **f, char *stringpool, int *otype) {
return tv;
}
void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_feature &feature, struct pool *file_keys) {
void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_feature &feature) {
int i;
for (i = 0; i < m; i++) {
int otype;
@ -168,26 +168,6 @@ void decode_meta(int m, char **meta, char *stringpool, mvt_layer &layer, mvt_fea
mvt_value value = retrieve_string(meta, stringpool, &otype);
layer.tag(feature, key.string_value, value);
if (!is_pooled(file_keys, key.string_value.c_str(), otype)) {
if (pthread_mutex_lock(&var_lock) != 0) {
perror("pthread_mutex_lock");
exit(EXIT_FAILURE);
}
// Dup to retain after munmap
char *copy = strdup(key.string_value.c_str());
if (copy == NULL) {
perror("Out of memory");
exit(EXIT_FAILURE);
}
pool(file_keys, copy, otype);
if (pthread_mutex_unlock(&var_lock) != 0) {
perror("pthread_mutex_unlock");
exit(EXIT_FAILURE);
}
}
}
}
@ -526,7 +506,7 @@ int manage_gap(unsigned long long index, unsigned long long *previndex, double s
return 0;
}
long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) {
long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *stringpool, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, volatile long long *along, double gamma, int nlayers, int *prevent, int *additional, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, volatile int *running) {
int line_detail;
double fraction = 1;
@ -936,7 +916,7 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
feature.geometry = to_feature(features[j][x].geom);
count += features[j][x].geom.size();
decode_meta(features[j][x].m, &features[j][x].meta, features[j][x].stringpool, layer, feature, file_keys[j]);
decode_meta(features[j][x].m, &features[j][x].meta, features[j][x].stringpool, layer, feature);
layer.features.push_back(feature);
}
@ -1014,7 +994,6 @@ struct write_tile_args {
char *stringpool;
int min_detail;
int basezoom;
struct pool **file_keys;
char **layernames;
sqlite3 *outdb;
double droprate;
@ -1082,7 +1061,7 @@ void *run_thread(void *vargs) {
// fprintf(stderr, "%d/%u/%u\n", z, x, y);
long long len = write_tile(geom, &geompos, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->file_keys, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running);
long long len = write_tile(geom, &geompos, arg->metabase, arg->stringpool, z, x, y, z == arg->maxzoom ? arg->full_detail : arg->low_detail, arg->min_detail, arg->basezoom, arg->layernames, arg->outdb, arg->droprate, arg->buffer, arg->fname, arg->geomfile, arg->minzoom, arg->maxzoom, arg->todo, arg->along, arg->gamma, arg->nlayers, arg->prevent, arg->additional, arg->child_shards, arg->meta_off, arg->pool_off, arg->initial_x, arg->initial_y, arg->running);
if (len < 0) {
int *err = (int *) malloc(sizeof(int));
@ -1137,7 +1116,7 @@ void *run_thread(void *vargs) {
return NULL;
}
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) {
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y) {
int i;
for (i = 0; i <= maxzoom; i++) {
long long most = 0;
@ -1247,7 +1226,6 @@ int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpo
args[thread].stringpool = stringpool;
args[thread].min_detail = min_detail;
args[thread].basezoom = basezoom;
args[thread].file_keys = file_keys; // locked with var_lock
args[thread].layernames = layernames;
args[thread].outdb = outdb; // locked with db_lock
args[thread].droprate = droprate;

@ -1,5 +1,5 @@
long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, int *prevent, int *additional);
long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE **geomfile, int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, int *prevent, int *additional);
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y);
int traverse_zooms(int *geomfd, off_t *geom_size, char *metabase, char *stringpool, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, int basezoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, int *prevent, int *additional, int full_detail, int low_detail, int min_detail, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y);
int manage_gap(unsigned long long index, unsigned long long *previndex, double scale, double gamma, double *gap);