mirror of
https://github.com/mapbox/tippecanoe.git
synced 2025-04-17 23:46:13 +00:00
Use a string pool to avoid duplicating keys and values
This commit is contained in:
parent
55e93a5d37
commit
cde1e60603
83
geojson.c
83
geojson.c
@ -214,7 +214,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) {
|
||||
int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) {
|
||||
int i;
|
||||
for (i = 0; i <= maxzoom; i++) {
|
||||
long long most = 0;
|
||||
@ -275,7 +275,7 @@ int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *
|
||||
|
||||
// fprintf(stderr, "%d/%u/%u\n", z, x, y);
|
||||
|
||||
long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent);
|
||||
long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent);
|
||||
|
||||
if (len < 0) {
|
||||
return i - 1;
|
||||
@ -381,14 +381,49 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f
|
||||
}
|
||||
}
|
||||
|
||||
struct stringpool {
|
||||
char *s;
|
||||
struct stringpool *left;
|
||||
struct stringpool *right;
|
||||
long long off;
|
||||
} *pooltree = NULL;
|
||||
|
||||
long long addpool(FILE *poolfile, long long *poolpos, char *s) {
|
||||
struct stringpool **sp = &pooltree;
|
||||
|
||||
while (*sp != NULL) {
|
||||
int cmp = strcmp(s, (*sp)->s);
|
||||
if (cmp < 0) {
|
||||
sp = &((*sp)->left);
|
||||
} else if (cmp > 0) {
|
||||
sp = &((*sp)->right);
|
||||
} else {
|
||||
return (*sp)->off;
|
||||
}
|
||||
}
|
||||
|
||||
*sp = malloc(sizeof(struct stringpool));
|
||||
(*sp)->s = strdup(s); // XXX really should be mapped from the pool itself
|
||||
(*sp)->left = NULL;
|
||||
(*sp)->right = NULL;
|
||||
(*sp)->off = *poolpos;
|
||||
|
||||
fwrite_check(s, strlen(s) + 1, sizeof(char), poolfile, "string pool");
|
||||
*poolpos += strlen(s) + 1;
|
||||
|
||||
return (*sp)->off;
|
||||
}
|
||||
|
||||
int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) {
|
||||
int ret = EXIT_SUCCESS;
|
||||
|
||||
char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1];
|
||||
char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1];
|
||||
char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1];
|
||||
char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1];
|
||||
|
||||
sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX");
|
||||
sprintf(poolname, "%s%s", tmpdir, "/pool.XXXXXXXX");
|
||||
sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX");
|
||||
sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX");
|
||||
|
||||
@ -397,6 +432,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
perror(metaname);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
int poolfd = mkstemp(poolname);
|
||||
if (poolfd < 0) {
|
||||
perror(poolname);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
int geomfd = mkstemp(geomname);
|
||||
if (geomfd < 0) {
|
||||
perror(geomname);
|
||||
@ -413,6 +453,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
perror(metaname);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
FILE *poolfile = fopen(poolname, "wb");
|
||||
if (poolfile == NULL) {
|
||||
perror(poolname);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
FILE *geomfile = fopen(geomname, "wb");
|
||||
if (geomfile == NULL) {
|
||||
perror(geomname);
|
||||
@ -424,13 +469,19 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
long long metapos = 0;
|
||||
long long poolpos = 0;
|
||||
long long geompos = 0;
|
||||
long long indexpos = 0;
|
||||
|
||||
unlink(metaname);
|
||||
unlink(poolname);
|
||||
unlink(geomname);
|
||||
unlink(indexname);
|
||||
|
||||
// So we still have a legitimate map even if no metadata
|
||||
fprintf(poolfile, "\n");
|
||||
poolpos++;
|
||||
|
||||
unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0};
|
||||
unsigned midx = 0, midy = 0;
|
||||
long long seq = 0;
|
||||
@ -592,8 +643,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
serialize_int(metafile, m, &metapos, fname);
|
||||
for (i = 0; i < m; i++) {
|
||||
serialize_int(metafile, metatype[i], &metapos, fname);
|
||||
serialize_string(metafile, metakey[i], &metapos, fname);
|
||||
serialize_string(metafile, metaval[i], &metapos, fname);
|
||||
serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i]), &metapos, fname);
|
||||
serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i]), &metapos, fname);
|
||||
}
|
||||
|
||||
long long geomstart = geompos;
|
||||
@ -667,11 +718,13 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
}
|
||||
|
||||
fclose(metafile);
|
||||
fclose(poolfile);
|
||||
fclose(geomfile);
|
||||
fclose(indexfile);
|
||||
|
||||
struct stat geomst;
|
||||
struct stat metast;
|
||||
struct stat poolst;
|
||||
|
||||
if (fstat(geomfd, &geomst) != 0) {
|
||||
perror("stat geom\n");
|
||||
@ -681,6 +734,10 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
perror("stat meta\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (fstat(poolfd, &poolst) != 0) {
|
||||
perror("stat pool\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (geomst.st_size == 0 || metast.st_size == 0) {
|
||||
fprintf(stderr, "did not read any valid geometries\n");
|
||||
@ -693,6 +750,12 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0);
|
||||
if (stringpool == MAP_FAILED) {
|
||||
perror("mmap stringpool");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
struct pool file_keys1[nlayers];
|
||||
struct pool *file_keys[nlayers];
|
||||
int i;
|
||||
@ -915,9 +978,9 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
size[j] = 0;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata\n", seq, (long long) geomst.st_size, (long long) metast.st_size);
|
||||
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size);
|
||||
|
||||
int written = traverse_zooms(fd, size, meta, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent);
|
||||
int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent);
|
||||
|
||||
if (maxzoom != written) {
|
||||
fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written);
|
||||
@ -928,11 +991,17 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
|
||||
if (munmap(meta, metast.st_size) != 0) {
|
||||
perror("munmap meta");
|
||||
}
|
||||
|
||||
if (close(metafd) < 0) {
|
||||
perror("close meta");
|
||||
}
|
||||
|
||||
if (munmap(stringpool, poolst.st_size) != 0) {
|
||||
perror("munmap pool");
|
||||
}
|
||||
if (close(poolfd) < 0) {
|
||||
perror("close pool");
|
||||
}
|
||||
|
||||
double minlat = 0, minlon = 0, maxlat = 0, maxlon = 0, midlat = 0, midlon = 0;
|
||||
|
||||
tile2latlon(midx, midy, maxzoom, &maxlat, &minlon);
|
||||
|
20
tile.cc
20
tile.cc
@ -186,7 +186,17 @@ int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
void decode_meta(char **meta, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta, char *only) {
|
||||
struct pool_val *retrieve_string(char **f, struct pool *p, int type, char *stringpool) {
|
||||
struct pool_val *ret;
|
||||
long long off;
|
||||
|
||||
deserialize_long_long(f, &off);
|
||||
ret = pool(p, stringpool + off, type);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta, char *only) {
|
||||
int m;
|
||||
deserialize_int(meta, &m);
|
||||
|
||||
@ -194,13 +204,13 @@ void decode_meta(char **meta, struct pool *keys, struct pool *values, struct poo
|
||||
for (i = 0; i < m; i++) {
|
||||
int t;
|
||||
deserialize_int(meta, &t);
|
||||
struct pool_val *key = deserialize_string(meta, keys, VT_STRING);
|
||||
struct pool_val *key = retrieve_string(meta, keys, VT_STRING, stringpool);
|
||||
|
||||
if (only != NULL && (strcmp(key->s, only) != 0)) {
|
||||
deserialize_int(meta, &t);
|
||||
*meta += t;
|
||||
} else {
|
||||
struct pool_val *value = deserialize_string(meta, values, t);
|
||||
struct pool_val *value = retrieve_string(meta, values, t, stringpool);
|
||||
|
||||
intmeta->push_back(key->n);
|
||||
intmeta->push_back(value->n);
|
||||
@ -349,7 +359,7 @@ void evaluate(std::vector<coalesce> &features, char *metabase, struct pool *file
|
||||
}
|
||||
#endif
|
||||
|
||||
long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) {
|
||||
long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) {
|
||||
int line_detail;
|
||||
static bool evaluated = false;
|
||||
double oprogress = 0;
|
||||
@ -614,7 +624,7 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u
|
||||
c.metasrc = meta;
|
||||
c.coalesced = false;
|
||||
|
||||
decode_meta(&meta, keys[layer], values[layer], file_keys[layer], &c.meta, NULL);
|
||||
decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta, NULL);
|
||||
features[layer].push_back(c);
|
||||
}
|
||||
}
|
||||
|
2
tile.h
2
tile.h
@ -25,4 +25,4 @@ void deserialize_uint(char **f, unsigned *n);
|
||||
void deserialize_byte(char **f, signed char *n);
|
||||
struct pool_val *deserialize_string(char **f, struct pool *p, int type);
|
||||
|
||||
long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent);
|
||||
long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent);
|
||||
|
Loading…
x
Reference in New Issue
Block a user