Use a string pool to avoid duplicating keys and values

This commit is contained in:
Eric Fischer 2015-06-17 17:18:08 -07:00
parent 55e93a5d37
commit cde1e60603
3 changed files with 92 additions and 13 deletions

View File

@ -214,7 +214,7 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) {
return ret;
}
int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) {
int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, char *stringpool, unsigned *file_bbox, struct pool **file_keys, unsigned *midx, unsigned *midy, char **layernames, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, const char *tmpdir, double gamma, int nlayers, char *prevent) {
int i;
for (i = 0; i <= maxzoom; i++) {
long long most = 0;
@ -275,7 +275,7 @@ int traverse_zooms(int geomfd[4], off_t geom_size[4], char *metabase, unsigned *
// fprintf(stderr, "%d/%u/%u\n", z, x, y);
long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent);
long long len = write_tile(&geom, metabase, stringpool, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, min_detail, maxzoom, file_keys, layernames, outdb, droprate, buffer, fname, sub, minzoom, maxzoom, todo, geomstart, along, gamma, nlayers, prevent);
if (len < 0) {
return i - 1;
@ -381,14 +381,49 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f
}
}
struct stringpool {
char *s;
struct stringpool *left;
struct stringpool *right;
long long off;
} *pooltree = NULL;
long long addpool(FILE *poolfile, long long *poolpos, char *s) {
struct stringpool **sp = &pooltree;
while (*sp != NULL) {
int cmp = strcmp(s, (*sp)->s);
if (cmp < 0) {
sp = &((*sp)->left);
} else if (cmp > 0) {
sp = &((*sp)->right);
} else {
return (*sp)->off;
}
}
*sp = malloc(sizeof(struct stringpool));
(*sp)->s = strdup(s); // XXX really should be mapped from the pool itself
(*sp)->left = NULL;
(*sp)->right = NULL;
(*sp)->off = *poolpos;
fwrite_check(s, strlen(s) + 1, sizeof(char), poolfile, "string pool");
*poolpos += strlen(s) + 1;
return (*sp)->off;
}
int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) {
int ret = EXIT_SUCCESS;
char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1];
char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1];
char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1];
char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1];
sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX");
sprintf(poolname, "%s%s", tmpdir, "/pool.XXXXXXXX");
sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX");
sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX");
@ -397,6 +432,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror(metaname);
exit(EXIT_FAILURE);
}
int poolfd = mkstemp(poolname);
if (poolfd < 0) {
perror(poolname);
exit(EXIT_FAILURE);
}
int geomfd = mkstemp(geomname);
if (geomfd < 0) {
perror(geomname);
@ -413,6 +453,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror(metaname);
exit(EXIT_FAILURE);
}
FILE *poolfile = fopen(poolname, "wb");
if (poolfile == NULL) {
perror(poolname);
exit(EXIT_FAILURE);
}
FILE *geomfile = fopen(geomname, "wb");
if (geomfile == NULL) {
perror(geomname);
@ -424,13 +469,19 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
exit(EXIT_FAILURE);
}
long long metapos = 0;
long long poolpos = 0;
long long geompos = 0;
long long indexpos = 0;
unlink(metaname);
unlink(poolname);
unlink(geomname);
unlink(indexname);
// So we still have a legitimate map even if no metadata
fprintf(poolfile, "\n");
poolpos++;
unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0};
unsigned midx = 0, midy = 0;
long long seq = 0;
@ -592,8 +643,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
serialize_int(metafile, m, &metapos, fname);
for (i = 0; i < m; i++) {
serialize_int(metafile, metatype[i], &metapos, fname);
serialize_string(metafile, metakey[i], &metapos, fname);
serialize_string(metafile, metaval[i], &metapos, fname);
serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i]), &metapos, fname);
serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i]), &metapos, fname);
}
long long geomstart = geompos;
@ -667,11 +718,13 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
}
fclose(metafile);
fclose(poolfile);
fclose(geomfile);
fclose(indexfile);
struct stat geomst;
struct stat metast;
struct stat poolst;
if (fstat(geomfd, &geomst) != 0) {
perror("stat geom\n");
@ -681,6 +734,10 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror("stat meta\n");
exit(EXIT_FAILURE);
}
if (fstat(poolfd, &poolst) != 0) {
perror("stat pool\n");
exit(EXIT_FAILURE);
}
if (geomst.st_size == 0 || metast.st_size == 0) {
fprintf(stderr, "did not read any valid geometries\n");
@ -693,6 +750,12 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
exit(EXIT_FAILURE);
}
char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0);
if (stringpool == MAP_FAILED) {
perror("mmap stringpool");
exit(EXIT_FAILURE);
}
struct pool file_keys1[nlayers];
struct pool *file_keys[nlayers];
int i;
@ -915,9 +978,9 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
size[j] = 0;
}
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata\n", seq, (long long) geomst.st_size, (long long) metast.st_size);
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size);
int written = traverse_zooms(fd, size, meta, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent);
int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent);
if (maxzoom != written) {
fprintf(stderr, "\n\n\n*** NOTE TILES ONLY COMPLETE THROUGH ZOOM %d ***\n\n\n", written);
@ -928,11 +991,17 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
if (munmap(meta, metast.st_size) != 0) {
perror("munmap meta");
}
if (close(metafd) < 0) {
perror("close meta");
}
if (munmap(stringpool, poolst.st_size) != 0) {
perror("munmap pool");
}
if (close(poolfd) < 0) {
perror("close pool");
}
double minlat = 0, minlon = 0, maxlat = 0, maxlon = 0, midlat = 0, midlon = 0;
tile2latlon(midx, midy, maxzoom, &maxlat, &minlon);

20
tile.cc
View File

@ -186,7 +186,17 @@ int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2) {
return cmp;
}
void decode_meta(char **meta, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta, char *only) {
struct pool_val *retrieve_string(char **f, struct pool *p, int type, char *stringpool) {
struct pool_val *ret;
long long off;
deserialize_long_long(f, &off);
ret = pool(p, stringpool + off, type);
return ret;
}
void decode_meta(char **meta, char *stringpool, struct pool *keys, struct pool *values, struct pool *file_keys, std::vector<int> *intmeta, char *only) {
int m;
deserialize_int(meta, &m);
@ -194,13 +204,13 @@ void decode_meta(char **meta, struct pool *keys, struct pool *values, struct poo
for (i = 0; i < m; i++) {
int t;
deserialize_int(meta, &t);
struct pool_val *key = deserialize_string(meta, keys, VT_STRING);
struct pool_val *key = retrieve_string(meta, keys, VT_STRING, stringpool);
if (only != NULL && (strcmp(key->s, only) != 0)) {
deserialize_int(meta, &t);
*meta += t;
} else {
struct pool_val *value = deserialize_string(meta, values, t);
struct pool_val *value = retrieve_string(meta, values, t, stringpool);
intmeta->push_back(key->n);
intmeta->push_back(value->n);
@ -349,7 +359,7 @@ void evaluate(std::vector<coalesce> &features, char *metabase, struct pool *file
}
#endif
long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) {
long long write_tile(char **geoms, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent) {
int line_detail;
static bool evaluated = false;
double oprogress = 0;
@ -614,7 +624,7 @@ long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, u
c.metasrc = meta;
c.coalesced = false;
decode_meta(&meta, keys[layer], values[layer], file_keys[layer], &c.meta, NULL);
decode_meta(&meta, stringpool, keys[layer], values[layer], file_keys[layer], &c.meta, NULL);
features[layer].push_back(c);
}
}

2
tile.h
View File

@ -25,4 +25,4 @@ void deserialize_uint(char **f, unsigned *n);
void deserialize_byte(char **f, signed char *n);
struct pool_val *deserialize_string(char **f, struct pool *p, int type);
long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent);
long long write_tile(char **geom, char *metabase, char *stringpool, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int min_detail, int basezoom, struct pool **file_keys, char **layernames, sqlite3 *outdb, double droprate, int buffer, const char *fname, FILE *geomfile[4], int file_minzoom, int file_maxzoom, double todo, char *geomstart, long long along, double gamma, int nlayers, char *prevent);