Build the string pool in an appendable memory map, not the normal heap

This commit is contained in:
Eric Fischer 2015-06-18 16:13:37 -07:00
parent d96dee8dad
commit dc3021656e
4 changed files with 141 additions and 48 deletions

View File

@ -22,7 +22,7 @@ C = $(shell find . '(' -name '*.c' -o -name '*.cc' ')')
INCLUDES = -I/usr/local/include
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o
tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o memfile.o
g++ $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lm -lz -lprotobuf-lite -lsqlite3
enumerate: enumerate.o

108
geojson.c
View File

@ -20,6 +20,7 @@
#include "mbtiles.h"
#include "projection.h"
#include "version.h"
#include "memfile.h"
int low_detail = 10;
int full_detail = -1;
@ -396,43 +397,71 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f
}
struct stringpool {
char *s; // first byte is type
struct stringpool *left;
struct stringpool *right;
long long left;
long long right;
long long off;
} *pooltree = NULL;
};
long long pooltree = 0;
long long addpool(FILE *poolfile, long long *poolpos, char *s, char type) {
struct stringpool **sp = &pooltree;
long long addpool(struct memfile *poolfile, char *s, char type) {
long long *sp = &pooltree;
while (*sp != NULL) {
int cmp = strcmp(s, (*sp)->s + 1);
while (*sp != 0) {
int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off + 1);
if (cmp == 0) {
cmp = type - (*sp)->s[0];
cmp = type - (poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off)[0];
}
if (cmp < 0) {
sp = &((*sp)->left);
sp = &(((struct stringpool *) (poolfile->map + *sp))->left);
} else if (cmp > 0) {
sp = &((*sp)->right);
sp = &(((struct stringpool *) (poolfile->map + *sp))->right);
} else {
return (*sp)->off;
return ((struct stringpool *) (poolfile->map + *sp))->off;
}
}
*sp = malloc(sizeof(struct stringpool));
(*sp)->s = malloc(strlen(s) + 2);
(*sp)->s[0] = type;
strcpy((*sp)->s + 1, s); // XXX really should be mapped from the pool itself
(*sp)->left = NULL;
(*sp)->right = NULL;
(*sp)->off = *poolpos;
// *sp is probably in the memory-mapped file, and will move if the file grows.
long long ssp;
if (sp == &pooltree) {
ssp = -1;
} else {
ssp = ((char *) sp) - poolfile->map;
}
fwrite_check((*sp)->s, strlen(s) + 2, sizeof(char), poolfile, "string pool");
*poolpos += strlen(s) + 2;
long long off = poolfile->off;
if (memfile_write(poolfile, &type, 1) < 0) {
perror("memfile write");
exit(EXIT_FAILURE);
}
if (memfile_write(poolfile, s, strlen(s) + 1) < 0) {
perror("memfile write");
exit(EXIT_FAILURE);
}
return (*sp)->off;
struct stringpool tsp;
tsp.left = 0;
tsp.right = 0;
tsp.off = off;
// alignment
while (poolfile->off % sizeof(long long) != 0) {
memfile_write(poolfile, "", 1);
}
long long p = poolfile->off;
if (memfile_write(poolfile, &tsp, sizeof(struct stringpool)) < 0) {
perror("memfile write");
exit(EXIT_FAILURE);
}
if (ssp == -1) {
pooltree = p;
} else {
*((long long *) (poolfile->map + ssp)) = p;
}
return off;
}
int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) {
@ -474,7 +503,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror(metaname);
exit(EXIT_FAILURE);
}
FILE *poolfile = fopen(poolname, "wb");
struct memfile *poolfile = memfile_open(poolfd);
if (poolfile == NULL) {
perror(poolname);
exit(EXIT_FAILURE);
@ -490,18 +519,16 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
exit(EXIT_FAILURE);
}
long long metapos = 0;
long long poolpos = 0;
long long geompos = 0;
long long indexpos = 0;
unlink(metaname);
//unlink(poolname);
unlink(poolname);
unlink(geomname);
unlink(indexname);
// So we still have a legitimate map even if no metadata
fprintf(poolfile, "\n");
poolpos++;
// To distinguish a null value
memfile_write(poolfile, "", 1);
unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0};
unsigned midx = 0, midy = 0;
@ -663,8 +690,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
serialize_int(metafile, m, &metapos, fname);
for (i = 0; i < m; i++) {
serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i], VT_STRING), &metapos, fname);
serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i], metatype[i]), &metapos, fname);
serialize_long_long(metafile, addpool(poolfile, metakey[i], VT_STRING), &metapos, fname);
serialize_long_long(metafile, addpool(poolfile, metaval[i], metatype[i]), &metapos, fname);
}
long long geomstart = geompos;
@ -739,13 +766,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
}
fclose(metafile);
fclose(poolfile);
fclose(geomfile);
fclose(indexfile);
struct stat geomst;
struct stat metast;
struct stat poolst;
if (fstat(geomfd, &geomst) != 0) {
perror("stat geom\n");
@ -755,10 +780,6 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror("stat meta\n");
exit(EXIT_FAILURE);
}
if (fstat(poolfd, &poolst) != 0) {
perror("stat pool\n");
exit(EXIT_FAILURE);
}
if (geomst.st_size == 0 || metast.st_size == 0) {
fprintf(stderr, "did not read any valid geometries\n");
@ -771,11 +792,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
exit(EXIT_FAILURE);
}
char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0);
if (stringpool == MAP_FAILED) {
perror("mmap stringpool");
exit(EXIT_FAILURE);
}
char *stringpool = poolfile->map;
struct pool file_keys1[nlayers];
struct pool *file_keys[nlayers];
@ -856,6 +873,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
merges[start / unit].end = end;
merges[start / unit].next = NULL;
// MAP_PRIVATE to avoid disk writes if it fits in memory
void *map = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_PRIVATE, indexfd, start);
if (map == MAP_FAILED) {
perror("mmap");
@ -999,7 +1017,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
size[j] = 0;
}
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size);
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, poolfile->off);
int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent);
@ -1016,10 +1034,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max
perror("close meta");
}
if (munmap(stringpool, poolst.st_size) != 0) {
perror("munmap pool");
}
if (close(poolfd) < 0) {
if (memfile_close(poolfile) != 0) {
perror("close pool");
}
@ -1184,7 +1199,6 @@ int main(int argc, char **argv) {
}
geometry_scale = 32 - (full_detail + maxzoom);
printf("geometry scale is %d\n", geometry_scale);
if (outdir == NULL) {
fprintf(stderr, "%s: must specify -o out.mbtiles\n", argv[0]);

69
memfile.c Normal file
View File

@ -0,0 +1,69 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include "memfile.h"
#define INCREMENT 131072
struct memfile *memfile_open(int fd) {
if (ftruncate(fd, INCREMENT) != 0) {
return NULL;
}
char *map = mmap(NULL, INCREMENT, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
return NULL;
}
struct memfile *mf = malloc(sizeof(struct memfile));
if (mf == NULL) {
munmap(map, INCREMENT);
return NULL;
}
mf->fd = fd;
mf->map = map;
mf->len = INCREMENT;
mf->off = 0;
return mf;
}
int memfile_close(struct memfile *file) {
if (munmap(file->map, file->len) != 0) {
return -1;
}
if (file->fd >= 0) {
if (close(file->fd) != 0) {
return -1;
}
}
free(file);
return 0;
}
int memfile_write(struct memfile *file, void *s, long long len) {
if (file->off + len > file->len) {
if (munmap(file->map, file->len) != 0) {
return -1;
}
file->len += INCREMENT;
if (ftruncate(file->fd, file->len) != 0) {
return -1;
}
file->map = mmap(NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0);
if (file->map == MAP_FAILED) {
return -1;
}
}
memcpy(file->map + file->off, s, len);
file->off += len;
return len;
}

10
memfile.h Normal file
View File

@ -0,0 +1,10 @@
struct memfile {
int fd;
char *map;
long long len;
long long off;
};
struct memfile *memfile_open(int fd);
int memfile_close(struct memfile *file);
int memfile_write(struct memfile *file, void *s, long long len);