diff --git a/Makefile b/Makefile index 94c63fc..efb279f 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ C = $(shell find . '(' -name '*.c' -o -name '*.cc' ')') INCLUDES = -I/usr/local/include LIBS = -L/usr/local/lib -tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o +tippecanoe: geojson.o jsonpull.o vector_tile.pb.o tile.o clip.o pool.o mbtiles.o geometry.o projection.o memfile.o g++ $(PG) $(LIBS) -O3 -g -Wall -o $@ $^ -lm -lz -lprotobuf-lite -lsqlite3 enumerate: enumerate.o diff --git a/geojson.c b/geojson.c index 46b8184..1146360 100644 --- a/geojson.c +++ b/geojson.c @@ -20,6 +20,7 @@ #include "mbtiles.h" #include "projection.h" #include "version.h" +#include "memfile.h" int low_detail = 10; int full_detail = -1; @@ -396,43 +397,71 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f } struct stringpool { - char *s; // first byte is type - struct stringpool *left; - struct stringpool *right; + long long left; + long long right; long long off; -} *pooltree = NULL; +}; +long long pooltree = 0; -long long addpool(FILE *poolfile, long long *poolpos, char *s, char type) { - struct stringpool **sp = &pooltree; +long long addpool(struct memfile *poolfile, char *s, char type) { + long long *sp = &pooltree; - while (*sp != NULL) { - int cmp = strcmp(s, (*sp)->s + 1); + while (*sp != 0) { + int cmp = strcmp(s, poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off + 1); if (cmp == 0) { - cmp = type - (*sp)->s[0]; + cmp = type - (poolfile->map + ((struct stringpool *) (poolfile->map + *sp))->off)[0]; } if (cmp < 0) { - sp = &((*sp)->left); + sp = &(((struct stringpool *) (poolfile->map + *sp))->left); } else if (cmp > 0) { - sp = &((*sp)->right); + sp = &(((struct stringpool *) (poolfile->map + *sp))->right); } else { - return (*sp)->off; + return ((struct stringpool *) (poolfile->map + *sp))->off; } } - *sp = malloc(sizeof(struct stringpool)); - (*sp)->s = malloc(strlen(s) + 2); - (*sp)->s[0] = type; - strcpy((*sp)->s + 1, s); // XXX really should be mapped from the pool itself - (*sp)->left = NULL; - (*sp)->right = NULL; - (*sp)->off = *poolpos; + // *sp is probably in the memory-mapped file, and will move if the file grows. + long long ssp; + if (sp == &pooltree) { + ssp = -1; + } else { + ssp = ((char *) sp) - poolfile->map; + } - fwrite_check((*sp)->s, strlen(s) + 2, sizeof(char), poolfile, "string pool"); - *poolpos += strlen(s) + 2; + long long off = poolfile->off; + if (memfile_write(poolfile, &type, 1) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } + if (memfile_write(poolfile, s, strlen(s) + 1) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } - return (*sp)->off; + struct stringpool tsp; + tsp.left = 0; + tsp.right = 0; + tsp.off = off; + + // alignment + while (poolfile->off % sizeof(long long) != 0) { + memfile_write(poolfile, "", 1); + } + + long long p = poolfile->off; + if (memfile_write(poolfile, &tsp, sizeof(struct stringpool)) < 0) { + perror("memfile write"); + exit(EXIT_FAILURE); + } + + if (ssp == -1) { + pooltree = p; + } else { + *((long long *) (poolfile->map + ssp)) = p; + } + return off; } int read_json(int argc, char **argv, char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, char *prevent) { @@ -474,7 +503,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror(metaname); exit(EXIT_FAILURE); } - FILE *poolfile = fopen(poolname, "wb"); + struct memfile *poolfile = memfile_open(poolfd); if (poolfile == NULL) { perror(poolname); exit(EXIT_FAILURE); @@ -490,18 +519,16 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } long long metapos = 0; - long long poolpos = 0; long long geompos = 0; long long indexpos = 0; unlink(metaname); - //unlink(poolname); + unlink(poolname); unlink(geomname); unlink(indexname); - // So we still have a legitimate map even if no metadata - fprintf(poolfile, "\n"); - poolpos++; + // To distinguish a null value + memfile_write(poolfile, "", 1); unsigned file_bbox[] = {UINT_MAX, UINT_MAX, 0, 0}; unsigned midx = 0, midy = 0; @@ -663,8 +690,8 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max serialize_int(metafile, m, &metapos, fname); for (i = 0; i < m; i++) { - serialize_long_long(metafile, addpool(poolfile, &poolpos, metakey[i], VT_STRING), &metapos, fname); - serialize_long_long(metafile, addpool(poolfile, &poolpos, metaval[i], metatype[i]), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, metakey[i], VT_STRING), &metapos, fname); + serialize_long_long(metafile, addpool(poolfile, metaval[i], metatype[i]), &metapos, fname); } long long geomstart = geompos; @@ -739,13 +766,11 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max } fclose(metafile); - fclose(poolfile); fclose(geomfile); fclose(indexfile); struct stat geomst; struct stat metast; - struct stat poolst; if (fstat(geomfd, &geomst) != 0) { perror("stat geom\n"); @@ -755,10 +780,6 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror("stat meta\n"); exit(EXIT_FAILURE); } - if (fstat(poolfd, &poolst) != 0) { - perror("stat pool\n"); - exit(EXIT_FAILURE); - } if (geomst.st_size == 0 || metast.st_size == 0) { fprintf(stderr, "did not read any valid geometries\n"); @@ -771,11 +792,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max exit(EXIT_FAILURE); } - char *stringpool = (char *) mmap(NULL, poolst.st_size, PROT_READ, MAP_PRIVATE, poolfd, 0); - if (stringpool == MAP_FAILED) { - perror("mmap stringpool"); - exit(EXIT_FAILURE); - } + char *stringpool = poolfile->map; struct pool file_keys1[nlayers]; struct pool *file_keys[nlayers]; @@ -856,6 +873,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max merges[start / unit].end = end; merges[start / unit].next = NULL; + // MAP_PRIVATE to avoid disk writes if it fits in memory void *map = mmap(NULL, end - start, PROT_READ | PROT_WRITE, MAP_PRIVATE, indexfd, start); if (map == MAP_FAILED) { perror("mmap"); @@ -999,7 +1017,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max size[j] = 0; } - fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, (long long) poolst.st_size); + fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of metadata, %lld bytes of string pool\n", seq, (long long) geomst.st_size, (long long) metast.st_size, poolfile->off); int written = traverse_zooms(fd, size, meta, stringpool, file_bbox, file_keys, &midx, &midy, layernames, maxzoom, minzoom, outdb, droprate, buffer, fname, tmpdir, gamma, nlayers, prevent); @@ -1016,10 +1034,7 @@ int read_json(int argc, char **argv, char *fname, const char *layername, int max perror("close meta"); } - if (munmap(stringpool, poolst.st_size) != 0) { - perror("munmap pool"); - } - if (close(poolfd) < 0) { + if (memfile_close(poolfile) != 0) { perror("close pool"); } @@ -1184,7 +1199,6 @@ int main(int argc, char **argv) { } geometry_scale = 32 - (full_detail + maxzoom); - printf("geometry scale is %d\n", geometry_scale); if (outdir == NULL) { fprintf(stderr, "%s: must specify -o out.mbtiles\n", argv[0]); diff --git a/memfile.c b/memfile.c new file mode 100644 index 0000000..454628c --- /dev/null +++ b/memfile.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include "memfile.h" + +#define INCREMENT 131072 + +struct memfile *memfile_open(int fd) { + if (ftruncate(fd, INCREMENT) != 0) { + return NULL; + } + + char *map = mmap(NULL, INCREMENT, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + return NULL; + } + + struct memfile *mf = malloc(sizeof(struct memfile)); + if (mf == NULL) { + munmap(map, INCREMENT); + return NULL; + } + + mf->fd = fd; + mf->map = map; + mf->len = INCREMENT; + mf->off = 0; + + return mf; +} + +int memfile_close(struct memfile *file) { + if (munmap(file->map, file->len) != 0) { + return -1; + } + + if (file->fd >= 0) { + if (close(file->fd) != 0) { + return -1; + } + } + + free(file); + return 0; +} + +int memfile_write(struct memfile *file, void *s, long long len) { + if (file->off + len > file->len) { + if (munmap(file->map, file->len) != 0) { + return -1; + } + + file->len += INCREMENT; + + if (ftruncate(file->fd, file->len) != 0) { + return -1; + } + + file->map = mmap(NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0); + if (file->map == MAP_FAILED) { + return -1; + } + } + + memcpy(file->map + file->off, s, len); + file->off += len; + return len; +} diff --git a/memfile.h b/memfile.h new file mode 100644 index 0000000..b536a6c --- /dev/null +++ b/memfile.h @@ -0,0 +1,10 @@ +struct memfile { + int fd; + char *map; + long long len; + long long off; +}; + +struct memfile *memfile_open(int fd); +int memfile_close(struct memfile *file); +int memfile_write(struct memfile *file, void *s, long long len);