Trying to follow Vlad's suggestion and work from the top down.

Doesn't quite work yet, but seems like the right thing to do.
From the bottom up, indexing is a mess because of the buffering,
and includes unneeded areas in the bounding boxes of big features.
This commit is contained in:
Eric Fischer 2014-12-03 16:18:43 -08:00
parent eaeb55bf71
commit 08ff40e42f
6 changed files with 258 additions and 125 deletions

View File

@ -24,7 +24,7 @@ void enumerate(char *fname) {
long long x = sqlite3_column_int(stmt, 1);
long long y = sqlite3_column_int(stmt, 2);
y = (1LL << zoom) - y;
y = (1LL << zoom) - 1 - y;
printf("%s %lld %lld %lld\n", fname, zoom, x, y);
}

199
geojson.c
View File

@ -97,6 +97,11 @@ void serialize_int(FILE *out, int n, long long *fpos, const char *fname, json_pu
*fpos += sizeof(int);
}
void serialize_long_long(FILE *out, long long n, long long *fpos, const char *fname, json_pull *source) {
fwrite_check(&n, sizeof(long long), 1, out, fname, source);
*fpos += sizeof(long long);
}
void serialize_byte(FILE *out, signed char n, long long *fpos, const char *fname, json_pull *source) {
fwrite_check(&n, sizeof(signed char), 1, out, fname, source);
*fpos += sizeof(signed char);
@ -179,6 +184,11 @@ void deserialize_int(char **f, int *n) {
*f += sizeof(int);
}
void deserialize_long_long(char **f, long long *n) {
memcpy(n, *f, sizeof(long long));
*f += sizeof(long long);
}
void deserialize_uint(char **f, unsigned *n) {
memcpy(n, *f, sizeof(unsigned));
*f += sizeof(unsigned);
@ -200,57 +210,87 @@ struct pool_val *deserialize_string(char **f, struct pool *p, int type) {
return ret;
}
void check(struct index *ix, long long n, char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer) {
void check(int geomfd, off_t geom_size, char *metabase, unsigned *file_bbox, struct pool *file_keys, unsigned *midx, unsigned *midy, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, const char *tmpdir) {
int i;
for (i = 0; i < maxzoom; i++) {
fprintf(stderr, "\n");
long long most = 0;
int z;
for (z = maxzoom; z >= minzoom; z--) {
struct index *i, *j = NULL;
for (i = ix; i < ix + n && i != NULL; i = j) {
if (i > ix && indexcmp(i - 1, i) > 0) {
fprintf(stderr, "index out of order\n");
printf("%lld of geom_size\n", (long long) geom_size);
char *geom = mmap(NULL, geom_size, PROT_READ, MAP_PRIVATE, geomfd, 0);
if (geom == MAP_FAILED) {
perror("mmap geom");
exit(EXIT_FAILURE);
}
unsigned wx, wy;
decode(i->index, &wx, &wy);
unsigned tx = 0, ty = 0;
if (z != 0) {
tx = wx >> (32 - z);
ty = wy >> (32 - z);
char *end = geom + geom_size;
char geomname[strlen(tmpdir) + strlen("/geom2.XXXXXXXX") + 1];
sprintf(geomname, "%s%s", tmpdir, "/geom2.XXXXXXXX");
int geomfd2 = mkstemp(geomname);
printf("%s\n", geomname);
if (geomfd2 < 0) {
perror(geomname);
exit(EXIT_FAILURE);
}
FILE *geomfile = fopen(geomname, "wb");
if (geomfile == NULL) {
perror(geomname);
exit(EXIT_FAILURE);
}
unlink(geomname);
while (geom < end) {
int zz;
unsigned xx, yy;
deserialize_int(&geom, &zz);
deserialize_uint(&geom, &xx);
deserialize_uint(&geom, &yy);
int z = zz + 1;
int dim = 2;
if (z == 0) {
dim = 1; // only one tile at z0
}
// printf("%lld in %lld\n", (long long)(i - ix), (long long)n);
char *ogeom = geom;
for (j = i + 1; j < ix + n; j++) {
unsigned wx2, wy2;
decode(j->index, &wx2, &wy2);
unsigned x, y;
for (x = xx * 2; x < xx * 2 + dim; x++) {
for (y = yy * 2; y < yy * 2 + dim; y++) {
fprintf(stderr, "%d/%u/%u\n", z, x, y);
geom = ogeom;
unsigned tx2 = 0, ty2 = 0;
if (z != 0) {
tx2 = wx2 >> (32 - z);
ty2 = wy2 >> (32 - z);
}
// fprintf(stderr, " %3.1f%% %d/%u/%u \r", (((i - ix) + (j - ix)) / 2.0 / n + (maxzoom - z)) / (maxzoom - minzoom + 1) * 100, z, tx, ty);
if (tx2 != tx || ty2 != ty) {
break;
}
}
fprintf(stderr, " %3.1f%% %d/%u/%u \r", (((i - ix) + (j - ix)) / 2.0 / n + (maxzoom - z)) / (maxzoom - minzoom + 1) * 100, z, tx, ty);
long long len = write_tile(i, j, metabase, file_bbox, z, tx, ty, z == maxzoom ? full_detail : low_detail, maxzoom, file_keys, layername, outdb, droprate, buffer);
long long len = write_tile(&geom, metabase, file_bbox, z, x, y, z == maxzoom ? full_detail : low_detail, maxzoom, file_keys, layername, outdb, droprate, buffer, fname, jp, geomfile);
if (z == maxzoom && len > most) {
*midx = tx;
*midy = ty;
*midx = x;
*midy = y;
most = len;
}
}
}
fprintf(stderr, "\n");
}
munmap(geom, geom_size);
close(geomfd);
fclose(geomfile);
struct stat geomst;
if (fstat(geomfd2, &geomst) != 0) {
perror("stat geom\n");
exit(EXIT_FAILURE);
}
geomfd = geomfd2;
geom_size = geomst.st_size;
}
}
struct merge {
@ -260,6 +300,7 @@ struct merge {
struct merge *next;
};
#if 0
static void insert(struct merge *m, struct merge **head, unsigned char *map, int bytes) {
while (*head != NULL && indexcmp(map + m->start, map + (*head)->start) > 0) {
head = &((*head)->next);
@ -301,22 +342,24 @@ static void merge(struct merge *merges, int nmerges, unsigned char *map, FILE *f
}
}
}
#endif
void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, int minzoom, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir) {
char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1];
char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1];
char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1];
sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX");
sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX");
sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX");
int metafd = mkstemp(metaname);
if (metafd < 0) {
perror(metaname);
exit(EXIT_FAILURE);
}
int indexfd = mkstemp(indexname);
if (indexfd < 0) {
perror(indexname);
int geomfd = mkstemp(geomname);
printf("%s\n", geomname);
if (geomfd < 0) {
perror(geomname);
exit(EXIT_FAILURE);
}
@ -325,15 +368,16 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
perror(metaname);
exit(EXIT_FAILURE);
}
FILE *indexfile = fopen(indexname, "wb");
if (indexfile == NULL) {
perror(indexname);
FILE *geomfile = fopen(geomname, "wb");
if (geomfile == NULL) {
perror(geomname);
exit(EXIT_FAILURE);
}
long long fpos = 0;
long long metapos = 0;
long long geompos = 0;
unlink(metaname);
unlink(indexname);
unlink(geomname);
unsigned file_bbox[] = { UINT_MAX, UINT_MAX, 0, 0 };
unsigned midx = 0, midy = 0;
@ -341,6 +385,11 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
json_pull *jp = json_begin_file(f);
long long seq = 0;
/* zoom, x, y of enclosing tile, so 0/0/0's parent is -1/0/0 */
serialize_int(geomfile, -1, &geompos, fname, jp);
serialize_uint(geomfile, 0, &geompos, fname, jp);
serialize_uint(geomfile, 0, &geompos, fname, jp);
while (1) {
json_object *j = json_read(jp);
if (j == NULL) {
@ -398,13 +447,9 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
}
{
long long start = fpos;
unsigned bbox[] = { UINT_MAX, UINT_MAX, 0, 0 };
parse_geometry(t, coordinates, bbox, &fpos, metafile, VT_MOVETO, fname, jp);
serialize_byte(metafile, VT_END, &fpos, fname, jp);
long long metastart = metapos;
char *metakey[properties->length];
char *metaval[properties->length];
int metatype[properties->length];
@ -445,17 +490,21 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
}
}
serialize_int(metafile, m, &fpos, fname, jp);
serialize_int(metafile, m, &metapos, fname, jp);
for (i = 0; i < m; i++) {
serialize_int(metafile, metatype[i], &fpos, fname, jp);
serialize_string(metafile, metakey[i], &fpos, fname, jp);
serialize_string(metafile, metaval[i], &fpos, fname, jp);
serialize_int(metafile, metatype[i], &metapos, fname, jp);
serialize_string(metafile, metakey[i], &metapos, fname, jp);
serialize_string(metafile, metaval[i], &metapos, fname, jp);
}
int z = maxzoom;
serialize_int(geomfile, mb_geometry[t], &geompos, fname, jp);
serialize_long_long(geomfile, metastart, &geompos, fname, jp);
parse_geometry(t, coordinates, bbox, &geompos, geomfile, VT_MOVETO, fname, jp);
serialize_byte(geomfile, VT_END, &geompos, fname, jp);
unsigned cx = bbox[0] / 2 + bbox[2] / 2;
unsigned cy = bbox[1] / 2 + bbox[3] / 2;
#if 0
int z = maxzoom;
#endif
/*
* Note that minzoom for lines is the dimension
@ -485,6 +534,12 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
minzoom = maxzoom - floor(log(r) / - log(droprate));
}
serialize_byte(geomfile, minzoom, &geompos, fname, jp);
#if 0
unsigned cx = bbox[0] / 2 + bbox[2] / 2;
unsigned cy = bbox[1] / 2 + bbox[3] / 2;
/* XXX do proper overlap instead of whole bounding box */
if (z == 0) {
struct index ix;
@ -546,6 +601,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
}
}
}
#endif
for (i = 0; i < 2; i++) {
if (bbox[i] < file_bbox[i]) {
@ -569,17 +625,20 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
/* XXX check for any non-features in the outer object */
}
/* end of tile */
serialize_int(geomfile, -2, &geompos, fname, jp);
json_end(jp);
fclose(metafile);
fclose(indexfile);
fclose(geomfile);
printf("bbox: %x %x %x %x\n", file_bbox[0], file_bbox[1], file_bbox[2], file_bbox[3]);
struct stat indexst;
struct stat geomst;
struct stat metast;
if (fstat(indexfd, &indexst) != 0) {
perror("stat index\n");
if (fstat(geomfd, &geomst) != 0) {
perror("stat geom\n");
exit(EXIT_FAILURE);
}
if (fstat(metafd, &metast) != 0) {
@ -587,7 +646,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
exit(EXIT_FAILURE);
}
if (indexst.st_size == 0 || metast.st_size == 0) {
if (geomst.st_size == 0 || metast.st_size == 0) {
fprintf(stderr, "%s: did not read any valid geometries\n", fname);
exit(EXIT_FAILURE);
}
@ -632,6 +691,7 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
printf("using layer name %s\n", trunc);
}
#if 0
{
int bytes = sizeof(struct index);
@ -709,29 +769,14 @@ void read_json(FILE *f, const char *fname, const char *layername, int maxzoom, i
fclose(f);
close(indexfd);
}
#endif
indexfd = open(indexname, O_RDONLY);
if (indexfd < 0) {
perror(indexname);
exit(EXIT_FAILURE);
}
if (unlink(indexname) != 0) {
perror(indexname);
exit(EXIT_FAILURE);
}
struct index *index = (struct index *) mmap(NULL, indexst.st_size, PROT_READ, MAP_PRIVATE, indexfd, 0);
if (index == MAP_FAILED) {
perror("mmap index");
exit(EXIT_FAILURE);
}
check(geomfd, geomst.st_size, meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer, fname, jp, tmpdir);
check(index, indexst.st_size / sizeof(struct index), meta, file_bbox, &file_keys, &midx, &midy, layername, maxzoom, minzoom, outdb, droprate, buffer);
munmap(index, indexst.st_size);
munmap(meta, metast.st_size);
close(indexfd);
close(geomfd);
close(metafd);

View File

@ -382,19 +382,10 @@ drawvec reduce_tiny_poly(drawvec &geom, int z, int detail, bool *reduced, double
return out;
}
drawvec clip_lines(drawvec &geom, int z, int detail, long long buffer) {
drawvec clip_point(drawvec &geom, int z, int detail, long long buffer) {
drawvec out;
unsigned i;
for (i = 0; i < geom.size(); i++) {
if (i > 0 && (geom[i - 1].op == VT_MOVETO || geom[i - 1].op == VT_LINETO) && geom[i].op == VT_LINETO) {
double x1 = geom[i - 1].x;
double y1 = geom[i - 1].y;
double x2 = geom[i - 0].x;
double y2 = geom[i - 0].y;
long long min = 0;
long long area = 0xFFFFFFFF;
if (z != 0) {
@ -404,6 +395,36 @@ drawvec clip_lines(drawvec &geom, int z, int detail, long long buffer) {
area += buffer * area / 256;
}
for (i = 0; i < geom.size(); i++) {
if (geom[i].x >= min && geom[i].y >= min && geom[i].x <= area && geom[i].y <= area) {
out.push_back(geom[i]);
}
}
return out;
}
drawvec clip_lines(drawvec &geom, int z, int detail, long long buffer) {
drawvec out;
unsigned i;
long long min = 0;
long long area = 0xFFFFFFFF;
if (z != 0) {
area = 1LL << (32 - z);
min -= buffer * area / 256;
area += buffer * area / 256;
}
for (i = 0; i < geom.size(); i++) {
if (i > 0 && (geom[i - 1].op == VT_MOVETO || geom[i - 1].op == VT_LINETO) && geom[i].op == VT_LINETO) {
double x1 = geom[i - 1].x;
double y1 = geom[i - 1].y;
double x2 = geom[i - 0].x;
double y2 = geom[i - 0].y;
int c = clip(&x1, &y1, &x2, &y2, min, min, area, area);
if (c > 1) { // clipped

View File

@ -18,6 +18,7 @@ typedef std::vector<draw> drawvec;
drawvec decode_geometry(char **meta, int z, unsigned tx, unsigned ty, int detail);
void to_tile_scale(drawvec &geom, int z, int detail);
drawvec remove_noop(drawvec geom, int type);
drawvec clip_point(drawvec &geom, int z, int detail, long long buffer);
drawvec clip_poly(drawvec &geom, int z, int detail, int buffer);
drawvec reduce_tiny_poly(drawvec &geom, int z, int detail, bool *reduced, double *accum_area);
drawvec clip_lines(drawvec &geom, int z, int detail, long long buffer);

102
tile.cc
View File

@ -342,10 +342,12 @@ void evaluate(std::vector<coalesce> &features, char *metabase, struct pool *file
pool_free(&keys);
}
long long write_tile(struct index *start, struct index *end, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer) {
long long write_tile(char **geoms, char *metabase, unsigned *file_bbox, int z, unsigned tx, unsigned ty, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, json_pull *jp, FILE *geomfile) {
int line_detail;
static bool evaluated = false;
char *og = *geoms;
for (line_detail = detail; line_detail >= MIN_DETAIL || line_detail == detail; line_detail--) {
GOOGLE_PROTOBUF_VERIFY_VERSION;
@ -360,41 +362,92 @@ long long write_tile(struct index *start, struct index *end, char *metabase, uns
std::vector<coalesce> features;
struct index *i;
for (i = start; i < end; i++) {
int t = i->type;
int within = 0;
long long geompos = 0;
if (z > i->maxzoom) {
continue;
}
if ((t == VT_LINE && z + line_detail <= i->minzoom) ||
(t == VT_POINT && z < i->minzoom)) {
continue;
*geoms = og;
while (1) {
int t;
deserialize_int(geoms, &t);
printf("geometry %d\n", t);
if (t < 0) {
break;
}
#if 0
if (i->candup) {
if (dup.count(i->fpos) != 0) {
continue;
}
dup.insert(i->fpos);
}
#endif
char *meta = metabase + i->fpos;
drawvec geom = decode_geometry(&meta, z, tx, ty, line_detail);
long long metastart;
deserialize_long_long(geoms, &metastart);
char *meta = metabase + metastart;
drawvec geom = decode_geometry(geoms, z, tx, ty, line_detail);
signed char minzoom;
deserialize_byte(geoms, &minzoom);
#if 0
if (z > i->maxzoom) {
continue;
}
#endif
if (t == VT_LINE) {
geom = clip_lines(geom, z, line_detail, buffer);
}
if (t == VT_POLYGON) {
geom = clip_poly(geom, z, line_detail, buffer);
}
if (t == VT_POINT) {
geom = clip_point(geom, z, line_detail, buffer);
}
geom = remove_noop(geom, t);
if (line_detail == detail) { /* only write out the next zoom once, even if we retry */
if (geom.size() > 0) {
if (!within) {
printf("writing %d/%u/%u\n", z, tx, ty);
serialize_int(geomfile, z, &geompos, fname, jp);
serialize_uint(geomfile, tx, &geompos, fname, jp);
serialize_uint(geomfile, ty, &geompos, fname, jp);
within = 1;
}
//printf("type %d, meta %lld\n", t, metastart);
serialize_int(geomfile, t, &geompos, fname, jp);
serialize_long_long(geomfile, metastart, &geompos, fname, jp);
for (unsigned u = 0; u < geom.size(); u++) {
serialize_byte(geomfile, geom[u].op, &geompos, fname, jp);
if (geom[u].op != VT_CLOSEPATH) {
serialize_uint(geomfile, geom[u].x, &geompos, fname, jp);
serialize_uint(geomfile, geom[u].y, &geompos, fname, jp);
}
}
serialize_byte(geomfile, VT_END, &geompos, fname, jp);
serialize_byte(geomfile, minzoom, &geompos, fname, jp);
}
}
if ((t == VT_LINE && z + line_detail <= minzoom) ||
(t == VT_POINT && z < minzoom)) {
continue;
}
bool reduced = false;
if (t == VT_POLYGON) {
geom = reduce_tiny_poly(geom, z, line_detail, &reduced, &accum_area);
}
if (t == VT_LINE) {
geom = clip_lines(geom, z, line_detail, buffer);
}
if (t == VT_POLYGON) {
geom = clip_poly(geom, z, line_detail, buffer);
}
if (t == VT_LINE || t == VT_POLYGON) {
if (!reduced) {
geom = simplify_lines(geom, z, line_detail);
@ -428,8 +481,8 @@ long long write_tile(struct index *start, struct index *end, char *metabase, uns
c.index2 = ~0LL;
}
} else {
c.index = i->index;
c.index2 = i->index;
c.index = 0;
c.index2 = 0;
}
c.geom = geom;
c.metasrc = meta;
@ -440,6 +493,11 @@ long long write_tile(struct index *start, struct index *end, char *metabase, uns
}
}
if (within) {
serialize_int(geomfile, -2, &geompos, fname, jp);
within = 0;
}
std::sort(features.begin(), features.end());
std::vector<coalesce> out;

10
tile.h
View File

@ -12,8 +12,16 @@
#define VT_BOOLEAN 7
struct pool;
struct json_pull;
void serialize_int(FILE *out, int n, long long *fpos, const char *fname, struct json_pull *source);
void serialize_long_long(FILE *out, long long n, long long *fpos, const char *fname, struct json_pull *source);
void serialize_byte(FILE *out, signed char n, long long *fpos, const char *fname, struct json_pull *source);
void serialize_uint(FILE *out, unsigned n, long long *fpos, const char *fname, struct json_pull *source);
void serialize_string(FILE *out, const char *s, long long *fpos, const char *fname, struct json_pull *source);
void deserialize_int(char **f, int *n);
void deserialize_long_long(char **f, long long *n);
void deserialize_uint(char **f, unsigned *n);
void deserialize_byte(char **f, signed char *n);
struct pool_val *deserialize_string(char **f, struct pool *p, int type);
@ -28,4 +36,4 @@ struct index {
int candup : 1;
};
long long write_tile(struct index *start, struct index *end, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer);
long long write_tile(char **geom, char *metabase, unsigned *file_bbox, int z, unsigned x, unsigned y, int detail, int basezoom, struct pool *file_keys, const char *layername, sqlite3 *outdb, double droprate, int buffer, const char *fname, struct json_pull *jp, FILE *geomfile);