From 6cea2d5db6b4aa2470130f7058a1547705a69891 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 22 Aug 2017 18:10:52 -0700 Subject: [PATCH] Progress on factoring out serialization state into a single object --- geojson.cpp | 110 ++++++++++++++++++++++++++-------------------------- main.cpp | 2 +- serial.hpp | 32 +++++++++++++++ 3 files changed, 88 insertions(+), 56 deletions(-) diff --git a/geojson.cpp b/geojson.cpp index dd325f4..355379d 100644 --- a/geojson.cpp +++ b/geojson.cpp @@ -38,10 +38,10 @@ #include "read_json.hpp" #include "mvt.hpp" -int serialize_feature(FILE *geomfile, FILE *metafile, FILE *indexfile, memfile *treefile, memfile *poolfile, serial_feature &sf, long long *geompos, long long *metapos, long long *indexpos, const char *fname, int line, unsigned *initial_x, unsigned *initial_y, bool want_dist, bool filters, long long *file_bbox, double *dist_sum, size_t *dist_count, int maxzoom, bool uses_gamma, size_t segment, struct reader *readers, volatile long long *layer_seq, volatile long long *progress_seq); +int serialize_feature(struct serialization_state *sst, serial_feature &sf, bool want_dist, bool filters, int maxzoom, bool uses_gamma); -static long long parse_geometry1(int t, json_object *j, long long *bbox, drawvec &geom, int op, const char *fname, int line, int *initialized, unsigned *initial_x, unsigned *initial_y, json_object *feature, long long &prev, long long &offset, bool &has_prev) { - parse_geometry(t, j, geom, op, fname, line, feature); +static long long parse_geometry1(struct serialization_state *sst, int t, json_object *j, long long *bbox, drawvec &geom, int op, json_object *feature, long long &prev, long long &offset, bool &has_prev) { + parse_geometry(t, j, geom, op, sst->fname, sst->line, feature); for (size_t i = 0; i < geom.size(); i++) { if (geom[i].op == VT_MOVETO || geom[i].op == VT_LINETO) { @@ -77,16 +77,16 @@ static long long parse_geometry1(int t, json_object *j, long long *bbox, drawvec bbox[3] = y; } - if (!*initialized) { + if (!*(sst->initialized)) { if (x < 0 || x >= (1LL << 32) || y < 0 || y >= (1LL < 32)) { - *initial_x = 1LL << 31; - *initial_y = 1LL << 31; + *(sst->initial_x) = 1LL << 31; + *(sst->initial_y) = 1LL << 31; } else { - *initial_x = (x >> geometry_scale) << geometry_scale; - *initial_y = (y >> geometry_scale) << geometry_scale; + *(sst->initial_x) = (x >> geometry_scale) << geometry_scale; + *(sst->initial_y) = (y >> geometry_scale) << geometry_scale; } - *initialized = 1; + *(sst->initialized) = 1; } geom[i].x = x >> geometry_scale; @@ -97,12 +97,12 @@ static long long parse_geometry1(int t, json_object *j, long long *bbox, drawvec return geom.size(); } -int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map *layermap, std::string layername, bool uses_gamma, std::map const *attribute_types, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) { +int serialize_geometry(struct serialization_state *sst, json_object *geometry, json_object *properties, json_object *id, std::set *exclude, std::set *include, int exclude_all, int basezoom, int layer, double droprate, json_object *tippecanoe, int maxzoom, json_object *feature, std::map *layermap, std::string layername, bool uses_gamma, std::map const *attribute_types, bool want_dist, bool filters) { json_object *geometry_type = json_hash_get(geometry, "type"); if (geometry_type == NULL) { static int warned = 0; if (!warned) { - fprintf(stderr, "%s:%d: null geometry (additional not reported)\n", reading, line); + fprintf(stderr, "%s:%d: null geometry (additional not reported)\n", sst->fname, sst->line); json_context(feature); warned = 1; } @@ -111,14 +111,14 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje } if (geometry_type->type != JSON_STRING) { - fprintf(stderr, "%s:%d: geometry type is not a string\n", reading, line); + fprintf(stderr, "%s:%d: geometry type is not a string\n", sst->fname, sst->line); json_context(feature); return 0; } json_object *coordinates = json_hash_get(geometry, "coordinates"); if (coordinates == NULL || coordinates->type != JSON_ARRAY) { - fprintf(stderr, "%s:%d: feature without coordinates array\n", reading, line); + fprintf(stderr, "%s:%d: feature without coordinates array\n", sst->fname, sst->line); json_context(feature); return 0; } @@ -130,7 +130,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje } } if (t >= GEOM_TYPES) { - fprintf(stderr, "%s:%d: Can't handle geometry type %s\n", reading, line, geometry_type->string); + fprintf(stderr, "%s:%d: Can't handle geometry type %s\n", sst->fname, sst->line, geometry_type->string); json_context(feature); return 0; } @@ -263,7 +263,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje int type = -1; std::string val; - stringify_value(properties->values[i], type, val, reading, line, feature, properties->keys[i]->string, attribute_types); + stringify_value(properties->values[i], type, val, sst->fname, sst->line, feature, properties->keys[i]->string, attribute_types); if (type >= 0) { metakey[m] = properties->keys[i]->string; @@ -288,14 +288,14 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje long long offset = 0; drawvec dv; - parse_geometry1(t, coordinates, bbox, dv, VT_MOVETO, fname, line, initialized, initial_x, initial_y, feature, prev, offset, has_prev); + parse_geometry1(sst, t, coordinates, bbox, dv, VT_MOVETO, feature, prev, offset, has_prev); if (mb_geometry[t] == VT_POLYGON) { dv = fix_polygon(dv); } serial_feature sf; sf.layer = layer; - sf.segment = segment; + sf.segment = sst->segment; sf.t = mb_geometry[t]; sf.has_id = has_id; sf.id = id_value; @@ -306,7 +306,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje sf.geometry = dv; sf.m = m; sf.feature_minzoom = 0; // Will be filled in during index merging - sf.seq = *layer_seq; + sf.seq = *(sst->layer_seq); for (size_t i = 0; i < 4; i++) { sf.bbox[i] = bbox[i]; @@ -322,10 +322,10 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje sf.full_values.push_back(sv); } - return serialize_feature(geomfile, metafile, indexfile, treefile, poolfile, sf, geompos, metapos, indexpos, fname, line, initial_x, initial_y, want_dist, filters, file_bbox, dist_sum, dist_count, maxzoom, uses_gamma, segment, readers, layer_seq, progress_seq); + return serialize_feature(sst, sf, want_dist, filters, maxzoom, uses_gamma); } -int serialize_feature(FILE *geomfile, FILE *metafile, FILE *indexfile, memfile *treefile, memfile *poolfile, serial_feature &sf, long long *geompos, long long *metapos, long long *indexpos, const char *fname, int line, unsigned *initial_x, unsigned *initial_y, bool want_dist, bool filters, long long *file_bbox, double *dist_sum, size_t *dist_count, int maxzoom, bool uses_gamma, size_t segment, struct reader *readers, volatile long long *layer_seq, volatile long long *progress_seq) { +int serialize_feature(struct serialization_state *sst, serial_feature &sf, bool want_dist, bool filters, int maxzoom, bool uses_gamma) { if (want_dist) { std::vector locs; for (size_t i = 0; i < sf.geometry.size(); i++) { @@ -347,8 +347,8 @@ int serialize_feature(FILE *geomfile, FILE *metafile, FILE *indexfile, memfile * // Convert approximately from tile units to feet double dist_ft = sqrt(avg) / 33; - *dist_sum += log(dist_ft) * n; - *dist_count += n; + *(sst->dist_sum) += log(dist_ft) * n; + *(sst->dist_count) += n; } locs.clear(); } @@ -365,7 +365,7 @@ int serialize_feature(FILE *geomfile, FILE *metafile, FILE *indexfile, memfile * static volatile long long warned = 0; long long extent = ((sf.bbox[2] - sf.bbox[0]) / ((1LL << (32 - maxzoom)) + 1)) * ((sf.bbox[3] - sf.bbox[1]) / ((1LL << (32 - maxzoom)) + 1)); if (extent > warned) { - fprintf(stderr, "Warning: %s:%d: Large unclipped (-pc) feature may be duplicated across %lld tiles\n", fname, line, extent); + fprintf(stderr, "Warning: %s:%d: Large unclipped (-pc) feature may be duplicated across %lld tiles\n", sst->fname, sst->line, extent); warned = extent; if (extent > 10000) { @@ -426,50 +426,50 @@ int serialize_feature(FILE *geomfile, FILE *metafile, FILE *indexfile, memfile * if (inline_meta) { sf.metapos = -1; for (size_t i = 0; i < sf.full_keys.size(); i++) { - sf.keys.push_back(addpool(poolfile, treefile, sf.full_keys[i].c_str(), mvt_string)); - sf.values.push_back(addpool(poolfile, treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type)); + sf.keys.push_back(addpool(sst->poolfile, sst->treefile, sf.full_keys[i].c_str(), mvt_string)); + sf.values.push_back(addpool(sst->poolfile, sst->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type)); } } else { - sf.metapos = *metapos; + sf.metapos = *(sst->metapos); for (size_t i = 0; i < sf.full_keys.size(); i++) { - serialize_long_long(metafile, addpool(poolfile, treefile, sf.full_keys[i].c_str(), mvt_string), metapos, fname); - serialize_long_long(metafile, addpool(poolfile, treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type), metapos, fname); + serialize_long_long(sst->metafile, addpool(sst->poolfile, sst->treefile, sf.full_keys[i].c_str(), mvt_string), sst->metapos, sst->fname); + serialize_long_long(sst->metafile, addpool(sst->poolfile, sst->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type), sst->metapos, sst->fname); } } - long long geomstart = *geompos; - serialize_feature(geomfile, &sf, geompos, fname, *initial_x >> geometry_scale, *initial_y >> geometry_scale, false); + long long geomstart = *(sst->geompos); + serialize_feature(sst->geomfile, &sf, sst->geompos, sst->fname, *(sst->initial_x) >> geometry_scale, *(sst->initial_y) >> geometry_scale, false); struct index index; index.start = geomstart; - index.end = *geompos; - index.segment = segment; - index.seq = *layer_seq; + index.end = *(sst->geompos); + index.segment = sst->segment; + index.seq = *(sst->layer_seq); index.t = sf.t; index.index = bbox_index; - fwrite_check(&index, sizeof(struct index), 1, indexfile, fname); - *indexpos += sizeof(struct index); + fwrite_check(&index, sizeof(struct index), 1, sst->indexfile, sst->fname); + *(sst->indexpos) += sizeof(struct index); for (size_t i = 0; i < 2; i++) { - if (sf.bbox[i] < file_bbox[i]) { - file_bbox[i] = sf.bbox[i]; + if (sf.bbox[i] < sst->file_bbox[i]) { + sst->file_bbox[i] = sf.bbox[i]; } } for (size_t i = 2; i < 4; i++) { - if (sf.bbox[i] > file_bbox[i]) { - file_bbox[i] = sf.bbox[i]; + if (sf.bbox[i] > sst->file_bbox[i]) { + sst->file_bbox[i] = sf.bbox[i]; } } - if (*progress_seq % 10000 == 0) { - checkdisk(readers, CPUS); + if (*(sst->progress_seq) % 10000 == 0) { + checkdisk(sst->readers, CPUS); if (!quiet) { - fprintf(stderr, "Read %.2f million features\r", *progress_seq / 1000000.0); + fprintf(stderr, "Read %.2f million features\r", *sst->progress_seq / 1000000.0); } } - (*progress_seq)++; - (*layer_seq)++; + (*(sst->progress_seq))++; + (*(sst->layer_seq))++; return 1; } @@ -490,7 +490,7 @@ void check_crs(json_object *j, const char *reading) { } } -void parse_json(json_pull *jp, const char *reading, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, char *fname, int basezoom, int layer, double droprate, long long *file_bbox, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, std::map *layermap, std::string layername, bool uses_gamma, std::map const *attribute_types, double *dist_sum, size_t *dist_count, bool want_dist, bool filters) { +void parse_json(struct serialization_state *sst, json_pull *jp, std::set *exclude, std::set *include, int exclude_all, int basezoom, int layer, double droprate, int maxzoom, std::map *layermap, std::string layername, bool uses_gamma, std::map const *attribute_types, bool want_dist, bool filters) { long long found_hashes = 0; long long found_features = 0; long long found_geometries = 0; @@ -499,7 +499,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se json_object *j = json_read(jp); if (j == NULL) { if (jp->error != NULL) { - fprintf(stderr, "%s:%d: %s\n", reading, jp->line, jp->error); + fprintf(stderr, "%s:%d: %s\n", sst->fname, jp->line, jp->error); if (jp->root != NULL) { json_context(jp->root); } @@ -513,7 +513,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se found_hashes++; if (found_hashes == 50 && found_features == 0 && found_geometries == 0) { - fprintf(stderr, "%s:%d: Warning: not finding any GeoJSON features or geometries in input yet after 50 objects.\n", reading, jp->line); + fprintf(stderr, "%s:%d: Warning: not finding any GeoJSON features or geometries in input yet after 50 objects.\n", sst->fname, jp->line); } } @@ -554,11 +554,11 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (is_geometry) { if (found_features != 0 && found_geometries == 0) { - fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", reading, jp->line); + fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", sst->fname, jp->line); } found_geometries++; - serialize_geometry(j, NULL, NULL, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, NULL, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist, filters); + serialize_geometry(sst, j, NULL, NULL, exclude, include, exclude_all, basezoom, layer, droprate, NULL, maxzoom, j, layermap, layername, uses_gamma, attribute_types, want_dist, filters); json_free(j); continue; } @@ -566,7 +566,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (strcmp(type->string, "Feature") != 0) { if (strcmp(type->string, "FeatureCollection") == 0) { - check_crs(j, reading); + check_crs(j, sst->fname); json_free(j); } @@ -574,13 +574,13 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se } if (found_features == 0 && found_geometries != 0) { - fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", reading, jp->line); + fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", sst->fname, jp->line); } found_features++; json_object *geometry = json_hash_get(j, "geometry"); if (geometry == NULL) { - fprintf(stderr, "%s:%d: feature with no geometry\n", reading, jp->line); + fprintf(stderr, "%s:%d: feature with no geometry\n", sst->fname, jp->line); json_context(j); json_free(j); continue; @@ -588,7 +588,7 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se json_object *properties = json_hash_get(j, "properties"); if (properties == NULL || (properties->type != JSON_HASH && properties->type != JSON_NULL)) { - fprintf(stderr, "%s:%d: feature without properties hash\n", reading, jp->line); + fprintf(stderr, "%s:%d: feature without properties hash\n", sst->fname, jp->line); json_context(j); json_free(j); continue; @@ -601,10 +601,10 @@ void parse_json(json_pull *jp, const char *reading, volatile long long *layer_se if (geometries != NULL) { size_t g; for (g = 0; g < geometries->length; g++) { - serialize_geometry(geometries->array[g], properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist, filters); + serialize_geometry(sst, geometries->array[g], properties, id, exclude, include, exclude_all, basezoom, layer, droprate, tippecanoe, maxzoom, j, layermap, layername, uses_gamma, attribute_types, want_dist, filters); } } else { - serialize_geometry(geometry, properties, id, reading, jp->line, layer_seq, progress_seq, metapos, geompos, indexpos, exclude, include, exclude_all, metafile, geomfile, indexfile, poolfile, treefile, fname, basezoom, layer, droprate, file_bbox, tippecanoe, segment, initialized, initial_x, initial_y, readers, maxzoom, j, layermap, layername, uses_gamma, attribute_types, dist_sum, dist_count, want_dist, filters); + serialize_geometry(sst, geometry, properties, id, exclude, include, exclude_all, basezoom, layer, droprate, tippecanoe, maxzoom, j, layermap, layername, uses_gamma, attribute_types, want_dist, filters); } json_free(j); diff --git a/main.cpp b/main.cpp index d12debf..95fa40e 100644 --- a/main.cpp +++ b/main.cpp @@ -1386,7 +1386,7 @@ int read_input(std::vector &sources, char *fname, int maxzoom, int minzo overall_offset += ahead; checkdisk(reader, CPUS); } - } else if (c == '{' || c == 0xEF || c == ' ') { // XXX + } else if (c == '{' || c == 0xEF || c == ' ') { // XXX // Plain serial reading long long layer_seq = overall_offset; diff --git a/serial.hpp b/serial.hpp index bcc2dcc..83f95ce 100644 --- a/serial.hpp +++ b/serial.hpp @@ -67,4 +67,36 @@ struct serial_feature { void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, const char *fname, long long wx, long long wy, bool include_minzoom); serial_feature deserialize_feature(FILE *geoms, long long *geompos_in, char *metabase, long long *meta_off, unsigned z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y); +struct serialization_state { + const char *fname; // source file name + int line; // user-oriented location within source for error reports + + volatile long long *layer_seq; // sequence within current layer + volatile long long *progress_seq; // overall sequence for progress indicator + + struct reader *readers; // array of data for each input thread + int segment; // the current input thread + + FILE *geomfile; // main feature serialization; references metafile or poolfile + long long *geompos; + + FILE *metafile; // feature metadata; references poolfile + long long *metapos; + + FILE *indexfile; // quadkey index into geomfile + long long *indexpos; + + struct memfile *poolfile; // string pool for keys and values + struct memfile *treefile; // index into poolfile + + long long file_bbox[4]; // global bounding box + + unsigned *initial_x; // relative offset of all geometries + unsigned *initial_y; + int *initialized; + + double *dist_sum; // running tally for calculation of resolution within features + size_t *dist_count; +}; + #endif