Merge pull request #627 from mapbox/geojson-loop

Factor out the GeoJSON parsing loop from feature serialization
This commit is contained in:
Eric Fischer 2018-08-08 15:14:14 -07:00 committed by GitHub
commit f0517d090b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 295 additions and 181 deletions

View File

@ -1,3 +1,8 @@
## 1.31.2
* Don't accept anything inside another JSON object's properties as a
feature or geometry of its own.
## 1.31.1
* Add --exclude-all to tile-join

View File

@ -47,7 +47,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o geocsv.o csv.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o geocsv.o csv.o geojson-loop.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-enumerate: enumerate.o
@ -59,7 +59,7 @@ tippecanoe-decode: decode.o projection.o mvt.o write_json.o text.o jsonpull/json
tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpull/jsonpull.o text.o evaluator.o csv.o write_json.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o text.o
tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o text.o geojson-loop.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
unit: unit.o text.o

View File

@ -94,7 +94,7 @@ void handle(std::string message, int z, unsigned x, unsigned y, std::set<std::st
fprintf(stderr, "Couldn't parse tile %d/%u/%u\n", z, x, y);
exit(EXIT_FAILURE);
}
} catch (std::exception const& e) {
} catch (std::exception const &e) {
fprintf(stderr, "PBF decoding error in tile %d/%u/%u\n", z, x, y);
exit(EXIT_FAILURE);
}

184
geojson-loop.cpp Normal file
View File

@ -0,0 +1,184 @@
#ifdef MTRACE
#include <mcheck.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <errno.h>
#include "geojson-loop.hpp"
#include "jsonpull/jsonpull.h"
// XXX duplicated
#define GEOM_TYPES 6
static const char *geometry_names[GEOM_TYPES] = {
"Point", "MultiPoint", "LineString", "MultiLineString", "Polygon", "MultiPolygon",
};
// XXX duplicated
static void json_context(json_object *j) {
char *s = json_stringify(j);
if (strlen(s) >= 500) {
sprintf(s + 497, "...");
}
fprintf(stderr, "In JSON object %s\n", s);
free(s); // stringify
}
void parse_json(json_feature_action *jfa, json_pull *jp) {
long long found_hashes = 0;
long long found_features = 0;
long long found_geometries = 0;
while (1) {
json_object *j = json_read(jp);
if (j == NULL) {
if (jp->error != NULL) {
fprintf(stderr, "%s:%d: %s\n", jfa->fname.c_str(), jp->line, jp->error);
if (jp->root != NULL) {
json_context(jp->root);
}
}
json_free(jp->root);
break;
}
if (j->type == JSON_HASH) {
found_hashes++;
if (found_hashes == 50 && found_features == 0 && found_geometries == 0) {
fprintf(stderr, "%s:%d: Warning: not finding any GeoJSON features or geometries in input yet after 50 objects.\n", jfa->fname.c_str(), jp->line);
}
}
json_object *type = json_hash_get(j, "type");
if (type == NULL || type->type != JSON_STRING) {
continue;
}
if (found_features == 0) {
int i;
int is_geometry = 0;
for (i = 0; i < GEOM_TYPES; i++) {
if (strcmp(type->string, geometry_names[i]) == 0) {
is_geometry = 1;
break;
}
}
if (is_geometry) {
if (j->parent != NULL) {
if (j->parent->type == JSON_ARRAY && j->parent->parent != NULL) {
if (j->parent->parent->type == JSON_HASH) {
json_object *geometries = json_hash_get(j->parent->parent, "geometries");
if (geometries != NULL) {
// Parent of Parent must be a GeometryCollection
is_geometry = 0;
}
}
} else if (j->parent->type == JSON_HASH) {
json_object *geometry = json_hash_get(j->parent, "geometry");
if (geometry != NULL) {
// Parent must be a Feature
is_geometry = 0;
}
}
}
}
if (is_geometry) {
json_object *jo = j;
while (jo != NULL) {
if (jo->parent != NULL && jo->parent->type == JSON_HASH) {
if (json_hash_get(jo->parent, "properties") == jo) {
// Ancestor is the value corresponding to a properties key
is_geometry = 0;
break;
}
}
jo = jo->parent;
}
}
if (is_geometry) {
if (found_features != 0 && found_geometries == 0) {
fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", jfa->fname.c_str(), jp->line);
}
found_geometries++;
jfa->add_feature(j, false, NULL, NULL, NULL, j);
json_free(j);
continue;
}
}
if (strcmp(type->string, "Feature") != 0) {
if (strcmp(type->string, "FeatureCollection") == 0) {
jfa->check_crs(j);
json_free(j);
}
continue;
}
if (found_features == 0 && found_geometries != 0) {
fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", jfa->fname.c_str(), jp->line);
}
found_features++;
json_object *geometry = json_hash_get(j, "geometry");
if (geometry == NULL) {
fprintf(stderr, "%s:%d: feature with no geometry\n", jfa->fname.c_str(), jp->line);
json_context(j);
json_free(j);
continue;
}
json_object *properties = json_hash_get(j, "properties");
if (properties == NULL || (properties->type != JSON_HASH && properties->type != JSON_NULL)) {
fprintf(stderr, "%s:%d: feature without properties hash\n", jfa->fname.c_str(), jp->line);
json_context(j);
json_free(j);
continue;
}
bool is_feature = true;
{
json_object *jo = j;
while (jo != NULL) {
if (jo->parent != NULL && jo->parent->type == JSON_HASH) {
if (json_hash_get(jo->parent, "properties") == jo) {
// Ancestor is the value corresponding to a properties key
is_feature = false;
break;
}
}
jo = jo->parent;
}
}
if (!is_feature) {
continue;
}
json_object *tippecanoe = json_hash_get(j, "tippecanoe");
json_object *id = json_hash_get(j, "id");
json_object *geometries = json_hash_get(geometry, "geometries");
if (geometries != NULL && geometries->type == JSON_ARRAY) {
jfa->add_feature(geometries, true, properties, id, tippecanoe, j);
} else {
jfa->add_feature(geometry, false, properties, id, tippecanoe, j);
}
json_free(j);
/* XXX check for any non-features in the outer object */
}
}

11
geojson-loop.hpp Normal file
View File

@ -0,0 +1,11 @@
#include <string>
#include "jsonpull/jsonpull.h"
struct json_feature_action {
std::string fname;
virtual int add_feature(json_object *geometry, bool geometrycollection, json_object *properties, json_object *id, json_object *tippecanoe, json_object *feature) = 0;
virtual void check_crs(json_object *j) = 0;
};
void parse_json(json_feature_action *action, json_pull *jp);

View File

@ -37,6 +37,7 @@
#include "text.hpp"
#include "read_json.hpp"
#include "mvt.hpp"
#include "geojson-loop.hpp"
int serialize_geojson_feature(struct serialization_state *sst, json_object *geometry, json_object *properties, json_object *id, int layer, json_object *tippecanoe, json_object *feature, std::string layername) {
json_object *geometry_type = json_hash_get(geometry, "type");
@ -227,127 +228,35 @@ void check_crs(json_object *j, const char *reading) {
}
}
void parse_json(struct serialization_state *sst, json_pull *jp, int layer, std::string layername) {
long long found_hashes = 0;
long long found_features = 0;
long long found_geometries = 0;
struct json_serialize_action : json_feature_action {
serialization_state *sst;
int layer;
std::string layername;
while (1) {
json_object *j = json_read(jp);
if (j == NULL) {
if (jp->error != NULL) {
fprintf(stderr, "%s:%d: %s\n", sst->fname, jp->line, jp->error);
if (jp->root != NULL) {
json_context(jp->root);
}
}
json_free(jp->root);
break;
}
if (j->type == JSON_HASH) {
found_hashes++;
if (found_hashes == 50 && found_features == 0 && found_geometries == 0) {
fprintf(stderr, "%s:%d: Warning: not finding any GeoJSON features or geometries in input yet after 50 objects.\n", sst->fname, jp->line);
}
}
json_object *type = json_hash_get(j, "type");
if (type == NULL || type->type != JSON_STRING) {
continue;
}
if (found_features == 0) {
int i;
int is_geometry = 0;
for (i = 0; i < GEOM_TYPES; i++) {
if (strcmp(type->string, geometry_names[i]) == 0) {
is_geometry = 1;
break;
}
}
if (is_geometry) {
if (j->parent != NULL) {
if (j->parent->type == JSON_ARRAY && j->parent->parent != NULL) {
if (j->parent->parent->type == JSON_HASH) {
json_object *geometries = json_hash_get(j->parent->parent, "geometries");
if (geometries != NULL) {
// Parent of Parent must be a GeometryCollection
is_geometry = 0;
}
}
} else if (j->parent->type == JSON_HASH) {
json_object *geometry = json_hash_get(j->parent, "geometry");
if (geometry != NULL) {
// Parent must be a Feature
is_geometry = 0;
}
}
}
}
if (is_geometry) {
if (found_features != 0 && found_geometries == 0) {
fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", sst->fname, jp->line);
}
found_geometries++;
serialize_geojson_feature(sst, j, NULL, NULL, layer, NULL, j, layername);
json_free(j);
continue;
}
}
if (strcmp(type->string, "Feature") != 0) {
if (strcmp(type->string, "FeatureCollection") == 0) {
check_crs(j, sst->fname);
json_free(j);
}
continue;
}
if (found_features == 0 && found_geometries != 0) {
fprintf(stderr, "%s:%d: Warning: found a mixture of features and bare geometries\n", sst->fname, jp->line);
}
found_features++;
json_object *geometry = json_hash_get(j, "geometry");
if (geometry == NULL) {
fprintf(stderr, "%s:%d: feature with no geometry\n", sst->fname, jp->line);
json_context(j);
json_free(j);
continue;
}
json_object *properties = json_hash_get(j, "properties");
if (properties == NULL || (properties->type != JSON_HASH && properties->type != JSON_NULL)) {
fprintf(stderr, "%s:%d: feature without properties hash\n", sst->fname, jp->line);
json_context(j);
json_free(j);
continue;
}
json_object *tippecanoe = json_hash_get(j, "tippecanoe");
json_object *id = json_hash_get(j, "id");
json_object *geometries = json_hash_get(geometry, "geometries");
if (geometries != NULL && geometries->type == JSON_ARRAY) {
size_t g;
for (g = 0; g < geometries->length; g++) {
serialize_geojson_feature(sst, geometries->array[g], properties, id, layer, tippecanoe, j, layername);
int add_feature(json_object *geometry, bool geometrycollection, json_object *properties, json_object *id, json_object *tippecanoe, json_object *feature) {
if (geometrycollection) {
int ret = 1;
for (size_t g = 0; g < geometry->length; g++) {
ret &= serialize_geojson_feature(sst, geometry->array[g], properties, id, layer, tippecanoe, feature, layername);
}
return ret;
} else {
serialize_geojson_feature(sst, geometry, properties, id, layer, tippecanoe, j, layername);
return serialize_geojson_feature(sst, geometry, properties, id, layer, tippecanoe, feature, layername);
}
json_free(j);
/* XXX check for any non-features in the outer object */
}
void check_crs(json_object *j) {
::check_crs(j, fname.c_str());
}
};
void parse_json(struct serialization_state *sst, json_pull *jp, int layer, std::string layername) {
json_serialize_action jsa;
jsa.sst = sst;
jsa.layer = layer;
jsa.layername = layername;
parse_json(&jsa, jp);
}
void *run_parse_json(void *v) {

View File

@ -10,6 +10,7 @@
#include "jsonpull/jsonpull.h"
#include "csv.hpp"
#include "text.hpp"
#include "geojson-loop.hpp"
int fail = EXIT_SUCCESS;
bool wrap = false;
@ -362,70 +363,35 @@ void join_csv(json_object *j) {
}
}
struct json_join_action : json_feature_action {
int add_feature(json_object *geometry, bool, json_object *, json_object *, json_object *, json_object *feature) {
if (feature != geometry) { // a real feature, not a bare geometry
if (csvfile != NULL) {
join_csv(feature);
}
char *s = json_stringify(feature);
out(s, 1, json_hash_get(feature, "properties"));
free(s);
} else {
char *s = json_stringify(geometry);
out(s, 2, NULL);
free(s);
}
return 1;
}
void check_crs(json_object *) {
}
};
void process(FILE *fp, const char *fname) {
json_pull *jp = json_begin_file(fp);
while (1) {
json_object *j = json_read(jp);
if (j == NULL) {
if (jp->error != NULL) {
fprintf(stderr, "%s:%d: %s\n", fname, jp->line, jp->error);
}
json_free(jp->root);
break;
}
json_object *type = json_hash_get(j, "type");
if (type == NULL || type->type != JSON_STRING) {
continue;
}
if (strcmp(type->string, "Feature") == 0) {
if (csvfile != NULL) {
join_csv(j);
}
char *s = json_stringify(j);
out(s, 1, json_hash_get(j, "properties"));
free(s);
json_free(j);
} else if (strcmp(type->string, "Point") == 0 ||
strcmp(type->string, "MultiPoint") == 0 ||
strcmp(type->string, "LineString") == 0 ||
strcmp(type->string, "MultiLineString") == 0 ||
strcmp(type->string, "MultiPolygon") == 0) {
int is_geometry = 1;
if (j->parent != NULL) {
if (j->parent->type == JSON_ARRAY && j->parent->parent != NULL) {
if (j->parent->parent->type == JSON_HASH) {
json_object *geometries = json_hash_get(j->parent->parent, "geometries");
if (geometries != NULL) {
// Parent of Parent must be a GeometryCollection
is_geometry = 0;
}
}
} else if (j->parent->type == JSON_HASH) {
json_object *geometry = json_hash_get(j->parent, "geometry");
if (geometry != NULL) {
// Parent must be a Feature
is_geometry = 0;
}
}
}
if (is_geometry) {
char *s = json_stringify(j);
out(s, 2, NULL);
free(s);
json_free(j);
}
} else if (strcmp(type->string, "FeatureCollection") == 0) {
json_free(j);
}
}
json_join_action jja;
jja.fname = fname;
parse_json(&jja, jp);
json_end(jp);
}

9
tests/nested/in.json Normal file
View File

@ -0,0 +1,9 @@
{"type":"Feature","properties":{"nested2":{"type":"Point","coordinates":[2,2]}},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{"deeper2":[{"type":"Point","coordinates":[2,2]}]},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{"more2":{"something":{"type":"Point","coordinates":[2,2]}}},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{"nested":{"type":"Feature","properties":{},"geometry":{"type":"Point","coordinates":[2,2]}}},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{"deeper":[{"type":"Feature","properties":{},"geometry":{"type":"Point","coordinates":[2,2]}}]},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{"more":{"something":{"type":"Feature","properties":{},"geometry":{"type":"Point","coordinates":[2,2]}}}},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}
{"type":"Feature","properties":{},"geometry":{"type":"LineString","coordinates":[[0,0],[1,1]]}}

View File

@ -0,0 +1,30 @@
{ "type": "FeatureCollection", "properties": {
"bounds": "0.000000,0.000000,1.000000,1.000000",
"center": "0.000000,0.000000,0",
"description": "tests/nested/out/-z0_--preserve-input-order.json.check.mbtiles",
"format": "pbf",
"json": "{\"vector_layers\": [ { \"id\": \"in\", \"description\": \"\", \"minzoom\": 0, \"maxzoom\": 0, \"fields\": {\"deeper\": \"String\", \"deeper2\": \"String\", \"more\": \"String\", \"more2\": \"String\", \"nested\": \"String\", \"nested2\": \"String\"} } ],\"tilestats\": {\"layerCount\": 1,\"layers\": [{\"layer\": \"in\",\"count\": 7,\"geometry\": \"LineString\",\"attributeCount\": 6,\"attributes\": [{\"attribute\": \"deeper\",\"count\": 1,\"type\": \"string\",\"values\": [\"[{\\\"type\\\":\\\"Feature\\\",\\\"properties\\\":{},\\\"geometry\\\":{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}}]\"]},{\"attribute\": \"deeper2\",\"count\": 1,\"type\": \"string\",\"values\": [\"[{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}]\"]},{\"attribute\": \"more\",\"count\": 1,\"type\": \"string\",\"values\": [\"{\\\"something\\\":{\\\"type\\\":\\\"Feature\\\",\\\"properties\\\":{},\\\"geometry\\\":{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}}}\"]},{\"attribute\": \"more2\",\"count\": 1,\"type\": \"string\",\"values\": [\"{\\\"something\\\":{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}}\"]},{\"attribute\": \"nested\",\"count\": 1,\"type\": \"string\",\"values\": [\"{\\\"type\\\":\\\"Feature\\\",\\\"properties\\\":{},\\\"geometry\\\":{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}}\"]},{\"attribute\": \"nested2\",\"count\": 1,\"type\": \"string\",\"values\": [\"{\\\"type\\\":\\\"Point\\\",\\\"coordinates\\\":[2,2]}\"]}]}]}}",
"maxzoom": "0",
"minzoom": "0",
"name": "tests/nested/out/-z0_--preserve-input-order.json.check.mbtiles",
"type": "overlay",
"version": "2"
}, "features": [
{ "type": "FeatureCollection", "properties": { "zoom": 0, "x": 0, "y": 0 }, "features": [
{ "type": "FeatureCollection", "properties": { "layer": "in", "version": 2, "extent": 4096 }, "features": [
{ "type": "Feature", "properties": { "nested2": "{\"type\":\"Point\",\"coordinates\":[2,2]}" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { "deeper2": "[{\"type\":\"Point\",\"coordinates\":[2,2]}]" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { "more2": "{\"something\":{\"type\":\"Point\",\"coordinates\":[2,2]}}" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { "nested": "{\"type\":\"Feature\",\"properties\":{},\"geometry\":{\"type\":\"Point\",\"coordinates\":[2,2]}}" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { "deeper": "[{\"type\":\"Feature\",\"properties\":{},\"geometry\":{\"type\":\"Point\",\"coordinates\":[2,2]}}]" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { "more": "{\"something\":{\"type\":\"Feature\",\"properties\":{},\"geometry\":{\"type\":\"Point\",\"coordinates\":[2,2]}}}" }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "LineString", "coordinates": [ [ 0.000000, 0.000000 ], [ 0.966797, 1.054628 ] ] } }
] }
] }
] }

View File

@ -1,6 +1,6 @@
#ifndef VERSION_HPP
#define VERSION_HPP
#define VERSION "v1.31.1"
#define VERSION "v1.31.2"
#endif