From ef38318a6d42dcf0c5d60a0a5542c7bb44fc7382 Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 4 Oct 2016 16:43:03 -0700 Subject: [PATCH] Enforce that string feature attributes must be encoded as UTF-8 --- CHANGELOG.md | 4 ++++ geojson.cpp | 41 ++++++++++++++++++++++++++++++++++++++++- version.hpp | 2 +- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbdfc7f..9ecf25a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.14.2 + +* Enforce that string feature attributes must be encoded as UTF-8 + ## 1.14.1 * Whitespace after commas in tile-join .csv input is no longer significant diff --git a/geojson.cpp b/geojson.cpp index 60781dc..b5c2f8b 100644 --- a/geojson.cpp +++ b/geojson.cpp @@ -167,6 +167,45 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i return g; } +std::string check_utf8(std::string s, json_object *feature, const char *reading, int line) { + for (size_t i = 0; i < s.size(); i++) { + int fail = 0; + + if ((s[i] & 0x80) == 0x80) { + if ((s[i] & 0xE0) == 0xC0) { + if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) { + fail = 2; + } + i += 1; + } else if ((s[i] & 0xF0) == 0xE0) { + if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) { + fail = 3; + } + i += 2; + } else if ((s[i] & 0xF8) == 0xF0) { + if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) { + fail = 4; + } + i += 3; + } else { + fail = 1; + } + } + + if (fail != 0) { + fprintf(stderr, "%s:%d: \"%s\" is not valid UTF-8 (%d byte:", reading, line, s.c_str(), fail); + for (size_t j = 0; j < fail && j + i < s.size(); j++) { + fprintf(stderr, " 0x%02X", s[i + j] & 0xFF); + } + fprintf(stderr, ")\n"); + json_context(feature); + exit(EXIT_FAILURE); + } + } + + return s; +} + int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set *exclude, std::set *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map *layermap, std::string const &layername) { json_object *geometry_type = json_hash_get(geometry, "type"); if (geometry_type == NULL) { @@ -288,7 +327,7 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) { tas.type = metatype[m] = VT_STRING; - metaval[m] = std::string(properties->values[i]->string); + metaval[m] = check_utf8(std::string(properties->values[i]->string), feature, reading, line); m++; } else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) { tas.type = metatype[m] = VT_NUMBER; diff --git a/version.hpp b/version.hpp index 99bed7e..08c7cec 100644 --- a/version.hpp +++ b/version.hpp @@ -1 +1 @@ -#define VERSION "tippecanoe v1.14.1\n" +#define VERSION "tippecanoe v1.14.2\n"