Merge pull request #304 from mapbox/utf8-check

Enforce that string feature attributes must be encoded as UTF-8
This commit is contained in:
Eric Fischer 2016-10-05 15:54:58 -07:00 committed by GitHub
commit 04157e7728
9 changed files with 10633 additions and 4 deletions

View File

@ -1,3 +1,7 @@
## 1.14.2
* Enforce that string feature attributes must be encoded as UTF-8
## 1.14.1
* Whitespace after commas in tile-join .csv input is no longer significant

View File

@ -19,7 +19,7 @@ else
FINAL_FLAGS := -g $(WARNING_FLAGS) $(DEBUG_FLAGS)
endif
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join unit
docs: man/tippecanoe.1
@ -44,7 +44,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o text.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-enumerate: enumerate.o
@ -56,6 +56,9 @@ tippecanoe-decode: decode.o projection.o mvt.o
tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
unit: unit.o text.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
%.o: %.c $(ALL_H)
$(CC) $(PG) $(INCLUDES) $(FINAL_FLAGS) $(CFLAGS) -c -o $@ $<
@ -71,7 +74,8 @@ indent:
TESTS = $(wildcard tests/*/out/*.json)
SPACE = $(NULL) $(NULL)
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test unit
./unit
# Work around Makefile and filename punctuation limits: _ for space, @ for :, % for /
%.json.check:

23
catch/LICENSE_1_0.txt Normal file
View File

@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

10524
catch/catch.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,7 @@ extern "C" {
#include "geometry.hpp"
#include "options.hpp"
#include "serial.hpp"
#include "text.hpp"
#define GEOM_POINT 0 /* array of positions */
#define GEOM_MULTIPOINT 1 /* array of arrays of positions */
@ -289,6 +290,12 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) {
tas.type = metatype[m] = VT_STRING;
metaval[m] = std::string(properties->values[i]->string);
std::string err = check_utf8(metaval[m]);
if (err != "") {
fprintf(stderr, "%s:%d: %s\n", reading, line, err.c_str());
json_context(feature);
exit(EXIT_FAILURE);
}
m++;
} else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) {
tas.type = metatype[m] = VT_NUMBER;

52
text.cpp Normal file
View File

@ -0,0 +1,52 @@
#include "text.hpp"
#include <stdio.h>
/**
* Returns an empty string if `s` is valid utf8;
* otherwise returns an error message.
*/
std::string check_utf8(std::string s) {
for (size_t i = 0; i < s.size(); i++) {
int fail = 0;
if ((s[i] & 0x80) == 0x80) {
if ((s[i] & 0xE0) == 0xC0) {
if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) {
fail = 2;
} else {
i += 1;
}
} else if ((s[i] & 0xF0) == 0xE0) {
if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) {
fail = 3;
} else {
i += 2;
}
} else if ((s[i] & 0xF8) == 0xF0) {
if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) {
fail = 4;
} else {
i += 3;
}
} else {
fail = 1;
}
}
if (fail != 0) {
std::string out = "\"" + s + "\" is not valid UTF-8 (";
for (size_t j = 0; j < fail && i + j < s.size(); j++) {
if (j != 0) {
out += " ";
}
char tmp[6];
sprintf(tmp, "0x%02X", s[i + j] & 0xFF);
out += std::string(tmp);
}
out += ")";
return out;
}
}
return "";
}

3
text.hpp Normal file
View File

@ -0,0 +1,3 @@
#include <string>
std::string check_utf8(std::string text);

12
unit.cpp Normal file
View File

@ -0,0 +1,12 @@
#define CATCH_CONFIG_MAIN
#include "catch/catch.hpp"
#include "text.hpp"
TEST_CASE("UTF-8 enforcement", "[utf8]") {
REQUIRE(check_utf8("") == std::string(""));
REQUIRE(check_utf8("hello world") == std::string(""));
REQUIRE(check_utf8("Καλημέρα κόσμε") == std::string(""));
REQUIRE(check_utf8("こんにちは 世界") == std::string(""));
REQUIRE(check_utf8("👋🌏") == std::string(""));
REQUIRE(check_utf8("Hola m\xF3n") == std::string("\"Hola m\xF3n\" is not valid UTF-8 (0xF3 0x6E)"));
}

View File

@ -1 +1 @@
#define VERSION "tippecanoe v1.14.1\n"
#define VERSION "tippecanoe v1.14.2\n"