Make UTF-8 checking into a unit test with Catch

This commit is contained in:
Eric Fischer 2016-10-05 14:54:28 -07:00
parent ef38318a6d
commit 9806db3c0a
7 changed files with 10628 additions and 43 deletions

View File

@ -19,7 +19,7 @@ else
FINAL_FLAGS := -g $(WARNING_FLAGS) $(DEBUG_FLAGS)
endif
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join unit
docs: man/tippecanoe.1
@ -44,7 +44,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o text.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-enumerate: enumerate.o
@ -56,6 +56,9 @@ tippecanoe-decode: decode.o projection.o mvt.o
tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
unit: unit.o text.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
%.o: %.c $(ALL_H)
$(CC) $(PG) $(INCLUDES) $(FINAL_FLAGS) $(CFLAGS) -c -o $@ $<
@ -71,7 +74,8 @@ indent:
TESTS = $(wildcard tests/*/out/*.json)
SPACE = $(NULL) $(NULL)
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test unit
./unit
# Work around Makefile and filename punctuation limits: _ for space, @ for :, % for /
%.json.check:

23
catch/LICENSE_1_0.txt Normal file
View File

@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

10524
catch/catch.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,7 @@ extern "C" {
#include "geometry.hpp"
#include "options.hpp"
#include "serial.hpp"
#include "text.hpp"
#define GEOM_POINT 0 /* array of positions */
#define GEOM_MULTIPOINT 1 /* array of arrays of positions */
@ -167,45 +168,6 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i
return g;
}
std::string check_utf8(std::string s, json_object *feature, const char *reading, int line) {
for (size_t i = 0; i < s.size(); i++) {
int fail = 0;
if ((s[i] & 0x80) == 0x80) {
if ((s[i] & 0xE0) == 0xC0) {
if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) {
fail = 2;
}
i += 1;
} else if ((s[i] & 0xF0) == 0xE0) {
if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) {
fail = 3;
}
i += 2;
} else if ((s[i] & 0xF8) == 0xF0) {
if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) {
fail = 4;
}
i += 3;
} else {
fail = 1;
}
}
if (fail != 0) {
fprintf(stderr, "%s:%d: \"%s\" is not valid UTF-8 (%d byte:", reading, line, s.c_str(), fail);
for (size_t j = 0; j < fail && j + i < s.size(); j++) {
fprintf(stderr, " 0x%02X", s[i + j] & 0xFF);
}
fprintf(stderr, ")\n");
json_context(feature);
exit(EXIT_FAILURE);
}
}
return s;
}
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string const &layername) {
json_object *geometry_type = json_hash_get(geometry, "type");
if (geometry_type == NULL) {
@ -327,7 +289,13 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) {
tas.type = metatype[m] = VT_STRING;
metaval[m] = check_utf8(std::string(properties->values[i]->string), feature, reading, line);
metaval[m] = std::string(properties->values[i]->string);
std::string err = check_utf8(metaval[m]);
if (err != "") {
fprintf(stderr, "%s:%d: %s\n", reading, line, err.c_str());
json_context(feature);
exit(EXIT_FAILURE);
}
m++;
} else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) {
tas.type = metatype[m] = VT_NUMBER;

52
text.cpp Normal file
View File

@ -0,0 +1,52 @@
#include "text.hpp"
#include <stdio.h>
/**
* Returns an empty string if `s` is valid utf8;
* otherwise returns an error message.
*/
std::string check_utf8(std::string s) {
for (size_t i = 0; i < s.size(); i++) {
int fail = 0;
if ((s[i] & 0x80) == 0x80) {
if ((s[i] & 0xE0) == 0xC0) {
if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) {
fail = 2;
} else {
i += 1;
}
} else if ((s[i] & 0xF0) == 0xE0) {
if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) {
fail = 3;
} else {
i += 2;
}
} else if ((s[i] & 0xF8) == 0xF0) {
if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) {
fail = 4;
} else {
i += 3;
}
} else {
fail = 1;
}
}
if (fail != 0) {
std::string out = "\"" + s + "\" is not valid UTF-8 (";
for (size_t j = 0; j < fail && i + j < s.size(); j++) {
if (j != 0) {
out += " ";
}
char tmp[6];
sprintf(tmp, "0x%02X", s[i + j] & 0xFF);
out += std::string(tmp);
}
out += ")";
return out;
}
}
return "";
}

3
text.hpp Normal file
View File

@ -0,0 +1,3 @@
#include <string>
std::string check_utf8(std::string text);

11
unit.cpp Normal file
View File

@ -0,0 +1,11 @@
#define CATCH_CONFIG_MAIN
#include "catch/catch.hpp"
#include "text.hpp"
TEST_CASE("UTF-8 enforcement", "[utf8]") {
REQUIRE(check_utf8("") == std::string(""));
REQUIRE(check_utf8("hello world") == std::string(""));
REQUIRE(check_utf8("Καλημέρα κόσμε") == std::string(""));
REQUIRE(check_utf8("こんにちは 世界") == std::string(""));
REQUIRE(check_utf8("Hola m\xF3n") == std::string("\"Hola m\xF3n\" is not valid UTF-8 (0xF3 0x6E)"));
}