mirror of
https://github.com/mapbox/tippecanoe.git
synced 2025-02-01 08:47:57 +00:00
Make UTF-8 checking into a unit test with Catch
This commit is contained in:
parent
ef38318a6d
commit
9806db3c0a
10
Makefile
10
Makefile
@ -19,7 +19,7 @@ else
|
||||
FINAL_FLAGS := -g $(WARNING_FLAGS) $(DEBUG_FLAGS)
|
||||
endif
|
||||
|
||||
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join
|
||||
all: tippecanoe tippecanoe-enumerate tippecanoe-decode tile-join unit
|
||||
|
||||
docs: man/tippecanoe.1
|
||||
|
||||
@ -44,7 +44,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
|
||||
INCLUDES = -I/usr/local/include -I.
|
||||
LIBS = -L/usr/local/lib
|
||||
|
||||
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o
|
||||
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o clipper/clipper.o mvt.o serial.o main.o text.o
|
||||
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
|
||||
|
||||
tippecanoe-enumerate: enumerate.o
|
||||
@ -56,6 +56,9 @@ tippecanoe-decode: decode.o projection.o mvt.o
|
||||
tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o
|
||||
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
|
||||
|
||||
unit: unit.o text.o
|
||||
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
|
||||
|
||||
%.o: %.c $(ALL_H)
|
||||
$(CC) $(PG) $(INCLUDES) $(FINAL_FLAGS) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
@ -71,7 +74,8 @@ indent:
|
||||
TESTS = $(wildcard tests/*/out/*.json)
|
||||
SPACE = $(NULL) $(NULL)
|
||||
|
||||
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test
|
||||
test: tippecanoe tippecanoe-decode $(addsuffix .check,$(TESTS)) parallel-test pbf-test join-test unit
|
||||
./unit
|
||||
|
||||
# Work around Makefile and filename punctuation limits: _ for space, @ for :, % for /
|
||||
%.json.check:
|
||||
|
23
catch/LICENSE_1_0.txt
Normal file
23
catch/LICENSE_1_0.txt
Normal file
@ -0,0 +1,23 @@
|
||||
Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
10524
catch/catch.hpp
Normal file
10524
catch/catch.hpp
Normal file
File diff suppressed because it is too large
Load Diff
48
geojson.cpp
48
geojson.cpp
@ -38,6 +38,7 @@ extern "C" {
|
||||
#include "geometry.hpp"
|
||||
#include "options.hpp"
|
||||
#include "serial.hpp"
|
||||
#include "text.hpp"
|
||||
|
||||
#define GEOM_POINT 0 /* array of positions */
|
||||
#define GEOM_MULTIPOINT 1 /* array of arrays of positions */
|
||||
@ -167,45 +168,6 @@ long long parse_geometry(int t, json_object *j, long long *bbox, drawvec &out, i
|
||||
return g;
|
||||
}
|
||||
|
||||
std::string check_utf8(std::string s, json_object *feature, const char *reading, int line) {
|
||||
for (size_t i = 0; i < s.size(); i++) {
|
||||
int fail = 0;
|
||||
|
||||
if ((s[i] & 0x80) == 0x80) {
|
||||
if ((s[i] & 0xE0) == 0xC0) {
|
||||
if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) {
|
||||
fail = 2;
|
||||
}
|
||||
i += 1;
|
||||
} else if ((s[i] & 0xF0) == 0xE0) {
|
||||
if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) {
|
||||
fail = 3;
|
||||
}
|
||||
i += 2;
|
||||
} else if ((s[i] & 0xF8) == 0xF0) {
|
||||
if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) {
|
||||
fail = 4;
|
||||
}
|
||||
i += 3;
|
||||
} else {
|
||||
fail = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (fail != 0) {
|
||||
fprintf(stderr, "%s:%d: \"%s\" is not valid UTF-8 (%d byte:", reading, line, s.c_str(), fail);
|
||||
for (size_t j = 0; j < fail && j + i < s.size(); j++) {
|
||||
fprintf(stderr, " 0x%02X", s[i + j] & 0xFF);
|
||||
}
|
||||
fprintf(stderr, ")\n");
|
||||
json_context(feature);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int serialize_geometry(json_object *geometry, json_object *properties, json_object *id, const char *reading, int line, volatile long long *layer_seq, volatile long long *progress_seq, long long *metapos, long long *geompos, long long *indexpos, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, FILE *metafile, FILE *geomfile, FILE *indexfile, struct memfile *poolfile, struct memfile *treefile, const char *fname, int basezoom, int layer, double droprate, long long *file_bbox, json_object *tippecanoe, int segment, int *initialized, unsigned *initial_x, unsigned *initial_y, struct reader *readers, int maxzoom, json_object *feature, std::map<std::string, layermap_entry> *layermap, std::string const &layername) {
|
||||
json_object *geometry_type = json_hash_get(geometry, "type");
|
||||
if (geometry_type == NULL) {
|
||||
@ -327,7 +289,13 @@ int serialize_geometry(json_object *geometry, json_object *properties, json_obje
|
||||
|
||||
if (properties->values[i] != NULL && properties->values[i]->type == JSON_STRING) {
|
||||
tas.type = metatype[m] = VT_STRING;
|
||||
metaval[m] = check_utf8(std::string(properties->values[i]->string), feature, reading, line);
|
||||
metaval[m] = std::string(properties->values[i]->string);
|
||||
std::string err = check_utf8(metaval[m]);
|
||||
if (err != "") {
|
||||
fprintf(stderr, "%s:%d: %s\n", reading, line, err.c_str());
|
||||
json_context(feature);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
m++;
|
||||
} else if (properties->values[i] != NULL && properties->values[i]->type == JSON_NUMBER) {
|
||||
tas.type = metatype[m] = VT_NUMBER;
|
||||
|
52
text.cpp
Normal file
52
text.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include "text.hpp"
|
||||
#include <stdio.h>
|
||||
|
||||
/**
|
||||
* Returns an empty string if `s` is valid utf8;
|
||||
* otherwise returns an error message.
|
||||
*/
|
||||
std::string check_utf8(std::string s) {
|
||||
for (size_t i = 0; i < s.size(); i++) {
|
||||
int fail = 0;
|
||||
|
||||
if ((s[i] & 0x80) == 0x80) {
|
||||
if ((s[i] & 0xE0) == 0xC0) {
|
||||
if (i + 1 >= s.size() || (s[i + 1] & 0xC0) != 0x80) {
|
||||
fail = 2;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
} else if ((s[i] & 0xF0) == 0xE0) {
|
||||
if (i + 2 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80) {
|
||||
fail = 3;
|
||||
} else {
|
||||
i += 2;
|
||||
}
|
||||
} else if ((s[i] & 0xF8) == 0xF0) {
|
||||
if (i + 3 >= s.size() || (s[i + 1] & 0xC0) != 0x80 || (s[i + 2] & 0xC0) != 0x80 || (s[i + 3] & 0xC0) != 0x80) {
|
||||
fail = 4;
|
||||
} else {
|
||||
i += 3;
|
||||
}
|
||||
} else {
|
||||
fail = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (fail != 0) {
|
||||
std::string out = "\"" + s + "\" is not valid UTF-8 (";
|
||||
for (size_t j = 0; j < fail && i + j < s.size(); j++) {
|
||||
if (j != 0) {
|
||||
out += " ";
|
||||
}
|
||||
char tmp[6];
|
||||
sprintf(tmp, "0x%02X", s[i + j] & 0xFF);
|
||||
out += std::string(tmp);
|
||||
}
|
||||
out += ")";
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
3
text.hpp
Normal file
3
text.hpp
Normal file
@ -0,0 +1,3 @@
|
||||
#include <string>
|
||||
|
||||
std::string check_utf8(std::string text);
|
11
unit.cpp
Normal file
11
unit.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include "catch/catch.hpp"
|
||||
#include "text.hpp"
|
||||
|
||||
TEST_CASE("UTF-8 enforcement", "[utf8]") {
|
||||
REQUIRE(check_utf8("") == std::string(""));
|
||||
REQUIRE(check_utf8("hello world") == std::string(""));
|
||||
REQUIRE(check_utf8("Καλημέρα κόσμε") == std::string(""));
|
||||
REQUIRE(check_utf8("こんにちは 世界") == std::string(""));
|
||||
REQUIRE(check_utf8("Hola m\xF3n") == std::string("\"Hola m\xF3n\" is not valid UTF-8 (0xF3 0x6E)"));
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user