From afb5cece960bcdc728465849b29362f195198e9f Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Wed, 6 Dec 2017 13:32:44 -0800 Subject: [PATCH] Verify that CSV input is encoded as UTF-8 --- Makefile | 2 +- csv.cpp | 13 +++++++++++++ geocsv.cpp | 12 ++++++++++++ jsontool.cpp | 13 +++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6838cca..f24ba91 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ tippecanoe-decode: decode.o projection.o mvt.o write_json.o text.o jsonpull/json tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpull/jsonpull.o text.o evaluator.o csv.o $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread -tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o +tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o text.o $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread unit: unit.o text.o diff --git a/csv.cpp b/csv.cpp index 9710cad..97c6f8f 100644 --- a/csv.cpp +++ b/csv.cpp @@ -1,4 +1,5 @@ #include "csv.hpp" +#include "text.hpp" std::vector csv_split(const char *s) { std::vector ret; @@ -67,6 +68,12 @@ void readcsv(const char *fn, std::vector &header, std::map 0) { + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s: %s\n", fn, err.c_str()); + exit(EXIT_FAILURE); + } + header = csv_split(s.c_str()); for (size_t i = 0; i < header.size(); i++) { @@ -74,6 +81,12 @@ void readcsv(const char *fn, std::vector &header, std::map 0) { + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s: %s\n", fn, err.c_str()); + exit(EXIT_FAILURE); + } + std::vector line = csv_split(s.c_str()); if (line.size() > 0) { line[0] = csv_dequote(line[0]); diff --git a/geocsv.cpp b/geocsv.cpp index 982138c..cc17ca5 100644 --- a/geocsv.cpp +++ b/geocsv.cpp @@ -20,6 +20,12 @@ void parse_geocsv(std::vector &sst, std::string fnam ssize_t latcol = -1, loncol = -1; if ((s = csv_getline(f)).size() > 0) { + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s: %s\n", fname.c_str(), err.c_str()); + exit(EXIT_FAILURE); + } + header = csv_split(s.c_str()); for (size_t i = 0; i < header.size(); i++) { @@ -41,6 +47,12 @@ void parse_geocsv(std::vector &sst, std::string fnam size_t seq = 0; while ((s = csv_getline(f)).size() > 0) { + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s: %s\n", fname.c_str(), err.c_str()); + exit(EXIT_FAILURE); + } + seq++; std::vector line = csv_split(s.c_str()); diff --git a/jsontool.cpp b/jsontool.cpp index 70ca2e0..f1a1231 100644 --- a/jsontool.cpp +++ b/jsontool.cpp @@ -9,6 +9,7 @@ #include #include "jsonpull/jsonpull.h" #include "csv.hpp" +#include "text.hpp" int fail = EXIT_SUCCESS; bool wrap = false; @@ -208,6 +209,12 @@ void join_csv(json_object *j) { exit(EXIT_FAILURE); } + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s\n", err.c_str()); + exit(EXIT_FAILURE); + } + header = csv_split(s.c_str()); for (size_t i = 0; i < header.size(); i++) { @@ -264,6 +271,12 @@ void join_csv(json_object *j) { break; } + std::string err = check_utf8(s); + if (err != "") { + fprintf(stderr, "%s\n", err.c_str()); + exit(EXIT_FAILURE); + } + fields = csv_split(s.c_str()); for (size_t i = 0; i < fields.size(); i++) {