Verify that CSV input is encoded as UTF-8

This commit is contained in:
Eric Fischer 2017-12-06 13:32:44 -08:00
parent f3e6afa4e9
commit afb5cece96
4 changed files with 39 additions and 1 deletions

View File

@ -59,7 +59,7 @@ tippecanoe-decode: decode.o projection.o mvt.o write_json.o text.o jsonpull/json
tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpull/jsonpull.o text.o evaluator.o csv.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o
tippecanoe-json-tool: jsontool.o jsonpull/jsonpull.o csv.o text.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
unit: unit.o text.o

13
csv.cpp
View File

@ -1,4 +1,5 @@
#include "csv.hpp"
#include "text.hpp"
std::vector<std::string> csv_split(const char *s) {
std::vector<std::string> ret;
@ -67,6 +68,12 @@ void readcsv(const char *fn, std::vector<std::string> &header, std::map<std::str
std::string s;
if ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s: %s\n", fn, err.c_str());
exit(EXIT_FAILURE);
}
header = csv_split(s.c_str());
for (size_t i = 0; i < header.size(); i++) {
@ -74,6 +81,12 @@ void readcsv(const char *fn, std::vector<std::string> &header, std::map<std::str
}
}
while ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s: %s\n", fn, err.c_str());
exit(EXIT_FAILURE);
}
std::vector<std::string> line = csv_split(s.c_str());
if (line.size() > 0) {
line[0] = csv_dequote(line[0]);

View File

@ -20,6 +20,12 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
ssize_t latcol = -1, loncol = -1;
if ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s: %s\n", fname.c_str(), err.c_str());
exit(EXIT_FAILURE);
}
header = csv_split(s.c_str());
for (size_t i = 0; i < header.size(); i++) {
@ -41,6 +47,12 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
size_t seq = 0;
while ((s = csv_getline(f)).size() > 0) {
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s: %s\n", fname.c_str(), err.c_str());
exit(EXIT_FAILURE);
}
seq++;
std::vector<std::string> line = csv_split(s.c_str());

View File

@ -9,6 +9,7 @@
#include <vector>
#include "jsonpull/jsonpull.h"
#include "csv.hpp"
#include "text.hpp"
int fail = EXIT_SUCCESS;
bool wrap = false;
@ -208,6 +209,12 @@ void join_csv(json_object *j) {
exit(EXIT_FAILURE);
}
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s\n", err.c_str());
exit(EXIT_FAILURE);
}
header = csv_split(s.c_str());
for (size_t i = 0; i < header.size(); i++) {
@ -264,6 +271,12 @@ void join_csv(json_object *j) {
break;
}
std::string err = check_utf8(s);
if (err != "") {
fprintf(stderr, "%s\n", err.c_str());
exit(EXIT_FAILURE);
}
fields = csv_split(s.c_str());
for (size_t i = 0; i < fields.size(); i++) {