From 21042a7308dd0ea20b09cf9965ae6e1ccf048efa Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 10 Oct 2017 14:57:38 -0700 Subject: [PATCH] Move CSV code into its own file --- Makefile | 2 +- csv.cpp | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ csv.hpp | 14 +++++++++ tile-join.cpp | 86 ++------------------------------------------------- 4 files changed, 100 insertions(+), 85 deletions(-) create mode 100644 csv.cpp create mode 100644 csv.hpp diff --git a/Makefile b/Makefile index 54feee3..d5c5f11 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ tippecanoe-enumerate: enumerate.o tippecanoe-decode: decode.o projection.o mvt.o write_json.o text.o $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpull/jsonpull.o text.o evaluator.o +tile-join: tile-join.o projection.o pool.o mbtiles.o mvt.o memfile.o dirtiles.o jsonpull/jsonpull.o text.o evaluator.o csv.o $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread geojson2nd: geojson2nd.o jsonpull/jsonpull.o diff --git a/csv.cpp b/csv.cpp new file mode 100644 index 0000000..5fc0b0d --- /dev/null +++ b/csv.cpp @@ -0,0 +1,83 @@ +#include "csv.hpp" + +#define MAXLINE 10000 /* XXX */ + +std::vector csv_split(char *s) { + std::vector ret; + + while (*s && *s != '\n' && *s != '\r') { + char *start = s; + int within = 0; + + for (; *s && *s != '\n' && *s != '\r'; s++) { + if (*s == '"') { + within = !within; + } + + if (*s == ',' && !within) { + break; + } + } + + std::string v = std::string(start, s - start); + ret.push_back(v); + + if (*s == ',') { + s++; + + while (*s && isspace(*s)) { + s++; + } + } + } + + return ret; +} + +std::string csv_dequote(std::string s) { + std::string out; + for (size_t i = 0; i < s.size(); i++) { + if (s[i] == '"') { + if (i + 1 < s.size() && s[i + 1] == '"') { + out.push_back('"'); + } + } else { + out.push_back(s[i]); + } + } + return out; +} + +void readcsv(char *fn, std::vector &header, std::map> &mapping) { + FILE *f = fopen(fn, "r"); + if (f == NULL) { + perror(fn); + exit(EXIT_FAILURE); + } + + char s[MAXLINE]; + if (fgets(s, MAXLINE, f)) { + header = csv_split(s); + + for (size_t i = 0; i < header.size(); i++) { + header[i] = csv_dequote(header[i]); + } + } + while (fgets(s, MAXLINE, f)) { + std::vector line = csv_split(s); + if (line.size() > 0) { + line[0] = csv_dequote(line[0]); + } + + for (size_t i = 0; i < line.size() && i < header.size(); i++) { + // printf("putting %s\n", line[0].c_str()); + mapping.insert(std::pair>(line[0], line)); + } + } + + if (fclose(f) != 0) { + perror("fclose"); + exit(EXIT_FAILURE); + } +} + diff --git a/csv.hpp b/csv.hpp new file mode 100644 index 0000000..b9fd8c9 --- /dev/null +++ b/csv.hpp @@ -0,0 +1,14 @@ +#ifndef CSV_HPP +#define CSV_HPP + +#include +#include +#include +#include +#include + +std::vector csv_split(char *s); +std::string csv_dequote(std::string s); +void readcsv(char *fn, std::vector &header, std::map> &mapping); + +#endif diff --git a/tile-join.cpp b/tile-join.cpp index f44ac60..235ac29 100644 --- a/tile-join.cpp +++ b/tile-join.cpp @@ -23,6 +23,7 @@ #include "geometry.hpp" #include "dirtiles.hpp" #include "evaluator.hpp" +#include "csv.hpp" #include #include #include @@ -30,8 +31,6 @@ #include "jsonpull/jsonpull.h" #include "milo/dtoa_milo.h" -std::string dequote(std::string s); - int pk = false; int pC = false; int pg = false; @@ -213,7 +212,7 @@ void handle(std::string message, int z, unsigned x, unsigned y, std::map 0) { if (joinval[0] == '"') { - joinval = dequote(joinval); + joinval = csv_dequote(joinval); } else if ((joinval[0] >= '0' && joinval[0] <= '9') || joinval[0] == '-') { attr_type = mvt_double; } @@ -979,87 +978,6 @@ void usage(char **argv) { exit(EXIT_FAILURE); } -#define MAXLINE 10000 /* XXX */ - -std::vector split(char *s) { - std::vector ret; - - while (*s && *s != '\n' && *s != '\r') { - char *start = s; - int within = 0; - - for (; *s && *s != '\n' && *s != '\r'; s++) { - if (*s == '"') { - within = !within; - } - - if (*s == ',' && !within) { - break; - } - } - - std::string v = std::string(start, s - start); - ret.push_back(v); - - if (*s == ',') { - s++; - - while (*s && isspace(*s)) { - s++; - } - } - } - - return ret; -} - -std::string dequote(std::string s) { - std::string out; - for (size_t i = 0; i < s.size(); i++) { - if (s[i] == '"') { - if (i + 1 < s.size() && s[i + 1] == '"') { - out.push_back('"'); - } - } else { - out.push_back(s[i]); - } - } - return out; -} - -void readcsv(char *fn, std::vector &header, std::map> &mapping) { - FILE *f = fopen(fn, "r"); - if (f == NULL) { - perror(fn); - exit(EXIT_FAILURE); - } - - char s[MAXLINE]; - if (fgets(s, MAXLINE, f)) { - header = split(s); - - for (size_t i = 0; i < header.size(); i++) { - header[i] = dequote(header[i]); - } - } - while (fgets(s, MAXLINE, f)) { - std::vector line = split(s); - if (line.size() > 0) { - line[0] = dequote(line[0]); - } - - for (size_t i = 0; i < line.size() && i < header.size(); i++) { - // printf("putting %s\n", line[0].c_str()); - mapping.insert(std::pair>(line[0], line)); - } - } - - if (fclose(f) != 0) { - perror("fclose"); - exit(EXIT_FAILURE); - } -} - int main(int argc, char **argv) { char *out_mbtiles = NULL; char *out_dir = NULL;