From f3e6afa4e9c2522d6013b956c5ceeacce71625ae Mon Sep 17 00:00:00 2001 From: Eric Fischer Date: Tue, 5 Dec 2017 17:18:19 -0800 Subject: [PATCH] Basic geographic CSV parsing --- Makefile | 2 +- geocsv.cpp | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ geocsv.hpp | 13 +++++++ main.cpp | 46 +++++++++++++++++++++++++ 4 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 geocsv.cpp create mode 100644 geocsv.hpp diff --git a/Makefile b/Makefile index 6c87a13..6838cca 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ C = $(wildcard *.c) $(wildcard *.cpp) INCLUDES = -I/usr/local/include -I. LIBS = -L/usr/local/lib -tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o +tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o geocsv.o csv.o $(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread tippecanoe-enumerate: enumerate.o diff --git a/geocsv.cpp b/geocsv.cpp new file mode 100644 index 0000000..982138c --- /dev/null +++ b/geocsv.cpp @@ -0,0 +1,99 @@ +#include +#include "geocsv.hpp" +#include "mvt.hpp" +#include "serial.hpp" +#include "projection.hpp" +#include "main.hpp" +#include "text.hpp" +#include "csv.hpp" +#include "milo/dtoa_milo.h" + +void parse_geocsv(std::vector &sst, std::string fname, int layer, std::string layername) { + FILE *f = fopen(fname.c_str(), "r"); + if (f == NULL) { + perror(fname.c_str()); + exit(EXIT_FAILURE); + } + + std::string s; + std::vector header; + ssize_t latcol = -1, loncol = -1; + + if ((s = csv_getline(f)).size() > 0) { + header = csv_split(s.c_str()); + + for (size_t i = 0; i < header.size(); i++) { + header[i] = csv_dequote(header[i]); + + if (header[i] == "lat" || header[i] == "latitude") { + latcol = i; + } + if (header[i] == "lon" || header[i] == "longitude" || header[i] == "long") { + loncol = i; + } + } + } + + if (latcol < 0 || loncol < 0) { + fprintf(stderr, "%s: Can't find \"lat\" and \"lon\" columns\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + size_t seq = 0; + while ((s = csv_getline(f)).size() > 0) { + seq++; + std::vector line = csv_split(s.c_str()); + + if (line.size() != header.size()) { + fprintf(stderr, "%s:%zu: Mismatched column count: %zu in line, %zu in header\n", fname.c_str(), seq, line.size(), header.size()); + exit(EXIT_FAILURE); + } + + double lon = atof(line[loncol].c_str()); + double lat = atof(line[latcol].c_str()); + + long long x, y; + projection->project(lon, lat, 32, &x, &y); + drawvec dv; + dv.push_back(draw(VT_MOVETO, x, y)); + + std::vector full_keys; + std::vector full_values; + + for (size_t i = 0; i < line.size(); i++) { + if (i != (size_t) latcol && i != (size_t) loncol) { + full_keys.push_back(header[i]); + + serial_val sv; + sv.type = mvt_string; + sv.s = line[i]; + + full_values.push_back(sv); + } + } + + serial_feature sf; + + sf.layer = layer; + sf.layername = layername; + sf.segment = sst[0].segment; + sf.has_id = false; + sf.id = 0; + sf.has_tippecanoe_minzoom = false; + sf.has_tippecanoe_maxzoom = false; + sf.feature_minzoom = false; + sf.seq = *(sst[0].layer_seq); + sf.geometry = dv; + sf.t = 1; // POINT + sf.full_keys = full_keys; + sf.full_values = full_values; + sf.m = sf.full_values.size(); + + serialize_feature(&sst[0], sf); + } + + if (fclose(f) != 0) { + perror("fclose"); + exit(EXIT_FAILURE); + } +} diff --git a/geocsv.hpp b/geocsv.hpp new file mode 100644 index 0000000..6550f4d --- /dev/null +++ b/geocsv.hpp @@ -0,0 +1,13 @@ +#ifndef GEOCSV_HPP +#define GEOCSV_HPP + +#include +#include +#include +#include +#include "mbtiles.hpp" +#include "serial.hpp" + +void parse_geocsv(std::vector &sst, std::string fname, int layer, std::string layername); + +#endif diff --git a/main.cpp b/main.cpp index a06a8b9..d267d7c 100644 --- a/main.cpp +++ b/main.cpp @@ -51,6 +51,7 @@ #include "main.hpp" #include "geojson.hpp" #include "geobuf.hpp" +#include "geocsv.hpp" #include "geometry.hpp" #include "serial.hpp" #include "options.hpp" @@ -1290,6 +1291,51 @@ int read_input(std::vector &sources, char *fname, int maxzoom, int minzo continue; } + if (sources[source].file.size() > 4 && sources[source].file.substr(sources[source].file.size() - 4) == std::string(".csv")) { + long long layer_seq[CPUS]; + double dist_sums[CPUS]; + size_t dist_counts[CPUS]; + + std::vector sst; + sst.resize(CPUS); + + // XXX factor out this duplicated setup + for (size_t i = 0; i < CPUS; i++) { + layer_seq[i] = overall_offset; + dist_sums[i] = 0; + dist_counts[i] = 0; + + sst[i].fname = reading.c_str(); + sst[i].line = 0; + sst[i].layer_seq = &layer_seq[i]; + sst[i].progress_seq = &progress_seq; + sst[i].readers = &readers; + sst[i].segment = i; + sst[i].initial_x = &initial_x[i]; + sst[i].initial_y = &initial_y[i]; + sst[i].initialized = &initialized[i]; + sst[i].dist_sum = &dist_sums[i]; + sst[i].dist_count = &dist_counts[i]; + sst[i].want_dist = guess_maxzoom; + sst[i].maxzoom = maxzoom; + sst[i].filters = prefilter != NULL || postfilter != NULL; + sst[i].uses_gamma = uses_gamma; + sst[i].layermap = &layermaps[i]; + sst[i].exclude = exclude; + sst[i].include = include; + sst[i].exclude_all = exclude_all; + sst[i].filter = filter; + sst[i].basezoom = basezoom; + sst[i].attribute_types = attribute_types; + } + + parse_geocsv(sst, sources[source].file, layer, sources[layer].layer); + + overall_offset = layer_seq[0]; + checkdisk(&readers); + continue; + } + struct stat st; char *map = NULL; off_t off = 0;