Basic geographic CSV parsing

This commit is contained in:
Eric Fischer 2017-12-05 17:18:19 -08:00
parent f2f76082df
commit f3e6afa4e9
4 changed files with 159 additions and 1 deletions

View File

@ -47,7 +47,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o geocsv.o csv.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-enumerate: enumerate.o

99
geocsv.cpp Normal file
View File

@ -0,0 +1,99 @@
#include <stdlib.h>
#include "geocsv.hpp"
#include "mvt.hpp"
#include "serial.hpp"
#include "projection.hpp"
#include "main.hpp"
#include "text.hpp"
#include "csv.hpp"
#include "milo/dtoa_milo.h"
void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fname, int layer, std::string layername) {
FILE *f = fopen(fname.c_str(), "r");
if (f == NULL) {
perror(fname.c_str());
exit(EXIT_FAILURE);
}
std::string s;
std::vector<std::string> header;
ssize_t latcol = -1, loncol = -1;
if ((s = csv_getline(f)).size() > 0) {
header = csv_split(s.c_str());
for (size_t i = 0; i < header.size(); i++) {
header[i] = csv_dequote(header[i]);
if (header[i] == "lat" || header[i] == "latitude") {
latcol = i;
}
if (header[i] == "lon" || header[i] == "longitude" || header[i] == "long") {
loncol = i;
}
}
}
if (latcol < 0 || loncol < 0) {
fprintf(stderr, "%s: Can't find \"lat\" and \"lon\" columns\n", fname.c_str());
exit(EXIT_FAILURE);
}
size_t seq = 0;
while ((s = csv_getline(f)).size() > 0) {
seq++;
std::vector<std::string> line = csv_split(s.c_str());
if (line.size() != header.size()) {
fprintf(stderr, "%s:%zu: Mismatched column count: %zu in line, %zu in header\n", fname.c_str(), seq, line.size(), header.size());
exit(EXIT_FAILURE);
}
double lon = atof(line[loncol].c_str());
double lat = atof(line[latcol].c_str());
long long x, y;
projection->project(lon, lat, 32, &x, &y);
drawvec dv;
dv.push_back(draw(VT_MOVETO, x, y));
std::vector<std::string> full_keys;
std::vector<serial_val> full_values;
for (size_t i = 0; i < line.size(); i++) {
if (i != (size_t) latcol && i != (size_t) loncol) {
full_keys.push_back(header[i]);
serial_val sv;
sv.type = mvt_string;
sv.s = line[i];
full_values.push_back(sv);
}
}
serial_feature sf;
sf.layer = layer;
sf.layername = layername;
sf.segment = sst[0].segment;
sf.has_id = false;
sf.id = 0;
sf.has_tippecanoe_minzoom = false;
sf.has_tippecanoe_maxzoom = false;
sf.feature_minzoom = false;
sf.seq = *(sst[0].layer_seq);
sf.geometry = dv;
sf.t = 1; // POINT
sf.full_keys = full_keys;
sf.full_values = full_values;
sf.m = sf.full_values.size();
serialize_feature(&sst[0], sf);
}
if (fclose(f) != 0) {
perror("fclose");
exit(EXIT_FAILURE);
}
}

13
geocsv.hpp Normal file
View File

@ -0,0 +1,13 @@
#ifndef GEOCSV_HPP
#define GEOCSV_HPP
#include <stdio.h>
#include <set>
#include <map>
#include <string>
#include "mbtiles.hpp"
#include "serial.hpp"
void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fname, int layer, std::string layername);
#endif

View File

@ -51,6 +51,7 @@
#include "main.hpp"
#include "geojson.hpp"
#include "geobuf.hpp"
#include "geocsv.hpp"
#include "geometry.hpp"
#include "serial.hpp"
#include "options.hpp"
@ -1290,6 +1291,51 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
continue;
}
if (sources[source].file.size() > 4 && sources[source].file.substr(sources[source].file.size() - 4) == std::string(".csv")) {
long long layer_seq[CPUS];
double dist_sums[CPUS];
size_t dist_counts[CPUS];
std::vector<struct serialization_state> sst;
sst.resize(CPUS);
// XXX factor out this duplicated setup
for (size_t i = 0; i < CPUS; i++) {
layer_seq[i] = overall_offset;
dist_sums[i] = 0;
dist_counts[i] = 0;
sst[i].fname = reading.c_str();
sst[i].line = 0;
sst[i].layer_seq = &layer_seq[i];
sst[i].progress_seq = &progress_seq;
sst[i].readers = &readers;
sst[i].segment = i;
sst[i].initial_x = &initial_x[i];
sst[i].initial_y = &initial_y[i];
sst[i].initialized = &initialized[i];
sst[i].dist_sum = &dist_sums[i];
sst[i].dist_count = &dist_counts[i];
sst[i].want_dist = guess_maxzoom;
sst[i].maxzoom = maxzoom;
sst[i].filters = prefilter != NULL || postfilter != NULL;
sst[i].uses_gamma = uses_gamma;
sst[i].layermap = &layermaps[i];
sst[i].exclude = exclude;
sst[i].include = include;
sst[i].exclude_all = exclude_all;
sst[i].filter = filter;
sst[i].basezoom = basezoom;
sst[i].attribute_types = attribute_types;
}
parse_geocsv(sst, sources[source].file, layer, sources[layer].layer);
overall_offset = layer_seq[0];
checkdisk(&readers);
continue;
}
struct stat st;
char *map = NULL;
off_t off = 0;