Read Shapefile and DBF headers and skim through the files

This commit is contained in:
Eric Fischer 2017-09-20 15:38:37 -07:00
parent 4c2d80ea17
commit c2093329b1
4 changed files with 201 additions and 1 deletions

View File

@ -46,7 +46,7 @@ C = $(wildcard *.c) $(wildcard *.cpp)
INCLUDES = -I/usr/local/include -I.
LIBS = -L/usr/local/lib
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o evaluator.o
tippecanoe: geojson.o jsonpull/jsonpull.o tile.o pool.o mbtiles.o geometry.o projection.o memfile.o mvt.o serial.o main.o text.o dirtiles.o plugin.o read_json.o write_json.o geobuf.o shapefile.o evaluator.o
$(CXX) $(PG) $(LIBS) $(FINAL_FLAGS) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) -lm -lz -lsqlite3 -lpthread
tippecanoe-enumerate: enumerate.o

View File

@ -51,6 +51,7 @@
#include "main.hpp"
#include "geojson.hpp"
#include "geobuf.hpp"
#include "shapefile.hpp"
#include "geometry.hpp"
#include "serial.hpp"
#include "options.hpp"
@ -1267,6 +1268,49 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
continue;
}
if (sources[source].file.size() > 4 && sources[source].file.substr(sources[source].file.size() - 4) == std::string(".shp")) {
long long layer_seq[CPUS];
double dist_sums[CPUS];
size_t dist_counts[CPUS];
struct serialization_state sst[CPUS];
// XXX factor out this duplicated setup
for (size_t i = 0; i < CPUS; i++) {
layer_seq[i] = overall_offset;
dist_sums[i] = 0;
dist_counts[i] = 0;
sst[i].fname = reading.c_str();
sst[i].line = 0;
sst[i].layer_seq = &layer_seq[i];
sst[i].progress_seq = &progress_seq;
sst[i].readers = readers;
sst[i].segment = i;
sst[i].initial_x = &initial_x[i];
sst[i].initial_y = &initial_y[i];
sst[i].initialized = &initialized[i];
sst[i].dist_sum = &dist_sums[i];
sst[i].dist_count = &dist_counts[i];
sst[i].want_dist = guess_maxzoom;
sst[i].maxzoom = maxzoom;
sst[i].filters = prefilter != NULL || postfilter != NULL;
sst[i].uses_gamma = uses_gamma;
sst[i].layermap = &layermaps[i];
sst[i].exclude = exclude;
sst[i].include = include;
sst[i].exclude_all = exclude_all;
sst[i].filter = filter;
sst[i].basezoom = basezoom;
sst[i].attribute_types = attribute_types;
}
parse_shapefile(sst, sources[source].file, layer, sources[layer].layer);
overall_offset = layer_seq[0];
checkdisk(readers, CPUS);
continue;
}
struct stat st;
char *map = NULL;
off_t off = 0;

143
shapefile.cpp Normal file
View File

@ -0,0 +1,143 @@
#include <stdlib.h>
#include "shapefile.hpp"
static unsigned int read32le(unsigned char *ba) {
return ((ba[0] & 0xFF)) |
((ba[1] & 0xFF) << 8) |
((ba[2] & 0xFF) << 16) |
((ba[3] & 0xFF) << 24);
}
static unsigned int read16le(unsigned char *ba) {
return ((ba[0] & 0xFF)) |
((ba[1] & 0xFF) << 8);
}
static unsigned long long read64le(unsigned char *ba) {
return read32le(ba) |
(((long long) read32le(ba + 4)) << 32);
}
static unsigned int read32be(unsigned char *ba) {
return ((ba[0] & 0xFF) << 24) |
((ba[1] & 0xFF) << 16) |
((ba[2] & 0xFF) << 8) |
((ba[3] & 0xFF));
}
static double toDouble(unsigned char *ba) {
if (sizeof(double) != 8) {
fprintf(stderr, "Internal error: wrong floating point size\n");
exit(EXIT_FAILURE);
}
return *((double *) ba);
}
void parse_shapefile(struct serialization_state *sst, std::string fname, int layer, std::string layername) {
std::string dbfname = fname.substr(0, fname.size() - 3) + "dbf";
FILE *shp = fopen(fname.c_str(), "rb");
if (shp == NULL) {
perror(fname.c_str());
exit(EXIT_FAILURE);
}
FILE *dbf = fopen(dbfname.c_str(), "rb");
if (dbf == NULL) {
perror(dbfname.c_str());
exit(EXIT_FAILURE);
}
unsigned char shpheader[100];
if (fread(shpheader, 1, 100, shp) != 100) {
perror("read shapefile header");
exit(EXIT_FAILURE);
}
unsigned int magic = read32be(shpheader);
unsigned int flen = 2 * read32be(shpheader + 24) - 100;
unsigned int version = read32le(shpheader + 28);
if (magic != 9994 || version != 1000) {
fprintf(stderr, "%s: not a shapefile (%u %u)\n", fname.c_str(), magic, version);
exit(EXIT_FAILURE);
}
unsigned char dbfheader[32];
if (fread(dbfheader, 1, 32, dbf) != 32) {
perror("read dbf header");
exit(EXIT_FAILURE);
}
unsigned int dbnrec = read32le(dbfheader + 4);
unsigned int dbheaderlen = read16le(dbfheader + 8);
unsigned int dbreclen = read16le(dbfheader + 10);
if (dbheaderlen <= 32) {
fprintf(stderr, "Impossible length for DBF column header %u\n", dbheaderlen);
exit(EXIT_FAILURE);
}
unsigned int dbcol_len = dbheaderlen - 32;
unsigned char dbcolumns[dbcol_len];
if (fread(dbcolumns, 1, dbcol_len, dbf) != dbcol_len) {
perror("read dbf column header");
exit(EXIT_FAILURE);
}
std::vector<std::string> columns;
std::vector<int> column_widths;
std::vector<int> column_types;
// -1 because there is a 1-byte terminator
for (size_t i = 0; i < dbcol_len - 1; i += 32) {
size_t j;
for (j = i; j < i + 10; j++) {
if (dbcolumns[j] == '\0') {
break;
}
}
columns.push_back(std::string((char *) dbcolumns + i, j - i));
column_widths.push_back(dbcolumns[i + 16]);
column_types.push_back(dbcolumns[i + 11]);
}
unsigned char db[dbreclen];
unsigned seq = 0;
while (fread(db, dbreclen, 1, dbf) == 1) {
unsigned char shlen[8];
if (fread(shlen, 8, 1, shp) != 1) {
fprintf(stderr, "Attributes with no shape\n");
exit(EXIT_FAILURE);
}
seq++;
unsigned fileseq = read32be(shlen);
if (fileseq != seq) {
fprintf(stderr, "Shapefile out of sequence: found %u for record %u\n", fileseq, seq);
exit(EXIT_FAILURE);
}
unsigned int geom_len = read32be(shlen + 4) * 2;
unsigned char geom_buf[geom_len];
if (fread(geom_buf, 1, geom_len, shp) != geom_len) {
fprintf(stderr, "End of file reading geometry\n");
exit(EXIT_FAILURE);
}
}
if (seq != dbnrec) {
fprintf(stderr, "Unexpected number of attributes: %u instead of %u\n", seq, dbnrec);
exit(EXIT_FAILURE);
}
if (fclose(shp) != 0) {
perror("fclose");
exit(EXIT_FAILURE);
}
if (fclose(dbf) != 0) {
perror("fclose");
exit(EXIT_FAILURE);
}
}

13
shapefile.hpp Normal file
View File

@ -0,0 +1,13 @@
#ifndef SHAPEFILE_HPP
#define SHAPEFILE_HPP
#include <stdio.h>
#include <set>
#include <map>
#include <string>
#include "mbtiles.hpp"
#include "serial.hpp"
void parse_shapefile(struct serialization_state *sst, std::string fname, int layer, std::string layername);
#endif