Merge branch 'master' into plugins

2025-04-15 22:56:43 +00:00 · 2017-05-01 11:43:05 -07:00 · 2017-05-01 11:43:05 -07:00 · a0234923b7
commit a0234923b7
parent 5a8bfa23a4 1669d96510
9 changed files with 4189 additions and 124 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,9 @@
 *.o
 *.obj

+# Autogenerated dependencies
+*.d
+
 # Precompiled Headers
 *.gch
 *.pch
@ -27,6 +30,14 @@
 *.exe
 *.out
 *.app
+tippecanoe
+tile-join
+tippecanoe-decode
+tippecanoe-enumerate
+unit
+
+# Vim
+*.swp

 # Mac 
-.DS_Store
+.DS_Store
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,7 @@
+## 1.17.3
+
+* Support RFC 8142 GeoJSON text sequences
+
 ## 1.17.2

 * Organize usage output the same way as in the README
--- a/3
+++ b/3
@ -100,15 +100,18 @@ parallel-test:
 	./tippecanoe -z5 -f -pi -l test -n test -P -o tests/parallel/parallel-file.mbtiles tests/parallel/in[1234].json tests/parallel/empty[12].json
 	cat tests/parallel/in[1234].json | ./tippecanoe -z5 -f -pi -l test -n test -o tests/parallel/linear-pipe.mbtiles
 	cat tests/parallel/in[1234].json | ./tippecanoe -z5 -f -pi -l test -n test -P -o tests/parallel/parallel-pipe.mbtiles
+	cat tests/parallel/in[1234].json | sed 's/^/@/' | tr '@' '\036' | ./tippecanoe -z5 -f -pi -l test -n test -o tests/parallel/implicit-pipe.mbtiles
 	./tippecanoe -z5 -f -pi -l test -n test -P -o tests/parallel/parallel-pipes.mbtiles <(cat tests/parallel/in1.json) <(cat tests/parallel/empty1.json) <(cat tests/parallel/empty2.json) <(cat tests/parallel/in2.json) /dev/null <(cat tests/parallel/in3.json) <(cat tests/parallel/in4.json)
 	./tippecanoe-decode tests/parallel/linear-file.mbtiles > tests/parallel/linear-file.json
 	./tippecanoe-decode tests/parallel/parallel-file.mbtiles > tests/parallel/parallel-file.json
 	./tippecanoe-decode tests/parallel/linear-pipe.mbtiles > tests/parallel/linear-pipe.json
 	./tippecanoe-decode tests/parallel/parallel-pipe.mbtiles > tests/parallel/parallel-pipe.json
+	./tippecanoe-decode tests/parallel/implicit-pipe.mbtiles > tests/parallel/implicit-pipe.json
 	./tippecanoe-decode tests/parallel/parallel-pipes.mbtiles > tests/parallel/parallel-pipes.json
 	cmp tests/parallel/linear-file.json tests/parallel/parallel-file.json
 	cmp tests/parallel/linear-file.json tests/parallel/linear-pipe.json
 	cmp tests/parallel/linear-file.json tests/parallel/parallel-pipe.json
+	cmp tests/parallel/linear-file.json tests/parallel/implicit-pipe.json
 	cmp tests/parallel/linear-file.json tests/parallel/parallel-pipes.json
 	rm tests/parallel/*.mbtiles tests/parallel/*.json

--- a/README.md
+++ b/README.md
@ -104,6 +104,10 @@ If your input is formatted as newline-delimited GeoJSON, use `-P` to make input
   Performance will be better if the input is a named file that can be mapped into memory
   rather than a stream that can only be read sequentially.

+If the input file begins with the [RFC 8142](https://tools.ietf.org/html/rfc8142) record separator,
+parallel processing of input will be invoked automatically, splitting at record separators rather
+than at all newlines.
+
 ### Projection of input

 * `-s` _projection_ or `--projection=`_projection_: Specify the projection of the input data. Currently supported are `EPSG:4326` (WGS84, the default) and `EPSG:3857` (Web Mercator). In general you should use WGS84 for your input files if at all possible.
--- a/jsonpull/jsonpull.c
+++ b/jsonpull/jsonpull.c
@ -295,7 +295,7 @@ again:

 			return NULL;
 		}
-	} while (c == ' ' || c == '\t' || c == '\r' || c == '\n');
+	} while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == 0x1E);

 	/////////////////////////// Arrays

--- a/main.cpp
+++ b/main.cpp
@ -377,7 +377,7 @@ void *run_sort(void *v) {
 	return NULL;
 }

-void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types) {
+void do_read_parallel(char *map, long long len, long long initial_offset, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > *layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator) {
 	long long segs[CPUS + 1];
 	segs[0] = 0;
 	segs[CPUS] = len;
@ -385,7 +385,7 @@ void do_read_parallel(char *map, long long len, long long initial_offset, const
 	for (size_t i = 1; i < CPUS; i++) {
 		segs[i] = len * i / CPUS;

-		while (segs[i] < len && map[segs[i]] != '\n') {
+		while (segs[i] < len && map[segs[i]] != separator) {
 			segs[i]++;
 		}
 	}
@ -460,6 +460,7 @@ struct read_parallel_arg {
 	long long offset;
 	long long len;
 	volatile int *is_parsing;
+	int separator;

 	const char *reading;
 	struct reader *reader;
@ -501,7 +502,7 @@ void *run_read_parallel(void *v) {
 	}
 	madvise(map, rpa->len, MADV_RANDOM);  // sequential, but from several pointers at once

-	do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma, rpa->attribute_types);
+	do_read_parallel(map, rpa->len, rpa->offset, rpa->reading, rpa->reader, rpa->progress_seq, rpa->exclude, rpa->include, rpa->exclude_all, rpa->fname, rpa->basezoom, rpa->source, rpa->nlayers, rpa->layermaps, rpa->droprate, rpa->initialized, rpa->initial_x, rpa->initial_y, rpa->maxzoom, rpa->layername, rpa->uses_gamma, rpa->attribute_types, rpa->separator);

 	madvise(map, rpa->len, MADV_DONTNEED);
 	if (munmap(map, rpa->len) != 0) {
@ -518,7 +519,7 @@ void *run_read_parallel(void *v) {
 	return NULL;
 }

-void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types) {
+void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile int *is_parsing, pthread_t *parallel_parser, bool &parser_created, const char *reading, struct reader *reader, volatile long long *progress_seq, std::set<std::string> *exclude, std::set<std::string> *include, int exclude_all, char *fname, int basezoom, int source, int nlayers, std::vector<std::map<std::string, layermap_entry> > &layermaps, double droprate, int *initialized, unsigned *initial_x, unsigned *initial_y, int maxzoom, std::string layername, bool uses_gamma, std::map<std::string, int> const *attribute_types, int separator) {
 	// This has to kick off an intermediate thread to start the parser threads,
 	// so the main thread can get back to reading the next input stage while
 	// the intermediate thread waits for the completion of the parser threads.
@ -536,6 +537,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i
 	rpa->offset = offset;
 	rpa->len = len;
 	rpa->is_parsing = is_parsing;
+	rpa->separator = separator;

 	rpa->reading = reading;
 	rpa->reader = reader;
@ -1206,7 +1208,9 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 		char *map = NULL;
 		off_t off = 0;

-		if (read_parallel) {
+		int read_parallel_this = read_parallel ? '\n' : 0x1E;
+
+		if (1) {
 			if (fstat(fd, &st) == 0) {
 				off = lseek(fd, 0, SEEK_CUR);
 				if (off >= 0) {
@ -1219,14 +1223,31 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 			}
 		}

+		if (map != NULL && map != MAP_FAILED && st.st_size - off > 0) {
+			if (map[0] == 0x1E) {
+				read_parallel_this = 0x1E;
+			}
+
+			if (!read_parallel_this) {
+				// Not a GeoJSON text sequence, so unmap and read serially
+
+				if (munmap(map, st.st_size - off) != 0) {
+					perror("munmap source file");
+					exit(EXIT_FAILURE);
+				}
+
+				map = NULL;
+			}
+		}
+
 		if (map != NULL && map != MAP_FAILED) {
-			do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, uses_gamma, attribute_types);
+			do_read_parallel(map, st.st_size - off, overall_offset, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, &layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, uses_gamma, attribute_types, read_parallel_this);
 			overall_offset += st.st_size - off;
 			checkdisk(reader, CPUS);

 			if (munmap(map, st.st_size - off) != 0) {
-				madvise(map, st.st_size, MADV_DONTNEED);
 				perror("munmap source file");
+				exit(EXIT_FAILURE);
 			}
 		} else {
 			FILE *fp = fdopen(fd, "r");
@ -1239,7 +1260,15 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 				continue;
 			}

-			if (read_parallel) {
+			int c = getc(fp);
+			if (c != EOF) {
+				ungetc(c, fp);
+			}
+			if (c == 0x1E) {
+				read_parallel_this = 0x1E;
+			}
+
+			if (read_parallel_this) {
 				// Serial reading of chunks that are then parsed in parallel

 				char readname[strlen(tmpdir) + strlen("/read.XXXXXXXX") + 1];
@ -1273,7 +1302,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 					fwrite_check(buf, sizeof(char), n, readfp, reading.c_str());
 					ahead += n;

-					if (buf[n - 1] == '\n' && ahead > PARSE_MIN) {
+					if (buf[n - 1] == read_parallel_this && ahead > PARSE_MIN) {
 						// Don't let the streaming reader get too far ahead of the parsers.
 						// If the buffered input gets huge, even if the parsers are still running,
 						// wait for the parser thread instead of continuing to stream input.
@ -1288,7 +1317,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 							}

 							fflush(readfp);
-							start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types);
+							start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this);

 							initial_offset += ahead;
 							overall_offset += ahead;
@ -1325,7 +1354,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
 				fflush(readfp);

 				if (ahead > 0) {
-					start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types);
+					start_parsing(readfd, readfp, initial_offset, ahead, &is_parsing, &parallel_parser, parser_created, reading.c_str(), reader, &progress_seq, exclude, include, exclude_all, fname, basezoom, layer, nlayers, layermaps, droprate, initialized, initial_x, initial_y, maxzoom, sources[layer].layer, gamma != 0, attribute_types, read_parallel_this);

 					if (parser_created) {
 						if (pthread_join(parallel_parser, NULL) != 0) {
--- a/man/tippecanoe.1
+++ b/man/tippecanoe.1
@ -109,6 +109,10 @@ messages may result otherwise.
 Performance will be better if the input is a named file that can be mapped into memory
 rather than a stream that can only be read sequentially.
 .RE
+.PP
+If the input file begins with the RFC 8142 \[la]https://tools.ietf.org/html/rfc8142\[ra] record separator,
+parallel processing of input will be invoked automatically, splitting at record separators rather
+than at all newlines.
 .SS Projection of input
 .RS
 .IP \(bu 2
--- a/tests/ne_110m_admin_1_states_provinces_lines/in.json
+++ b/tests/ne_110m_admin_1_states_provinces_lines/in.json
--- a/version.hpp
+++ b/version.hpp
@ -1 +1 @@
-#define VERSION "tippecanoe v1.17.2\n"
+#define VERSION "tippecanoe v1.17.3\n"