Merge pull request #212 from mapbox/limitfiles

Speculatively open files to avoid overrunning the system limits
This commit is contained in:
Eric Fischer 2016-04-14 10:32:19 -07:00
commit 2a6e41126f
4 changed files with 96 additions and 30 deletions

View File

@ -1,3 +1,7 @@
## 1.9.13
* Don't trust the OS so much about how many files can be open
## 1.9.12
* Limit the size of the parallel parsing streaming input buffer

118
geojson.c
View File

@ -81,6 +81,7 @@ struct source {
int CPUS;
int TEMP_FILES;
long long MAX_FILES;
static long long diskfree;
#define MAX_ZOOM 24
@ -146,16 +147,50 @@ void init_cpus() {
// Round down to a power of 2
CPUS = 1 << (int) (log(CPUS) / log(2));
TEMP_FILES = 64;
struct rlimit rl;
if (getrlimit(RLIMIT_NOFILE, &rl) != 0) {
perror("getrlimit");
exit(EXIT_FAILURE);
} else {
TEMP_FILES = rl.rlim_cur / 3;
if (TEMP_FILES > CPUS * 4) {
TEMP_FILES = CPUS * 4;
MAX_FILES = rl.rlim_cur;
}
// Don't really want too many temporary files, because the file system
// will start to bog down eventually
if (MAX_FILES > 2000) {
MAX_FILES = 2000;
}
// MacOS can run out of system file descriptors
// even if we stay under the rlimit, so try to
// find out the real limit.
long long fds[MAX_FILES];
long long i;
for (i = 0; i < MAX_FILES; i++) {
fds[i] = open("/dev/null", O_RDONLY);
if (fds[i] < 0) {
break;
}
}
long long j;
for (j = 0; j < i; j++) {
if (close(fds[j]) < 0) {
perror("close");
exit(EXIT_FAILURE);
}
}
// Scale down because we really don't want to run the system out of files
MAX_FILES = i * 3 / 4;
if (MAX_FILES < 32) {
fprintf(stderr, "Can't open a useful number of files: %lld\n", MAX_FILES);
exit(EXIT_FAILURE);
}
TEMP_FILES = (MAX_FILES - 10) / 2;
if (TEMP_FILES > CPUS * 4) {
TEMP_FILES = CPUS * 4;
}
}
size_t fwrite_check(const void *ptr, size_t size, size_t nitems, FILE *stream, const char *fname) {
@ -1166,7 +1201,7 @@ void start_parsing(int fd, FILE *fp, long long offset, long long len, volatile i
}
}
void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int splits, long long mem, const char *tmpdir, int availfiles, FILE *geomfile, FILE *indexfile, long long *geompos_out, long long *progress, long long *progress_max, long long *progress_reported) {
void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int splits, long long mem, const char *tmpdir, long long *availfiles, FILE *geomfile, FILE *indexfile, long long *geompos_out, long long *progress, long long *progress_max, long long *progress_reported) {
// Arranged as bits to facilitate subdividing again if a subdivided file is still huge
int splitbits = log(splits) / log(2);
splits = 1 << splitbits;
@ -1208,7 +1243,7 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
exit(EXIT_FAILURE);
}
availfiles -= 4;
*availfiles -= 4;
unlink(geomname);
unlink(indexname);
@ -1285,7 +1320,7 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
exit(EXIT_FAILURE);
}
availfiles += 2;
*availfiles += 2;
}
for (i = 0; i < splits; i++) {
@ -1298,7 +1333,7 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
exit(EXIT_FAILURE);
}
availfiles += 2;
*availfiles += 2;
}
for (i = 0; i < splits; i++) {
@ -1447,7 +1482,7 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
// counter backward but will be an honest estimate of the work remaining.
*progress_max += geomst.st_size / 4;
radix1(&geomfds[i], &indexfds[i], 1, prefix + splitbits, availfiles / 4, mem, tmpdir, availfiles, geomfile, indexfile, geompos_out, progress, progress_max, progress_reported);
radix1(&geomfds[i], &indexfds[i], 1, prefix + splitbits, *availfiles / 4, mem, tmpdir, availfiles, geomfile, indexfile, geompos_out, progress, progress_max, progress_reported);
already_closed = 1;
}
}
@ -1461,9 +1496,9 @@ void radix1(int *geomfds_in, int *indexfds_in, int inputs, int prefix, int split
perror("close index");
exit(EXIT_FAILURE);
}
}
availfiles += 2;
*availfiles += 2;
}
}
}
@ -1477,13 +1512,6 @@ void radix(struct reader *reader, int nreaders, FILE *geomfile, int geomfd, FILE
// Then concatenate each of the sub-outputs into a final output.
struct rlimit rl;
if (getrlimit(RLIMIT_NOFILE, &rl) != 0) {
perror("getrlimit");
exit(EXIT_FAILURE);
}
long long mem;
#ifdef __APPLE__
@ -1511,15 +1539,10 @@ void radix(struct reader *reader, int nreaders, FILE *geomfile, int geomfd, FILE
mem = 8192;
}
// Don't use huge numbers of files that will trouble the file system
if (rl.rlim_cur > 5000) {
rl.rlim_cur = 5000;
}
long long availfiles = rl.rlim_cur - 2 * nreaders // each reader has a geom and an index
- 4 // pool, meta, mbtiles, mbtiles journal
- 4 // top-level geom and index output, both FILE and fd
- 3; // stdin, stdout, stderr
long long availfiles = MAX_FILES - 2 * nreaders // each reader has a geom and an index
- 4 // pool, meta, mbtiles, mbtiles journal
- 4 // top-level geom and index output, both FILE and fd
- 3; // stdin, stdout, stderr
// 4 because for each we have output and input FILE and fd for geom and index
int splits = availfiles / 4;
@ -1545,7 +1568,13 @@ void radix(struct reader *reader, int nreaders, FILE *geomfile, int geomfd, FILE
}
long long progress = 0, progress_max = geom_total, progress_reported = -1;
radix1(geomfds, indexfds, nreaders, 0, splits, mem, tmpdir, availfiles, geomfile, indexfile, geompos, &progress, &progress_max, &progress_reported);
long long availfiles_before = availfiles;
radix1(geomfds, indexfds, nreaders, 0, splits, mem, tmpdir, &availfiles, geomfile, indexfile, geompos, &progress, &progress_max, &progress_reported);
if (availfiles - 2 * nreaders != availfiles_before) {
fprintf(stderr, "Internal error: miscounted available file descriptors: %lld vs %lld\n", availfiles - 2 * nreaders, availfiles);
exit(EXIT_FAILURE);
}
}
int read_json(int argc, struct source **sourcelist, char *fname, const char *layername, int maxzoom, int minzoom, int basezoom, double basezoom_marker_width, sqlite3 *outdb, struct pool *exclude, struct pool *include, int exclude_all, double droprate, int buffer, const char *tmpdir, double gamma, int *prevent, int *additional, int read_parallel, int forcetable, const char *attribution) {
@ -2453,12 +2482,35 @@ int main(int argc, char **argv) {
pool_init(&include, 0);
int exclude_all = 0;
int read_parallel = 0;
int files_open_at_start;
for (i = 0; i < 256; i++) {
prevent[i] = 0;
additional[i] = 0;
}
{
char dup[256];
memset(dup, 0, sizeof(dup));
for (i = 0; i < sizeof(additional_options) / sizeof(additional_options[0]); i++) {
if (dup[additional_options[i]]) {
fprintf(stderr, "Internal error: reused -a%c\n", additional_options[i]);
exit(EXIT_FAILURE);
}
dup[additional_options[i]] = 1;
}
memset(dup, 0, sizeof(dup));
for (i = 0; i < sizeof(prevent_options) / sizeof(prevent_options[0]); i++) {
if (dup[prevent_options[i]]) {
fprintf(stderr, "Internal error: reused -p%c\n", prevent_options[i]);
exit(EXIT_FAILURE);
}
dup[prevent_options[i]] = 1;
}
}
static struct option long_options[] = {
{"name", required_argument, 0, 'n'},
{"layer", required_argument, 0, 'l'},
@ -2690,6 +2742,9 @@ int main(int argc, char **argv) {
}
}
files_open_at_start = open("/dev/null", O_RDONLY);
close(files_open_at_start);
if (maxzoom > MAX_ZOOM) {
maxzoom = MAX_ZOOM;
fprintf(stderr, "Highest supported zoom is %d\n", maxzoom);
@ -2766,5 +2821,12 @@ int main(int argc, char **argv) {
muntrace();
#endif
i = open("/dev/null", O_RDONLY);
// i < files_open_at_start is not an error, because reading from a pipe closes stdin
if (i > files_open_at_start) {
fprintf(stderr, "Internal error: did not close all files: %d\n", i);
exit(EXIT_FAILURE);
}
return ret;
}

2
tile.h
View File

@ -52,7 +52,7 @@ static int additional_options[] = {
A_LINE_DROP,
#define A_POLYGON_DROP ((int) 'p')
A_POLYGON_DROP,
#define A_PREFER_RADIX_SORT ((int) 'r')
#define A_PREFER_RADIX_SORT ((int) 'R')
A_PREFER_RADIX_SORT,
};

View File

@ -1 +1 @@
#define VERSION "tippecanoe v1.9.12\n"
#define VERSION "tippecanoe v1.9.13\n"