2015-07-27 23:20:20 +00:00
|
|
|
#include <stdio.h>
|
2014-09-29 19:17:35 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2016-04-27 23:41:41 +00:00
|
|
|
#include <limits.h>
|
2017-09-06 20:06:08 +00:00
|
|
|
#include <math.h>
|
2016-04-27 21:59:20 +00:00
|
|
|
#include "memfile.hpp"
|
2016-04-27 21:00:14 +00:00
|
|
|
#include "pool.hpp"
|
2014-09-29 19:17:35 +00:00
|
|
|
|
2016-04-27 21:59:20 +00:00
|
|
|
int swizzlecmp(const char *a, const char *b) {
|
2017-09-06 20:06:08 +00:00
|
|
|
ssize_t a1 = strlen(a) - 1;
|
|
|
|
ssize_t b1 = strlen(b) - 1;
|
|
|
|
|
|
|
|
while (a1 >= 0 || b1 >= 0) {
|
|
|
|
unsigned char ac = '\0', bc = '\0';
|
|
|
|
if (a1 >= 0) {
|
|
|
|
ac = a[a1];
|
|
|
|
}
|
|
|
|
if (b1 >= 0) {
|
|
|
|
bc = b[b1];
|
|
|
|
}
|
|
|
|
|
|
|
|
int aa = ac;
|
|
|
|
int bb = bc;
|
2016-04-27 21:59:20 +00:00
|
|
|
|
|
|
|
int cmp = aa - bb;
|
|
|
|
if (cmp != 0) {
|
|
|
|
return cmp;
|
|
|
|
}
|
|
|
|
|
2017-09-06 20:06:08 +00:00
|
|
|
a1--;
|
|
|
|
b1--;
|
2016-04-27 21:59:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
long long addpool(struct memfile *poolfile, struct memfile *treefile, const char *s, char type) {
|
|
|
|
long long *sp = &treefile->tree;
|
2017-09-06 20:06:08 +00:00
|
|
|
size_t depth = 0;
|
|
|
|
|
|
|
|
// In typical data, traversal depth generally stays under 2.5x
|
|
|
|
size_t max = 3 * log(treefile->off / sizeof(struct stringpool)) / log(2);
|
|
|
|
if (max < 30) {
|
|
|
|
max = 30;
|
|
|
|
}
|
2016-04-27 21:59:20 +00:00
|
|
|
|
|
|
|
while (*sp != 0) {
|
|
|
|
int cmp = swizzlecmp(s, poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off + 1);
|
|
|
|
|
|
|
|
if (cmp == 0) {
|
|
|
|
cmp = type - (poolfile->map + ((struct stringpool *) (treefile->map + *sp))->off)[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cmp < 0) {
|
|
|
|
sp = &(((struct stringpool *) (treefile->map + *sp))->left);
|
|
|
|
} else if (cmp > 0) {
|
|
|
|
sp = &(((struct stringpool *) (treefile->map + *sp))->right);
|
|
|
|
} else {
|
|
|
|
return ((struct stringpool *) (treefile->map + *sp))->off;
|
|
|
|
}
|
2017-09-06 20:06:08 +00:00
|
|
|
|
|
|
|
depth++;
|
|
|
|
if (depth > max) {
|
|
|
|
// Search is very deep, so string is probably unique.
|
|
|
|
// Add it to the pool without adding it to the search tree.
|
|
|
|
|
|
|
|
long long off = poolfile->off;
|
|
|
|
if (memfile_write(poolfile, &type, 1) < 0) {
|
|
|
|
perror("memfile write");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
if (memfile_write(poolfile, (void *) s, strlen(s) + 1) < 0) {
|
|
|
|
perror("memfile write");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
return off;
|
|
|
|
}
|
2016-04-27 21:59:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// *sp is probably in the memory-mapped file, and will move if the file grows.
|
|
|
|
long long ssp;
|
|
|
|
if (sp == &treefile->tree) {
|
|
|
|
ssp = -1;
|
|
|
|
} else {
|
|
|
|
ssp = ((char *) sp) - treefile->map;
|
|
|
|
}
|
|
|
|
|
|
|
|
long long off = poolfile->off;
|
|
|
|
if (memfile_write(poolfile, &type, 1) < 0) {
|
|
|
|
perror("memfile write");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
if (memfile_write(poolfile, (void *) s, strlen(s) + 1) < 0) {
|
|
|
|
perror("memfile write");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct stringpool tsp;
|
|
|
|
tsp.left = 0;
|
|
|
|
tsp.right = 0;
|
|
|
|
tsp.off = off;
|
|
|
|
|
|
|
|
long long p = treefile->off;
|
|
|
|
if (memfile_write(treefile, &tsp, sizeof(struct stringpool)) < 0) {
|
|
|
|
perror("memfile write");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ssp == -1) {
|
|
|
|
treefile->tree = p;
|
|
|
|
} else {
|
|
|
|
*((long long *) (treefile->map + ssp)) = p;
|
|
|
|
}
|
|
|
|
return off;
|
|
|
|
}
|