Clear up some confusion about attribute count and external references

Now the count is always adjacent to whereever the key/value pair is
stored, and is not kept in the serial feature object other than as
the length of the vectors of keys and values.
This commit is contained in:
Eric Fischer 2018-04-05 15:40:14 -07:00
parent d01d8177a0
commit 1b26becad9
7 changed files with 36 additions and 55 deletions

View File

@ -387,7 +387,6 @@ void readFeature(protozero::pbf_reader &pbf, size_t dim, double e, std::vector<s
sf.t = dv[i].type; sf.t = dv[i].type;
sf.full_keys = full_keys; sf.full_keys = full_keys;
sf.full_values = full_values; sf.full_values = full_values;
sf.m = sf.full_values.size();
auto tip = other.find("tippecanoe"); auto tip = other.find("tippecanoe");
if (tip != other.end()) { if (tip != other.end()) {
@ -512,7 +511,6 @@ void outBareGeometry(drawvec const &dv, int type, struct serialization_state *ss
sf.seq = (*sst->layer_seq); sf.seq = (*sst->layer_seq);
sf.geometry = dv; sf.geometry = dv;
sf.t = type; sf.t = type;
sf.m = 0;
serialize_feature(sst, sf); serialize_feature(sst, sf);
} }

View File

@ -108,7 +108,6 @@ void parse_geocsv(std::vector<struct serialization_state> &sst, std::string fnam
sf.t = 1; // POINT sf.t = 1; // POINT
sf.full_keys = full_keys; sf.full_keys = full_keys;
sf.full_values = full_values; sf.full_values = full_values;
sf.m = sf.full_values.size();
serialize_feature(&sst[0], sf); serialize_feature(&sst[0], sf);
} }

View File

@ -193,7 +193,6 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
sf.has_tippecanoe_maxzoom = (tippecanoe_maxzoom != -1); sf.has_tippecanoe_maxzoom = (tippecanoe_maxzoom != -1);
sf.tippecanoe_maxzoom = tippecanoe_maxzoom; sf.tippecanoe_maxzoom = tippecanoe_maxzoom;
sf.geometry = dv; sf.geometry = dv;
sf.m = m;
sf.feature_minzoom = 0; // Will be filled in during index merging sf.feature_minzoom = 0; // Will be filled in during index merging
sf.seq = *(sst->layer_seq); sf.seq = *(sst->layer_seq);

View File

@ -393,7 +393,6 @@ serial_feature parse_feature(json_pull *jp, int z, unsigned x, unsigned y, std::
sf.bbox[0] = sf.bbox[1] = LLONG_MAX; sf.bbox[0] = sf.bbox[1] = LLONG_MAX;
sf.bbox[2] = sf.bbox[3] = LLONG_MIN; sf.bbox[2] = sf.bbox[3] = LLONG_MIN;
sf.extent = 0; sf.extent = 0;
sf.m = 0;
sf.metapos = 0; sf.metapos = 0;
sf.has_id = false; sf.has_id = false;

View File

@ -219,19 +219,15 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, long long *geompos, c
serialize_long_long(geomfile, sf->extent, geompos, fname); serialize_long_long(geomfile, sf->extent, geompos, fname);
} }
serialize_int(geomfile, sf->m, geompos, fname); serialize_long_long(geomfile, sf->metapos, geompos, fname);
if (sf->m != 0) {
serialize_long_long(geomfile, sf->metapos, geompos, fname);
}
if (sf->metapos < 0 && sf->m != sf->keys.size()) { if (sf->metapos < 0) {
fprintf(stderr, "Internal error: feature said to have %lld attributes but only %lld found\n", (long long) sf->m, (long long) sf->keys.size()); serialize_long_long(geomfile, sf->keys.size(), geompos, fname);
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < sf->keys.size(); i++) { for (size_t i = 0; i < sf->keys.size(); i++) {
serialize_long_long(geomfile, sf->keys[i], geompos, fname); serialize_long_long(geomfile, sf->keys[i], geompos, fname);
serialize_long_long(geomfile, sf->values[i], geompos, fname); serialize_long_long(geomfile, sf->values[i], geompos, fname);
}
} }
if (include_minzoom) { if (include_minzoom) {
@ -285,19 +281,14 @@ serial_feature deserialize_feature(FILE *geoms, long long *geompos_in, char *met
sf.layer >>= 6; sf.layer >>= 6;
sf.metapos = 0; sf.metapos = 0;
{ deserialize_long_long_io(geoms, &sf.metapos, geompos_in);
int m;
deserialize_int_io(geoms, &m, geompos_in);
sf.m = m;
}
if (sf.m != 0) {
deserialize_long_long_io(geoms, &sf.metapos, geompos_in);
}
if (sf.metapos >= 0) { if (sf.metapos >= 0) {
char *meta = metabase + sf.metapos + meta_off[sf.segment]; char *meta = metabase + sf.metapos + meta_off[sf.segment];
long long count;
deserialize_long_long(&meta, &count);
for (size_t i = 0; i < sf.m; i++) { for (long long i = 0; i < count; i++) {
long long k, v; long long k, v;
deserialize_long_long(&meta, &k); deserialize_long_long(&meta, &k);
deserialize_long_long(&meta, &v); deserialize_long_long(&meta, &v);
@ -305,7 +296,10 @@ serial_feature deserialize_feature(FILE *geoms, long long *geompos_in, char *met
sf.values.push_back(v); sf.values.push_back(v);
} }
} else { } else {
for (size_t i = 0; i < sf.m; i++) { long long count;
deserialize_long_long_io(geoms, &count, geompos_in);
for (long long i = 0; i < count; i++) {
long long k, v; long long k, v;
deserialize_long_long_io(geoms, &k, geompos_in); deserialize_long_long_io(geoms, &k, geompos_in);
deserialize_long_long_io(geoms, &v, geompos_in); deserialize_long_long_io(geoms, &v, geompos_in);
@ -524,13 +518,11 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
if (sst->include->count(sf.full_keys[i]) == 0) { if (sst->include->count(sf.full_keys[i]) == 0) {
sf.full_keys.erase(sf.full_keys.begin() + i); sf.full_keys.erase(sf.full_keys.begin() + i);
sf.full_values.erase(sf.full_values.begin() + i); sf.full_values.erase(sf.full_values.begin() + i);
sf.m--;
continue; continue;
} }
} else if (sst->exclude->count(sf.full_keys[i]) != 0) { } else if (sst->exclude->count(sf.full_keys[i]) != 0) {
sf.full_keys.erase(sf.full_keys.begin() + i); sf.full_keys.erase(sf.full_keys.begin() + i);
sf.full_values.erase(sf.full_values.begin() + i); sf.full_values.erase(sf.full_values.begin() + i);
sf.m--;
continue; continue;
} }
@ -556,6 +548,7 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
} }
} else { } else {
sf.metapos = r->metapos; sf.metapos = r->metapos;
serialize_long_long(r->metafile, sf.full_keys.size(), &r->metapos, sst->fname);
for (size_t i = 0; i < sf.full_keys.size(); i++) { for (size_t i = 0; i < sf.full_keys.size(); i++) {
serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string), &r->metapos, sst->fname); serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string), &r->metapos, sst->fname);
serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type), &r->metapos, sst->fname); serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type), &r->metapos, sst->fname);

View File

@ -57,9 +57,9 @@ struct serial_feature {
unsigned long long index = 0; unsigned long long index = 0;
long long extent = 0; long long extent = 0;
size_t m = 0;
std::vector<long long> keys{}; std::vector<long long> keys{};
std::vector<long long> values{}; std::vector<long long> values{};
// If >= 0, metadata is external
long long metapos = 0; long long metapos = 0;
// XXX This isn't serialized. Should it be here? // XXX This isn't serialized. Should it be here?

View File

@ -74,7 +74,7 @@ bool draws_something(drawvec &geom) {
return false; return false;
} }
static int metacmp(int m1, const std::vector<long long> &keys1, const std::vector<long long> &values1, char *stringpool1, int m2, const std::vector<long long> &keys2, const std::vector<long long> &values2, char *stringpool2); static int metacmp(const std::vector<long long> &keys1, const std::vector<long long> &values1, char *stringpool1, const std::vector<long long> &keys2, const std::vector<long long> &values2, char *stringpool2);
int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2); int coalindexcmp(const struct coalesce *c1, const struct coalesce *c2);
struct coalesce { struct coalesce {
@ -87,7 +87,6 @@ struct coalesce {
unsigned long long index = 0; unsigned long long index = 0;
long long original_seq = 0; long long original_seq = 0;
int type = 0; int type = 0;
int m = 0;
bool coalesced = false; bool coalesced = false;
double spacing = 0; double spacing = 0;
bool has_id = false; bool has_id = false;
@ -131,7 +130,7 @@ int coalcmp(const void *v1, const void *v2) {
} }
} }
cmp = metacmp(c1->m, c1->keys, c1->values, c1->stringpool, c2->m, c2->keys, c2->values, c2->stringpool); cmp = metacmp(c1->keys, c1->values, c1->stringpool, c2->keys, c2->values, c2->stringpool);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
@ -196,9 +195,9 @@ mvt_value retrieve_string(long long off, char *stringpool, int *otype) {
return stringified_to_mvt_value(type, s); return stringified_to_mvt_value(type, s);
} }
void decode_meta(int m, std::vector<long long> const &metakeys, std::vector<long long> const &metavals, char *stringpool, mvt_layer &layer, mvt_feature &feature) { void decode_meta(std::vector<long long> const &metakeys, std::vector<long long> const &metavals, char *stringpool, mvt_layer &layer, mvt_feature &feature) {
int i; size_t i;
for (i = 0; i < m; i++) { for (i = 0; i < metakeys.size(); i++) {
int otype; int otype;
mvt_value key = retrieve_string(metakeys[i], stringpool, NULL); mvt_value key = retrieve_string(metakeys[i], stringpool, NULL);
mvt_value value = retrieve_string(metavals[i], stringpool, &otype); mvt_value value = retrieve_string(metavals[i], stringpool, &otype);
@ -207,9 +206,9 @@ void decode_meta(int m, std::vector<long long> const &metakeys, std::vector<long
} }
} }
static int metacmp(int m1, const std::vector<long long> &keys1, const std::vector<long long> &values1, char *stringpool1, int m2, const std::vector<long long> &keys2, const std::vector<long long> &values2, char *stringpool2) { static int metacmp(const std::vector<long long> &keys1, const std::vector<long long> &values1, char *stringpool1, const std::vector<long long> &keys2, const std::vector<long long> &values2, char *stringpool2) {
int i; size_t i;
for (i = 0; i < m1 && i < m2; i++) { for (i = 0; i < keys1.size() && i < keys2.size(); i++) {
mvt_value key1 = retrieve_string(keys1[i], stringpool1, NULL); mvt_value key1 = retrieve_string(keys1[i], stringpool1, NULL);
mvt_value key2 = retrieve_string(keys2[i], stringpool2, NULL); mvt_value key2 = retrieve_string(keys2[i], stringpool2, NULL);
@ -236,16 +235,16 @@ static int metacmp(int m1, const std::vector<long long> &keys1, const std::vecto
} }
} }
if (m1 < m2) { if (keys1.size() < keys2.size()) {
return -1; return -1;
} else if (m1 > m2) { } else if (keys1.size() > keys2.size()) {
return 1; return 1;
} else { } else {
return 0; return 0;
} }
} }
void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom, int segment, unsigned *initial_x, unsigned *initial_y, int m, std::vector<long long> &metakeys, std::vector<long long> &metavals, bool has_id, unsigned long long id, unsigned long long index, long long extent) { void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, unsigned tx, unsigned ty, int buffer, int *within, long long *geompos, FILE **geomfile, const char *fname, signed char t, int layer, long long metastart, signed char feature_minzoom, int child_shards, int max_zoom_increment, long long seq, int tippecanoe_minzoom, int tippecanoe_maxzoom, int segment, unsigned *initial_x, unsigned *initial_y, std::vector<long long> &metakeys, std::vector<long long> &metavals, bool has_id, unsigned long long id, unsigned long long index, long long extent) {
if (geom.size() > 0 && (nextzoom <= maxzoom || additional[A_EXTEND_ZOOMS])) { if (geom.size() > 0 && (nextzoom <= maxzoom || additional[A_EXTEND_ZOOMS])) {
int xo, yo; int xo, yo;
int span = 1 << (nextzoom - z); int span = 1 << (nextzoom - z);
@ -336,11 +335,10 @@ void rewrite(drawvec &geom, int z, int nextzoom, int maxzoom, long long *bbox, u
sf.geometry = geom2; sf.geometry = geom2;
sf.index = index; sf.index = index;
sf.extent = extent; sf.extent = extent;
sf.m = m;
sf.feature_minzoom = feature_minzoom; sf.feature_minzoom = feature_minzoom;
if (metastart < 0) { if (metastart < 0) {
for (int i = 0; i < m; i++) { for (size_t i = 0; i < metakeys.size(); i++) {
sf.keys.push_back(metakeys[i]); sf.keys.push_back(metakeys[i]);
sf.values.push_back(metavals[i]); sf.values.push_back(metavals[i]);
} }
@ -363,7 +361,6 @@ struct partial {
long long layer = 0; long long layer = 0;
long long original_seq = 0; long long original_seq = 0;
unsigned long long index = 0; unsigned long long index = 0;
int m = 0;
int segment = 0; int segment = 0;
bool reduced = 0; bool reduced = 0;
int z = 0; int z = 0;
@ -1318,7 +1315,7 @@ serial_feature next_feature(FILE *geoms, long long *geompos_in, char *metabase,
if (*first_time && pass == 1) { /* only write out the next zoom once, even if we retry */ if (*first_time && pass == 1) { /* only write out the next zoom once, even if we retry */
if (sf.tippecanoe_maxzoom == -1 || sf.tippecanoe_maxzoom >= nextzoom) { if (sf.tippecanoe_maxzoom == -1 || sf.tippecanoe_maxzoom >= nextzoom) {
rewrite(sf.geometry, z, nextzoom, maxzoom, sf.bbox, tx, ty, buffer, within, geompos, geomfile, fname, sf.t, sf.layer, sf.metapos, sf.feature_minzoom, child_shards, max_zoom_increment, sf.seq, sf.tippecanoe_minzoom, sf.tippecanoe_maxzoom, sf.segment, initial_x, initial_y, sf.m, sf.keys, sf.values, sf.has_id, sf.id, sf.index, sf.extent); rewrite(sf.geometry, z, nextzoom, maxzoom, sf.bbox, tx, ty, buffer, within, geompos, geomfile, fname, sf.t, sf.layer, sf.metapos, sf.feature_minzoom, child_shards, max_zoom_increment, sf.seq, sf.tippecanoe_minzoom, sf.tippecanoe_maxzoom, sf.segment, initial_x, initial_y, sf.keys, sf.values, sf.has_id, sf.id, sf.index, sf.extent);
} }
} }
@ -1337,7 +1334,7 @@ serial_feature next_feature(FILE *geoms, long long *geompos_in, char *metabase,
std::map<std::string, mvt_value> attributes; std::map<std::string, mvt_value> attributes;
std::string layername = (*layer_unmaps)[sf.segment][sf.layer]; std::string layername = (*layer_unmaps)[sf.segment][sf.layer];
for (size_t i = 0; i < sf.m; i++) { for (size_t i = 0; i < sf.keys.size(); i++) {
std::string key = stringpool + pool_off[sf.segment] + sf.keys[i] + 1; std::string key = stringpool + pool_off[sf.segment] + sf.keys[i] + 1;
serial_val sv; serial_val sv;
@ -1393,13 +1390,12 @@ serial_feature next_feature(FILE *geoms, long long *geompos_in, char *metabase,
// Remove nulls, now that the filter has run // Remove nulls, now that the filter has run
for (ssize_t i = sf.m - 1; i >= 0; i--) { for (ssize_t i = sf.keys.size() - 1; i >= 0; i--) {
int type = (stringpool + pool_off[sf.segment])[sf.values[i]]; int type = (stringpool + pool_off[sf.segment])[sf.values[i]];
if (type == mvt_null) { if (type == mvt_null) {
sf.keys.erase(sf.keys.begin() + i); sf.keys.erase(sf.keys.begin() + i);
sf.values.erase(sf.values.begin() + i); sf.values.erase(sf.values.begin() + i);
sf.m--;
} }
} }
@ -1486,7 +1482,7 @@ void *run_prefilter(void *v) {
tmp_feature.geometry[i].y += sy; tmp_feature.geometry[i].y += sy;
} }
decode_meta(sf.m, sf.keys, sf.values, rpa->stringpool + rpa->pool_off[sf.segment], tmp_layer, tmp_feature); decode_meta(sf.keys, sf.values, rpa->stringpool + rpa->pool_off[sf.segment], tmp_layer, tmp_feature);
tmp_layer.features.push_back(tmp_feature); tmp_layer.features.push_back(tmp_feature);
layer_to_geojson(tmp_layer, 0, 0, 0, false, true, false, true, sf.index, sf.seq, sf.extent, true, state); layer_to_geojson(tmp_layer, 0, 0, 0, false, true, false, true, sf.index, sf.seq, sf.extent, true, state);
@ -1548,7 +1544,7 @@ void preserve_attribute(attribute_op op, std::map<std::string, accum_state> &att
// If the feature being merged into has this key as a metadata reference, // If the feature being merged into has this key as a metadata reference,
// promote it to a full_key so it can be modified // promote it to a full_key so it can be modified
for (int i = 0; i < p.m; i++) { for (size_t i = 0; i < p.keys.size(); i++) {
if (strcmp(key.c_str(), stringpool + pool_off[p.segment] + p.keys[i] + 1) == 0) { if (strcmp(key.c_str(), stringpool + pool_off[p.segment] + p.keys[i] + 1) == 0) {
serial_val sv; serial_val sv;
sv.s = stringpool + pool_off[p.segment] + p.values[i] + 1; sv.s = stringpool + pool_off[p.segment] + p.values[i] + 1;
@ -1559,7 +1555,6 @@ void preserve_attribute(attribute_op op, std::map<std::string, accum_state> &att
p.keys.erase(p.keys.begin() + i); p.keys.erase(p.keys.begin() + i);
p.values.erase(p.values.begin() + i); p.values.erase(p.values.begin() + i);
p.m--;
break; break;
} }
@ -1631,7 +1626,7 @@ void preserve_attribute(attribute_op op, std::map<std::string, accum_state> &att
} }
void preserve_attributes(std::map<std::string, attribute_op> const *attribute_accum, std::map<std::string, accum_state> &attribute_accum_state, serial_feature &sf, char *stringpool, long long *pool_off, partial &p) { void preserve_attributes(std::map<std::string, attribute_op> const *attribute_accum, std::map<std::string, accum_state> &attribute_accum_state, serial_feature &sf, char *stringpool, long long *pool_off, partial &p) {
for (size_t i = 0; i < sf.m; i++) { for (size_t i = 0; i < sf.keys.size(); i++) {
std::string key = stringpool + pool_off[sf.segment] + sf.keys[i] + 1; std::string key = stringpool + pool_off[sf.segment] + sf.keys[i] + 1;
serial_val sv; serial_val sv;
@ -1914,7 +1909,6 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
partial p; partial p;
p.geoms.push_back(sf.geometry); p.geoms.push_back(sf.geometry);
p.layer = sf.layer; p.layer = sf.layer;
p.m = sf.m;
p.t = sf.t; p.t = sf.t;
p.segment = sf.segment; p.segment = sf.segment;
p.original_seq = sf.seq; p.original_seq = sf.seq;
@ -2061,7 +2055,6 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
pgeoms[j].clear(); pgeoms[j].clear();
c.coalesced = false; c.coalesced = false;
c.original_seq = original_seq; c.original_seq = original_seq;
c.m = partials[i].m;
c.stringpool = stringpool + pool_off[partials[i].segment]; c.stringpool = stringpool + pool_off[partials[i].segment];
c.keys = partials[i].keys; c.keys = partials[i].keys;
c.values = partials[i].values; c.values = partials[i].values;
@ -2188,7 +2181,7 @@ long long write_tile(FILE *geoms, long long *geompos_in, char *metabase, char *s
feature.id = layer_features[x].id; feature.id = layer_features[x].id;
feature.has_id = layer_features[x].has_id; feature.has_id = layer_features[x].has_id;
decode_meta(layer_features[x].m, layer_features[x].keys, layer_features[x].values, layer_features[x].stringpool, layer, feature); decode_meta(layer_features[x].keys, layer_features[x].values, layer_features[x].stringpool, layer, feature);
for (size_t a = 0; a < layer_features[x].full_keys.size(); a++) { for (size_t a = 0; a < layer_features[x].full_keys.size(); a++) {
serial_val sv = layer_features[x].full_values[a]; serial_val sv = layer_features[x].full_values[a];
mvt_value v = stringified_to_mvt_value(sv.type, sv.s.c_str()); mvt_value v = stringified_to_mvt_value(sv.type, sv.s.c_str());