mirror of
https://github.com/mapbox/tippecanoe.git
synced 2025-01-21 03:55:00 +00:00
Handle surrogate pairs in JSON strings
This commit is contained in:
parent
cc734c8709
commit
621cf72e5a
@ -569,12 +569,84 @@ again:
|
|||||||
struct string val;
|
struct string val;
|
||||||
string_init(&val);
|
string_init(&val);
|
||||||
|
|
||||||
|
int surrogate = -1;
|
||||||
while ((c = read_wrap(j)) != EOF) {
|
while ((c = read_wrap(j)) != EOF) {
|
||||||
if (c == '"') {
|
if (c == '"') {
|
||||||
|
if (surrogate >= 0) {
|
||||||
|
string_append(&val, 0xE0 | (surrogate >> 12));
|
||||||
|
string_append(&val, 0x80 | ((surrogate >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (surrogate & 0x3F));
|
||||||
|
surrogate = -1;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
c = read_wrap(j);
|
c = read_wrap(j);
|
||||||
|
|
||||||
|
if (c == 'u') {
|
||||||
|
char hex[5] = "aaaa";
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
hex[i] = read_wrap(j);
|
||||||
|
if (hex[i] < '0' || (hex[i] > '9' && hex[i] < 'A') || (hex[i] > 'F' && hex[i] < 'a') || hex[i] > 'f') {
|
||||||
|
j->error = "Invalid \\u hex character";
|
||||||
|
string_free(&val);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long ch = strtoul(hex, NULL, 16);
|
||||||
|
if (ch >= 0xd800 && ch <= 0xdbff) {
|
||||||
|
if (surrogate < 0) {
|
||||||
|
surrogate = ch;
|
||||||
|
} else {
|
||||||
|
// Impossible surrogate, so output the first half,
|
||||||
|
// keep what might be a legitimate new first half.
|
||||||
|
string_append(&val, 0xE0 | (surrogate >> 12));
|
||||||
|
string_append(&val, 0x80 | ((surrogate >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (surrogate & 0x3F));
|
||||||
|
surrogate = ch;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else if (ch >= 0xdc00 && c <= 0xdfff) {
|
||||||
|
if (surrogate >= 0) {
|
||||||
|
long c1 = surrogate - 0xd800;
|
||||||
|
long c2 = ch - 0xdc00;
|
||||||
|
ch = ((c1 << 10) | c2) + 0x010000;
|
||||||
|
surrogate = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (surrogate >= 0) {
|
||||||
|
string_append(&val, 0xE0 | (surrogate >> 12));
|
||||||
|
string_append(&val, 0x80 | ((surrogate >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (surrogate & 0x3F));
|
||||||
|
surrogate = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch <= 0x7F) {
|
||||||
|
string_append(&val, ch);
|
||||||
|
} else if (ch <= 0x7FF) {
|
||||||
|
string_append(&val, 0xC0 | (ch >> 6));
|
||||||
|
string_append(&val, 0x80 | (ch & 0x3F));
|
||||||
|
} else if (ch < 0xFFFF) {
|
||||||
|
string_append(&val, 0xE0 | (ch >> 12));
|
||||||
|
string_append(&val, 0x80 | ((ch >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (ch & 0x3F));
|
||||||
|
} else {
|
||||||
|
string_append(&val, 0xF0 | (ch >> 18));
|
||||||
|
string_append(&val, 0x80 | ((ch >> 12) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | ((ch >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (ch & 0x3F));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (surrogate >= 0) {
|
||||||
|
string_append(&val, 0xE0 | (surrogate >> 12));
|
||||||
|
string_append(&val, 0x80 | ((surrogate >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (surrogate & 0x3F));
|
||||||
|
surrogate = -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (c == '"') {
|
if (c == '"') {
|
||||||
string_append(&val, '"');
|
string_append(&val, '"');
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
@ -591,38 +663,24 @@ again:
|
|||||||
string_append(&val, '\r');
|
string_append(&val, '\r');
|
||||||
} else if (c == 't') {
|
} else if (c == 't') {
|
||||||
string_append(&val, '\t');
|
string_append(&val, '\t');
|
||||||
} else if (c == 'u') {
|
|
||||||
char hex[5] = "aaaa";
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
hex[i] = read_wrap(j);
|
|
||||||
if (hex[i] < '0' || (hex[i] > '9' && hex[i] < 'A') || (hex[i] > 'F' && hex[i] < 'a') || hex[i] > 'f') {
|
|
||||||
j->error = "Invalid \\u hex character";
|
|
||||||
string_free(&val);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unsigned long ch = strtoul(hex, NULL, 16);
|
|
||||||
if (ch <= 0x7F) {
|
|
||||||
string_append(&val, ch);
|
|
||||||
} else if (ch <= 0x7FF) {
|
|
||||||
string_append(&val, 0xC0 | (ch >> 6));
|
|
||||||
string_append(&val, 0x80 | (ch & 0x3F));
|
|
||||||
} else {
|
|
||||||
string_append(&val, 0xE0 | (ch >> 12));
|
|
||||||
string_append(&val, 0x80 | ((ch >> 6) & 0x3F));
|
|
||||||
string_append(&val, 0x80 | (ch & 0x3F));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
j->error = "Found backslash followed by unknown character";
|
j->error = "Found backslash followed by unknown character";
|
||||||
string_free(&val);
|
string_free(&val);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else if (c < ' ') {
|
} else if (c < ' ') {
|
||||||
j->error = "Found control character in string";
|
j->error = "Found control character in string";
|
||||||
string_free(&val);
|
string_free(&val);
|
||||||
return NULL;
|
return NULL;
|
||||||
} else {
|
} else {
|
||||||
|
if (surrogate >= 0) {
|
||||||
|
string_append(&val, 0xE0 | (surrogate >> 12));
|
||||||
|
string_append(&val, 0x80 | ((surrogate >> 6) & 0x3F));
|
||||||
|
string_append(&val, 0x80 | (surrogate & 0x3F));
|
||||||
|
surrogate = -1;
|
||||||
|
}
|
||||||
|
|
||||||
string_append(&val, c);
|
string_append(&val, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user