mirror of
https://github.com/mapbox/tippecanoe.git
synced 2025-04-17 23:46:13 +00:00
Guess that any non-UTF-8 attributes are encoded as ISO-8859-1
This commit is contained in:
parent
d75258cc43
commit
27139eb295
@ -4,6 +4,7 @@
|
||||
#include "serial.hpp"
|
||||
#include "projection.hpp"
|
||||
#include "main.hpp"
|
||||
#include "text.hpp"
|
||||
#include "milo/dtoa_milo.h"
|
||||
|
||||
static void check(size_t bits, void *p, void *end) {
|
||||
@ -41,6 +42,30 @@ static double toDouble(unsigned char *ba) {
|
||||
return *((double *) ba);
|
||||
}
|
||||
|
||||
std::string forceutf8(std::string const &s) {
|
||||
if (check_utf8(s).size() == 0) {
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string out;
|
||||
for (size_t i = 0; i < s.size(); i++) {
|
||||
to_utf8(s[i] & 0xFF, out);
|
||||
}
|
||||
|
||||
static bool warned = false;
|
||||
if (!warned) {
|
||||
std::string trimmed = out;
|
||||
while (trimmed.size() > 0 && trimmed[trimmed.size() - 1] == ' ') {
|
||||
trimmed.pop_back();
|
||||
}
|
||||
|
||||
fprintf(stderr, "Warning: string \"%s\" is not UTF-8; assuming ISO-8859-1\n", trimmed.c_str());
|
||||
warned = true;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
drawvec decode_geometry(unsigned char *data, size_t len, int *type) {
|
||||
drawvec dv;
|
||||
|
||||
@ -235,7 +260,7 @@ void parse_shapefile(struct serialization_state *sst, std::string fname, int lay
|
||||
}
|
||||
}
|
||||
|
||||
columns.push_back(std::string((char *) dbcolumns + i, j - i));
|
||||
columns.push_back(forceutf8(std::string((char *) dbcolumns + i, j - i)));
|
||||
column_widths.push_back(dbcolumns[i + 16]);
|
||||
column_types.push_back(dbcolumns[i + 11]);
|
||||
}
|
||||
@ -274,7 +299,7 @@ void parse_shapefile(struct serialization_state *sst, std::string fname, int lay
|
||||
|
||||
size_t dbp = 1;
|
||||
for (size_t i = 0; i < columns.size(); i++) {
|
||||
std::string s = std::string((char *) (db + dbp), column_widths[i]);
|
||||
std::string s = forceutf8(std::string((char *) (db + dbp), column_widths[i]));
|
||||
dbp += column_widths[i];
|
||||
|
||||
while (s.size() > 0 && s[s.size() - 1] == ' ') {
|
||||
|
20
text.cpp
20
text.cpp
@ -5,7 +5,7 @@
|
||||
* Returns an empty string if `s` is valid utf8;
|
||||
* otherwise returns an error message.
|
||||
*/
|
||||
std::string check_utf8(std::string s) {
|
||||
std::string check_utf8(std::string const &s) {
|
||||
for (size_t i = 0; i < s.size(); i++) {
|
||||
size_t fail = 0;
|
||||
|
||||
@ -122,3 +122,21 @@ std::string truncate16(std::string const &s, size_t runes) {
|
||||
|
||||
return std::string(s, 0, lastgood - start);
|
||||
}
|
||||
|
||||
void to_utf8(unsigned ch, std::string &s) {
|
||||
if (ch <= 0x7F) {
|
||||
s.push_back(ch);
|
||||
} else if (ch <= 0x7FF) {
|
||||
s.push_back(0xC0 | (ch >> 6));
|
||||
s.push_back(0x80 | (ch & 0x3F));
|
||||
} else if (ch < 0xFFFF) {
|
||||
s.push_back(0xE0 | (ch >> 12));
|
||||
s.push_back(0x80 | ((ch >> 6) & 0x3F));
|
||||
s.push_back(0x80 | (ch & 0x3F));
|
||||
} else {
|
||||
s.push_back(0xF0 | (ch >> 18));
|
||||
s.push_back(0x80 | ((ch >> 12) & 0x3F));
|
||||
s.push_back(0x80 | ((ch >> 6) & 0x3F));
|
||||
s.push_back(0x80 | (ch & 0x3F));
|
||||
}
|
||||
}
|
||||
|
3
text.hpp
3
text.hpp
@ -3,8 +3,9 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
std::string check_utf8(std::string text);
|
||||
std::string check_utf8(std::string const &text);
|
||||
const char *utf8_next(const char *s, long *c);
|
||||
std::string truncate16(std::string const &s, size_t runes);
|
||||
void to_utf8(unsigned c, std::string &s);
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user