Ensure unicode strings are correctly handled

This commit is contained in:
Jeremy Lakeman 2015-06-29 14:30:26 +09:30
parent 653039e685
commit 4891348684
5 changed files with 53 additions and 18 deletions

View File

@ -182,7 +182,7 @@ public class ServalDClient implements ServalDHttpConnectionFactory
}
conn.setAllowUserInteraction(false);
try {
conn.addRequestProperty("Authorization", "Basic " + Base64.encode((restfulUsername + ":" + restfulPassword).getBytes("US-ASCII")));
conn.addRequestProperty("Authorization", "Basic " + Base64.encode((restfulUsername + ":" + restfulPassword).getBytes("UTF-8")));
}
catch (UnsupportedEncodingException e) {
throw new ServalDInterfaceException("invalid RESTful password", e);

View File

@ -47,14 +47,14 @@ public class MeshMSCommon
if (!"application/json".equals(conn.getContentType()))
throw new ServalDInterfaceException("unexpected HTTP Content-Type: " + conn.getContentType());
if (conn.getResponseCode() == HttpURLConnection.HTTP_FORBIDDEN) {
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getErrorStream(), "US-ASCII"));
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getErrorStream(), "UTF-8"));
Status status = decodeRestfulStatus(json);
throwRestfulResponseExceptions(status, conn.getURL());
throw new ServalDInterfaceException("unexpected MeshMS status = " + status.meshms_status_code + ", \"" + status.meshms_status_message + "\"");
}
for (int code: expected_response_codes) {
if (conn.getResponseCode() == code) {
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getInputStream(), "US-ASCII"));
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getInputStream(), "UTF-8"));
return json;
}
}
@ -121,7 +121,7 @@ public class MeshMSCommon
conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);
conn.connect();
OutputStream ost = conn.getOutputStream();
PrintStream wr = new PrintStream(ost, false, "US-ASCII");
PrintStream wr = new PrintStream(ost, false, "UTF-8");
wr.print("--" + boundary + "\r\n");
wr.print("Content-Disposition: form-data; name=\"message\"\r\n");
wr.print("Content-Type: text/plain; charset=utf-8\r\n");

View File

@ -90,7 +90,7 @@ public class RhizomeCommon
if (!conn.getContentType().equals("application/json"))
throw new ServalDInterfaceException("unexpected HTTP Content-Type: " + conn.getContentType());
if (status.http_status_code >= 300) {
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getErrorStream(), "US-ASCII"));
JSONTokeniser json = new JSONTokeniser(new InputStreamReader(conn.getErrorStream(), "UTF-8"));
decodeRestfulStatus(status, json);
}
if (status.http_status_code == HttpURLConnection.HTTP_FORBIDDEN)
@ -123,7 +123,7 @@ public class RhizomeCommon
Status status = receiveResponse(conn, expected_response_codes);
if (!conn.getContentType().equals("application/json"))
throw new ServalDInterfaceException("unexpected HTTP Content-Type: " + conn.getContentType());
return new JSONTokeniser(new InputStreamReader(status.input_stream, "US-ASCII"));
return new JSONTokeniser(new InputStreamReader(status.input_stream, "UTF-8"));
}
protected static void decodeHeaderBundleStatus(Status status, HttpURLConnection conn) throws ServalDInterfaceException
@ -382,7 +382,7 @@ public class RhizomeCommon
conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);
conn.connect();
OutputStream ost = conn.getOutputStream();
PrintStream wr = new PrintStream(ost, false, "US-ASCII");
PrintStream wr = new PrintStream(ost, false, "UTF-8");
wr.print(new Object(){}.getClass().getEnclosingClass().getName());
if (author != null) {
wr.print("\r\n--" + boundary + "\r\n");

View File

@ -82,7 +82,7 @@ public class RhizomeIncompleteManifest {
*/
public void toTextFormat(OutputStream os) throws IOException
{
OutputStreamWriter osw = new OutputStreamWriter(os, "US-ASCII");
OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
if (id != null)
osw.write("id=" + id.toHex() + "\n");
if (version != null)
@ -161,7 +161,7 @@ public class RhizomeIncompleteManifest {
public void parseTextFormat(InputStream in) throws IOException, RhizomeManifestParseException
{
try {
InputStreamReader inr = new InputStreamReader(in, "US-ASCII");
InputStreamReader inr = new InputStreamReader(in, "UTF-8");
int pos = 0;
int lnum = 1;
int eq = -1;

View File

@ -663,7 +663,35 @@ strbuf strbuf_json_boolean(strbuf sb, int boolean)
return sb;
}
static void _json_char(strbuf sb, char c)
static const uint32_t offsetsFromUTF8[6] = {
0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
// is start of UTF sequence
static uint8_t isutf(char c) {
return (c & 0xC0) != 0x80;
}
static uint32_t u8_nextchar(const char *s, unsigned *i)
{
if (!s[*i])
return 0;
uint32_t ch = 0;
int sz = 0;
do {
ch <<= 6;
ch += (unsigned char)s[(*i)++];
sz++;
} while (s[*i] && !isutf(s[*i]));
ch -= offsetsFromUTF8[sz-1];
return ch;
}
static void _json_char(strbuf sb, uint32_t c)
{
if (c == '"' || c == '\\') {
strbuf_putc(sb, '\\');
@ -679,8 +707,8 @@ static void _json_char(strbuf sb, char c)
strbuf_puts(sb, "\\r");
else if (c == '\t')
strbuf_puts(sb, "\\t");
else if (iscntrl(c))
strbuf_sprintf(sb, "\\u%04X", (unsigned char) c);
else if (c>0x7f || iscntrl(c))
strbuf_sprintf(sb, "\\u%04X", c);
else
strbuf_putc(sb, c);
}
@ -689,8 +717,10 @@ strbuf strbuf_json_string(strbuf sb, const char *str)
{
if (str) {
strbuf_putc(sb, '"');
for (; *str; ++str)
_json_char(sb, *str);
unsigned pos=0;
uint32_t c;
while((c = u8_nextchar(str, &pos)))
_json_char(sb, c);
strbuf_putc(sb, '"');
} else
strbuf_json_null(sb);
@ -699,10 +729,15 @@ strbuf strbuf_json_string(strbuf sb, const char *str)
strbuf strbuf_json_string_len(strbuf sb, const char *str, size_t strlen)
{
strbuf_putc(sb, '"');
for (; strlen; --strlen, ++str)
_json_char(sb, *str);
strbuf_putc(sb, '"');
if (str && strlen){
strbuf_putc(sb, '"');
unsigned pos=0;
uint32_t c;
while(pos<strlen && (c = u8_nextchar(str, &pos)))
_json_char(sb, c);
strbuf_putc(sb, '"');
} else
strbuf_json_null(sb);
return sb;
}