mirror of
https://github.com/corda/corda.git
synced 2025-01-04 20:24:17 +00:00
fix ArrayIndexOutOfBoundsException when decoding a UTF-8 stream
This commit is contained in:
parent
852d77d0b5
commit
c63668c1ce
@ -50,9 +50,18 @@ public class Utf8 {
|
||||
while (i < offset+length) {
|
||||
int x = s8[i++];
|
||||
if ((x & 0x080) == 0x0) { // 1 byte char
|
||||
if (x == 0) ++i; // 2 byte null char
|
||||
if (x == 0) { // 2 byte null char
|
||||
if (i == offset + length) {
|
||||
return null;
|
||||
}
|
||||
++ i;
|
||||
}
|
||||
cram(buf, j++, x);
|
||||
} else if ((x & 0x0e0) == 0x0c0) { // 2 byte char
|
||||
if (i == offset + length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isMultiByte) {
|
||||
buf = widen(buf, j, length-1);
|
||||
isMultiByte = true;
|
||||
@ -60,6 +69,10 @@ public class Utf8 {
|
||||
int y = s8[i++];
|
||||
cram(buf, j++, ((x & 0x1f) << 6) | (y & 0x3f));
|
||||
} else if ((x & 0x0f0) == 0x0e0) { // 3 byte char
|
||||
if (i + 1 >= offset + length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isMultiByte) {
|
||||
buf = widen(buf, j, length-2);
|
||||
isMultiByte = true;
|
||||
@ -74,8 +87,13 @@ public class Utf8 {
|
||||
|
||||
public static char[] decode16(byte[] s8, int offset, int length) {
|
||||
Object decoded = decode(s8, offset, length);
|
||||
if (decoded instanceof char[]) return (char[])decoded;
|
||||
return (char[])widen(decoded, length, length);
|
||||
if (decoded == null) {
|
||||
return null;
|
||||
} else if (decoded instanceof char[]) {
|
||||
return (char[])decoded;
|
||||
} else {
|
||||
return (char[])widen(decoded, length, length);
|
||||
}
|
||||
}
|
||||
|
||||
private static void cram(Object data, int index, int val) {
|
||||
|
@ -13,6 +13,8 @@ package java.io;
|
||||
import avian.Utf8;
|
||||
|
||||
public class InputStreamReader extends Reader {
|
||||
private static final int MultibytePadding = 4;
|
||||
|
||||
private final InputStream in;
|
||||
|
||||
public InputStreamReader(InputStream in) {
|
||||
@ -28,19 +30,60 @@ public class InputStreamReader extends Reader {
|
||||
throw new UnsupportedEncodingException(encoding);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public int read(char[] b, int offset, int length) throws IOException {
|
||||
byte[] buffer = new byte[length];
|
||||
int c = in.read(buffer);
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c <= 0) return c;
|
||||
byte[] buffer = new byte[length + MultibytePadding];
|
||||
int bufferLength = length;
|
||||
int bufferOffset = 0;
|
||||
while (true) {
|
||||
int c = in.read(buffer, bufferOffset, bufferLength);
|
||||
|
||||
char[] buffer16 = Utf8.decode16(buffer, 0, c);
|
||||
if (c <= 0) {
|
||||
if (bufferOffset > 0) {
|
||||
// if we've reached the end of the stream while trying to
|
||||
// read a multibyte character, we still need to return any
|
||||
// competely-decoded characters, plus \ufffd to indicate an
|
||||
// unknown character
|
||||
c = 1;
|
||||
while (bufferOffset > 0) {
|
||||
char[] buffer16 = Utf8.decode16(buffer, 0, bufferOffset);
|
||||
|
||||
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
||||
if (buffer16 != null) {
|
||||
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
||||
|
||||
c = buffer16.length + 1;
|
||||
break;
|
||||
} else {
|
||||
-- bufferOffset;
|
||||
}
|
||||
}
|
||||
|
||||
return buffer16.length;
|
||||
b[offset + c - 1] = '\ufffd';
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
bufferOffset += c;
|
||||
|
||||
char[] buffer16 = Utf8.decode16(buffer, 0, bufferOffset);
|
||||
|
||||
if (buffer16 != null) {
|
||||
bufferOffset = 0;
|
||||
|
||||
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
||||
|
||||
return buffer16.length;
|
||||
} else {
|
||||
// the buffer ended in an incomplete multibyte character, so
|
||||
// we try to read a another byte at a time until it's complete
|
||||
bufferLength = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
@ -21,7 +21,66 @@ public class Strings {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
private static void testDecode(final boolean prematureEOS) throws Exception {
|
||||
java.io.Reader r = new java.io.InputStreamReader
|
||||
(new java.io.InputStream() {
|
||||
int state = 0;
|
||||
|
||||
public int read() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public int read(byte[] b, int offset, int length) {
|
||||
if (length == 0) return 0;
|
||||
|
||||
switch (state) {
|
||||
case 0:
|
||||
b[offset] = (byte) 0xc2;
|
||||
state = 1;
|
||||
return 1;
|
||||
|
||||
case 1:
|
||||
b[offset] = (byte) 0xae;
|
||||
state = 2;
|
||||
return 1;
|
||||
|
||||
case 2:
|
||||
b[offset] = (byte) 0xea;
|
||||
state = 3;
|
||||
return 1;
|
||||
|
||||
case 3:
|
||||
b[offset] = (byte) 0xba;
|
||||
state = prematureEOS ? 5 : 4;
|
||||
return 1;
|
||||
|
||||
case 4:
|
||||
b[offset] = (byte) 0xaf;
|
||||
state = 5;
|
||||
return 1;
|
||||
|
||||
case 5:
|
||||
return -1;
|
||||
|
||||
default:
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
char[] buffer = new char[2];
|
||||
int offset = 0;
|
||||
while (offset < buffer.length) {
|
||||
int c = r.read(buffer, offset, buffer.length - offset);
|
||||
if (c == -1) break;
|
||||
offset += c;
|
||||
}
|
||||
|
||||
expect(new String(buffer, 0, offset).equals
|
||||
(prematureEOS ? "\u00ae\ufffd" : "\u00ae\uaeaf"));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
expect(new String(new byte[] { 99, 111, 109, 46, 101, 99, 111, 118, 97,
|
||||
116, 101, 46, 110, 97, 116, 46, 98, 117,
|
||||
115, 46, 83, 121, 109, 98, 111, 108 })
|
||||
@ -77,5 +136,8 @@ public class Strings {
|
||||
expect(Character.forDigit(Character.digit('b', 16), 16) == 'b');
|
||||
expect(Character.forDigit(Character.digit('f', 16), 16) == 'f');
|
||||
expect(Character.forDigit(Character.digit('z', 36), 36) == 'z');
|
||||
|
||||
testDecode(false);
|
||||
testDecode(true);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user