mirror of
https://github.com/corda/corda.git
synced 2025-01-22 20:38:05 +00:00
fix ArrayIndexOutOfBoundsException when decoding a UTF-8 stream
This commit is contained in:
parent
852d77d0b5
commit
c63668c1ce
@ -50,9 +50,18 @@ public class Utf8 {
|
|||||||
while (i < offset+length) {
|
while (i < offset+length) {
|
||||||
int x = s8[i++];
|
int x = s8[i++];
|
||||||
if ((x & 0x080) == 0x0) { // 1 byte char
|
if ((x & 0x080) == 0x0) { // 1 byte char
|
||||||
if (x == 0) ++i; // 2 byte null char
|
if (x == 0) { // 2 byte null char
|
||||||
|
if (i == offset + length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
++ i;
|
||||||
|
}
|
||||||
cram(buf, j++, x);
|
cram(buf, j++, x);
|
||||||
} else if ((x & 0x0e0) == 0x0c0) { // 2 byte char
|
} else if ((x & 0x0e0) == 0x0c0) { // 2 byte char
|
||||||
|
if (i == offset + length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isMultiByte) {
|
if (!isMultiByte) {
|
||||||
buf = widen(buf, j, length-1);
|
buf = widen(buf, j, length-1);
|
||||||
isMultiByte = true;
|
isMultiByte = true;
|
||||||
@ -60,6 +69,10 @@ public class Utf8 {
|
|||||||
int y = s8[i++];
|
int y = s8[i++];
|
||||||
cram(buf, j++, ((x & 0x1f) << 6) | (y & 0x3f));
|
cram(buf, j++, ((x & 0x1f) << 6) | (y & 0x3f));
|
||||||
} else if ((x & 0x0f0) == 0x0e0) { // 3 byte char
|
} else if ((x & 0x0f0) == 0x0e0) { // 3 byte char
|
||||||
|
if (i + 1 >= offset + length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (!isMultiByte) {
|
if (!isMultiByte) {
|
||||||
buf = widen(buf, j, length-2);
|
buf = widen(buf, j, length-2);
|
||||||
isMultiByte = true;
|
isMultiByte = true;
|
||||||
@ -74,9 +87,14 @@ public class Utf8 {
|
|||||||
|
|
||||||
public static char[] decode16(byte[] s8, int offset, int length) {
|
public static char[] decode16(byte[] s8, int offset, int length) {
|
||||||
Object decoded = decode(s8, offset, length);
|
Object decoded = decode(s8, offset, length);
|
||||||
if (decoded instanceof char[]) return (char[])decoded;
|
if (decoded == null) {
|
||||||
|
return null;
|
||||||
|
} else if (decoded instanceof char[]) {
|
||||||
|
return (char[])decoded;
|
||||||
|
} else {
|
||||||
return (char[])widen(decoded, length, length);
|
return (char[])widen(decoded, length, length);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void cram(Object data, int index, int val) {
|
private static void cram(Object data, int index, int val) {
|
||||||
if (data instanceof byte[]) ((byte[])data)[index] = (byte)val;
|
if (data instanceof byte[]) ((byte[])data)[index] = (byte)val;
|
||||||
|
@ -13,6 +13,8 @@ package java.io;
|
|||||||
import avian.Utf8;
|
import avian.Utf8;
|
||||||
|
|
||||||
public class InputStreamReader extends Reader {
|
public class InputStreamReader extends Reader {
|
||||||
|
private static final int MultibytePadding = 4;
|
||||||
|
|
||||||
private final InputStream in;
|
private final InputStream in;
|
||||||
|
|
||||||
public InputStreamReader(InputStream in) {
|
public InputStreamReader(InputStream in) {
|
||||||
@ -29,18 +31,59 @@ public class InputStreamReader extends Reader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int read(char[] b, int offset, int length) throws IOException {
|
public int read(char[] b, int offset, int length) throws IOException {
|
||||||
byte[] buffer = new byte[length];
|
if (length == 0) {
|
||||||
int c = in.read(buffer);
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (c <= 0) return c;
|
byte[] buffer = new byte[length + MultibytePadding];
|
||||||
|
int bufferLength = length;
|
||||||
|
int bufferOffset = 0;
|
||||||
|
while (true) {
|
||||||
|
int c = in.read(buffer, bufferOffset, bufferLength);
|
||||||
|
|
||||||
char[] buffer16 = Utf8.decode16(buffer, 0, c);
|
if (c <= 0) {
|
||||||
|
if (bufferOffset > 0) {
|
||||||
|
// if we've reached the end of the stream while trying to
|
||||||
|
// read a multibyte character, we still need to return any
|
||||||
|
// competely-decoded characters, plus \ufffd to indicate an
|
||||||
|
// unknown character
|
||||||
|
c = 1;
|
||||||
|
while (bufferOffset > 0) {
|
||||||
|
char[] buffer16 = Utf8.decode16(buffer, 0, bufferOffset);
|
||||||
|
|
||||||
|
if (buffer16 != null) {
|
||||||
|
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
||||||
|
|
||||||
|
c = buffer16.length + 1;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
-- bufferOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b[offset + c - 1] = '\ufffd';
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
bufferOffset += c;
|
||||||
|
|
||||||
|
char[] buffer16 = Utf8.decode16(buffer, 0, bufferOffset);
|
||||||
|
|
||||||
|
if (buffer16 != null) {
|
||||||
|
bufferOffset = 0;
|
||||||
|
|
||||||
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
System.arraycopy(buffer16, 0, b, offset, buffer16.length);
|
||||||
|
|
||||||
return buffer16.length;
|
return buffer16.length;
|
||||||
|
} else {
|
||||||
|
// the buffer ended in an incomplete multibyte character, so
|
||||||
|
// we try to read a another byte at a time until it's complete
|
||||||
|
bufferLength = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -21,7 +21,66 @@ public class Strings {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
private static void testDecode(final boolean prematureEOS) throws Exception {
|
||||||
|
java.io.Reader r = new java.io.InputStreamReader
|
||||||
|
(new java.io.InputStream() {
|
||||||
|
int state = 0;
|
||||||
|
|
||||||
|
public int read() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int read(byte[] b, int offset, int length) {
|
||||||
|
if (length == 0) return 0;
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case 0:
|
||||||
|
b[offset] = (byte) 0xc2;
|
||||||
|
state = 1;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
b[offset] = (byte) 0xae;
|
||||||
|
state = 2;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
b[offset] = (byte) 0xea;
|
||||||
|
state = 3;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
b[offset] = (byte) 0xba;
|
||||||
|
state = prematureEOS ? 5 : 4;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
b[offset] = (byte) 0xaf;
|
||||||
|
state = 5;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
char[] buffer = new char[2];
|
||||||
|
int offset = 0;
|
||||||
|
while (offset < buffer.length) {
|
||||||
|
int c = r.read(buffer, offset, buffer.length - offset);
|
||||||
|
if (c == -1) break;
|
||||||
|
offset += c;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(new String(buffer, 0, offset).equals
|
||||||
|
(prematureEOS ? "\u00ae\ufffd" : "\u00ae\uaeaf"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
expect(new String(new byte[] { 99, 111, 109, 46, 101, 99, 111, 118, 97,
|
expect(new String(new byte[] { 99, 111, 109, 46, 101, 99, 111, 118, 97,
|
||||||
116, 101, 46, 110, 97, 116, 46, 98, 117,
|
116, 101, 46, 110, 97, 116, 46, 98, 117,
|
||||||
115, 46, 83, 121, 109, 98, 111, 108 })
|
115, 46, 83, 121, 109, 98, 111, 108 })
|
||||||
@ -77,5 +136,8 @@ public class Strings {
|
|||||||
expect(Character.forDigit(Character.digit('b', 16), 16) == 'b');
|
expect(Character.forDigit(Character.digit('b', 16), 16) == 'b');
|
||||||
expect(Character.forDigit(Character.digit('f', 16), 16) == 'f');
|
expect(Character.forDigit(Character.digit('f', 16), 16) == 'f');
|
||||||
expect(Character.forDigit(Character.digit('z', 36), 36) == 'z');
|
expect(Character.forDigit(Character.digit('z', 36), 36) == 'z');
|
||||||
|
|
||||||
|
testDecode(false);
|
||||||
|
testDecode(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user