From 1f7f9319c377faf6830e6aea575d64e13724faca Mon Sep 17 00:00:00 2001 From: "J. Treadwell" Date: Tue, 11 Nov 2008 17:43:11 -0700 Subject: [PATCH 1/4] Added UTF-8 support (still absent with string literals)! --- classpath/java/lang/String.java | 107 +++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 8 deletions(-) diff --git a/classpath/java/lang/String.java b/classpath/java/lang/String.java index 74f05c4d55..89b33ad39e 100644 --- a/classpath/java/lang/String.java +++ b/classpath/java/lang/String.java @@ -12,11 +12,12 @@ package java.lang; import java.io.UnsupportedEncodingException; import java.util.regex.Pattern; +import java.io.ByteArrayOutputStream; public final class String implements Comparable, CharSequence { - private Object data; - private int offset; - private int length; + private final Object data; + private final int offset; + private final int length; private int hashCode; public String(char[] data, int offset, int length, boolean copy) { @@ -43,6 +44,10 @@ public final class String implements Comparable, CharSequence { this(data, 0, data.length); } + public String(String s) { + this(s.toCharArray()); + } + public String(byte[] data, String charset) throws UnsupportedEncodingException { @@ -65,16 +70,20 @@ public final class String implements Comparable, CharSequence { (offset + " < 0 or " + offset + " + " + length + " > " + l); } + if(!copy && isUTF8(data)) copy = true; + if (copy) { Object c; if (data instanceof char[]) { c = new char[length]; + System.arraycopy(data, offset, c, 0, length); } else { - c = new byte[length]; + c = decodeUTF8((byte[])data, offset, length); + if(c instanceof char[]) length = ((char[])c).length; } - System.arraycopy(data, offset, c, 0, length); this.data = c; + this.offset = 0; this.length = length; } else { this.data = data; @@ -83,6 +92,85 @@ public final class String implements Comparable, CharSequence { } } + private static boolean isUTF8(Object data) { + if(!(data instanceof byte[])) return false; + byte[] b = (byte[])data; + for(int i = 0; i < b.length; ++i) { + if(((int)b[i] & 0x080) != 0) return true; + } + return false; + } + + private static byte[] encodeUTF8(char[] s16, int offset, int length) { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + for(int i = offset; i < offset+length; ++i) { + char c = s16[i]; + if(c == '\u0000') { // null char + buf.write(0); + buf.write(0); + } else if(c < 0x080) { // 1 byte char + buf.write(c); + } else if(c < 0x0800) { // 2 byte char + buf.write(0x0c0 | (c >>> 6)); + buf.write(0x080 | (c & 0x03f)); + } else { // 3 byte char + buf.write(0x0e0 | ((c >>> 12) & 0x0f)); + buf.write(0x080 | ((c >>> 6) & 0x03f)); + buf.write(0x080 | (c & 0x03f)); + } + } + return buf.toByteArray(); + } + + private static void decodeUTF8_insert(Object data, int index, int val) { + if(data instanceof byte[]) ((byte[])data)[index] = (byte)val; + else ((char[])data)[index] = (char)val; + } + + private static Object decodeUTF8_widen(Object data, int length, int capacity) { + byte[] src = (byte[])data; + char[] result = new char[capacity]; + for(int i = 0; i < length; ++i) result[i] = (char)((int)src[i] & 0x0ff); + return result; + } + + private static Object decodeUTF8_trim(Object data, int length) { + if(data instanceof byte[]) return data; + if(((char[])data).length == length) return data; + char[] result = new char[length]; + System.arraycopy(data, 0, result, 0, length); + return result; + } + + private static Object decodeUTF8(byte[] s8, int offset, int length) { + Object buf = new byte[s8.length]; + boolean isMultiByte = false; + int i=offset, j=0; + while(i < offset+length) { + int x = s8[i++]; + if((x & 0x080) == 0x0) { // 1 byte char + if(x == 0) ++i; // 2 byte null char + decodeUTF8_insert(buf, j++, x); + } else if((x & 0x0e0) == 0x0c0) { // 2 byte char + if(!isMultiByte) { + buf = decodeUTF8_widen(buf, j, s8.length-1); + isMultiByte = true; + } + int y = s8[i++]; + decodeUTF8_insert(buf, j++, ((x & 0x1f) << 6) | (y & 0x3f)); + } else if((x & 0x0f0) == 0x0e0) { // 3 byte char + if(!isMultiByte) { + buf = decodeUTF8_widen(buf, j, s8.length-2); + isMultiByte = true; + } + int y = s8[i++]; int z = s8[i++]; + decodeUTF8_insert(buf, j++, ((x & 0xf) << 12) | ((y & 0x3f) << 6) | (z & 0x3f)); + } + } + + return decodeUTF8_trim(buf, j); + } + public String toString() { return this; } @@ -341,9 +429,12 @@ public final class String implements Comparable, CharSequence { } public byte[] getBytes() { - byte[] b = new byte[length]; - getBytes(0, length, b, 0); - return b; + if(data instanceof byte[]) { + byte[] b = new byte[length]; + getBytes(0, length, b, 0); + return b; + } + return encodeUTF8((char[])data, offset, length); } public byte[] getBytes(String format) From 3001c2067c895c55e1ff4e5e7296ebb8e0478aff Mon Sep 17 00:00:00 2001 From: "J. Treadwell" Date: Wed, 12 Nov 2008 10:19:21 -0700 Subject: [PATCH 2/4] changed String(byte[] bytes, String charsetName) to support only UTF-8 --- classpath/java/lang/String.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classpath/java/lang/String.java b/classpath/java/lang/String.java index 89b33ad39e..e2295cdc89 100644 --- a/classpath/java/lang/String.java +++ b/classpath/java/lang/String.java @@ -52,7 +52,7 @@ public final class String implements Comparable, CharSequence { throws UnsupportedEncodingException { this(data); - if (! charset.equals("US-ASCII")) { + if (! charset.equals("UTF-8")) { throw new UnsupportedEncodingException(charset); } } From dc2700d91378ec4af0fcefe570609ce589ea8e43 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 15 Nov 2008 17:28:45 -0700 Subject: [PATCH 3/4] ensure that the saved exception in a finally block is visited during GC when the jsr instruction is used --- src/compile.cpp | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index 44e2d0f2a4..3fe7873a51 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -1764,6 +1764,67 @@ handleExit(MyThread* t, Frame* frame) (t, frame, getThunk(t, releaseMonitorForObjectThunk)); } +int +exceptionIndex(MyThread* t, object code, unsigned jsrIp, unsigned dstIp) +{ + object table = codeExceptionHandlerTable(t, code); + unsigned length = exceptionHandlerTableLength(t, table); + for (unsigned i = 0; i < length; ++i) { + ExceptionHandler* eh = exceptionHandlerTableBody(t, table, i); + if (exceptionHandlerCatchType(eh) == 0) { + unsigned ip = exceptionHandlerIp(eh); + unsigned index; + switch (codeBody(t, code, ip++)) { + case astore: + index = codeBody(t, code, ip++); + break; + + case astore_0: + index = 0; + break; + + case astore_1: + index = 1; + break; + + case astore_2: + index = 2; + break; + + case astore_3: + index = 3; + break; + + default: abort(t); + } + + if (ip == jsrIp) { + return -1; + } + + switch (codeBody(t, code, ip++)) { + case jsr: { + uint32_t offset = codeReadInt16(t, code, ip); + if ((ip - 3) + offset == dstIp) { + return index; + } + } break; + + case jsr_w: { + uint32_t offset = codeReadInt32(t, code, ip); + if ((ip - 5) + offset == dstIp) { + return index; + } + } break; + + default: break; + } + } + } + + abort(t); +} + void compile(MyThread* t, Frame* initialFrame, unsigned ip, bool exceptionHandler = false) @@ -2783,18 +2844,33 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, case jsr: case jsr_w: { + uint32_t thisIp; uint32_t newIp; if (instruction == jsr) { uint32_t offset = codeReadInt16(t, code, ip); - newIp = (ip - 3) + offset; + thisIp = ip - 3; + newIp = thisIp + offset; } else { uint32_t offset = codeReadInt32(t, code, ip); - newIp = (ip - 5) + offset; + thisIp = ip - 5; + newIp = thisIp + offset; } assert(t, newIp < codeLength(t, code)); + int index = exceptionIndex(t, code, thisIp, newIp); + if (index >= 0) { + // store a null pointer at the same index the exception would + // be stored in the finally block so we can safely treat that + // location as a GC root. Of course, this assumes there + // wasn't already a live value there, which is something we + // should verify once we have complete data flow information + // (todo). + c->storeLocal(BytesPerWord, c->constant(0), index); + frame->storedObject(index); + } + c->saveStack(); frame->pushAddress(frame->machineIp(ip)); From 92a8a4d83b567470e1e354df8beb3aee85156f59 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 15 Nov 2008 18:03:43 -0700 Subject: [PATCH 4/4] clean up subroutine code in wake of merge from master branch --- src/compile.cpp | 2 +- src/compiler.cpp | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/compile.cpp b/src/compile.cpp index a49d3e045e..e5707abf27 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -2911,7 +2911,7 @@ compile(MyThread* t, Frame* initialFrame, unsigned ip, // wasn't already a live value there, which is something we // should verify once we have complete data flow information // (todo). - c->storeLocal(BytesPerWord, c->constant(0), index); + c->storeLocal(1, c->constant(0), index); frame->storedObject(index); } diff --git a/src/compiler.cpp b/src/compiler.cpp index 0b3cf4c8f6..f0bd87d442 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -4029,16 +4029,8 @@ class MyCompiler: public Compiler { virtual void endSubroutine(Subroutine* subroutine) { MySubroutine* sr = static_cast(subroutine); if (sr->forkState) { - fprintf(stderr, "restore sr forkstate\n"); - Local* locals = c.locals; ::restoreState(&c, sr->forkState); - for (int i = c.localFootprint - 1; i >= 0; --i) { - if (locals[i].value and c.locals[i].value == 0) { - storeLocal(locals[i].footprint, locals[i].value, i); - } - } } else { - fprintf(stderr, "save sr forkstate\n"); sr->forkState = ::saveState(&c); } }