Improved (should now be complete) Unicode support (UTF-8 for *nix and UTF-16 for Windows).

This commit is contained in:
JET 2010-04-20 10:03:07 -06:00
parent b6936fb597
commit c666ab58e3
4 changed files with 154 additions and 64 deletions

View File

@ -28,10 +28,11 @@
# define CLOSE _close # define CLOSE _close
# define READ _read # define READ _read
# define WRITE _write # define WRITE _write
# define STAT _stat # define STAT _wstat
# define STRUCT_STAT struct _stat # define STRUCT_STAT struct _stat
# define MKDIR(path, mode) _mkdir(path) # define MKDIR(path, mode) _wmkdir(path)
# define UNLINK _unlink # define UNLINK _wunlink
# define RENAME _wrename
# define OPEN_MASK O_BINARY # define OPEN_MASK O_BINARY
# ifdef _MSC_VER # ifdef _MSC_VER
@ -40,10 +41,15 @@
# define S_IRUSR _S_IREAD # define S_IRUSR _S_IREAD
# define S_IWUSR _S_IWRITE # define S_IWUSR _S_IWRITE
# else # else
# define OPEN _open # define OPEN _wopen
# define CREAT _creat # define CREAT _wcreat
# endif # endif
# define GET_CHARS GetStringChars
# define RELEASE_CHARS(path, chars) ReleaseStringChars(path, reinterpret_cast<const jchar*>(chars))
typedef wchar_t char_t;
#else // not PLATFORM_WINDOWS #else // not PLATFORM_WINDOWS
# include <dirent.h> # include <dirent.h>
@ -59,20 +65,28 @@
# define MKDIR mkdir # define MKDIR mkdir
# define CREAT creat # define CREAT creat
# define UNLINK unlink # define UNLINK unlink
# define RENAME rename
# define OPEN_MASK 0 # define OPEN_MASK 0
# define GET_CHARS GetStringUTFChars
# define RELEASE_CHARS ReleaseStringUTFChars
typedef char char_t;
#endif // not PLATFORM_WINDOWS #endif // not PLATFORM_WINDOWS
inline void* operator new(size_t, void* p) throw() { return p; } inline void* operator new(size_t, void* p) throw() { return p; }
typedef const char_t* string_t;
namespace { namespace {
#ifdef _MSC_VER #ifdef _MSC_VER
inline int inline int
OPEN(const char* path, int mask, int mode) OPEN(string_t path, int mask, int mode)
{ {
int fd; int fd;
if (_sopen_s(&fd, path, mask, _SH_DENYNO, mode) == 0) { if (_wsopen_s(&fd, path, mask, _SH_DENYNO, mode) == 0) {
return fd; return fd;
} else { } else {
return -1; return -1;
@ -80,21 +94,21 @@ OPEN(const char* path, int mask, int mode)
} }
inline int inline int
CREAT(const char* path, int mode) CREAT(string_t path, int mode)
{ {
return OPEN(path, _O_CREAT, mode); return OPEN(path, _O_CREAT, mode);
} }
#endif #endif
inline bool inline bool
exists(const char* path) exists(string_t path)
{ {
STRUCT_STAT s; STRUCT_STAT s;
return STAT(path, &s) == 0; return STAT(path, &s) == 0;
} }
inline int inline int
doOpen(JNIEnv* e, const char* path, int mask) doOpen(JNIEnv* e, string_t path, int mask)
{ {
int fd = OPEN(path, mask | OPEN_MASK, S_IRUSR | S_IWUSR); int fd = OPEN(path, mask | OPEN_MASK, S_IRUSR | S_IWUSR);
if (fd == -1) { if (fd == -1) {
@ -157,11 +171,11 @@ class Mapping {
}; };
inline Mapping* inline Mapping*
map(JNIEnv* e, const char* path) map(JNIEnv* e, string_t path)
{ {
Mapping* result = 0; Mapping* result = 0;
HANDLE file = CreateFile(path, FILE_READ_DATA, FILE_SHARE_READ, 0, HANDLE file = CreateFileW(path, FILE_READ_DATA, FILE_SHARE_READ, 0,
OPEN_EXISTING, 0, 0); OPEN_EXISTING, 0, 0);
if (file != INVALID_HANDLE_VALUE) { if (file != INVALID_HANDLE_VALUE) {
unsigned size = GetFileSize(file, 0); unsigned size = GetFileSize(file, 0);
if (size != INVALID_FILE_SIZE) { if (size != INVALID_FILE_SIZE) {
@ -205,10 +219,10 @@ class Directory {
public: public:
Directory(): handle(0), findNext(false) { } Directory(): handle(0), findNext(false) { }
virtual const char* next() { virtual string_t next() {
if (handle and handle != INVALID_HANDLE_VALUE) { if (handle and handle != INVALID_HANDLE_VALUE) {
if (findNext) { if (findNext) {
if (FindNextFile(handle, &data)) { if (FindNextFileW(handle, &data)) {
return data.cFileName; return data.cFileName;
} }
} else { } else {
@ -227,7 +241,7 @@ class Directory {
} }
HANDLE handle; HANDLE handle;
WIN32_FIND_DATA data; WIN32_FIND_DATAW data;
bool findNext; bool findNext;
}; };
@ -245,7 +259,7 @@ class Mapping {
}; };
inline Mapping* inline Mapping*
map(JNIEnv* e, const char* path) map(JNIEnv* e, string_t path)
{ {
Mapping* result = 0; Mapping* result = 0;
int fd = open(path, O_RDONLY); int fd = open(path, O_RDONLY);
@ -280,6 +294,14 @@ unmap(JNIEnv*, Mapping* mapping)
} // namespace } // namespace
inline string_t getChars(JNIEnv* e, jstring path) {
return reinterpret_cast<string_t>(e->GET_CHARS(path, 0));
}
inline void releaseChars(JNIEnv* e, jstring path, string_t chars) {
e->RELEASE_CHARS(path, chars);
}
extern "C" JNIEXPORT jstring JNICALL extern "C" JNIEXPORT jstring JNICALL
Java_java_io_File_toCanonicalPath(JNIEnv* /*e*/, jclass, jstring path) Java_java_io_File_toCanonicalPath(JNIEnv* /*e*/, jclass, jstring path)
{ {
@ -297,14 +319,14 @@ Java_java_io_File_toAbsolutePath(JNIEnv* /*e*/, jclass, jstring path)
extern "C" JNIEXPORT jlong JNICALL extern "C" JNIEXPORT jlong JNICALL
Java_java_io_File_length(JNIEnv* e, jclass, jstring path) Java_java_io_File_length(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
STRUCT_STAT s; STRUCT_STAT s;
int r = STAT(chars, &s); int r = STAT(chars, &s);
if (r == 0) { if (r == 0) {
return s.st_size; return s.st_size;
} }
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
} }
return -1; return -1;
@ -313,7 +335,7 @@ Java_java_io_File_length(JNIEnv* e, jclass, jstring path)
extern "C" JNIEXPORT void JNICALL extern "C" JNIEXPORT void JNICALL
Java_java_io_File_mkdir(JNIEnv* e, jclass, jstring path) Java_java_io_File_mkdir(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
if (not exists(chars)) { if (not exists(chars)) {
int r = ::MKDIR(chars, 0700); int r = ::MKDIR(chars, 0700);
@ -321,14 +343,14 @@ Java_java_io_File_mkdir(JNIEnv* e, jclass, jstring path)
throwNewErrno(e, "java/io/IOException"); throwNewErrno(e, "java/io/IOException");
} }
} }
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
} }
} }
extern "C" JNIEXPORT void JNICALL extern "C" JNIEXPORT void JNICALL
Java_java_io_File_createNewFile(JNIEnv* e, jclass, jstring path) Java_java_io_File_createNewFile(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
if (not exists(chars)) { if (not exists(chars)) {
int fd = CREAT(chars, 0600); int fd = CREAT(chars, 0600);
@ -338,38 +360,38 @@ Java_java_io_File_createNewFile(JNIEnv* e, jclass, jstring path)
doClose(e, fd); doClose(e, fd);
} }
} }
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
} }
} }
extern "C" JNIEXPORT void JNICALL extern "C" JNIEXPORT void JNICALL
Java_java_io_File_delete(JNIEnv* e, jclass, jstring path) Java_java_io_File_delete(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
int r = UNLINK(chars); int r = UNLINK(chars);
if (r != 0) { if (r != 0) {
throwNewErrno(e, "java/io/IOException"); throwNewErrno(e, "java/io/IOException");
} }
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
} }
} }
extern "C" JNIEXPORT jboolean JNICALL extern "C" JNIEXPORT jboolean JNICALL
Java_java_io_File_rename(JNIEnv* e, jclass, jstring old, jstring new_) Java_java_io_File_rename(JNIEnv* e, jclass, jstring old, jstring new_)
{ {
const char* oldChars = e->GetStringUTFChars(old, 0); string_t oldChars = getChars(e, old);
const char* newChars = e->GetStringUTFChars(new_, 0); string_t newChars = getChars(e, new_);
if (oldChars) { if (oldChars) {
bool v; bool v;
if (newChars) { if (newChars) {
v = rename(oldChars, newChars) == 0; v = RENAME(oldChars, newChars) == 0;
e->ReleaseStringUTFChars(new_, newChars); releaseChars(e, new_, newChars);
} else { } else {
v = false; v = false;
} }
e->ReleaseStringUTFChars(old, oldChars); releaseChars(e, old, oldChars);
return v; return v;
} else { } else {
return false; return false;
@ -379,12 +401,12 @@ Java_java_io_File_rename(JNIEnv* e, jclass, jstring old, jstring new_)
extern "C" JNIEXPORT jboolean JNICALL extern "C" JNIEXPORT jboolean JNICALL
Java_java_io_File_isDirectory(JNIEnv* e, jclass, jstring path) Java_java_io_File_isDirectory(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
STRUCT_STAT s; STRUCT_STAT s;
int r = STAT(chars, &s); int r = STAT(chars, &s);
bool v = (r == 0 and S_ISDIR(s.st_mode)); bool v = (r == 0 and S_ISDIR(s.st_mode));
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return v; return v;
} else { } else {
return false; return false;
@ -394,12 +416,12 @@ Java_java_io_File_isDirectory(JNIEnv* e, jclass, jstring path)
extern "C" JNIEXPORT jboolean JNICALL extern "C" JNIEXPORT jboolean JNICALL
Java_java_io_File_isFile(JNIEnv* e, jclass, jstring path) Java_java_io_File_isFile(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
STRUCT_STAT s; STRUCT_STAT s;
int r = STAT(chars, &s); int r = STAT(chars, &s);
bool v = (r == 0 and S_ISREG(s.st_mode)); bool v = (r == 0 and S_ISREG(s.st_mode));
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return v; return v;
} else { } else {
return false; return false;
@ -409,10 +431,10 @@ Java_java_io_File_isFile(JNIEnv* e, jclass, jstring path)
extern "C" JNIEXPORT jboolean JNICALL extern "C" JNIEXPORT jboolean JNICALL
Java_java_io_File_exists(JNIEnv* e, jclass, jstring path) Java_java_io_File_exists(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
bool v = exists(chars); bool v = exists(chars);
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return v; return v;
} else { } else {
return false; return false;
@ -424,18 +446,19 @@ Java_java_io_File_exists(JNIEnv* e, jclass, jstring path)
extern "C" JNIEXPORT jlong JNICALL extern "C" JNIEXPORT jlong JNICALL
Java_java_io_File_openDir(JNIEnv* e, jclass, jstring path) Java_java_io_File_openDir(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
unsigned length = strlen(chars); unsigned length = wcslen(chars);
unsigned size = length * sizeof(char_t);
RUNTIME_ARRAY(char, buffer, length + 3); RUNTIME_ARRAY(char_t, buffer, length + 3);
memcpy(RUNTIME_ARRAY_BODY(buffer), chars, length); memcpy(RUNTIME_ARRAY_BODY(buffer), chars, size);
memcpy(RUNTIME_ARRAY_BODY(buffer) + length, "\\*", 3); memcpy(RUNTIME_ARRAY_BODY(buffer) + length, L"\\*", 6);
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
Directory* d = new (malloc(sizeof(Directory))) Directory; Directory* d = new (malloc(sizeof(Directory))) Directory;
d->handle = FindFirstFile(RUNTIME_ARRAY_BODY(buffer), &(d->data)); d->handle = FindFirstFileW(RUNTIME_ARRAY_BODY(buffer), &(d->data));
if (d->handle == INVALID_HANDLE_VALUE) { if (d->handle == INVALID_HANDLE_VALUE) {
d->dispose(); d->dispose();
d = 0; d = 0;
@ -453,12 +476,12 @@ Java_java_io_File_readDir(JNIEnv* e, jclass, jlong handle)
Directory* d = reinterpret_cast<Directory*>(handle); Directory* d = reinterpret_cast<Directory*>(handle);
while (true) { while (true) {
const char* s = d->next(); string_t s = d->next();
if (s) { if (s) {
if (strcmp(s, ".") == 0 || strcmp(s, "..") == 0) { if (wcscmp(s, L".") == 0 || wcscmp(s, L"..") == 0) {
// skip . or .. and try again // skip . or .. and try again
} else { } else {
return e->NewStringUTF(s); return e->NewString(reinterpret_cast<const jchar*>(s), wcslen(s));
} }
} else { } else {
return 0; return 0;
@ -477,10 +500,10 @@ Java_java_io_File_closeDir(JNIEnv* , jclass, jlong handle)
extern "C" JNIEXPORT jlong JNICALL extern "C" JNIEXPORT jlong JNICALL
Java_java_io_File_openDir(JNIEnv* e, jclass, jstring path) Java_java_io_File_openDir(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
jlong handle = reinterpret_cast<jlong>(opendir(chars)); jlong handle = reinterpret_cast<jlong>(opendir(chars));
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return handle; return handle;
} else { } else {
return 0; return 0;
@ -522,10 +545,10 @@ Java_java_io_File_closeDir(JNIEnv* , jclass, jlong handle)
extern "C" JNIEXPORT jint JNICALL extern "C" JNIEXPORT jint JNICALL
Java_java_io_FileInputStream_open(JNIEnv* e, jclass, jstring path) Java_java_io_FileInputStream_open(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
int fd = doOpen(e, chars, O_RDONLY); int fd = doOpen(e, chars, O_RDONLY);
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return fd; return fd;
} else { } else {
return -1; return -1;
@ -572,10 +595,10 @@ Java_java_io_FileInputStream_close(JNIEnv* e, jclass, jint fd)
extern "C" JNIEXPORT jint JNICALL extern "C" JNIEXPORT jint JNICALL
Java_java_io_FileOutputStream_open(JNIEnv* e, jclass, jstring path) Java_java_io_FileOutputStream_open(JNIEnv* e, jclass, jstring path)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
int fd = doOpen(e, chars, O_WRONLY | O_CREAT | O_TRUNC); int fd = doOpen(e, chars, O_WRONLY | O_CREAT | O_TRUNC);
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
return fd; return fd;
} else { } else {
return -1; return -1;
@ -618,7 +641,7 @@ extern "C" JNIEXPORT void JNICALL
Java_java_io_RandomAccessFile_open(JNIEnv* e, jclass, jstring path, Java_java_io_RandomAccessFile_open(JNIEnv* e, jclass, jstring path,
jlongArray result) jlongArray result)
{ {
const char* chars = e->GetStringUTFChars(path, 0); string_t chars = getChars(e, path);
if (chars) { if (chars) {
Mapping* mapping = map(e, chars); Mapping* mapping = map(e, chars);
@ -628,7 +651,7 @@ Java_java_io_RandomAccessFile_open(JNIEnv* e, jclass, jstring path,
jlong length = (mapping ? mapping->length : 0); jlong length = (mapping ? mapping->length : 0);
e->SetLongArrayRegion(result, 1, 1, &length); e->SetLongArrayRegion(result, 1, 1, &length);
e->ReleaseStringUTFChars(path, chars); releaseChars(e, path, chars);
} }
} }

View File

@ -159,7 +159,7 @@ GetStringUTFLength(Thread* t, jstring s)
{ {
ENTER(t, Thread::ActiveState); ENTER(t, Thread::ActiveState);
return stringLength(t, *s); return stringUTFLength(t, *s);
} }
const char* JNICALL const char* JNICALL
@ -167,9 +167,10 @@ GetStringUTFChars(Thread* t, jstring s, jboolean* isCopy)
{ {
ENTER(t, Thread::ActiveState); ENTER(t, Thread::ActiveState);
int length = stringUTFLength(t, *s);
char* chars = static_cast<char*> char* chars = static_cast<char*>
(t->m->heap->allocate(stringLength(t, *s) + 1)); (t->m->heap->allocate(length + 1));
stringChars(t, *s, chars); stringUTFChars(t, *s, chars, length);
if (isCopy) *isCopy = true; if (isCopy) *isCopy = true;
return chars; return chars;

View File

@ -2690,6 +2690,30 @@ makeString(Thread* t, const char* format, ...)
return makeString(t, s, 0, byteArrayLength(t, s) - 1, 0); return makeString(t, s, 0, byteArrayLength(t, s) - 1, 0);
} }
int
stringUTFLength(Thread* t, object string) {
int length = 0;
if (stringLength(t, string)) {
object data = stringData(t, string);
if (objectClass(t, data)
== arrayBody(t, t->m->types, Machine::ByteArrayType)) {
length = stringLength(t, string);
} else {
for (unsigned i = 0; i < stringLength(t, string); ++i) {
uint16_t c = charArrayBody(t, data, stringOffset(t, string) + i);
if (!c) length += 1; // null char (was 2 bytes in Java)
else if (c < 0x80) length += 1; // ASCII char
else if (c < 0x800) length += 2; // two-byte char
else length += 3; // three-byte char
}
}
}
return length;
}
void void
stringChars(Thread* t, object string, char* chars) stringChars(Thread* t, object string, char* chars)
{ {
@ -2730,6 +2754,42 @@ stringChars(Thread* t, object string, uint16_t* chars)
chars[stringLength(t, string)] = 0; chars[stringLength(t, string)] = 0;
} }
void
stringUTFChars(Thread* t, object string, char* chars, unsigned length UNUSED)
{
assert(t, static_cast<unsigned>(stringUTFLength(t, string)) == length);
if (stringLength(t, string)) {
object data = stringData(t, string);
if (objectClass(t, data)
== arrayBody(t, t->m->types, Machine::ByteArrayType))
{
memcpy(chars,
&byteArrayBody(t, data, stringOffset(t, string)),
stringLength(t, string));
chars[stringLength(t, string)] = 0;
} else {
int j = 0;
for (unsigned i = 0; i < stringLength(t, string); ++i) {
uint16_t c = charArrayBody(t, data, stringOffset(t, string) + i);
if(!c) { // null char
chars[j++] = 0;
} else if (c < 0x80) { // ASCII char
chars[j++] = static_cast<char>(c);
} else if (c < 0x800) { // two-byte char
chars[j++] = static_cast<char>(0x0c0 | (c >> 6));
chars[j++] = static_cast<char>(0x080 | (c & 0x03f));
} else { // three-byte char
chars[j++] = static_cast<char>(0x0e0 | ((c >> 12) & 0x0f));
chars[j++] = static_cast<char>(0x080 | ((c >> 6) & 0x03f));
chars[j++] = static_cast<char>(0x080 | (c & 0x03f));
}
}
chars[j] = 0;
}
}
}
bool bool
isAssignableFrom(Thread* t, object a, object b) isAssignableFrom(Thread* t, object a, object b)
{ {

View File

@ -1901,12 +1901,18 @@ makeByteArray(Thread* t, const char* format, ...);
object object
makeString(Thread* t, const char* format, ...); makeString(Thread* t, const char* format, ...);
int
stringUTFLength(Thread* t, object string);
void void
stringChars(Thread* t, object string, char* chars); stringChars(Thread* t, object string, char* chars);
void void
stringChars(Thread* t, object string, uint16_t* chars); stringChars(Thread* t, object string, uint16_t* chars);
void
stringUTFChars(Thread* t, object string, char* chars, unsigned length);
bool bool
isAssignableFrom(Thread* t, object a, object b); isAssignableFrom(Thread* t, object a, object b);