From b88438d2fd5f2234c4a30ed2f3f47f4b4c169008 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sun, 16 Sep 2007 18:13:36 -0600 Subject: [PATCH] sketch of JAR support in Finder --- makefile | 2 +- src/builtin.cpp | 16 +- src/common.h | 36 ++++ src/finder.cpp | 461 +++++++++++++++++++++++++++++++++++++++--------- src/finder.h | 10 +- src/machine.cpp | 8 +- src/machine.h | 20 --- src/system.cpp | 66 +++++++ src/system.h | 17 ++ 9 files changed, 515 insertions(+), 121 deletions(-) diff --git a/makefile b/makefile index 02440476e0..df4a56c4bd 100644 --- a/makefile +++ b/makefile @@ -39,7 +39,7 @@ thread-lflags = -lpthread cflags = $(warnings) -fPIC -fno-rtti -fno-exceptions -fvisibility=hidden \ -I$(src) -I$(bld) $(thread-cflags) -D__STDC_LIMIT_MACROS -lflags = $(thread-lflags) -ldl -lm +lflags = $(thread-lflags) -ldl -lm -lz ifeq ($(mode),debug) cflags += -O0 -g3 diff --git a/src/builtin.cpp b/src/builtin.cpp index 6020b759ce..482da77721 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -546,11 +546,11 @@ ResourceInputStream_open(Thread* t, jclass, jstring path) jint JNICALL ResourceInputStream_read(Thread*, jclass, jlong peer, jint position) { - Finder::Data* d = reinterpret_cast(peer); - if (position >= static_cast(d->length())) { + System::Region* region = reinterpret_cast(peer); + if (position >= static_cast(region->length())) { return -1; } else { - return d->start()[position]; + return region->start()[position]; } } @@ -560,14 +560,14 @@ ResourceInputStream_read2(Thread* t, jclass, jlong peer, jint position, { if (length == 0) return 0; - Finder::Data* d = reinterpret_cast(peer); - if (length > static_cast(d->length()) - position) { - length = static_cast(d->length()) - position; + System::Region* region = reinterpret_cast(peer); + if (length > static_cast(region->length()) - position) { + length = static_cast(region->length()) - position; } if (length <= 0) { return -1; } else { - memcpy(&byteArrayBody(t, *b, offset), d->start() + position, length); + memcpy(&byteArrayBody(t, *b, offset), region->start() + position, length); return length; } } @@ -575,7 +575,7 @@ ResourceInputStream_read2(Thread* t, jclass, jlong peer, jint position, void JNICALL ResourceInputStream_close(Thread*, jclass, jlong peer) { - reinterpret_cast(peer)->dispose(); + reinterpret_cast(peer)->dispose(); } } // namespace diff --git a/src/common.h b/src/common.h index c8ef37489b..3f0ba59894 100644 --- a/src/common.h +++ b/src/common.h @@ -128,6 +128,42 @@ mask(T* p) return reinterpret_cast(reinterpret_cast(p) & PointerMask); } +inline uint32_t +hash(const char* s) +{ + uint32_t h = 0; + for (unsigned i = 0; s[i]; ++i) { + h = (h * 31) + s[i]; + } + return h; +} + +inline uint32_t +hash(const uint8_t* s, unsigned length) +{ + uint32_t h = 0; + for (unsigned i = 0; i < length; ++i) { + h = (h * 31) + s[i]; + } + return h; +} + +inline uint32_t +hash(const int8_t* s, unsigned length) +{ + return hash(reinterpret_cast(s), length); +} + +inline uint32_t +hash(const uint16_t* s, unsigned length) +{ + uint32_t h = 0; + for (unsigned i = 0; i < length; ++i) { + h = (h * 31) + s[i]; + } + return h; +} + } // namespace vm #endif//COMMON_H diff --git a/src/finder.cpp b/src/finder.cpp index ea4e980779..6078a21b3b 100644 --- a/src/finder.cpp +++ b/src/finder.cpp @@ -1,8 +1,4 @@ -#include "sys/mman.h" -#include "sys/types.h" -#include "sys/stat.h" -#include "fcntl.h" - +#include "zlib.h" #include "system.h" #include "finder.h" @@ -32,7 +28,341 @@ copy(System* s, const char* a) return p; } -const char** +bool +equal(const void* a, unsigned al, const void* b, unsigned bl) +{ + if (al == bl) { + return memcmp(a, b, al) == 0; + } else { + return false; + } +} + +class Element { + public: + Element(): next(0) { } + virtual ~Element() { } + virtual System::Region* find(const char* name) = 0; + virtual bool exists(const char* name) = 0; + virtual void dispose() = 0; + + Element* next; +}; + +class DirectoryElement: public Element { + public: + DirectoryElement(System* s, const char* name): + s(s), name(name) + { } + + virtual System::Region* find(const char* name) { + const char* file = append(s, this->name, "/", name); + System::Region* region; + System::Status status = s->map(®ion, file); + s->free(file); + + if (s->success(status)) { + return region; + } else { + return 0; + } + } + + virtual bool exists(const char* name) { + const char* file = append(s, this->name, "/", name); + System::FileType type = s->identify(file); + s->free(file); + return type != System::DoesNotExist; + } + + virtual void dispose() { + s->free(name); + s->free(this); + } + + System* s; + const char* name; +}; + +class PointerRegion: public System::Region { + public: + PointerRegion(System* s, const uint8_t* start, size_t length): + s(s), + start_(start), + length_(length) + { } + + virtual const uint8_t* start() { + return start_; + } + + virtual size_t length() { + return length_; + } + + virtual void dispose() { + s->free(this); + } + + System* s; + const uint8_t* start_; + size_t length_; +}; + +class DataRegion: public System::Region { + public: + DataRegion(System* s, size_t length): + s(s), + length_(length) + { } + + virtual const uint8_t* start() { + return data; + } + + virtual size_t length() { + return length_; + } + + virtual void dispose() { + s->free(this); + } + + System* s; + size_t length_; + uint8_t data[0]; +}; + +class JarIndex { + public: + static const unsigned HeaderSize = 30; + + enum CompressionMethod { + Stored = 0, + Deflated = 8 + }; + + class Node { + public: + Node(uint32_t hash, const uint8_t* entry, Node* next): + hash(hash), entry(entry), next(next) + { } + + uint32_t hash; + const uint8_t* entry; + Node* next; + }; + + JarIndex(System* s, unsigned capacity): + s(s), + capacity(capacity), + position(0), + nodes(static_cast(s->allocate(sizeof(Node) * capacity))), + zStream(0) + { + memset(table, 0, sizeof(Node*) * capacity); + } + + static uint16_t get2(const uint8_t* p) { + return + (static_cast(p[1]) << 8) | + (static_cast(p[0]) ); + } + + static uint32_t get4(const uint8_t* p) { + return + (static_cast(p[3]) << 24) | + (static_cast(p[2]) << 16) | + (static_cast(p[1]) << 8) | + (static_cast(p[0]) ); + } + + static uint32_t signature(const uint8_t* p) { + return get4(p); + } + + static uint32_t compressionMethod(const uint8_t* p) { + return get4(p + 8); + } + + static uint32_t compressedSize(const uint8_t* p) { + return get4(p + 18); + } + + static uint32_t uncompressedSize(const uint8_t* p) { + return get4(p + 22); + } + + static uint16_t fileNameLength(const uint8_t* p) { + return get2(p + 26); + } + + static uint16_t extraFieldLength(const uint8_t* p) { + return get2(p + 28); + } + + static const uint8_t* fileName(const uint8_t* p) { + return p + 30; + } + + static JarIndex* make(System* s, unsigned capacity) { + return new + (s->allocate(sizeof(JarIndex) + (sizeof(Node*) * capacity))) + JarIndex(s, capacity); + } + + static JarIndex* open(System* s, System::Region* region) { + JarIndex* index = make(s, 32); + + const uint8_t* p = region->start(); + const uint8_t* end = p + region->length(); + while (p < end) { + if (signature(p) == 0x04034b50) { + index = index->add(hash(fileName(p), fileNameLength(p)), p); + + p += HeaderSize + + fileNameLength(p) + + extraFieldLength(p) + + compressedSize(p); + } else { + break; + } + } + + return index; + } + + JarIndex* add(uint32_t hash, const uint8_t* entry) { + if (position < capacity) { + unsigned i = hash & (capacity - 1); + table[i] = new (nodes + (position++)) Node(hash, entry, table[i]); + return this; + } else { + JarIndex* index = make(s, capacity * 2); + for (unsigned i = 0; i < capacity; ++i) { + index->add(nodes[i].hash, nodes[i].entry); + } + index->add(hash, entry); + dispose(); + return index; + } + } + + Node* findNode(const char* name) { + unsigned length = strlen(name); + unsigned i = hash(name) & (capacity - 1); + for (Node* n = table[i]; n; n = n->next) { + const uint8_t* p = n->entry; + if (equal(name, length, fileName(p), fileNameLength(p))) { + return n; + } + } + return 0; + } + + System::Region* find(const char* name) { + Node* n = findNode(name); + if (n) { + const uint8_t* p = n->entry; + switch (compressionMethod(p)) { + case Stored: { + return new (s->allocate(sizeof(PointerRegion))) + PointerRegion(s, p + fileNameLength(p) + extraFieldLength(p), + compressedSize(p)); + } break; + + case Deflated: { + DataRegion* region = new + (s->allocate(sizeof(DataRegion) + uncompressedSize(p))) + DataRegion(s, uncompressedSize(p)); + + if (zStream == 0) { + zStream = static_cast(s->allocate(sizeof(z_stream))); + memset(zStream, 0, sizeof(z_stream)); + int r = inflateInit(zStream); + assert(s, r == Z_OK); + } + + zStream->next_in = const_cast + (p + fileNameLength(p) + extraFieldLength(p)); + zStream->avail_in = compressedSize(p); + zStream->next_out = region->data; + zStream->avail_out = region->length(); + + int r = inflate(zStream, Z_SYNC_FLUSH); + assert(s, r == Z_STREAM_END); + } break; + + default: + abort(s); + } + } + return 0; + } + + bool exists(const char* name) { + return findNode(name) != 0; + } + + void dispose() { + if (zStream) { + inflateEnd(zStream); + s->free(zStream); + } + s->free(nodes); + s->free(this); + } + + System* s; + unsigned capacity; + unsigned position; + Node* nodes; + z_stream* zStream; + Node* table[0]; +}; + +class JarElement: public Element { + public: + JarElement(System* s, const char* name): + s(s), name(name) + { } + + void init() { + if (index == 0) { + System::Region* r; + if (s->success(s->map(&r, this->name))) { + region = r; + index = JarIndex::open(s, r); + } + } + } + + virtual System::Region* find(const char* name) { + init(); + return (index ? index->find(name) : 0); + } + + virtual bool exists(const char* name) { + init(); + return (index ? index->exists(name) : 0); + } + + virtual void dispose() { + s->free(name); + if (index) { + index->dispose(); + region->dispose(); + } + s->free(this); + } + + System* s; + const char* name; + System::Region* region; + JarIndex* index; +}; + +Element* parsePath(System* s, const char* path) { class Tokenizer { @@ -62,24 +392,43 @@ parsePath(System* s, const char* path) char delimiter; }; - unsigned count = 0; - for (Tokenizer t(path, ':'); t.hasMore(); t.next()) ++ count; - - const char** v = static_cast - (s->allocate((count + 1) * sizeof(const char*))); - - unsigned i = 0; - for (Tokenizer t(path, ':'); t.hasMore(); ++i) { + Element* first = 0; + Element* prev = 0; + for (Tokenizer t(path, ':'); t.hasMore();) { Tokenizer::Token token(t.next()); - char* p = static_cast(s->allocate(token.length + 1)); - memcpy(p, token.s, token.length); - p[token.length] = 0; - v[i] = p; + char* name = static_cast(s->allocate(token.length + 1)); + memcpy(name, token.s, token.length); + name[token.length] = 0; + + Element* e; + switch (s->identify(name)) { + case System::File: { + e = new (s->allocate(sizeof(JarElement))) + JarElement(s, name); + } break; + + case System::Directory: { + e = new (s->allocate(sizeof(DirectoryElement))) + DirectoryElement(s, name); + } break; + + default: { + s->free(name); + e = 0; + } break; + } + + if (e) { + if (prev) { + prev->next = e; + } else { + first = e; + } + prev = e; + } } - v[i] = 0; - - return v; + return first; } class MyFinder: public Finder { @@ -90,67 +439,20 @@ class MyFinder: public Finder { pathString(copy(system, path)) { } - class Data: public Finder::Data { - public: - Data(System* system, uint8_t* start, size_t length): - system(system), - start_(start), - length_(length) - { } - - virtual const uint8_t* start() { - return start_; - } - - virtual size_t length() { - return length_; - } - - virtual void dispose() { - if (start_) { - munmap(start_, length_); - } - system->free(this); - } - - System* system; - uint8_t* start_; - size_t length_; - }; - - virtual Data* find(const char* name) { - Data* d = new (system->allocate(sizeof(Data))) Data(system, 0, 0); - - for (const char** p = path_; *p; ++p) { - const char* file = append(system, *p, "/", name); - int fd = open(file, O_RDONLY); - system->free(file); - - if (fd != -1) { - struct stat s; - int r = fstat(fd, &s); - if (r != -1) { - void* data = mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (data) { - d->start_ = static_cast(data); - d->length_ = s.st_size; - return d; - } - } + virtual System::Region* find(const char* name) { + for (Element* e = path_; e; e = e->next) { + System::Region* r = e->find(name); + if (r) { + return r; } } - system->free(d); return 0; } virtual bool exists(const char* name) { - for (const char** p = path_; *p; ++p) { - const char* file = append(system, *p, "/", name); - struct stat s; - int r = stat(file, &s); - system->free(file); - if (r == 0) { + for (Element* e = path_; e; e = e->next) { + if (e->exists(name)) { return true; } } @@ -163,16 +465,17 @@ class MyFinder: public Finder { } virtual void dispose() { - for (const char** p = path_; *p; ++p) { - system->free(*p); + for (Element* e = path_; e;) { + Element* t = e; + e = e->next; + t->dispose(); } - system->free(path_); system->free(pathString); system->free(this); } System* system; - const char** path_; + Element* path_; const char* pathString; }; diff --git a/src/finder.h b/src/finder.h index 1caf1aa08b..8edf56d0ff 100644 --- a/src/finder.h +++ b/src/finder.h @@ -8,16 +8,8 @@ namespace vm { class Finder { public: - class Data { - public: - virtual ~Data() { } - virtual const uint8_t* start() = 0; - virtual size_t length() = 0; - virtual void dispose() = 0; - }; - virtual ~Finder() { } - virtual Data* find(const char* name) = 0; + virtual System::Region* find(const char* name) = 0; virtual bool exists(const char* name) = 0; virtual const char* path() = 0; virtual void dispose() = 0; diff --git a/src/machine.cpp b/src/machine.cpp index 66a7313eab..62dd0235aa 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -2285,16 +2285,16 @@ resolveClass(Thread* t, object spec) memcpy(file, &byteArrayBody(t, spec, 0), byteArrayLength(t, spec) - 1); memcpy(file + byteArrayLength(t, spec) - 1, ".class", 7); - Finder::Data* data = t->vm->finder->find(file); + System::Region* region = t->vm->finder->find(file); - if (data) { + if (region) { if (Verbose) { fprintf(stderr, "parsing %s\n", &byteArrayBody(t, spec, 0)); } // parse class file - class_ = parseClass(t, data->start(), data->length()); - data->dispose(); + class_ = parseClass(t, region->start(), region->length()); + region->dispose(); if (LIKELY(t->exception == 0)) { if (Verbose) { diff --git a/src/machine.h b/src/machine.h index 068b47017d..34a660378d 100644 --- a/src/machine.h +++ b/src/machine.h @@ -1858,26 +1858,6 @@ makeTrace(Thread* t) return makeTrace(t, t->frame); } -inline uint32_t -hash(const int8_t* s, unsigned length) -{ - uint32_t h = 0; - for (unsigned i = 0; i < length; ++i) { - h = (h * 31) + static_cast(s[i]); - } - return h; -} - -inline uint32_t -hash(const uint16_t* s, unsigned length) -{ - uint32_t h = 0; - for (unsigned i = 0; i < length; ++i) { - h = (h * 31) + s[i]; - } - return h; -} - inline unsigned baseSize(Thread* t, object o, object class_) { diff --git a/src/system.cpp b/src/system.cpp index 420dfe94bd..c6286fc836 100644 --- a/src/system.cpp +++ b/src/system.cpp @@ -6,6 +6,7 @@ #include "fcntl.h" #include "dlfcn.h" #include "errno.h" +#include "unistd.h" #include "pthread.h" #include "signal.h" #include "stdint.h" @@ -376,6 +377,34 @@ class MySystem: public System { pthread_key_t key; }; + class Region: public System::Region { + public: + Region(System* system, uint8_t* start, size_t length): + system(system), + start_(start), + length_(length) + { } + + virtual const uint8_t* start() { + return start_; + } + + virtual size_t length() { + return length_; + } + + virtual void dispose() { + if (start_) { + munmap(start_, length_); + } + system->free(this); + } + + System* system; + uint8_t* start_; + size_t length_; + }; + class Library: public System::Library { public: Library(System* s, void* p, const char* name, bool mapName, @@ -517,6 +546,43 @@ class MySystem: public System { return dynamicCall(function, arguments, types, count, size, returnType); } + virtual Status map(System::Region** region, const char* name) { + Status status = 1; + + int fd = open(name, O_RDONLY); + if (fd != -1) { + struct stat s; + int r = fstat(fd, &s); + if (r != -1) { + void* data = mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (data) { + *region = new (allocate(sizeof(Region))) + Region(this, static_cast(data), s.st_size); + status = 0; + } + } + close(fd); + } + + return status; + } + + virtual FileType identify(const char* name) { + struct stat s; + int r = stat(name, &s); + if (r) { + if (S_ISREG(s.st_mode)) { + return File; + } else if (S_ISDIR(s.st_mode)) { + return Directory; + } else { + return Unknown; + } + } else { + return DoesNotExist; + } + } + virtual Status load(System::Library** lib, const char* name, bool mapName, diff --git a/src/system.h b/src/system.h index 9002d772b4..1094dab57b 100644 --- a/src/system.h +++ b/src/system.h @@ -17,6 +17,13 @@ class System: public Allocator { public: typedef intptr_t Status; + enum FileType { + Unknown, + DoesNotExist, + File, + Directory + }; + class Thread { public: virtual ~Thread() { } @@ -55,6 +62,14 @@ class System: public Allocator { virtual void dispose() = 0; }; + class Region { + public: + virtual ~Region() { } + virtual const uint8_t* start() = 0; + virtual size_t length() = 0; + virtual void dispose() = 0; + }; + class Library { public: virtual ~Library() { } @@ -74,6 +89,8 @@ class System: public Allocator { virtual uint64_t call(void* function, uintptr_t* arguments, uint8_t* types, unsigned count, unsigned size, unsigned returnType) = 0; + virtual Status map(Region**, const char* name) = 0; + virtual FileType identify(const char* name) = 0; virtual Status load(Library**, const char* name, bool mapName, Library* next) = 0; virtual void exit(int code) = 0;