From 5d9f7b2bc33a2a10ef6e773b9b2584970b405585 Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Sat, 2 Jun 2012 09:06:22 -0600 Subject: [PATCH] add optional LZMA support for compressing embedded JARs, boot images, and shared objects --- makefile | 94 +++++++++++++++++--- readme.txt | 24 +++++- src/bootimage-fields.cpp | 2 + src/bootimage.cpp | 44 ++++++++-- src/compile.cpp | 8 +- src/compiler.cpp | 23 +++++ src/finder.cpp | 36 +++++++- src/lzma-decode.cpp | 49 +++++++++++ src/lzma-encode.cpp | 71 +++++++++++++++ src/lzma-util.h | 53 ++++++++++++ src/lzma.h | 29 +++++++ src/lzma/load.cpp | 115 +++++++++++++++++++++++++ src/lzma/main.cpp | 182 +++++++++++++++++++++++++++++++++++++++ src/machine.cpp | 62 ++++++++++++- src/machine.h | 2 + 15 files changed, 764 insertions(+), 30 deletions(-) create mode 100644 src/lzma-decode.cpp create mode 100644 src/lzma-encode.cpp create mode 100644 src/lzma-util.h create mode 100644 src/lzma.h create mode 100644 src/lzma/load.cpp create mode 100644 src/lzma/main.cpp diff --git a/makefile b/makefile index e8626126dd..b0af8f8ae8 100755 --- a/makefile +++ b/makefile @@ -33,6 +33,9 @@ endif ifneq ($(mode),fast) options := $(options)-$(mode) endif +ifneq ($(lzma),) + options := $(options)-lzma +endif ifeq ($(bootimage),true) options := $(options)-bootimage endif @@ -532,6 +535,7 @@ ifdef msvc strip = : endif +c-objects = $(foreach x,$(1),$(patsubst $(2)/%.c,$(3)/%.o,$(x))) cpp-objects = $(foreach x,$(1),$(patsubst $(2)/%.cpp,$(3)/%.o,$(x))) asm-objects = $(foreach x,$(1),$(patsubst $(2)/%.S,$(3)/%-asm.o,$(x))) java-classes = $(foreach x,$(1),$(patsubst $(2)/%.java,$(3)/%.class,$(x))) @@ -594,7 +598,10 @@ ifeq ($(continuations),true) asmflags += -DAVIAN_CONTINUATIONS endif -bootimage-generator-sources = $(src)/bootimage.cpp +bootimage-generator-sources = $(src)/bootimage.cpp +ifneq ($(lzma),) + bootimage-generator-sources += $(src)/lzma-encode.cpp +endif bootimage-generator-objects = \ $(call cpp-objects,$(bootimage-generator-sources),$(src),$(build)) bootimage-generator = $(build)/bootimage-generator @@ -629,17 +636,62 @@ generator-sources = \ $(src)/type-generator.cpp \ $(src)/$(build-system).cpp \ $(src)/finder.cpp + +ifneq ($(lzma),) + common-cflags += -I$(lzma)/C -DAVIAN_USE_LZMA -D_7ZIP_ST + + vm-sources += \ + $(src)/lzma-decode.cpp + + generator-sources += \ + $(src)/lzma-decode.cpp + + lzma-decode-sources = \ + $(lzma)/C/LzmaDec.c + + lzma-decode-objects = \ + $(call c-objects,$(lzma-decode-sources),$(lzma)/C,$(build)) + + lzma-encode-sources = \ + $(lzma)/C/LzmaEnc.c \ + $(lzma)/C/LzFind.c + + lzma-encode-objects = \ + $(call c-objects,$(lzma-encode-sources),$(lzma)/C,$(build)) + + lzma-encoder = $(build)/lzma/lzma + + lzma-encoder-cflags = -D__STDC_CONSTANT_MACROS -fno-rtti -fno-exceptions \ + -I$(lzma)/C + + lzma-encoder-sources = \ + $(src)/lzma/main.cpp + + lzma-encoder-objects = \ + $(call cpp-objects,$(lzma-encoder-sources),$(src),$(build)) + + lzma-encoder-lzma-sources = $(lzma-encode-sources) $(lzma-decode-sources) + + lzma-encoder-lzma-objects = \ + $(call c-objects,$(lzma-encoder-lzma-sources),$(lzma)/C,$(build)) + + lzma-loader = $(build)/lzma/load.o +endif + generator-cpp-objects = \ $(foreach x,$(1),$(patsubst $(2)/%.cpp,$(3)/%-build.o,$(x))) +generator-c-objects = \ + $(foreach x,$(1),$(patsubst $(2)/%.c,$(3)/%-build.o,$(x))) generator-objects = \ $(call generator-cpp-objects,$(generator-sources),$(src),$(build)) +generator-lzma-objects = \ + $(call generator-c-objects,$(lzma-decode-sources),$(lzma)/C,$(build)) generator = $(build)/generator converter-depends = \ $(src)/binaryToObject/tools.h \ $(src)/binaryToObject/endianness.h - converter-sources = \ $(src)/binaryToObject/tools.cpp \ $(src)/binaryToObject/elf.cpp \ @@ -760,7 +812,7 @@ test-flags = -cp $(build)/test test-args = $(test-flags) $(input) .PHONY: build -build: $(static-library) $(executable) $(dynamic-library) \ +build: $(static-library) $(executable) $(dynamic-library) $(lzma-loader) \ $(executable-dynamic) $(classpath-dep) $(test-dep) $(test-extra-dep) $(test-dep): $(classpath-dep) @@ -863,6 +915,9 @@ endef $(vm-cpp-objects): $(build)/%.o: $(src)/%.cpp $(vm-depends) $(compile-object) +$(build)/%.o: $(lzma)/C/%.c + $(compile-object) + $(vm-asm-objects): $(build)/%-asm.o: $(src)/%.S $(compile-asm-object) @@ -894,6 +949,16 @@ $(converter-objects) $(converter-tool-objects): $(build)/binaryToObject/%.o: $(s $(converter): $(converter-objects) $(converter-tool-objects) $(build-cc) $(^) -g -o $(@) +$(lzma-encoder-objects): $(build)/lzma/%.o: $(src)/lzma/%.cpp + @mkdir -p $(dir $(@)) + $(build-cxx) $(lzma-encoder-cflags) -c $(<) -o $(@) + +$(lzma-encoder): $(lzma-encoder-objects) $(lzma-encoder-lzma-objects) + $(build-cc) $(^) -g -o $(@) + +$(lzma-loader): $(src)/lzma/load.cpp + $(compile-object) + $(build)/classpath.jar: $(classpath-dep) $(classpath-jar-dep) @echo "creating $(@)" (wd=$$(pwd) && \ @@ -916,18 +981,25 @@ $(javahome-object): $(build)/javahome.jar $(converter) $(converter) $(<) $(@) _binary_javahome_jar_start \ _binary_javahome_jar_end $(platform) $(arch) -$(generator-objects): $(generator-depends) -$(generator-objects): $(build)/%-build.o: $(src)/%.cpp +define compile-generator-object @echo "compiling $(@)" @mkdir -p $(dir $(@)) $(build-cxx) -DPOINTER_SIZE=$(pointer-size) -O0 -g3 $(build-cflags) \ -c $(<) -o $(@) +endef + +$(generator-objects): $(generator-depends) +$(generator-objects): $(build)/%-build.o: $(src)/%.cpp + $(compile-generator-object) + +$(generator-lzma-objects): $(build)/%-build.o: $(lzma)/C/%.c + $(compile-generator-object) $(jni-objects): $(build)/%.o: $(classpath-src)/%.cpp $(compile-object) $(static-library): $(vm-objects) $(classpath-objects) $(vm-heapwalk-objects) \ - $(javahome-object) $(boot-javahome-object) + $(javahome-object) $(boot-javahome-object) $(lzma-decode-objects) @echo "creating $(@)" rm -rf $(@) $(ar) cru $(@) $(^) @@ -940,7 +1012,7 @@ $(bootimage-object) $(codeimage-object): $(bootimage-generator) executable-objects = $(vm-objects) $(classpath-objects) $(driver-object) \ $(vm-heapwalk-objects) $(boot-object) $(vm-classpath-objects) \ - $(javahome-object) $(boot-javahome-object) + $(javahome-object) $(boot-javahome-object) $(lzma-decode-objects) $(executable): $(executable-objects) @echo "linking $(@)" @@ -976,7 +1048,8 @@ $(bootimage-generator): $(bootimage-generator-objects) $(build-bootimage-generator): \ $(vm-objects) $(classpath-object) $(classpath-objects) \ - $(heapwalk-objects) $(bootimage-generator-objects) $(converter-objects) + $(heapwalk-objects) $(bootimage-generator-objects) $(converter-objects) \ + $(lzma-decode-objects) $(lzma-encode-objects) @echo "linking $(@)" ifeq ($(platform),windows) ifdef msvc @@ -994,7 +1067,8 @@ endif $(dynamic-library): $(vm-objects) $(dynamic-object) $(classpath-objects) \ $(vm-heapwalk-objects) $(boot-object) $(vm-classpath-objects) \ - $(classpath-libraries) $(javahome-object) $(boot-javahome-object) + $(classpath-libraries) $(javahome-object) $(boot-javahome-object) \ + $(lzma-decode-objects) @echo "linking $(@)" ifdef msvc $(ld) $(shared) $(lflags) $(^) -out:$(@) -PDB:$(@).pdb \ @@ -1020,7 +1094,7 @@ else endif $(strip) $(strip-all) $(@) -$(generator): $(generator-objects) +$(generator): $(generator-objects) $(generator-lzma-objects) @echo "linking $(@)" $(build-ld) $(^) $(build-lflags) -o $(@) diff --git a/readme.txt b/readme.txt index ac54ca700f..ca43b1fa9e 100644 --- a/readme.txt +++ b/readme.txt @@ -79,6 +79,7 @@ certain flags described below, all of which are optional. arch={i386,x86_64,powerpc,arm} \ process={compile,interpret} \ mode={debug,debug-fast,fast,small} \ + lzma= \ ios={true,false} \ bootimage={true,false} \ heapdump={true,false} \ @@ -104,6 +105,13 @@ certain flags described below, all of which are optional. assertions default: fast + * lzma - if set, support use of LZMA to compress embedded JARs and + boot images. The value of this option should be a directory + containing a recent LZMA SDK (available at + http://www.7-zip.org/sdk.html). Currently, only version 9.20 of + the SDK has been tested, but other versions might work. + default: not set + * ios - if true, cross-compile for iOS on OS X. Note that non-jailbroken iOS devices do not allow JIT compilation, so only process=interpret or bootimage=true builds will run on such @@ -366,8 +374,20 @@ EOF Step 3: Make an object file out of the jar. - $ ../build/${platform}-${arch}/binaryToObject/binaryToObject boot.jar boot-jar.o \ - _binary_boot_jar_start _binary_boot_jar_end ${platform} ${arch} + $ ../build/${platform}-${arch}/binaryToObject/binaryToObject boot.jar \ + boot-jar.o _binary_boot_jar_start _binary_boot_jar_end ${platform} ${arch} + +If you've built Avian using the lzma option, you may optionally +compress the jar before generating the object: + + $ ../build/$(platform}-${arch}-lzma/lzma/lzma encode boot.jar boot.jar.lzma + && ../build/${platform}-${arch}-lzma/binaryToObject/binaryToObject \ + boot.jar.lzma boot-jar.o _binary_boot_jar_start _binary_boot_jar_end \ + ${platform} ${arch} + +Note that you'll need to specify "-Xbootclasspath:[lzma:bootJar]" +instead of "-Xbootclasspath:[bootJar]" in the next step if you've used +LZMA to compress the jar. Step 4: Write a driver which starts the VM and runs the desired main method. Note the bootJar function, which will be called by the VM to diff --git a/src/bootimage-fields.cpp b/src/bootimage-fields.cpp index 7638830585..a090268de2 100644 --- a/src/bootimage-fields.cpp +++ b/src/bootimage-fields.cpp @@ -5,6 +5,8 @@ FIELD(magic) +FIELD(initialized) + FIELD(heapSize) FIELD(codeSize) diff --git a/src/bootimage.cpp b/src/bootimage.cpp index a4555a9f6e..2affc13f6f 100644 --- a/src/bootimage.cpp +++ b/src/bootimage.cpp @@ -17,6 +17,7 @@ #include "assembler.h" #include "target.h" #include "binaryToObject/tools.h" +#include "lzma.h" // since we aren't linking against libstdc++, we must implement this // ourselves: @@ -1275,7 +1276,8 @@ writeBootImage2(Thread* t, OutputStream* bootimageOutput, OutputStream* codeOutp BootImage* image, uint8_t* code, const char* className, const char* methodName, const char* methodSpec, const char* bootimageStart, const char* bootimageEnd, - const char* codeimageStart, const char* codeimageEnd) + const char* codeimageStart, const char* codeimageEnd, + bool useLZMA) { setRoot(t, Machine::OutOfMemoryError, make(t, type(t, Machine::OutOfMemoryErrorType))); @@ -1594,6 +1596,7 @@ writeBootImage2(Thread* t, OutputStream* bootimageOutput, OutputStream* codeOutp heapWalker->dispose(); image->magic = BootImage::Magic; + image->initialized = 0; fprintf(stderr, "class count %d string count %d call count %d\n" "heap size %d code size %d\n", @@ -1656,7 +1659,27 @@ writeBootImage2(Thread* t, OutputStream* bootimageOutput, OutputStream* codeOutp SymbolInfo(bootimageData.length, bootimageEnd) }; - platform->writeObject(bootimageOutput, Slice(bootimageSymbols, 2), Slice(bootimageData.data, bootimageData.length), Platform::Writable, TargetBytesPerWord); + uint8_t* bootimage; + unsigned bootimageLength; + if (useLZMA) { +#ifdef AVIAN_USE_LZMA + bootimage = encodeLZMA(t->m->system, t->m->heap, bootimageData.data, + bootimageData.length, &bootimageLength); + + fprintf(stderr, "compressed heap size %d\n", bootimageLength); +#else + abort(t); +#endif + } else { + bootimage = bootimageData.data; + bootimageLength = bootimageData.length; + } + + platform->writeObject(bootimageOutput, Slice(bootimageSymbols, 2), Slice(bootimage, bootimageLength), Platform::Writable, TargetBytesPerWord); + + if (useLZMA) { + t->m->heap->free(bootimage, bootimageLength); + } compilationHandler.symbols.add(SymbolInfo(0, codeimageStart)); compilationHandler.symbols.add(SymbolInfo(image->codeSize, codeimageEnd)); @@ -1684,10 +1707,12 @@ writeBootImage(Thread* t, uintptr_t* arguments) const char* bootimageEnd = reinterpret_cast(arguments[8]); const char* codeimageStart = reinterpret_cast(arguments[9]); const char* codeimageEnd = reinterpret_cast(arguments[10]); + bool useLZMA = arguments[11]; writeBootImage2 (t, bootimageOutput, codeOutput, image, code, className, methodName, - methodSpec, bootimageStart, bootimageEnd, codeimageStart, codeimageEnd); + methodSpec, bootimageStart, bootimageEnd, codeimageStart, codeimageEnd, + useLZMA); return 1; } @@ -1746,7 +1771,11 @@ bool ArgParser::parse(int ac, const char** av) { } for(Arg* arg = first; arg; arg = arg->next) { if(strcmp(arg->name, &av[i][1]) == 0) { - state = arg; + if (arg->desc == 0) { + arg->value = "true"; + } else { + state = arg; + } } } if(!state) { @@ -1801,6 +1830,8 @@ public: char* codeimageStart; char* codeimageEnd; + bool useLZMA; + bool maybeSplit(const char* src, char*& destA, char*& destB) { if(src) { const char* split = strchr(src, ':'); @@ -1830,6 +1861,7 @@ public: Arg entry(parser, false, "entry", "[.[]]"); Arg bootimageSymbols(parser, false, "bootimage-symbols", ":"); Arg codeimageSymbols(parser, false, "codeimage-symbols", ":"); + Arg useLZMA(parser, false, "use-lzma", 0); if(!parser.parse(ac, av)) { parser.printUsage(av[0]); @@ -1839,6 +1871,7 @@ public: this->classpath = classpath.value; this->bootimage = bootimage.value; this->codeimage = codeimage.value; + this->useLZMA = useLZMA.value != 0; if(entry.value) { if(const char* entryClassEnd = strchr(entry.value, '.')) { @@ -1988,7 +2021,8 @@ main(int ac, const char** av) reinterpret_cast(args.bootimageStart), reinterpret_cast(args.bootimageEnd), reinterpret_cast(args.codeimageStart), - reinterpret_cast(args.codeimageEnd) + reinterpret_cast(args.codeimageEnd), + reinterpret_cast(args.useLZMA) }; run(t, writeBootImage, arguments); diff --git a/src/compile.cpp b/src/compile.cpp index 157f1fd24b..86edcb033e 100644 --- a/src/compile.cpp +++ b/src/compile.cpp @@ -9631,9 +9631,7 @@ boot(MyThread* t, BootImage* image, uint8_t* code) // fprintf(stderr, "code from %p to %p\n", // code, code + image->codeSize); - static bool fixed = false; - - if (not fixed) { + if (not image->initialized) { fixupHeap(t, heapMap, heapMapSizeInWords, heap); } @@ -9680,7 +9678,7 @@ boot(MyThread* t, BootImage* image, uint8_t* code) findThunks(t, image, code); - if (fixed) { + if (image->initialized) { resetRuntimeState (t, classLoaderMap(t, root(t, Machine::BootLoader)), heap, image->heapSize); @@ -9703,7 +9701,7 @@ boot(MyThread* t, BootImage* image, uint8_t* code) (t, classLoaderMap(t, root(t, Machine::AppLoader)), image, code); } - fixed = true; + image->initialized = true; setRoot(t, Machine::BootstrapClassMap, makeHashMap(t, 0, 0)); } diff --git a/src/compiler.cpp b/src/compiler.cpp index 812a38a9e9..213c6e81ae 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -971,6 +971,29 @@ valid(Read* r) return r and r->valid(); } +#ifndef NDEBUG + +bool +hasBuddy(Context* c, Value* a, Value* b) +{ + if (a == b) { + return true; + } + + int i = 0; + for (Value* p = a->buddy; p != a; p = p->buddy) { + if (p == b) { + return true; + } + if (++i > 1000) { + abort(c); + } + } + return false; +} + +#endif // not NDEBUG + Read* live(Context* c UNUSED, Value* v) { diff --git a/src/finder.cpp b/src/finder.cpp index 1a08e65008..ba8aef75f4 100644 --- a/src/finder.cpp +++ b/src/finder.cpp @@ -12,6 +12,7 @@ #include "system.h" #include "tokenizer.h" #include "finder.h" +#include "lzma.h" using namespace vm; @@ -173,11 +174,12 @@ class DirectoryElement: public Element { class PointerRegion: public System::Region { public: PointerRegion(System* s, Allocator* allocator, const uint8_t* start, - size_t length): + size_t length, bool freePointer = false): s(s), allocator(allocator), start_(start), - length_(length) + length_(length), + freePointer(freePointer) { } virtual const uint8_t* start() { @@ -189,6 +191,9 @@ class PointerRegion: public System::Region { } virtual void dispose() { + if (freePointer) { + allocator->free(start_, length_); + } allocator->free(this, sizeof(*this)); } @@ -196,6 +201,7 @@ class PointerRegion: public System::Region { Allocator* allocator; const uint8_t* start_; size_t length_; + bool freePointer; }; class DataRegion: public System::Region { @@ -556,7 +562,10 @@ class BuiltinElement: public JarElement { virtual void init() { if (index == 0) { if (s->success(s->load(&library, libraryName))) { - void* p = library->resolve(name); + bool lzma = strncmp("lzma:", name, 5) == 0; + const char* symbolName = lzma ? name + 5 : name; + + void* p = library->resolve(symbolName); if (p) { uint8_t* (*function)(unsigned*); memcpy(&function, &p, BytesPerWord); @@ -564,10 +573,29 @@ class BuiltinElement: public JarElement { unsigned size; uint8_t* data = function(&size); if (data) { + bool freePointer; + if (lzma) { +#ifdef AVIAN_USE_LZMA + unsigned outSize; + data = decodeLZMA(s, allocator, data, size, &outSize); + size = outSize; + freePointer = true; +#else + abort(s); +#endif + } else { + freePointer = false; + } region = new (allocator->allocate(sizeof(PointerRegion))) - PointerRegion(s, allocator, data, size); + PointerRegion(s, allocator, data, size, freePointer); index = JarIndex::open(s, allocator, region); + } else if (DebugFind) { + fprintf(stderr, "%s in %s returned null\n", symbolName, + libraryName); } + } else if (DebugFind) { + fprintf(stderr, "unable to find %s in %s\n", symbolName, + libraryName); } } } diff --git a/src/lzma-decode.cpp b/src/lzma-decode.cpp new file mode 100644 index 0000000000..7710934ddd --- /dev/null +++ b/src/lzma-decode.cpp @@ -0,0 +1,49 @@ +/* Copyright (c) 2012, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "lzma-util.h" +#include "LzmaDec.h" + +using namespace vm; + +namespace vm { + +uint8_t* +decodeLZMA(System* s, Allocator* a, uint8_t* in, unsigned inSize, + unsigned* outSize) +{ + const unsigned PropHeaderSize = 5; + const unsigned HeaderSize = 13; + + int32_t outSize32; + memcpy(&outSize32, in + PropHeaderSize, 4); + expect(s, outSize32 >= 0); + SizeT outSizeT = outSize32; + + uint8_t* out = static_cast(a->allocate(outSize32)); + + SizeT inSizeT = inSize; + LzmaAllocator allocator(a); + + ELzmaStatus status; + int result = LzmaDecode + (out, &outSizeT, in + HeaderSize, &inSizeT, in, PropHeaderSize, + LZMA_FINISH_END, &status, &(allocator.allocator)); + + expect(s, result == SZ_OK); + expect(s, status == LZMA_STATUS_FINISHED_WITH_MARK); + + *outSize = outSize32; + + return out; +} + +} // namespace vm + diff --git a/src/lzma-encode.cpp b/src/lzma-encode.cpp new file mode 100644 index 0000000000..d1b002476a --- /dev/null +++ b/src/lzma-encode.cpp @@ -0,0 +1,71 @@ +/* Copyright (c) 2012, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "lzma-util.h" +#include "LzmaEnc.h" + +using namespace vm; + +namespace { + +SRes +myProgress(void*, UInt64, UInt64) +{ + return SZ_OK; +} + +} // namespace + +namespace vm { + +uint8_t* +encodeLZMA(System* s, Allocator* a, uint8_t* in, unsigned inSize, + unsigned* outSize) +{ + const unsigned PropHeaderSize = 5; + const unsigned HeaderSize = 13; + + unsigned bufferSize = inSize * 2; + + uint8_t* buffer = static_cast(a->allocate(bufferSize)); + + LzmaAllocator allocator(a); + + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = 9; + props.writeEndMark = 1; + + ICompressProgress progress = { myProgress }; + + SizeT propsSize = PropHeaderSize; + + int32_t inSize32 = inSize; + memcpy(buffer + PropHeaderSize, &inSize32, 4); + + SizeT outSizeT = bufferSize; + int result = LzmaEncode + (buffer + HeaderSize, &outSizeT, in, inSize, &props, buffer, + &propsSize, 1, &progress, &(allocator.allocator), &(allocator.allocator)); + + expect(s, result == SZ_OK); + + *outSize = outSizeT + HeaderSize; + + uint8_t* out = static_cast(a->allocate(*outSize)); + memcpy(out, buffer, *outSize); + + a->free(buffer, bufferSize); + + return out; +} + +} // namespace vm + diff --git a/src/lzma-util.h b/src/lzma-util.h new file mode 100644 index 0000000000..105a9e4077 --- /dev/null +++ b/src/lzma-util.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2012, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#ifndef LZMA_UTIL_H +#define LZMA_UTIL_H + +#include "lzma.h" +#include "Types.h" +#include "system.h" +#include "allocator.h" + +namespace vm { + +const unsigned Padding = 16; + +class LzmaAllocator { + public: + LzmaAllocator(Allocator* a): a(a) { + allocator.Alloc = allocate; + allocator.Free = free; + } + + ISzAlloc allocator; + Allocator* a; + + static void* allocate(void* allocator, size_t size) { + uint8_t* p = static_cast + (static_cast(allocator)->a->allocate(size + Padding)); + int32_t size32 = size; + memcpy(p, &size32, 4); + return p + Padding; + } + + static void free(void* allocator, void* address) { + if (address) { + void* p = static_cast(address) - Padding; + int32_t size32; + memcpy(&size32, p, 4); + static_cast(allocator)->a->free(p, size32 + Padding); + } + } +}; + +} // namespace vm + +#endif // LZMA_UTIL_H diff --git a/src/lzma.h b/src/lzma.h new file mode 100644 index 0000000000..5e6ba35a82 --- /dev/null +++ b/src/lzma.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2012, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#ifndef LZMA_H +#define LZMA_H + +#include "system.h" +#include "allocator.h" + +namespace vm { + +uint8_t* +decodeLZMA(System* s, Allocator* a, uint8_t* in, unsigned inSize, + unsigned* outSize); + +uint8_t* +encodeLZMA(System* s, Allocator* a, uint8_t* in, unsigned inSize, + unsigned* outSize); + +} // namespace vm + +#endif // LZMA_H diff --git a/src/lzma/load.cpp b/src/lzma/load.cpp new file mode 100644 index 0000000000..f4fc57965c --- /dev/null +++ b/src/lzma/load.cpp @@ -0,0 +1,115 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "LzmaDec.h" + +#ifdef __MINGW32__ +# define EXPORT __declspec(dllexport) +#else +# define EXPORT __attribute__ ((visibility("default"))) +#endif + +#if defined __MINGW32__ && ! defined __x86_64__ +# define SYMBOL(x) binary_exe_##x +#else +# define SYMBOL(x) _binary_exe_##x +#endif + +extern "C" { + + extern const uint8_t SYMBOL(start)[]; + extern const uint8_t SYMBOL(end)[]; + +} // extern "C" + +namespace { + +void* +myAllocate(void*, size_t size) +{ + return malloc(size); +} + +void +myFree(void*, void* address) +{ + free(address); +} + +} // namespace + +int +main(int ac, const char** av) +{ + const unsigned PropHeaderSize = 5; + const unsigned HeaderSize = 13; + + SizeT inSize = SYMBOL(end) - SYMBOL(start); + + int32_t outSize32; + memcpy(&outSize32, SYMBOL(start) + PropHeaderSize, 4); + SizeT outSize = outSize32; + + uint8_t* out = static_cast(malloc(outSize)); + if (out) { + ISzAlloc allocator = { myAllocate, myFree }; + ELzmaStatus status = LZMA_STATUS_NOT_SPECIFIED; + + if (SZ_OK == LzmaDecode + (out, &outSize, SYMBOL(start) + HeaderSize, &inSize, SYMBOL(start), + PropHeaderSize, LZMA_FINISH_END, &status, &allocator)) + { + char name[L_tmpnam]; + if (tmpnam(name)) { + int file = open(name, O_CREAT | O_EXCL | O_WRONLY, S_IRWXU); + if (file != -1) { + SizeT result = write(file, out, outSize); + free(out); + + if (close(file) == 0 and outSize == result) { + void* library = dlopen(name, RTLD_LAZY | RTLD_GLOBAL); + unlink(name); + + if (library) { + void* main = dlsym(library, "main"); + if (main) { + int (*mainFunction)(int, const char**); + memcpy(&mainFunction, &main, sizeof(void*)); + return mainFunction(ac, av); + } else { + fprintf(stderr, "unable to find main in %s", name); + } + } else { + fprintf(stderr, "unable to dlopen %s: %s\n", name, dlerror()); + } + } else { + unlink(name); + + fprintf(stderr, "close or write failed; tried %d, got %d; %s\n", + static_cast(outSize), static_cast(result), + strerror(errno)); + } + } else { + fprintf(stderr, "unable to open %s\n", name); + } + } else { + fprintf(stderr, "unable to make temporary file name\n"); + } + } else { + fprintf(stderr, "unable to decode LZMA data\n"); + } + } else { + fprintf(stderr, "unable to allocate buffer of size %d\n", + static_cast(outSize)); + } + + return -1; +} diff --git a/src/lzma/main.cpp b/src/lzma/main.cpp new file mode 100644 index 0000000000..a9a558d897 --- /dev/null +++ b/src/lzma/main.cpp @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#ifdef WIN32 +#include +#else +#include +#include +#endif +#include + +#include "LzmaEnc.h" +#include "LzmaDec.h" + +namespace { + +void* +myAllocate(void*, size_t size) +{ + return malloc(size); +} + +void +myFree(void*, void* address) +{ + free(address); +} + +SRes +myProgress(void*, UInt64, UInt64) +{ + return SZ_OK; +} + +void +usageAndExit(const char* program) +{ + fprintf(stderr, + "usage: %s {encode|decode} " + "[]", program); + exit(-1); +} + +} // namespace + +int +main(int argc, const char** argv) +{ + if (argc < 4 or argc > 5) { + usageAndExit(argv[0]); + } + + bool encode = strcmp(argv[1], "encode") == 0; + + uint8_t* data = 0; + unsigned size; + int fd = open(argv[2], O_RDONLY); + if (fd != -1) { + struct stat s; + int r = fstat(fd, &s); + if (r != -1) { +#ifdef WIN32 + HANDLE fm; + HANDLE h = (HANDLE) _get_osfhandle (fd); + + fm = CreateFileMapping( + h, + NULL, + PAGE_READONLY, + 0, + 0, + NULL); + data = static_cast(MapViewOfFile( + fm, + FILE_MAP_READ, + 0, + 0, + s.st_size)); + + CloseHandle(fm); +#else + data = static_cast + (mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); +#endif + size = s.st_size; + } + close(fd); + } + + bool success = false; + + if (data) { + const unsigned PropHeaderSize = 5; + const unsigned HeaderSize = 13; + + SizeT outSize; + if (encode) { + outSize = size * 2; + } else { + int32_t outSize32; + memcpy(&outSize32, data + PropHeaderSize, 4); + if (outSize32 >= 0) { + outSize = outSize32; + } else if (argc == 5) { + outSize = atoi(argv[4]); + } else { + outSize = -1; + } + } + + if (outSize >= 0) { + uint8_t* out = static_cast(malloc(outSize)); + if (out) { + SizeT inSize = size; + ISzAlloc allocator = { myAllocate, myFree }; + ELzmaStatus status = LZMA_STATUS_NOT_SPECIFIED; + int result; + if (encode) { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = 9; + props.writeEndMark = 1; + + ICompressProgress progress = { myProgress }; + + SizeT propsSize = PropHeaderSize; + + int32_t inSize32 = inSize; + memcpy(out + PropHeaderSize, &inSize32, 4); + + result = LzmaEncode + (out + HeaderSize, &outSize, data, inSize, &props, out, + &propsSize, 1, &progress, &allocator, &allocator); + + outSize += HeaderSize; + } else { + result = LzmaDecode + (out, &outSize, data + HeaderSize, &inSize, data, PropHeaderSize, + LZMA_FINISH_END, &status, &allocator); + } + + if (result == SZ_OK) { + FILE* outFile = fopen(argv[3], "wb"); + + if (outFile) { + if (fwrite(out, outSize, 1, outFile) == 1) { + success = true; + } else { + fprintf(stderr, "unable to write to %s\n", argv[3]); + } + + fclose(outFile); + } else { + fprintf(stderr, "unable to open %s\n", argv[3]); + } + } else { + fprintf(stderr, "unable to %s data: result %d status %d\n", + encode ? "encode" : "decode", result, status); + } + + free(out); + } else { + fprintf(stderr, "unable to allocate output buffer\n"); + } + } else { + fprintf(stderr, "unable to determine uncompressed size\n"); + } + +#ifdef WIN32 + UnmapViewOfFile(data); +#else + munmap(data, size); +#endif + } else { + perror(argv[0]); + } + + return (success ? 0 : -1); +} diff --git a/src/machine.cpp b/src/machine.cpp index 53b6a0f66d..6e242dd8b8 100644 --- a/src/machine.cpp +++ b/src/machine.cpp @@ -15,6 +15,7 @@ #include "constants.h" #include "processor.h" #include "arch.h" +#include "lzma.h" using namespace vm; @@ -48,6 +49,40 @@ join(Thread* t, Thread* o) } } +#ifndef NDEBUG + +bool +find(Thread* t, Thread* o) +{ + return (t == o) + or (t->peer and find(t->peer, o)) + or (t->child and find(t->child, o)); +} + +unsigned +count(Thread* t, Thread* o) +{ + unsigned c = 0; + + if (t != o) ++ c; + if (t->peer) c += count(t->peer, o); + if (t->child) c += count(t->child, o); + + return c; +} + +Thread** +fill(Thread* t, Thread* o, Thread** array) +{ + if (t != o) *(array++) = t; + if (t->peer) array = fill(t->peer, o, array); + if (t->child) array = fill(t->child, o, array); + + return array; +} + +#endif // not NDEBUG + void dispose(Thread* t, Thread* o, bool remove) { @@ -203,8 +238,8 @@ turnOffTheLights(Thread* t) Finder* af = m->appFinder; c->dispose(); - m->dispose(); h->disposeFixies(); + m->dispose(); p->dispose(); bf->dispose(); af->dispose(); @@ -2945,6 +2980,10 @@ Machine::dispose() heap->free(heapPool[i], ThreadHeapSizeInBytes); } + if (bootimage) { + heap->free(bootimage, bootimageSize); + } + heap->free(arguments, sizeof(const char*) * argumentCount); heap->free(properties, sizeof(const char*) * propertyCount); @@ -3000,13 +3039,28 @@ Thread::init() uint8_t* code = 0; const char* imageFunctionName = findProperty(m, "avian.bootimage"); if (imageFunctionName) { - void* imagep = m->libraries->resolve(imageFunctionName); + bool lzma = strncmp("lzma:", imageFunctionName, 5) == 0; + const char* symbolName + = lzma ? imageFunctionName + 5 : imageFunctionName; + + void* imagep = m->libraries->resolve(symbolName); if (imagep) { - BootImage* (*imageFunction)(unsigned*); + uint8_t* (*imageFunction)(unsigned*); memcpy(&imageFunction, &imagep, BytesPerWord); unsigned size; - image = imageFunction(&size); + uint8_t* imageBytes = imageFunction(&size); + if (lzma) { +#ifdef AVIAN_USE_LZMA + m->bootimage = image = reinterpret_cast + (decodeLZMA + (m->system, m->heap, imageBytes, size, &(m->bootimageSize))); +#else + abort(this); +#endif + } else { + image = reinterpret_cast(imageBytes); + } const char* codeFunctionName = findProperty(m, "avian.codeimage"); if (codeFunctionName) { diff --git a/src/machine.h b/src/machine.h index 05bc01b1e6..bb3b97a43c 100644 --- a/src/machine.h +++ b/src/machine.h @@ -1316,6 +1316,7 @@ class Machine { System::Monitor* shutdownLock; System::Library* libraries; FILE* errorLog; + BootImage* bootimage; object types; object roots; object finalizers; @@ -1332,6 +1333,7 @@ class Machine { JNIEnvVTable jniEnvVTable; uintptr_t* heapPool[ThreadHeapPoolSize]; unsigned heapPoolIndex; + unsigned bootimageSize; }; void