From ef4a8430ad853785003bf6b1679323a7a4c728ba Mon Sep 17 00:00:00 2001 From: Joel Dice Date: Tue, 13 Oct 2009 19:00:16 -0600 Subject: [PATCH] add binaryToObject utility We use this utility instead of objcopy to embed data into object files because it offers more control over e.g. section alignment, which is important for bootimage builds. --- makefile | 90 ++--- readme.txt | 44 +-- src/binaryToMacho.cpp | 189 ---------- .../elf.cpp} | 245 ++++++++++--- src/binaryToObject/mach-o.cpp | 331 ++++++++++++++++++ src/binaryToObject/main.cpp | 182 ++++++++++ src/binaryToObject/pe.cpp | 229 ++++++++++++ 7 files changed, 978 insertions(+), 332 deletions(-) delete mode 100644 src/binaryToMacho.cpp rename src/{binaryToElf.cpp => binaryToObject/elf.cpp} (59%) create mode 100644 src/binaryToObject/mach-o.cpp create mode 100644 src/binaryToObject/main.cpp create mode 100644 src/binaryToObject/pe.cpp diff --git a/makefile b/makefile index 58847aa29c..5523fc80e5 100644 --- a/makefile +++ b/makefile @@ -82,7 +82,6 @@ cc = $(build-cc) ar = ar ranlib = ranlib dlltool = dlltool -objcopy = objcopy vg = nice valgrind --num-callers=32 --db-attach=yes --freelist-vol=100000000 vg += --leak-check=full --suppressions=valgrind.supp db = gdb --args @@ -118,8 +117,6 @@ lflags = $(common-lflags) -lpthread -ldl system = posix asm = x86 -object-arch = i386:x86-64 -object-format = elf64-x86-64 pointer-size = 8 so-prefix = lib @@ -129,17 +126,11 @@ shared = -shared native-path = echo -binaryToElf = $(native-build)/binaryToElf - ifeq ($(arch),i386) - object-arch = i386 - object-format = elf32-i386 pointer-size = 4 endif ifeq ($(arch),powerpc) asm = powerpc - object-arch = powerpc - object-format = elf32-powerpc pointer-size = 4 endif @@ -151,20 +142,15 @@ ifeq ($(platform),darwin) endif rdynamic = strip-all = -S -x - binaryToMacho = $(native-build)/binaryToMacho - binaryToElf = so-suffix = .jnilib shared = -dynamiclib endif ifeq ($(platform),windows) - binaryToElf = - inc = "$(root)/win32/include" lib = "$(root)/win32/lib" system = windows - object-format = pe-i386 so-prefix = so-suffix = .dll @@ -179,7 +165,6 @@ ifeq ($(platform),windows) dlltool = i586-mingw32msvc-dlltool ar = i586-mingw32msvc-ar ranlib = i586-mingw32msvc-ranlib - objcopy = i586-mingw32msvc-objcopy strip = i586-mingw32msvc-strip else common-cflags += "-I$(JAVA_HOME)/include/win32" @@ -199,12 +184,9 @@ ifeq ($(platform),windows) dlltool = x86_64-pc-mingw32-dlltool ar = x86_64-pc-mingw32-ar ranlib = x86_64-pc-mingw32-ranlib - objcopy = x86_64-pc-mingw32-objcopy strip = x86_64-pc-mingw32-strip inc = "$(root)/win64/include" lib = "$(root)/win64/lib" - pointer-size = 8 - object-format = pe-x86-64 endif endif @@ -272,6 +254,7 @@ ifdef msvc strip = : endif +cpp-program = $(patsubst $(2)/%.cpp,$(3)/%,$(1)) cpp-objects = $(foreach x,$(1),$(patsubst $(2)/%.cpp,$(3)/%.o,$(x))) asm-objects = $(foreach x,$(1),$(patsubst $(2)/%.S,$(3)/%-asm.o,$(x))) java-classes = $(foreach x,$(1),$(patsubst $(2)/%.java,$(3)/%.class,$(x))) @@ -390,6 +373,15 @@ generator-objects = \ $(call cpp-objects,$(generator-sources),$(src),$(native-build)) generator = $(native-build)/generator +converter-objects = \ + $(native-build)/binaryToObject-main.o \ + $(native-build)/binaryToObject-elf64.o \ + $(native-build)/binaryToObject-elf32.o \ + $(native-build)/binaryToObject-mach-o64.o \ + $(native-build)/binaryToObject-mach-o32.o \ + $(native-build)/binaryToObject-pe.o +converter = $(native-build)/binaryToObject + static-library = $(native-build)/lib$(name).a executable = $(native-build)/$(name)${exe-suffix} dynamic-library = $(native-build)/$(so-prefix)$(name)$(so-suffix) @@ -597,20 +589,31 @@ $(build)/classpath.jar: $(classpath-dep) cd $(classpath-build) && \ $(jar) c0f "$$($(native-path) "$${wd}/$(@)")" .) -$(binaryToMacho): $(src)/binaryToMacho.cpp - $(cxx) $(^) $(call output,$(@)) +$(native-build)/binaryToObject-main.o: $(src)/binaryToObject/main.cpp + $(build-cxx) -c $(^) -o $(@) -$(classpath-object): $(build)/classpath.jar $(binaryToMacho) +$(native-build)/binaryToObject-elf64.o: $(src)/binaryToObject/elf.cpp + $(build-cxx) -DBITS_PER_WORD=64 -c $(^) -o $(@) + +$(native-build)/binaryToObject-elf32.o: $(src)/binaryToObject/elf.cpp + $(build-cxx) -DBITS_PER_WORD=32 -c $(^) -o $(@) + +$(native-build)/binaryToObject-mach-o64.o: $(src)/binaryToObject/mach-o.cpp + $(build-cxx) -DBITS_PER_WORD=64 -c $(^) -o $(@) + +$(native-build)/binaryToObject-mach-o32.o: $(src)/binaryToObject/mach-o.cpp + $(build-cxx) -DBITS_PER_WORD=32 -c $(^) -o $(@) + +$(native-build)/binaryToObject-pe.o: $(src)/binaryToObject/pe.cpp + $(build-cxx) -c $(^) -o $(@) + +$(converter): $(converter-objects) + $(build-cxx) $(^) -o $(@) + +$(classpath-object): $(build)/classpath.jar $(converter) @echo "creating $(@)" -ifeq ($(platform),darwin) - $(binaryToMacho) $(asm) $(build)/classpath.jar __TEXT __text \ - __binary_classpath_jar_start __binary_classpath_jar_end > $(@) -else - (wd=$$(pwd) && \ - cd $(build) && \ - $(objcopy) -I binary classpath.jar \ - -O $(object-format) -B $(object-arch) "$${wd}/$(@)") -endif + $(converter) $(<) $(@) _binary_classpath_jar_start \ + _binary_classpath_jar_end $(platform) $(arch) $(generator-objects): $(native-build)/%.o: $(src)/%.cpp @echo "compiling $(@)" @@ -631,30 +634,11 @@ $(static-library): $(vm-objects) $(jni-objects) $(vm-heapwalk-objects) $(bootimage-bin): $(bootimage-generator) $(<) $(classpath-build) $(@) -$(binaryToElf): $(src)/binaryToElf.cpp - $(cxx) $(^) $(call output,$(@)) - -# we would always use objcopy here except (1) it's not supported on -# Darwin, and (2) it won't let us specify per-section alignment -# requirements -$(bootimage-object): $(bootimage-bin) $(binaryToMacho) $(binaryToElf) +$(bootimage-object): $(bootimage-bin) $(converter) @echo "creating $(@)" -ifeq ($(platform),darwin) - $(binaryToMacho) $(asm) $(<) __BOOT __boot \ - __binary_bootimage_bin_start __binary_bootimage_bin_end > $(@) -else -ifeq ($(platform),linux) - $(binaryToElf) $(<) .boot \ - _binary_bootimage_bin_start _binary_bootimage_bin_end > $(@) -else - (wd=$$(pwd) && \ - cd $(native-build) && \ - $(objcopy) --rename-section=.data=.boot -I binary bootimage.bin \ - -O $(object-format) -B $(object-arch) "$${wd}/$(@).tmp" && \ - $(objcopy) --set-section-flags .boot=alloc,load,code "$${wd}/$(@).tmp" \ - "$${wd}/$(@)") -endif -endif + $(converter) $(<) $(@) _binary_bootimage_bin_start \ + _binary_bootimage_bin_end $(platform) $(arch) $(pointer-size) \ + writable executable $(gnu-object-dep): $(gnu-libraries) @mkdir -p $(build)/gnu-objects diff --git a/readme.txt b/readme.txt index e2ecba8fc5..65e86cbd5a 100644 --- a/readme.txt +++ b/readme.txt @@ -64,7 +64,6 @@ Build requirements include: * GNU make 3.80 or later * GCC 3.4 or later * JDK 1.5 or later - * GNU binutils 2.17 or later (not needed on OS X) * MinGW 3.4 or later (only if compiling for Windows) * zlib 1.2.3 or later @@ -196,29 +195,8 @@ EOF Step 3: Make an object file out of the jar. -for linux-i386: - - $ objcopy -I binary boot.jar -O elf32-i386 -B i386 boot-jar.o - -for linux-x86_64: - - $ objcopy -I binary boot.jar -O elf64-x86-64 -B i386:x86-64 boot-jar.o - -for windows-i386: - - $ objcopy -I binary boot.jar -O pe-i386 -B i386 boot-jar.o - -for darwin-i386: (objcopy is not currently supported on this platform, -so we use the binaryToMacho utility instead) - - $ ../build/darwin-i386/binaryToMacho x86 boot.jar \ - __TEXT __text __binary_boot_jar_start __binary_boot_jar_end > boot-jar.o - -for darwin-powerpc: - - $ ../build/darwin-i386/binaryToMacho powerpc boot.jar \ - __TEXT __text __binary_boot_jar_start __binary_boot_jar_end > boot-jar.o - + $ ../build/${platform}-${arch}/binaryToObject boot.jar boot-jar.o \ + _binary_boot_jar_start _binary_boot_jar_end ${platform} ${arch} Step 4: Write a driver which starts the VM and runs the desired main method. Note the bootJar function, which will be called by the VM to @@ -412,21 +390,11 @@ Step 6: Build the boot image. Step 7: Make an object file out of the boot image. -for linux-i386: - $ objcopy --rename-section=.data=.boot -I binary bootimage.bin \ - -O elf32-i386 -B i386 bootimage.tmp - $ objcopy --set-section-flags .boot=alloc,load,code bootimage.tmp \ - bootimage.o - -for darwin-i386: - - $ ../build/darwin-i386/binaryToMacho x86 bootimage.bin \ - __BOOT __boot __binary_bootimage_bin_start __binary_bootimage_bin_end \ - > bootimage.o - -for other platforms: See the previous example for -architecture-specific parameters. + $ ../build/${platform}-${arch}/binaryToObject \ + bootimage.bin bootimage-bin.o \ + _binary_bootimage_bin_start _binary_bootimage_bin_end \ + ${platform} ${arch} 8 writable executable Step 8: Write a driver which starts the VM and runs the desired main method. Note the bootimageBin function, which will be called by the diff --git a/src/binaryToMacho.cpp b/src/binaryToMacho.cpp deleted file mode 100644 index a7d1cb279f..0000000000 --- a/src/binaryToMacho.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2008-2009, Avian Contributors - - Permission to use, copy, modify, and/or distribute this software - for any purpose with or without fee is hereby granted, provided - that the above copyright notice and this permission notice appear - in all copies. - - There is NO WARRANTY for this software. See license.txt for - details. */ - -#include "stdint.h" -#include "stdio.h" -#include "string.h" - -#include "sys/stat.h" -#include "sys/mman.h" -#include "fcntl.h" -#include "unistd.h" - -#include "mach-o/loader.h" -#include "mach-o/nlist.h" - -namespace { - -inline unsigned -pad(unsigned n) -{ - return (n + (4 - 1)) & ~(4 - 1); -} - -void -writeObject(const char* architecture, - FILE* out, const uint8_t* data, unsigned size, - const char* segmentName, const char* sectionName, - const char* startName, const char* endName) -{ - unsigned startNameLength = strlen(startName) + 1; - unsigned endNameLength = strlen(endName) + 1; - - cpu_type_t cpuType; - cpu_subtype_t cpuSubtype; - if (strcmp(architecture, "x86") == 0) { - cpuType = CPU_TYPE_I386; - cpuSubtype = CPU_SUBTYPE_I386_ALL; - } else if (strcmp(architecture, "powerpc") == 0) { - cpuType = CPU_TYPE_POWERPC; - cpuSubtype = CPU_SUBTYPE_POWERPC_ALL; - } - - mach_header header = { - MH_MAGIC, // magic - cpuType, - cpuSubtype, - MH_OBJECT, // filetype, - 2, // ncmds - sizeof(segment_command) - + sizeof(section) - + sizeof(symtab_command), // sizeofcmds - 0 // flags - }; - - segment_command segment = { - LC_SEGMENT, // cmd - sizeof(segment_command) + sizeof(section), // cmdsize - "", // segname - 0, // vmaddr - pad(size), // vmsize - sizeof(mach_header) - + sizeof(segment_command) - + sizeof(section) - + sizeof(symtab_command), // fileoff - pad(size), // filesize - 7, // maxprot - 7, // initprot - 1, // nsects - 0 // flags - }; - - strncpy(segment.segname, segmentName, sizeof(segment.segname)); - - section sect = { - "", // sectname - "", // segname - 0, // addr - pad(size), // size - sizeof(mach_header) - + sizeof(segment_command) - + sizeof(section) - + sizeof(symtab_command), // offset - 0, // align - 0, // reloff - 0, // nreloc - S_REGULAR, // flags - 0, // reserved1 - 0, // reserved2 - }; - - strncpy(sect.segname, segmentName, sizeof(sect.segname)); - strncpy(sect.sectname, sectionName, sizeof(sect.sectname)); - - symtab_command symbolTable = { - LC_SYMTAB, // cmd - sizeof(symtab_command), // cmdsize - sizeof(mach_header) - + sizeof(segment_command) - + sizeof(section) - + sizeof(symtab_command) - + pad(size), // symoff - 2, // nsyms - sizeof(mach_header) - + sizeof(segment_command) - + sizeof(section) - + sizeof(symtab_command) - + pad(size) - + (sizeof(struct nlist) * 2), // stroff - 1 + startNameLength + endNameLength, // strsize - }; - - struct nlist symbolList[] = { - { - reinterpret_cast(1), // n_un - N_SECT | N_EXT, // n_type - 1, // n_sect - 0, // n_desc - 0 // n_value - }, - { - reinterpret_cast(1 + startNameLength), // n_un - N_SECT | N_EXT, // n_type - 1, // n_sect - 0, // n_desc - size // n_value - } - }; - - fwrite(&header, 1, sizeof(header), out); - fwrite(&segment, 1, sizeof(segment), out); - fwrite(§, 1, sizeof(sect), out); - fwrite(&symbolTable, 1, sizeof(symbolTable), out); - - fwrite(data, 1, size, out); - for (unsigned i = 0; i < pad(size) - size; ++i) fputc(0, out); - - fwrite(&symbolList, 1, sizeof(symbolList), out); - - fputc(0, out); - fwrite(startName, 1, startNameLength, out); - fwrite(endName, 1, endNameLength, out); -} - -} // namespace - -int -main(int argc, const char** argv) -{ - if (argc != 7) { - fprintf(stderr, - "usage: %s " - "
\n", - argv[0]); - return -1; - } - - uint8_t* data = 0; - unsigned size; - int fd = open(argv[2], O_RDONLY); - if (fd != -1) { - struct stat s; - int r = fstat(fd, &s); - if (r != -1) { - data = static_cast - (mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); - size = s.st_size; - } - close(fd); - } - - if (data) { - writeObject - (argv[1], stdout, data, size, argv[3], argv[4], argv[5], argv[6]); - - munmap(data, size); - - return 0; - } else { - perror(argv[0]); - return -1; - } -} diff --git a/src/binaryToElf.cpp b/src/binaryToObject/elf.cpp similarity index 59% rename from src/binaryToElf.cpp rename to src/binaryToObject/elf.cpp index aeb1459689..81d90616eb 100644 --- a/src/binaryToElf.cpp +++ b/src/binaryToObject/elf.cpp @@ -12,38 +12,172 @@ #include "stdio.h" #include "string.h" -#include "sys/stat.h" -#include "sys/mman.h" -#include "fcntl.h" -#include "unistd.h" +#define EI_NIDENT 16 -#include "elf.h" +#define EI_MAG0 0 +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_ABIVERSION 8 -#ifdef __x86_64__ +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' + +#define ELFCLASS64 2 +#define ELFCLASS32 1 + +#define EV_CURRENT 1 + +#define ELFDATA2LSB 1 + +#define ELFOSABI_SYSV 0 + +#define ET_REL 1 + +#define EM_386 3 +#define EM_X86_64 62 + +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 + +#define SHF_WRITE (1 << 0) +#define SHF_ALLOC (1 << 1) +#define SHF_EXECINSTR (1 << 2) + +#define STB_GLOBAL 1 + +#define STT_NOTYPE 0 + +#define STV_DEFAULT 0 + +#define ELF64_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf)) +#define ELF32_ST_INFO(bind, type) ELF64_ST_INFO((bind), (type)) + +#if (BITS_PER_WORD == 64) # define FileHeader Elf64_Ehdr # define SectionHeader Elf64_Shdr # define Symbol Elf64_Sym # define Class ELFCLASS64 -# define Machine EM_X86_64 # define SYMBOL_INFO ELF64_ST_INFO -#else // not __x86_64__ +#elif (BITS_PER_WORD == 32) # define FileHeader Elf32_Ehdr # define SectionHeader Elf32_Shdr # define Symbol Elf32_Sym # define Class ELFCLASS32 -# define Machine EM_386 # define SYMBOL_INFO ELF32_ST_INFO -#endif // not __x86_64__ +#else +# error +#endif #define Data ELFDATA2LSB #define OSABI ELFOSABI_SYSV namespace { +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Xword; +typedef uint16_t Elf64_Section; +typedef uint64_t Elf64_Off; + +struct Elf64_Ehdr { + unsigned char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +}; + +struct Elf64_Shdr { + Elf64_Word sh_name; + Elf64_Word sh_type; + Elf64_Xword sh_flags; + Elf64_Addr sh_addr; + Elf64_Off sh_offset; + Elf64_Xword sh_size; + Elf64_Word sh_link; + Elf64_Word sh_info; + Elf64_Xword sh_addralign; + Elf64_Xword sh_entsize; +}; + +struct Elf64_Sym { + Elf64_Word st_name; + unsigned char st_info; + unsigned char st_other; + Elf64_Section st_shndx; + Elf64_Addr st_value; + Elf64_Xword st_size; +}; + +typedef uint16_t Elf32_Half; +typedef uint32_t Elf32_Word; +typedef uint32_t Elf32_Addr; +typedef uint64_t Elf32_Xword; +typedef uint16_t Elf32_Section; +typedef uint32_t Elf32_Off; + +struct Elf32_Ehdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +}; + +struct Elf32_Shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +}; + +struct Elf32_Sym { + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Section st_shndx; +}; + void -writeObject(FILE* out, const uint8_t* data, unsigned size, - const char* sectionName, const char* startName, - const char* endName) +writeObject(const uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* sectionName, unsigned sectionFlags, + unsigned alignment, int machine) { const unsigned sectionCount = 5; const unsigned symbolCount = 2; @@ -92,7 +226,7 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, fileHeader.e_ident[EI_OSABI] = OSABI; fileHeader.e_ident[EI_ABIVERSION] = 0; fileHeader.e_type = ET_REL; - fileHeader.e_machine = Machine; + fileHeader.e_machine = machine; fileHeader.e_version = EV_CURRENT; fileHeader.e_entry = 0; fileHeader.e_phoff = 0; @@ -111,14 +245,14 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, SectionHeader bodySection; bodySection.sh_name = sectionNameOffset; bodySection.sh_type = SHT_PROGBITS; - bodySection.sh_flags = SHF_WRITE | SHF_ALLOC | SHF_EXECINSTR; + bodySection.sh_flags = sectionFlags; bodySection.sh_addr = 0; bodySection.sh_offset = sizeof(FileHeader) + (sizeof(SectionHeader) * sectionCount); bodySection.sh_size = size; bodySection.sh_link = 0; bodySection.sh_info = 0; - bodySection.sh_addralign = sizeof(void*); + bodySection.sh_addralign = alignment; bodySection.sh_entsize = 0; SectionHeader sectionStringTableSection; @@ -157,7 +291,7 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, symbolTableSection.sh_size = sizeof(Symbol) * symbolCount; symbolTableSection.sh_link = stringTableSectionNumber; symbolTableSection.sh_info = 0; - symbolTableSection.sh_addralign = sizeof(void*); + symbolTableSection.sh_addralign = BITS_PER_WORD / 8; symbolTableSection.sh_entsize = sizeof(Symbol); Symbol startSymbol; @@ -170,7 +304,7 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, Symbol endSymbol; endSymbol.st_name = endNameOffset; - endSymbol.st_value = 0; + endSymbol.st_value = size; endSymbol.st_size = 0; endSymbol.st_info = SYMBOL_INFO(STB_GLOBAL, STT_NOTYPE); endSymbol.st_other = STV_DEFAULT; @@ -183,9 +317,9 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, out); fwrite(&stringTableSection, 1, sizeof(stringTableSection), out); fwrite(&symbolTableSection, 1, sizeof(symbolTableSection), out); - + fwrite(data, 1, size, out); - + fputc(0, out); fwrite(sectionStringTableName, 1, sectionStringTableNameLength, out); fwrite(stringTableName, 1, stringTableNameLength, out); @@ -202,39 +336,46 @@ writeObject(FILE* out, const uint8_t* data, unsigned size, } // namespace -int -main(int argc, const char** argv) +#define MACRO_MAKE_NAME(a, b, c) a##b##c +#define MAKE_NAME(a, b, c) MACRO_MAKE_NAME(a, b, c) + +namespace binaryToObject { + +bool +MAKE_NAME(writeElf, BITS_PER_WORD, Object) + (uint8_t* data, unsigned size, FILE* out, const char* startName, + const char* endName, const char* architecture, unsigned alignment, + bool writable, bool executable) { - if (argc != 5) { - fprintf(stderr, - "usage: %s
" - "\n", - argv[0]); - return -1; - } - - uint8_t* data = 0; - unsigned size; - int fd = open(argv[1], O_RDONLY); - if (fd != -1) { - struct stat s; - int r = fstat(fd, &s); - if (r != -1) { - data = static_cast - (mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); - size = s.st_size; - } - close(fd); - } - - if (data) { - writeObject(stdout, data, size, argv[2], argv[3], argv[4]); - - munmap(data, size); - - return 0; + int machine; + if (strcmp(architecture, "x86_64") == 0) { + machine = EM_X86_64; + } else if (strcmp(architecture, "i386") == 0) { + machine = EM_386; } else { - perror(argv[0]); - return -1; + fprintf(stderr, "unsupported architecture: %s\n", architecture); + return false; } + + const char* sectionName; + unsigned sectionFlags = SHF_ALLOC; + if (writable and executable) { + sectionName = ".rwx"; + sectionFlags |= SHF_WRITE | SHF_EXECINSTR; + } else if (writable) { + sectionName = ".data"; + sectionFlags |= SHF_WRITE; + } else if (executable) { + sectionName = ".text"; + sectionFlags |= SHF_EXECINSTR; + } else { + sectionName = ".rodata"; + } + + writeObject(data, size, out, startName, endName, sectionName, sectionFlags, + alignment, machine); + + return true; } + +} // namespace binaryToObject diff --git a/src/binaryToObject/mach-o.cpp b/src/binaryToObject/mach-o.cpp new file mode 100644 index 0000000000..3921cb2f9d --- /dev/null +++ b/src/binaryToObject/mach-o.cpp @@ -0,0 +1,331 @@ +/* Copyright (c) 2008-2009, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "stdint.h" +#include "stdio.h" +#include "string.h" + +#define MH_MAGIC 0xfeedface + +#define MH_OBJECT 1 + +#define LC_SEGMENT 1 +#define LC_SYMTAB 2 + +#define S_REGULAR 0 + +#define N_SECT 0xe +#define N_EXT 0x1 + +#define CPU_ARCH_ABI64 0x01000000 + +#define CPU_TYPE_I386 7 +#define CPU_TYPE_X86_64 (CPU_TYPE_I386 | CPU_ARCH_ABI64) +#define CPU_TYPE_POWERPC 18 + +#define CPU_SUBTYPE_I386_ALL 3 +#define CPU_SUBTYPE_X86_64_ALL CPU_SUBTYPE_I386_ALL +#define CPU_SUBTYPE_POWERPC_ALL 0 + +#if (BITS_PER_WORD == 64) +# define FileHeader mach_header_64 +# define SegmentCommand segment_command_64 +# define Section section_64 +#elif (BITS_PER_WORD == 32) +# define FileHeader mach_header +# define SegmentCommand segment_command +# define Section section +#else +# error +#endif + +namespace { + +typedef int cpu_type_t; +typedef int cpu_subtype_t; +typedef int vm_prot_t; + +struct mach_header_64 { + uint32_t magic; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; + uint32_t reserved; +}; + +struct segment_command_64 { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint64_t vmaddr; + uint64_t vmsize; + uint64_t fileoff; + uint64_t filesize; + vm_prot_t maxprot; + vm_prot_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct section_64 { + char sectname[16]; + char segname[16]; + uint64_t addr; + uint64_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; +}; + +struct mach_header { + uint32_t magic; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; +}; + +struct segment_command { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint32_t vmaddr; + uint32_t vmsize; + uint32_t fileoff; + uint32_t filesize; + vm_prot_t maxprot; + vm_prot_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct section { + char sectname[16]; + char segname[16]; + uint32_t addr; + uint32_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; +}; + +struct symtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t symoff; + uint32_t nsyms; + uint32_t stroff; + uint32_t strsize; +}; + +struct nlist { + union { + int32_t n_strx; + } n_un; + uint8_t n_type; + uint8_t n_sect; + int16_t n_desc; + uint32_t n_value; +}; + +inline unsigned +pad(unsigned n) +{ + return (n + ((BITS_PER_WORD / 8) - 1)) & ~((BITS_PER_WORD / 8) - 1); +} + +inline unsigned +log(unsigned n) +{ + unsigned r = 0; + for (unsigned i = 1; i < n; ++r) i <<= 1; + return r; +} + +void +writeObject(const uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* segmentName, const char* sectionName, + unsigned alignment, cpu_type_t cpuType, cpu_subtype_t cpuSubType) +{ + unsigned startNameLength = strlen(startName) + 1; + unsigned endNameLength = strlen(endName) + 1; + + FileHeader header = { + MH_MAGIC, // magic + cpuType, + cpuSubType, + MH_OBJECT, // filetype, + 2, // ncmds + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(symtab_command), // sizeofcmds + 0 // flags + }; + + SegmentCommand segment = { + LC_SEGMENT, // cmd + sizeof(SegmentCommand) + sizeof(Section), // cmdsize + "", // segname + 0, // vmaddr + pad(size), // vmsize + sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(symtab_command), // fileoff + pad(size), // filesize + 7, // maxprot + 7, // initprot + 1, // nsects + 0 // flags + }; + + strncpy(segment.segname, segmentName, sizeof(segment.segname)); + + Section sect = { + "", // sectname + "", // segname + 0, // addr + pad(size), // size + sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(symtab_command), // offset + log(alignment), // align + 0, // reloff + 0, // nreloc + S_REGULAR, // flags + 0, // reserved1 + 0, // reserved2 + }; + + strncpy(sect.segname, segmentName, sizeof(sect.segname)); + strncpy(sect.sectname, sectionName, sizeof(sect.sectname)); + + symtab_command symbolTable = { + LC_SYMTAB, // cmd + sizeof(symtab_command), // cmdsize + sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(symtab_command) + + pad(size), // symoff + 2, // nsyms + sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(symtab_command) + + pad(size) + + (sizeof(struct nlist) * 2), // stroff + 1 + startNameLength + endNameLength, // strsize + }; + + struct nlist symbolList[] = { + { + 1, // n_un + N_SECT | N_EXT, // n_type + 1, // n_sect + 0, // n_desc + 0 // n_value + }, + { + 1 + startNameLength, // n_un + N_SECT | N_EXT, // n_type + 1, // n_sect + 0, // n_desc + size // n_value + } + }; + + fwrite(&header, 1, sizeof(header), out); + fwrite(&segment, 1, sizeof(segment), out); + fwrite(§, 1, sizeof(sect), out); + fwrite(&symbolTable, 1, sizeof(symbolTable), out); + + fwrite(data, 1, size, out); + for (unsigned i = 0; i < pad(size) - size; ++i) fputc(0, out); + + fwrite(&symbolList, 1, sizeof(symbolList), out); + + fputc(0, out); + fwrite(startName, 1, startNameLength, out); + fwrite(endName, 1, endNameLength, out); +} + +} // namespace + +#define MACRO_MAKE_NAME(a, b, c) a##b##c +#define MAKE_NAME(a, b, c) MACRO_MAKE_NAME(a, b, c) + +namespace binaryToObject { + +bool +MAKE_NAME(writeMachO, BITS_PER_WORD, Object) + (uint8_t* data, unsigned size, FILE* out, const char* startName, + const char* endName, const char* architecture, unsigned alignment, + bool, bool executable) +{ + cpu_type_t cpuType; + cpu_subtype_t cpuSubType; + if (strcmp(architecture, "x86_64") == 0) { + cpuType = CPU_TYPE_X86_64; + cpuSubType = CPU_SUBTYPE_X86_64_ALL; + } else if (strcmp(architecture, "i386") == 0) { + cpuType = CPU_TYPE_I386; + cpuSubType = CPU_SUBTYPE_I386_ALL; + } else if (strcmp(architecture, "powerpc") == 0) { + cpuType = CPU_TYPE_POWERPC; + cpuSubType = CPU_SUBTYPE_POWERPC_ALL; + } else { + fprintf(stderr, "unsupported architecture: %s\n", architecture); + return false; + } + + const char* segmentName; + const char* sectionName; + if (executable) { + segmentName = "__RWX"; + sectionName = "__rwx"; + } else { + segmentName = "__TEXT"; + sectionName = "__text"; + } + + unsigned startNameLength = strlen(startName); + char myStartName[startNameLength + 2]; + myStartName[0] = '_'; + memcpy(myStartName + 1, startName, startNameLength + 1); + + unsigned endNameLength = strlen(endName); + char myEndName[endNameLength + 2]; + myEndName[0] = '_'; + memcpy(myEndName + 1, endName, endNameLength + 1); + + writeObject(data, size, out, myStartName, myEndName, segmentName, + sectionName, alignment, cpuType, cpuSubType); + + return true; +} + +} // namespace binaryToObject diff --git a/src/binaryToObject/main.cpp b/src/binaryToObject/main.cpp new file mode 100644 index 0000000000..351ecb1b2c --- /dev/null +++ b/src/binaryToObject/main.cpp @@ -0,0 +1,182 @@ +/* Copyright (c) 2008-2009, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "stdint.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" + +#include "sys/stat.h" +#include "sys/mman.h" +#include "fcntl.h" +#include "unistd.h" + +namespace binaryToObject { + +bool +writeElf64Object(uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* architecture, unsigned alignment, bool writable, + bool executable); + +bool +writeElf32Object(uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* architecture, unsigned alignment, bool writable, + bool executable); + +bool +writeMachO64Object(uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* architecture, unsigned alignment, bool writable, + bool executable); + +bool +writeMachO32Object(uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* architecture, unsigned alignment, bool writable, + bool executable); + +bool +writePEObject(uint8_t* data, unsigned size, FILE* out, const char* startName, + const char* endName, const char* architecture, + unsigned alignment, bool writable, bool executable); + +} // namespace binaryToObject + +namespace { + +bool +writeObject(uint8_t* data, unsigned size, FILE* out, const char* startName, + const char* endName, const char* platform, + const char* architecture, unsigned alignment, bool writable, + bool executable) +{ + using namespace binaryToObject; + + bool found = false; + bool success = false; + if (strcmp("linux", platform) == 0) { + if (strcmp("x86_64", architecture) == 0) { + found = true; + success = writeElf64Object + (data, size, out, startName, endName, architecture, alignment, + writable, executable); + } else if (strcmp("i386", architecture) == 0) { + found = true; + success = writeElf32Object + (data, size, out, startName, endName, architecture, alignment, + writable, executable); + } + } else if (strcmp("darwin", platform) == 0) { + if (strcmp("x86_64", architecture) == 0) { + found = true; + success = writeMachO64Object + (data, size, out, startName, endName, architecture, alignment, + writable, executable); + } else if (strcmp("i386", architecture) == 0 + or strcmp("powerpc", architecture) == 0) + { + found = true; + success = writeMachO32Object + (data, size, out, startName, endName, architecture, alignment, + writable, executable); + } + } else if (strcmp("windows", platform) == 0 + and ((strcmp("x86_64", architecture) == 0 + or strcmp("i386", architecture) == 0))) + { + found = true; + success = writePEObject + (data, size, out, startName, endName, architecture, alignment, writable, + executable); + } + + if (not found) { + fprintf(stderr, "unsupported platform: %s/%s\n", platform, architecture); + return false; + } + + return success; +} + +void +usageAndExit(const char* name) +{ + fprintf(stderr, + "usage: %s " + " " + "[ [{writable|executable}...]]\n", + name); + exit(-1); +} + +} // namespace + +int +main(int argc, const char** argv) +{ + if (argc < 7 or argc > 10) { + usageAndExit(argv[0]); + } + + unsigned alignment = 1; + if (argc > 7) { + alignment = atoi(argv[7]); + } + + bool writable = false; + bool executable = false; + + for (int i = 8; i < argc; ++i) { + if (strcmp("writable", argv[i]) == 0) { + writable = true; + } else if (strcmp("executable", argv[i]) == 0) { + executable = true; + } else { + usageAndExit(argv[0]); + } + } + + uint8_t* data = 0; + unsigned size; + int fd = open(argv[1], O_RDONLY); + if (fd != -1) { + struct stat s; + int r = fstat(fd, &s); + if (r != -1) { + data = static_cast + (mmap(0, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0)); + size = s.st_size; + } + close(fd); + } + + bool success = false; + + if (data) { + FILE* out = fopen(argv[2], "wb"); + if (out) { + success = writeObject + (data, size, out, argv[3], argv[4], argv[5], argv[6], alignment, + writable, executable); + + fclose(out); + } else { + fprintf(stderr, "unable to open %d\n", argv[2]); + } + + munmap(data, size); + } else { + perror(argv[0]); + } + + return (success ? 0 : -1); +} diff --git a/src/binaryToObject/pe.cpp b/src/binaryToObject/pe.cpp new file mode 100644 index 0000000000..49bf772a03 --- /dev/null +++ b/src/binaryToObject/pe.cpp @@ -0,0 +1,229 @@ +/* Copyright (c) 2009, Avian Contributors + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + There is NO WARRANTY for this software. See license.txt for + details. */ + +#include "stdint.h" +#include "stdio.h" +#include "string.h" + +#define IMAGE_SIZEOF_SHORT_NAME 8 + +#define IMAGE_FILE_RELOCS_STRIPPED 1 +#define IMAGE_FILE_LINE_NUMS_STRIPPED 4 +#define IMAGE_FILE_MACHINE_AMD64 0x8664 +#define IMAGE_FILE_MACHINE_I386 0x014c +#define IMAGE_FILE_32BIT_MACHINE 256 + +#define IMAGE_SCN_ALIGN_1BYTES 0x100000 +#define IMAGE_SCN_ALIGN_2BYTES 0x200000 +#define IMAGE_SCN_ALIGN_4BYTES 0x300000 +#define IMAGE_SCN_ALIGN_8BYTES 0x400000 +#define IMAGE_SCN_MEM_EXECUTE 0x20000000 +#define IMAGE_SCN_MEM_READ 0x40000000 +#define IMAGE_SCN_MEM_WRITE 0x80000000 +#define IMAGE_SCN_CNT_CODE 32 + +namespace { + +struct IMAGE_FILE_HEADER { + uint16_t Machine; + uint16_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; +} __attribute__((packed)); + +struct IMAGE_SECTION_HEADER { + uint8_t Name[IMAGE_SIZEOF_SHORT_NAME]; + union { + uint32_t PhysicalAddress; + uint32_t VirtualSize; + } Misc; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLinenumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLinenumbers; + uint32_t Characteristics; +} __attribute__((packed)); + +struct IMAGE_SYMBOL { + union { + struct { + uint32_t Short; + uint32_t Long; + } Name; + } N; + uint32_t Value; + int16_t SectionNumber; + uint16_t Type; + uint8_t StorageClass; + uint8_t NumberOfAuxSymbols; +} __attribute__((packed)); + +inline unsigned +pad(unsigned n) +{ + return (n + (4 - 1)) & ~(4 - 1); +} + +void +writeObject(const uint8_t* data, unsigned size, FILE* out, + const char* startName, const char* endName, + const char* sectionName, int machine, int machineMask, + int sectionMask) +{ + const unsigned sectionCount = 1; + const unsigned symbolCount = 2; + + const unsigned sectionNumber = 1; + + const unsigned startNameLength = strlen(startName) + 1; + const unsigned endNameLength = strlen(endName) + 1; + + const unsigned startNameOffset = 4; + const unsigned endNameOffset = startNameOffset + startNameLength; + + IMAGE_FILE_HEADER fileHeader = { + machine, // Machine + sectionCount, // NumberOfSections + 0, // TimeDateStamp + sizeof(IMAGE_FILE_HEADER) + + sizeof(IMAGE_SECTION_HEADER) + + pad(size), // PointerToSymbolTable + symbolCount, // NumberOfSymbols + 0, // SizeOfOptionalHeader + IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_LINE_NUMS_STRIPPED + | machineMask // Characteristics + }; + + IMAGE_SECTION_HEADER sectionHeader = { + "", // Name + 0, // PhysicalAddress + 0, // VirtualAddress + pad(size), // SizeOfRawData + sizeof(IMAGE_FILE_HEADER) + + sizeof(IMAGE_SECTION_HEADER), // PointerToRawData + 0, // PointerToRelocations + 0, // PointerToLinenumbers + 0, // NumberOfRelocations + 0, // NumberOfLinenumbers + sectionMask // Characteristics + }; + + strncpy(reinterpret_cast(sectionHeader.Name), sectionName, + sizeof(sectionHeader.Name)); + + IMAGE_SYMBOL startSymbol = { + { 0 }, // Name + 0, // Value + sectionNumber, // SectionNumber + 0, // Type + 2, // StorageClass + 0, // NumberOfAuxSymbols + }; + startSymbol.N.Name.Long = startNameOffset; + + IMAGE_SYMBOL endSymbol = { + { 0 }, // Name + size, // Value + sectionNumber, // SectionNumber + 0, // Type + 2, // StorageClass + 0, // NumberOfAuxSymbols + }; + endSymbol.N.Name.Long = endNameOffset; + + fwrite(&fileHeader, 1, sizeof(fileHeader), out); + fwrite(§ionHeader, 1, sizeof(sectionHeader), out); + + fwrite(data, 1, size, out); + for (unsigned i = 0; i < pad(size) - size; ++i) fputc(0, out); + + fwrite(&startSymbol, 1, sizeof(startSymbol), out); + fwrite(&endSymbol, 1, sizeof(endSymbol), out); + + uint32_t symbolTableSize = endNameOffset + endNameLength; + fwrite(&symbolTableSize, 1, 4, out); + + fwrite(startName, 1, startNameLength, out); + fwrite(endName, 1, endNameLength, out); +} + +} // namespace + +namespace binaryToObject { + +bool +writePEObject +(uint8_t* data, unsigned size, FILE* out, const char* startName, + const char* endName, const char* architecture, unsigned alignment, + bool writable, bool executable) +{ + int machine; + int machineMask; + if (strcmp(architecture, "x86_64") == 0) { + machine = IMAGE_FILE_MACHINE_AMD64; + machineMask = 0; + } else if (strcmp(architecture, "i386") == 0) { + machine = IMAGE_FILE_MACHINE_I386; + machineMask = IMAGE_FILE_32BIT_MACHINE; + } else { + fprintf(stderr, "unsupported architecture: %s\n", architecture); + return false; + } + + int sectionMask; + switch (alignment) { + case 0: + case 1: + sectionMask = IMAGE_SCN_ALIGN_1BYTES; + break; + case 2: + sectionMask = IMAGE_SCN_ALIGN_2BYTES; + break; + case 4: + sectionMask = IMAGE_SCN_ALIGN_4BYTES; + break; + case 8: + sectionMask = IMAGE_SCN_ALIGN_8BYTES; + break; + default: + fprintf(stderr, "unsupported alignment: %d\n", alignment); + return false; + } + + sectionMask |= IMAGE_SCN_MEM_READ; + + const char* sectionName; + if (writable and executable) { + sectionName = ".rwx"; + sectionMask |= IMAGE_SCN_MEM_WRITE + | IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_CNT_CODE; + } else if (executable) { + sectionName = ".text"; + sectionMask |= IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE; + } else { + sectionName = ".data"; + sectionMask |= IMAGE_SCN_MEM_WRITE; + } + + writeObject(data, size, out, startName, endName, sectionName, machine, + machineMask, sectionMask); + + return true; +} + +} // namespace binaryToObject