From 661f6c28a8be920a5118f6bb32d3a964ee68d1d8 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Fri, 27 Apr 2012 12:08:44 -0600 Subject: [PATCH] refactor binaryToObject to allow more flexibilty (in particular, allowing arbitrary symbols per object) --- makefile | 19 +- src/binaryToObject/elf.cpp | 422 +++++++++++++++----------------- src/binaryToObject/endianness.h | 25 +- src/binaryToObject/mach-o.cpp | 339 +++++++++++-------------- src/binaryToObject/main.cpp | 20 +- src/binaryToObject/pe.cpp | 323 +++++++++++++----------- src/binaryToObject/tools.cpp | 32 +++ src/binaryToObject/tools.h | 69 +++++- 8 files changed, 640 insertions(+), 609 deletions(-) diff --git a/makefile b/makefile index 1705b53986..ff6cf9855b 100755 --- a/makefile +++ b/makefile @@ -236,7 +236,6 @@ ifeq ($(arch),powerpc) ifneq ($(platform),darwin) ifneq ($(arch),$(build-arch)) - converter-cflags += -DOPPOSITE_ENDIAN cxx = powerpc-linux-gnu-g++ cc = powerpc-linux-gnu-gcc ar = powerpc-linux-gnu-ar @@ -342,10 +341,6 @@ ifeq ($(platform),darwin) lflags += $(flags) endif - ifeq ($(build-arch),powerpc) - converter-cflags += -DBIG_ENDIAN - endif - ifeq ($(arch),powerpc) openjdk-extra-cflags += -arch ppc -mmacosx-version-min=${OSX_SDK_VERSION} cflags += -arch ppc -mmacosx-version-min=${OSX_SDK_VERSION} @@ -428,6 +423,7 @@ endif ifeq ($(mode),debug) optimization-cflags = -O0 -g3 + converter-cflags += -O0 -g3 strip = : endif ifeq ($(mode),debug-fast) @@ -606,9 +602,6 @@ driver-object = $(build)/main.o driver-dynamic-objects = \ $(build)/main-dynamic.o -gdb-plugin-object = $(build)/gdb-plugin.o -gdb-plugin-source = $(src)/gdb-plugin.cpp - boot-source = $(src)/boot.cpp boot-object = $(build)/boot.o @@ -623,7 +616,10 @@ generator-objects = \ $(call generator-cpp-objects,$(generator-sources),$(src),$(build)) generator = $(build)/generator -converter-depends = $(src)/binaryToObject/tools.h +converter-depends = \ + $(src)/binaryToObject/tools.h \ + $(src)/binaryToObject/endianness.h + converter-sources = \ $(src)/binaryToObject/main.cpp \ @@ -826,9 +822,6 @@ $(heapwalk-objects): $(build)/%.o: $(src)/%.cpp $(vm-depends) $(driver-object): $(driver-source) $(compile-object) -$(gdb-plugin-object): $(gdb-plugin-source) - $(compile-object) - $(build)/main-dynamic.o: $(driver-source) @echo "compiling $(@)" @mkdir -p $(dir $(@)) @@ -846,7 +839,7 @@ $(converter-objects): $(build)/binaryToObject/%.o: $(src)/binaryToObject/%.cpp $ $(build-cxx) $(converter-cflags) -c $(<) -o $(@) $(converter): $(converter-objects) - $(build-cc) $(^) -o $(@) + $(build-cc) $(^) -g -o $(@) $(build)/classpath.jar: $(classpath-dep) $(classpath-jar-dep) @echo "creating $(@)" diff --git a/src/binaryToObject/elf.cpp b/src/binaryToObject/elf.cpp index 4e752104a1..3548ac8431 100644 --- a/src/binaryToObject/elf.cpp +++ b/src/binaryToObject/elf.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "endianness.h" @@ -119,6 +120,39 @@ using avian::endian::Endianness; #define V4 Endianness::v4 #define VANY Endianness::vAny + +unsigned getElfPlatform(PlatformInfo::Architecture arch) { + switch(arch) { + case PlatformInfo::x86_64: + return EM_X86_64; + case PlatformInfo::x86: + return EM_386; + case PlatformInfo::Arm: + return EM_ARM; + case PlatformInfo::PowerPC: + return EM_PPC; + } + return ~0; +} + +const char* getSectionName(unsigned accessFlags, unsigned& sectionFlags) { + sectionFlags = SHF_ALLOC; + if (accessFlags & Platform::Writable) { + if (accessFlags & Platform::Executable) { + sectionFlags |= SHF_WRITE | SHF_EXECINSTR; + return ".rwx"; + } else { + sectionFlags |= SHF_WRITE; + return ".data"; + } + } else if (accessFlags & Platform::Executable) { + sectionFlags |= SHF_EXECINSTR; + return ".text"; + } else { + return ".rodata"; + } +} + template class ElfPlatform : public Platform { public: @@ -158,240 +192,182 @@ public: typedef Symbol_Ty Symbol; - class ElfObjectWriter : public ObjectWriter { + static const unsigned Encoding = TargetLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; + + const unsigned machine; + + ElfPlatform(PlatformInfo::Architecture arch): + Platform(PlatformInfo(PlatformInfo::Linux, arch)), + machine(getElfPlatform(arch)) {} + + class FileWriter { public: + unsigned sectionCount; + unsigned sectionStringTableSectionNumber; - PlatformInfo::Architecture arch; - OutputStream* out; + AddrTy dataOffset; - ElfObjectWriter(PlatformInfo::Architecture arch, OutputStream* out): - arch(arch), - out(out) {} + FileHeader header; + StringTable strings; - void writeObject(const uint8_t* data, unsigned size, - const char* startName, const char* endName, - const char* sectionName, unsigned sectionFlags, - unsigned alignment, int machine, int encoding) + FileWriter(unsigned machine): + sectionCount(0), + dataOffset(sizeof(FileHeader)) { - const unsigned sectionCount = 5; - const unsigned symbolCount = 2; - - const unsigned sectionNameLength = strlen(sectionName) + 1; - const unsigned startNameLength = strlen(startName) + 1; - const unsigned endNameLength = strlen(endName) + 1; - - const char* const sectionStringTableName = ".shstrtab"; - const char* const stringTableName = ".strtab"; - const char* const symbolTableName = ".symtab"; - - const unsigned sectionStringTableNameLength - = strlen(sectionStringTableName) + 1; - const unsigned stringTableNameLength = strlen(stringTableName) + 1; - const unsigned symbolTableNameLength = strlen(symbolTableName) + 1; - - const unsigned nullStringOffset = 0; - - const unsigned sectionStringTableNameOffset = nullStringOffset + 1; - const unsigned stringTableNameOffset - = sectionStringTableNameOffset + sectionStringTableNameLength; - const unsigned symbolTableNameOffset - = stringTableNameOffset + stringTableNameLength; - const unsigned sectionNameOffset - = symbolTableNameOffset + symbolTableNameLength; - const unsigned sectionStringTableLength - = sectionNameOffset + sectionNameLength; - - const unsigned startNameOffset = nullStringOffset + 1; - const unsigned endNameOffset = startNameOffset + startNameLength; - const unsigned stringTableLength = endNameOffset + endNameLength; - - const unsigned bodySectionNumber = 1; - const unsigned sectionStringTableSectionNumber = 2; - const unsigned stringTableSectionNumber = 3; - - FileHeader fileHeader; - memset(&fileHeader, 0, sizeof(FileHeader)); - fileHeader.e_ident[EI_MAG0] = V1(ELFMAG0); - fileHeader.e_ident[EI_MAG1] = V1(ELFMAG1); - fileHeader.e_ident[EI_MAG2] = V1(ELFMAG2); - fileHeader.e_ident[EI_MAG3] = V1(ELFMAG3); - fileHeader.e_ident[EI_CLASS] = V1(Class); - fileHeader.e_ident[EI_DATA] = V1(encoding); - fileHeader.e_ident[EI_VERSION] = V1(EV_CURRENT); - fileHeader.e_ident[EI_OSABI] = V1(OSABI); - fileHeader.e_ident[EI_ABIVERSION] = V1(0); - fileHeader.e_type = V2(ET_REL); - fileHeader.e_machine = V2(machine); - fileHeader.e_version = V4(EV_CURRENT); - fileHeader.e_entry = VANY(static_cast(0)); - fileHeader.e_phoff = VANY(static_cast(0)); - fileHeader.e_shoff = VANY(static_cast(sizeof(FileHeader))); - fileHeader.e_flags = V4(machine == EM_ARM ? 0x04000000 : 0); - fileHeader.e_ehsize = V2(sizeof(FileHeader)); - fileHeader.e_phentsize = V2(0); - fileHeader.e_phnum = V2(0); - fileHeader.e_shentsize = V2(sizeof(SectionHeader)); - fileHeader.e_shnum = V2(sectionCount); - fileHeader.e_shstrndx = V2(sectionStringTableSectionNumber); - - SectionHeader nullSection; - memset(&nullSection, 0, sizeof(SectionHeader)); - - SectionHeader bodySection; - bodySection.sh_name = V4(sectionNameOffset); - bodySection.sh_type = V4(SHT_PROGBITS); - bodySection.sh_flags = VANY(static_cast(sectionFlags)); - bodySection.sh_addr = VANY(static_cast(0)); - unsigned bodySectionOffset - = sizeof(FileHeader) + (sizeof(SectionHeader) * sectionCount); - bodySection.sh_offset = VANY(static_cast(bodySectionOffset)); - unsigned bodySectionSize = size; - bodySection.sh_size = VANY(static_cast(bodySectionSize)); - bodySection.sh_link = V4(0); - bodySection.sh_info = V4(0); - bodySection.sh_addralign = VANY(static_cast(alignment)); - bodySection.sh_entsize = VANY(static_cast(0)); - - SectionHeader sectionStringTableSection; - sectionStringTableSection.sh_name = V4(sectionStringTableNameOffset); - sectionStringTableSection.sh_type = V4(SHT_STRTAB); - sectionStringTableSection.sh_flags = VANY(static_cast(0)); - sectionStringTableSection.sh_addr = VANY(static_cast(0)); - unsigned sectionStringTableSectionOffset - = bodySectionOffset + bodySectionSize; - sectionStringTableSection.sh_offset = VANY(static_cast(sectionStringTableSectionOffset)); - unsigned sectionStringTableSectionSize = sectionStringTableLength; - sectionStringTableSection.sh_size = VANY(static_cast(sectionStringTableSectionSize)); - sectionStringTableSection.sh_link = V4(0); - sectionStringTableSection.sh_info = V4(0); - sectionStringTableSection.sh_addralign = VANY(static_cast(1)); - sectionStringTableSection.sh_entsize = VANY(static_cast(0)); - - SectionHeader stringTableSection; - stringTableSection.sh_name = V4(stringTableNameOffset); - stringTableSection.sh_type = V4(SHT_STRTAB); - stringTableSection.sh_flags = VANY(static_cast(0)); - stringTableSection.sh_addr = VANY(static_cast(0)); - unsigned stringTableSectionOffset - = sectionStringTableSectionOffset + sectionStringTableSectionSize; - stringTableSection.sh_offset = VANY(static_cast(stringTableSectionOffset)); - unsigned stringTableSectionSize = stringTableLength; - stringTableSection.sh_size = VANY(static_cast(stringTableSectionSize)); - stringTableSection.sh_link = V4(0); - stringTableSection.sh_info = V4(0); - stringTableSection.sh_addralign = VANY(static_cast(1)); - stringTableSection.sh_entsize = VANY(static_cast(0)); - - SectionHeader symbolTableSection; - symbolTableSection.sh_name = V4(symbolTableNameOffset); - symbolTableSection.sh_type = V4(SHT_SYMTAB); - symbolTableSection.sh_flags = VANY(static_cast(0)); - symbolTableSection.sh_addr = VANY(static_cast(0)); - unsigned symbolTableSectionOffset - = stringTableSectionOffset + stringTableSectionSize; - symbolTableSection.sh_offset = VANY(static_cast(symbolTableSectionOffset)); - unsigned symbolTableSectionSize = sizeof(Symbol) * symbolCount; - symbolTableSection.sh_size = VANY(static_cast(symbolTableSectionSize)); - symbolTableSection.sh_link = V4(stringTableSectionNumber); - symbolTableSection.sh_info = V4(0); - symbolTableSection.sh_addralign = VANY(static_cast(Elf::BytesPerWord)); - symbolTableSection.sh_entsize = VANY(static_cast(sizeof(Symbol))); - - Symbol startSymbol; - startSymbol.st_name = V4(startNameOffset); - startSymbol.st_value = VANY(static_cast(0)); - startSymbol.st_size = VANY(static_cast(0)); - startSymbol.st_info = V1(SYMBOL_INFO(STB_GLOBAL, STT_NOTYPE)); - startSymbol.st_other = V1(STV_DEFAULT); - startSymbol.st_shndx = V2(bodySectionNumber); - - Symbol endSymbol; - endSymbol.st_name = V4(endNameOffset); - endSymbol.st_value = VANY(static_cast(size)); - endSymbol.st_size = VANY(static_cast(0)); - endSymbol.st_info = V1(SYMBOL_INFO(STB_GLOBAL, STT_NOTYPE)); - endSymbol.st_other = V1(STV_DEFAULT); - endSymbol.st_shndx = V2(bodySectionNumber); - - out->writeChunk(&fileHeader, sizeof(fileHeader)); - out->writeChunk(&nullSection, sizeof(nullSection)); - out->writeChunk(&bodySection, sizeof(bodySection)); - out->writeChunk(§ionStringTableSection, sizeof(sectionStringTableSection)); - out->writeChunk(&stringTableSection, sizeof(stringTableSection)); - out->writeChunk(&symbolTableSection, sizeof(symbolTableSection)); - - out->writeChunk(data, size); - - out->write(0); - out->writeChunk(sectionStringTableName, sectionStringTableNameLength); - out->writeChunk(stringTableName, stringTableNameLength); - out->writeChunk(symbolTableName, symbolTableNameLength); - out->writeChunk(sectionName, sectionNameLength); - - out->write(0); - out->writeChunk(startName, startNameLength); - out->writeChunk(endName, endNameLength); - - out->writeChunk(&startSymbol, sizeof(startSymbol)); - out->writeChunk(&endSymbol, sizeof(endSymbol)); + memset(&header, 0, sizeof(FileHeader)); + header.e_ident[EI_MAG0] = V1(ELFMAG0); + header.e_ident[EI_MAG1] = V1(ELFMAG1); + header.e_ident[EI_MAG2] = V1(ELFMAG2); + header.e_ident[EI_MAG3] = V1(ELFMAG3); + header.e_ident[EI_CLASS] = V1(Class); + header.e_ident[EI_DATA] = V1(Encoding); + header.e_ident[EI_VERSION] = V1(EV_CURRENT); + header.e_ident[EI_OSABI] = V1(OSABI); + header.e_ident[EI_ABIVERSION] = V1(0); + header.e_type = V2(ET_REL); + header.e_machine = V2(machine); + header.e_version = V4(EV_CURRENT); + header.e_entry = VANY(static_cast(0)); + header.e_phoff = VANY(static_cast(0)); + header.e_shoff = VANY(static_cast(sizeof(FileHeader))); + header.e_flags = V4(machine == EM_ARM ? 0x04000000 : 0); + header.e_ehsize = V2(sizeof(FileHeader)); + header.e_phentsize = V2(0); + header.e_phnum = V2(0); + header.e_shentsize = V2(sizeof(SectionHeader)); } - virtual bool write(uint8_t* data, size_t size, - const char* startName, const char* endName, - unsigned alignment, unsigned accessFlags) - { - int machine; - int encoding; - if (arch == PlatformInfo::x86_64) { - machine = EM_X86_64; - encoding = ELFDATA2LSB; - } else if (arch == PlatformInfo::x86) { - machine = EM_386; - encoding = ELFDATA2LSB; - } else if (arch == PlatformInfo::Arm) { - machine = EM_ARM; - encoding = ELFDATA2LSB; - } else if (arch == PlatformInfo::PowerPC) { - machine = EM_PPC; - encoding = ELFDATA2MSB; - } else { - fprintf(stderr, "unsupported architecture: %s\n", arch); - return false; - } - - const char* sectionName; - unsigned sectionFlags = SHF_ALLOC; - if (accessFlags & Writable) { - if (accessFlags & Executable) { - sectionName = ".rwx"; - sectionFlags |= SHF_WRITE | SHF_EXECINSTR; - } else { - sectionName = ".data"; - sectionFlags |= SHF_WRITE; - } - } else if (accessFlags & Executable) { - sectionName = ".text"; - sectionFlags |= SHF_EXECINSTR; - } else { - sectionName = ".rodata"; - } - - writeObject(data, size, startName, endName, sectionName, sectionFlags, - alignment, machine, encoding); - - return true; - } - - virtual void dispose() { - delete this; + void writeHeader(OutputStream* out) { + header.e_shnum = V2(sectionCount); + header.e_shstrndx = V2(sectionStringTableSectionNumber); + out->writeChunk(&header, sizeof(FileHeader)); } }; - ElfPlatform(PlatformInfo::Architecture arch): - Platform(PlatformInfo(PlatformInfo::Linux, arch)) {} + class SectionWriter { + public: + FileWriter& file; + String name; + SectionHeader header; + const size_t* dataSize; + const uint8_t* const* data; - virtual ObjectWriter* makeObjectWriter(OutputStream* out) { - return new ElfObjectWriter(info.arch, out); + SectionWriter(FileWriter& file): + file(file), + name(""), + data(0), + dataSize(0) + { + memset(&header, 0, sizeof(SectionHeader)); + file.sectionCount++; + file.dataOffset += sizeof(SectionHeader); + size_t nameOffset = file.strings.add(name); + header.sh_name = V4(nameOffset); + } + + SectionWriter( + FileWriter& file, + const char* chname, + unsigned type, + AddrTy flags, + unsigned alignment, + AddrTy addr, + const uint8_t* const* data, + size_t* dataSize, + size_t entsize = 0, + unsigned link = 0): + + file(file), + name(chname), + data(data), + dataSize(dataSize) + { + if(strcmp(chname, ".shstrtab") == 0) { + file.sectionStringTableSectionNumber = file.sectionCount; + } + file.sectionCount++; + file.dataOffset += sizeof(SectionHeader); + size_t nameOffset = file.strings.add(name); + + header.sh_name = V4(nameOffset); + header.sh_type = V4(type); + header.sh_flags = VANY(flags); + header.sh_addr = VANY(addr); + // header.sh_offset = VANY(static_cast(bodySectionOffset)); + // header.sh_size = VANY(static_cast(*dataSize)); + header.sh_link = V4(link); + header.sh_info = V4(0); + header.sh_addralign = VANY(static_cast(alignment)); + header.sh_entsize = VANY(static_cast(entsize)); + } + + void writeHeader(OutputStream* out) { + if(dataSize) { + header.sh_offset = VANY(file.dataOffset); + header.sh_size = VANY(static_cast(*dataSize)); + file.dataOffset += *dataSize; + } + + out->writeChunk(&header, sizeof(SectionHeader)); + } + + void writeData(OutputStream* out) { + if(data) { + out->writeChunk(*data, *dataSize); + } + } + + + }; + + virtual bool writeObject(OutputStream* out, Slice symbols, Slice data, unsigned accessFlags, unsigned alignment) { + + unsigned sectionFlags; + const char* sectionName = getSectionName(accessFlags, sectionFlags); + + StringTable symbolStringTable; + Buffer symbolTable; + + FileWriter file(machine); + + const int bodySectionNumber = 1; + const int stringTableSectionNumber = 3; + + SectionWriter sections[] = { + SectionWriter(file), // null section + SectionWriter(file, sectionName, SHT_PROGBITS, sectionFlags, alignment, 0, &data.items, &data.count), // body section + SectionWriter(file, ".shstrtab", SHT_STRTAB, 0, 1, 0, &file.strings.data, &file.strings.length), + SectionWriter(file, ".strtab", SHT_STRTAB, 0, 1, 0, &symbolStringTable.data, &symbolStringTable.length), + SectionWriter(file, ".symtab", SHT_SYMTAB, 0, 8, 0, &symbolTable.data, &symbolTable.length, sizeof(Symbol), stringTableSectionNumber) + }; + + // for some reason, string tables require a null first element... + symbolStringTable.add(""); + + for(SymbolInfo* sym = symbols.begin(); sym != symbols.end(); sym++) { + size_t nameOffset = symbolStringTable.add(sym->name); + + Symbol symbolStruct; + symbolStruct.st_name = V4(nameOffset); + symbolStruct.st_value = VANY(static_cast(sym->addr)); + symbolStruct.st_size = VANY(static_cast(0)); + symbolStruct.st_info = V1(SYMBOL_INFO(STB_GLOBAL, STT_NOTYPE)); + symbolStruct.st_other = V1(STV_DEFAULT); + symbolStruct.st_shndx = V2(bodySectionNumber); + symbolTable.write(&symbolStruct, sizeof(Symbol)); + } + + file.writeHeader(out); + + for(int i = 0; i < file.sectionCount; i++) { + sections[i].writeHeader(out); + } + + for(int i = 0; i < file.sectionCount; i++) { + sections[i].writeData(out); + } + + return true; } }; diff --git a/src/binaryToObject/endianness.h b/src/binaryToObject/endianness.h index 36c5675bac..0537fb8bfe 100644 --- a/src/binaryToObject/endianness.h +++ b/src/binaryToObject/endianness.h @@ -15,11 +15,12 @@ namespace avian { namespace endian { -#ifndef BIG_ENDIAN -const bool LittleEndian = true; -#else -const bool LittleEndian = false; -#endif +static union { + uint32_t i; + char c[4]; +} _DetectEndianness = {1}; + +const bool LittleEndian = _DetectEndianness.c[0] == 1; template class Endianness { @@ -57,13 +58,13 @@ public: return v; } else { return - ((static_cast(v) >> 56) & UINT64_C(0x00000000000000FF)) | - ((static_cast(v) >> 40) & UINT64_C(0x000000000000FF00)) | - ((static_cast(v) >> 24) & UINT64_C(0x0000000000FF0000)) | - ((static_cast(v) >> 8) & UINT64_C(0x00000000FF000000)) | - ((static_cast(v) << 8) & UINT64_C(0x000000FF00000000)) | - ((static_cast(v) << 24) & UINT64_C(0x0000FF0000000000)) | - ((static_cast(v) << 40) & UINT64_C(0x00FF000000000000)) | + ((static_cast(v) >> 56) & (static_cast(0xff) << 0)) | + ((static_cast(v) >> 40) & (static_cast(0xff) << 8)) | + ((static_cast(v) >> 24) & (static_cast(0xff) << 16)) | + ((static_cast(v) >> 8) & (static_cast(0xff) << 24)) | + ((static_cast(v) << 8) & (static_cast(0xff) << 32)) | + ((static_cast(v) << 24) & (static_cast(0xff) << 40)) | + ((static_cast(v) << 40) & (static_cast(0xff) << 48)) | ((static_cast(v) << 56)); } } diff --git a/src/binaryToObject/mach-o.cpp b/src/binaryToObject/mach-o.cpp index 5899c652ff..246940329f 100644 --- a/src/binaryToObject/mach-o.cpp +++ b/src/binaryToObject/mach-o.cpp @@ -56,6 +56,14 @@ using avian::endian::Endianness; #define V4 Endianness::v4 #define VANY Endianness::vAny +inline unsigned +log(unsigned n) +{ + unsigned r = 0; + for (unsigned i = 1; i < n; ++r) i <<= 1; + return r; +} + template class MachOPlatform : public Platform { public: @@ -132,200 +140,151 @@ public: return (n + (BytesPerWord - 1)) & ~(BytesPerWord - 1); } - static inline unsigned - log(unsigned n) - { - unsigned r = 0; - for (unsigned i = 1; i < n; ++r) i <<= 1; - return r; + virtual bool writeObject(OutputStream* out, Slice symbols, Slice data, unsigned accessFlags, unsigned alignment) { + cpu_type_t cpuType; + cpu_subtype_t cpuSubType; + switch(info.arch) { + case PlatformInfo::x86_64: + cpuType = CPU_TYPE_X86_64; + cpuSubType = CPU_SUBTYPE_X86_64_ALL; + break; + case PlatformInfo::x86: + cpuType = CPU_TYPE_I386; + cpuSubType = CPU_SUBTYPE_I386_ALL; + break; + case PlatformInfo::PowerPC: + cpuType = CPU_TYPE_POWERPC; + cpuSubType = CPU_SUBTYPE_POWERPC_ALL; + break; + case PlatformInfo::Arm: + cpuType = CPU_TYPE_ARM; + cpuSubType = CPU_SUBTYPE_ARM_V7; + default: + // should never happen (see MachOPlatform declarations at bottom) + fprintf(stderr, "unsupported architecture: %d\n", info.arch); + return false; + } + + const char* segmentName; + const char* sectionName; + if (accessFlags & Writable) { + if (accessFlags & Executable) { + segmentName = "__RWX"; + sectionName = "__rwx"; + } else { + segmentName = "__DATA"; + sectionName = "__data"; + } + } else { + segmentName = "__TEXT"; + sectionName = "__text"; + } + + FileHeader header = { + V4(Magic), // magic + V4(cpuType), + V4(cpuSubType), + V4(MH_OBJECT), // filetype, + V4(2), // ncmds + V4(sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(SymtabCommand)), // sizeofcmds + V4(0) // flags + }; + + AddrTy finalSize = pad(data.count); + + SegmentCommand segment = { + V4(Segment), // cmd + V4(sizeof(SegmentCommand) + sizeof(Section)), // cmdsize + "", // segname + VANY(static_cast(0)), // vmaddr + VANY(static_cast(finalSize)), // vmsize + VANY(static_cast(sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(SymtabCommand))), // fileoff + VANY(static_cast(finalSize)), // filesize + V4(7), // maxprot + V4(7), // initprot + V4(1), // nsects + V4(0) // flags + }; + + strncpy(segment.segname, segmentName, sizeof(segment.segname)); + + Section sect = { + "", // sectname + "", // segname + VANY(static_cast(0)), // addr + VANY(static_cast(finalSize)), // size + V4(sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(SymtabCommand)), // offset + V4(log(alignment)), // align + V4(0), // reloff + V4(0), // nreloc + V4(S_REGULAR), // flags + V4(0), // reserved1 + V4(0), // reserved2 + }; + + strncpy(sect.segname, segmentName, sizeof(sect.segname)); + strncpy(sect.sectname, sectionName, sizeof(sect.sectname)); + + StringTable strings; + strings.add(""); + Buffer symbolList; + + for(SymbolInfo* sym = symbols.begin(); sym != symbols.end(); sym++) { + unsigned offset = strings.length; + strings.write("_", 1); + strings.add(sym->name); + NList symbol = { + V4(offset), // n_un + V1(N_SECT | N_EXT), // n_type + V1(1), // n_sect + V2(0), // n_desc + VANY(static_cast(sym->addr)) // n_value + }; + symbolList.write(&symbol, sizeof(NList)); + } + + SymtabCommand symbolTable = { + V4(LC_SYMTAB), // cmd + V4(sizeof(SymtabCommand)), // cmdsize + V4(sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(SymtabCommand) + + finalSize), // symoff + V4(symbols.count), // nsyms + V4(sizeof(FileHeader) + + sizeof(SegmentCommand) + + sizeof(Section) + + sizeof(SymtabCommand) + + finalSize + + (sizeof(NList) * symbols.count)), // stroff + V4(strings.length), // strsize + }; + + out->writeChunk(&header, sizeof(header)); + out->writeChunk(&segment, sizeof(segment)); + out->writeChunk(§, sizeof(sect)); + out->writeChunk(&symbolTable, sizeof(symbolTable)); + + out->writeChunk(data.items, data.count); + out->writeRepeat(0, finalSize - data.count); + + out->writeChunk(symbolList.data, symbolList.length); + + out->writeChunk(strings.data, strings.length); } - - class MachOObjectWriter : public ObjectWriter { - public: - - PlatformInfo::Architecture arch; - OutputStream* out; - - MachOObjectWriter(PlatformInfo::Architecture arch, OutputStream* out): - arch(arch), - out(out) {} - - void writeObject(const uint8_t* data, unsigned size, - const char* startName, const char* endName, - const char* segmentName, const char* sectionName, - unsigned alignment, cpu_type_t cpuType, cpu_subtype_t cpuSubType) - { - unsigned startNameLength = strlen(startName) + 1; - unsigned endNameLength = strlen(endName) + 1; - - FileHeader header = { - V4(Magic), // magic - V4(cpuType), - V4(cpuSubType), - V4(MH_OBJECT), // filetype, - V4(2), // ncmds - V4(sizeof(SegmentCommand) - + sizeof(Section) - + sizeof(SymtabCommand)), // sizeofcmds - V4(0) // flags - }; - - SegmentCommand segment = { - V4(Segment), // cmd - V4(sizeof(SegmentCommand) + sizeof(Section)), // cmdsize - "", // segname - VANY(static_cast(0)), // vmaddr - VANY(static_cast(pad(size))), // vmsize - VANY(static_cast(sizeof(FileHeader) - + sizeof(SegmentCommand) - + sizeof(Section) - + sizeof(SymtabCommand))), // fileoff - VANY(static_cast(pad(size))), // filesize - V4(7), // maxprot - V4(7), // initprot - V4(1), // nsects - V4(0) // flags - }; - - strncpy(segment.segname, segmentName, sizeof(segment.segname)); - - Section sect = { - "", // sectname - "", // segname - VANY(static_cast(0)), // addr - VANY(static_cast(pad(size))), // size - V4(sizeof(FileHeader) - + sizeof(SegmentCommand) - + sizeof(Section) - + sizeof(SymtabCommand)), // offset - V4(log(alignment)), // align - V4(0), // reloff - V4(0), // nreloc - V4(S_REGULAR), // flags - V4(0), // reserved1 - V4(0), // reserved2 - }; - - strncpy(sect.segname, segmentName, sizeof(sect.segname)); - strncpy(sect.sectname, sectionName, sizeof(sect.sectname)); - - SymtabCommand symbolTable = { - V4(LC_SYMTAB), // cmd - V4(sizeof(SymtabCommand)), // cmdsize - V4(sizeof(FileHeader) - + sizeof(SegmentCommand) - + sizeof(Section) - + sizeof(SymtabCommand) - + pad(size)), // symoff - V4(2), // nsyms - V4(sizeof(FileHeader) - + sizeof(SegmentCommand) - + sizeof(Section) - + sizeof(SymtabCommand) - + pad(size) - + (sizeof(NList) * 2)), // stroff - V4(1 + startNameLength + endNameLength), // strsize - }; - - NList symbolList[] = { - { - V4(1), // n_un - V1(N_SECT | N_EXT), // n_type - V1(1), // n_sect - V2(0), // n_desc - VANY(static_cast(0)) // n_value - }, - { - V4(1 + startNameLength), // n_un - V1(N_SECT | N_EXT), // n_type - V1(1), // n_sect - V2(0), // n_desc - VANY(static_cast(size)) // n_value - } - }; - - out->writeChunk(&header, sizeof(header)); - out->writeChunk(&segment, sizeof(segment)); - out->writeChunk(§, sizeof(sect)); - out->writeChunk(&symbolTable, sizeof(symbolTable)); - - out->writeChunk(data, size); - out->writeRepeat(0, pad(size) - size); - - out->writeChunk(&symbolList, sizeof(symbolList)); - - out->write(0); - - out->writeChunk(startName, startNameLength); - out->writeChunk(endName, endNameLength); - } - - virtual bool write(uint8_t* data, size_t size, - const char* startName, const char* endName, - unsigned alignment, unsigned accessFlags) - { - - cpu_type_t cpuType; - cpu_subtype_t cpuSubType; - if (arch == PlatformInfo::x86_64) { - cpuType = CPU_TYPE_X86_64; - cpuSubType = CPU_SUBTYPE_X86_64_ALL; - } else if (arch == PlatformInfo::x86) { - cpuType = CPU_TYPE_I386; - cpuSubType = CPU_SUBTYPE_I386_ALL; - } else if (arch == PlatformInfo::PowerPC) { - cpuType = CPU_TYPE_POWERPC; - cpuSubType = CPU_SUBTYPE_POWERPC_ALL; - } else if (arch == PlatformInfo::Arm) { - cpuType = CPU_TYPE_ARM; - cpuSubType = CPU_SUBTYPE_ARM_V7; - } else { - fprintf(stderr, "unsupported architecture: %d\n", arch); - return false; - } - - const char* segmentName; - const char* sectionName; - if (accessFlags & Writable) { - if (accessFlags & Executable) { - segmentName = "__RWX"; - sectionName = "__rwx"; - } else { - segmentName = "__DATA"; - sectionName = "__data"; - } - } else { - segmentName = "__TEXT"; - sectionName = "__text"; - } - - unsigned startNameLength = strlen(startName); - char myStartName[startNameLength + 2]; - myStartName[0] = '_'; - memcpy(myStartName + 1, startName, startNameLength + 1); - - unsigned endNameLength = strlen(endName); - char myEndName[endNameLength + 2]; - myEndName[0] = '_'; - memcpy(myEndName + 1, endName, endNameLength + 1); - - writeObject(data, size, myStartName, myEndName, segmentName, - sectionName, alignment, cpuType, cpuSubType); - - return true; - } - - virtual void dispose() { - delete this; - } - }; MachOPlatform(PlatformInfo::Architecture arch): Platform(PlatformInfo(PlatformInfo::Darwin, arch)) {} - - virtual ObjectWriter* makeObjectWriter(OutputStream* out) { - return new MachOObjectWriter(info.arch, out); - } + }; MachOPlatform darwinx86Platform(PlatformInfo::x86); diff --git a/src/binaryToObject/main.cpp b/src/binaryToObject/main.cpp index c9a3fb8d75..e9bc749abc 100644 --- a/src/binaryToObject/main.cpp +++ b/src/binaryToObject/main.cpp @@ -33,18 +33,14 @@ void* operator new(size_t size) { return malloc(size); } -void operator delete(void* mem) { - if(mem) { - free(mem); - } -} +void operator delete(void* mem) { abort(); } namespace { using namespace avian::tools; bool -writeObject(uint8_t* data, unsigned size, OutputStream* out, const char* startName, +writeObject(uint8_t* data, size_t size, OutputStream* out, const char* startName, const char* endName, const char* os, const char* architecture, unsigned alignment, bool writable, bool executable) @@ -56,14 +52,16 @@ writeObject(uint8_t* data, unsigned size, OutputStream* out, const char* startNa return false; } - ObjectWriter* writer = platform->makeObjectWriter(out); + SymbolInfo symbols[2]; + symbols[0].name = startName; + symbols[0].addr = 0; + symbols[1].name = endName; + symbols[1].addr = size; - bool success = writer->write(data, size, startName, endName, alignment, - ObjectWriter::Readable | (writable ? ObjectWriter::Writable : 0) | (executable ? ObjectWriter::Executable : 0)); + unsigned accessFlags = (writable ? Platform::Writable : 0) | (executable ? Platform::Executable : 0); - writer->dispose(); + return platform->writeObject(out, Slice(symbols, 2), Slice(data, size), accessFlags, alignment); - return success; } void diff --git a/src/binaryToObject/pe.cpp b/src/binaryToObject/pe.cpp index 37bf643a95..aab60feb90 100644 --- a/src/binaryToObject/pe.cpp +++ b/src/binaryToObject/pe.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "tools.h" @@ -81,169 +82,193 @@ pad(unsigned n) using namespace avian::tools; -void -writeObject(const uint8_t* data, unsigned size, OutputStream* out, - const char* startName, const char* endName, - const char* sectionName, int machine, int machineMask, - int sectionMask) -{ - const unsigned sectionCount = 1; - const unsigned symbolCount = 2; - - const unsigned sectionNumber = 1; - - const unsigned startNameLength = strlen(startName) + 1; - const unsigned endNameLength = strlen(endName) + 1; - - const unsigned startNameOffset = 4; - const unsigned endNameOffset = startNameOffset + startNameLength; - - IMAGE_FILE_HEADER fileHeader = { - machine, // Machine - sectionCount, // NumberOfSections - 0, // TimeDateStamp - sizeof(IMAGE_FILE_HEADER) - + sizeof(IMAGE_SECTION_HEADER) - + pad(size), // PointerToSymbolTable - symbolCount, // NumberOfSymbols - 0, // SizeOfOptionalHeader - IMAGE_FILE_RELOCS_STRIPPED - | IMAGE_FILE_LINE_NUMS_STRIPPED - | machineMask // Characteristics - }; - - IMAGE_SECTION_HEADER sectionHeader = { - "", // Name - 0, // PhysicalAddress - 0, // VirtualAddress - pad(size), // SizeOfRawData - sizeof(IMAGE_FILE_HEADER) - + sizeof(IMAGE_SECTION_HEADER), // PointerToRawData - 0, // PointerToRelocations - 0, // PointerToLinenumbers - 0, // NumberOfRelocations - 0, // NumberOfLinenumbers - sectionMask // Characteristics - }; - - strncpy(reinterpret_cast(sectionHeader.Name), sectionName, - sizeof(sectionHeader.Name)); - - IMAGE_SYMBOL startSymbol = { - { 0 }, // Name - 0, // Value - sectionNumber, // SectionNumber - 0, // Type - 2, // StorageClass - 0, // NumberOfAuxSymbols - }; - startSymbol.N.Name.Long = startNameOffset; - - IMAGE_SYMBOL endSymbol = { - { 0 }, // Name - size, // Value - sectionNumber, // SectionNumber - 0, // Type - 2, // StorageClass - 0, // NumberOfAuxSymbols - }; - endSymbol.N.Name.Long = endNameOffset; - - out->writeChunk(&fileHeader, sizeof(fileHeader)); - out->writeChunk(§ionHeader, sizeof(sectionHeader)); - - out->writeChunk(data, size); - out->writeRepeat(0, pad(size) - size); - - out->writeChunk(&startSymbol, sizeof(startSymbol)); - out->writeChunk(&endSymbol, sizeof(endSymbol)); - - uint32_t symbolTableSize = endNameOffset + endNameLength; - out->writeChunk(&symbolTableSize, 4); - - out->writeChunk(startName, startNameLength); - out->writeChunk(endName, endNameLength); -} - template class WindowsPlatform : public Platform { public: - class PEObjectWriter : public ObjectWriter { + + class FileWriter { public: + unsigned sectionCount; + unsigned symbolCount; + unsigned dataStart; + unsigned dataOffset; - OutputStream* out; + IMAGE_FILE_HEADER header; - PEObjectWriter(OutputStream* out): - out(out) {} + StringTable strings; + Buffer symbols; - virtual bool write(uint8_t* data, size_t size, - const char* startName, const char* endName, - unsigned alignment, unsigned accessFlags) + FileWriter(unsigned machine, unsigned machineMask, unsigned symbolCount): + sectionCount(0), + symbolCount(symbolCount), + dataStart(sizeof(IMAGE_FILE_HEADER)), + dataOffset(0) { - int machine; - int machineMask; - - if (BytesPerWord == 8) { - machine = IMAGE_FILE_MACHINE_AMD64; - machineMask = 0; - } else { // if (BytesPerWord == 8) - machine = IMAGE_FILE_MACHINE_I386; - machineMask = IMAGE_FILE_32BIT_MACHINE; - } - - int sectionMask; - switch (alignment) { - case 0: - case 1: - sectionMask = IMAGE_SCN_ALIGN_1BYTES; - break; - case 2: - sectionMask = IMAGE_SCN_ALIGN_2BYTES; - break; - case 4: - sectionMask = IMAGE_SCN_ALIGN_4BYTES; - break; - case 8: - sectionMask = IMAGE_SCN_ALIGN_8BYTES; - break; - default: - fprintf(stderr, "unsupported alignment: %d\n", alignment); - return false; - } - - sectionMask |= IMAGE_SCN_MEM_READ; - - const char* sectionName; - if (accessFlags & ObjectWriter::Writable) { - if (accessFlags & ObjectWriter::Executable) { - sectionName = ".rwx"; - sectionMask |= IMAGE_SCN_MEM_WRITE - | IMAGE_SCN_MEM_EXECUTE - | IMAGE_SCN_CNT_CODE; - } else { - sectionName = ".data"; - sectionMask |= IMAGE_SCN_MEM_WRITE; - } - } else { - sectionName = ".text"; - sectionMask |= IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE; - } - - writeObject(data, size, out, startName, endName, sectionName, machine, - machineMask, sectionMask); - - return true; + header.Machine = machine; + // header.NumberOfSections = sectionCount; + header.TimeDateStamp = 0; + // header.PointerToSymbolTable = sizeof(IMAGE_FILE_HEADER) + // + sizeof(IMAGE_SECTION_HEADER) + // + pad(size); + // header.NumberOfSymbols = symbolCount; + header.SizeOfOptionalHeader = 0; + header.Characteristics = IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_LINE_NUMS_STRIPPED + | machineMask; } - virtual void dispose() { - delete this; + void writeHeader(OutputStream* out) { + header.NumberOfSections = sectionCount; + header.PointerToSymbolTable = dataStart + dataOffset; + printf("symbol table start: 0x%x\n", header.PointerToSymbolTable); + dataOffset = pad(dataOffset + symbolCount * sizeof(IMAGE_SYMBOL)); + printf("string table start: 0x%x\n", dataStart + dataOffset); + header.NumberOfSymbols = symbolCount; + out->writeChunk(&header, sizeof(IMAGE_FILE_HEADER)); } + void addSymbol(String name, unsigned addr, unsigned sectionNumber, unsigned type, unsigned storageClass) { + unsigned nameOffset = strings.add(name); + IMAGE_SYMBOL symbol = { + { 0 }, // Name + addr, // Value + sectionNumber, // SectionNumber + type, // Type + storageClass, // StorageClass + 0, // NumberOfAuxSymbols + }; + symbol.N.Name.Long = nameOffset+4; + symbols.write(&symbol, sizeof(IMAGE_SYMBOL)); + } + + void writeData(OutputStream* out) { + out->writeChunk(symbols.data, symbols.length); + uint32_t size = strings.length + 4; + out->writeChunk(&size, 4); + out->writeChunk(strings.data, strings.length); + } + }; + + class SectionWriter { + public: + FileWriter& file; + IMAGE_SECTION_HEADER header; + size_t dataSize; + size_t finalSize; + const uint8_t* data; + unsigned dataOffset; + + SectionWriter( + FileWriter& file, + const char* name, + unsigned sectionMask, + const uint8_t* data, + size_t dataSize): + + file(file), + data(data), + dataSize(dataSize), + finalSize(pad(dataSize)) + { + file.sectionCount++; + file.dataStart += sizeof(IMAGE_SECTION_HEADER); + strcpy(reinterpret_cast(header.Name), name); + header.Misc.VirtualSize = 0; + header.SizeOfRawData = finalSize; + // header.PointerToRawData = file.dataOffset; + dataOffset = file.dataOffset; + file.dataOffset += finalSize; + header.PointerToRelocations = 0; + header.PointerToLinenumbers = 0; + header.NumberOfRelocations = 0; + header.NumberOfLinenumbers = 0; + header.Characteristics = sectionMask; + } + + void writeHeader(OutputStream* out) { + header.PointerToRawData = dataOffset + file.dataStart; + printf("section %s: data at 0x%x, ending at 0x%x\n", header.Name, header.PointerToRawData, header.PointerToRawData + header.SizeOfRawData); + out->writeChunk(&header, sizeof(IMAGE_SECTION_HEADER)); + } + + void writeData(OutputStream* out) { + out->writeChunk(data, dataSize); + out->writeRepeat(0, finalSize - dataSize); + } + + }; - virtual ObjectWriter* makeObjectWriter(OutputStream* out) { - return new PEObjectWriter(out); + virtual bool writeObject(OutputStream* out, Slice symbols, Slice data, unsigned accessFlags, unsigned alignment) { + + int machine; + int machineMask; + + if (BytesPerWord == 8) { + machine = IMAGE_FILE_MACHINE_AMD64; + machineMask = 0; + } else { // if (BytesPerWord == 8) + machine = IMAGE_FILE_MACHINE_I386; + machineMask = IMAGE_FILE_32BIT_MACHINE; + } + + int sectionMask; + switch (alignment) { + case 0: + case 1: + sectionMask = IMAGE_SCN_ALIGN_1BYTES; + break; + case 2: + sectionMask = IMAGE_SCN_ALIGN_2BYTES; + break; + case 4: + sectionMask = IMAGE_SCN_ALIGN_4BYTES; + break; + case 8: + sectionMask = IMAGE_SCN_ALIGN_8BYTES; + break; + default: + fprintf(stderr, "unsupported alignment: %d\n", alignment); + return false; + } + + sectionMask |= IMAGE_SCN_MEM_READ; + + const char* sectionName; + if (accessFlags & Platform::Writable) { + if (accessFlags & Platform::Executable) { + sectionName = ".rwx"; + sectionMask |= IMAGE_SCN_MEM_WRITE + | IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_CNT_CODE; + } else { + sectionName = ".data"; + sectionMask |= IMAGE_SCN_MEM_WRITE; + } + } else { + sectionName = ".text"; + sectionMask |= IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE; + } + + FileWriter file(machine, machineMask, symbols.count); + + SectionWriter section(file, sectionName, sectionMask, data.items, data.count); + + file.writeHeader(out); + + for(SymbolInfo* sym = symbols.begin(); sym != symbols.end(); sym++) { + file.addSymbol(sym->name, sym->addr, 1, 0, 2); + } + + section.writeHeader(out); + + section.writeData(out); + + file.writeData(out); + + return true; + } WindowsPlatform(): @@ -253,4 +278,4 @@ public: WindowsPlatform<4> windows32Platform; WindowsPlatform<8> windows64Platform; -} +} // namespace diff --git a/src/binaryToObject/tools.cpp b/src/binaryToObject/tools.cpp index 0768e8a24b..4f8b8170d0 100644 --- a/src/binaryToObject/tools.cpp +++ b/src/binaryToObject/tools.cpp @@ -19,6 +19,38 @@ namespace avian { namespace tools { +String::String(const char* text): + text(text), + length(strlen(text)) {} + +Buffer::Buffer(): + capacity(100), + length(0), + data((uint8_t*)malloc(capacity)) {} + +Buffer::~Buffer() { + free(data); +} + +void Buffer::ensure(size_t more) { + if(length + more > capacity) { + capacity = capacity * 2 + more; + data = (uint8_t*)realloc(data, capacity); + } +} + +void Buffer::write(const void* d, size_t size) { + ensure(size); + memcpy(data + length, d, size); + length += size; +} + +unsigned StringTable::add(String str) { + unsigned offset = Buffer::length; + Buffer::write(str.text, str.length + 1); + return offset; +} + void OutputStream::write(uint8_t byte) { writeChunk(&byte, 1); } diff --git a/src/binaryToObject/tools.h b/src/binaryToObject/tools.h index 9ba285dd66..bc3d1f4f19 100644 --- a/src/binaryToObject/tools.h +++ b/src/binaryToObject/tools.h @@ -35,20 +35,62 @@ public: virtual void write(uint8_t byte); }; -class ObjectWriter { +class String { public: + const char* text; + size_t length; - enum AccessFlags { - Readable = 1 << 0, - Writable = 1 << 1, - Executable = 1 << 2 - }; + String(const char* text); +}; - virtual bool write(uint8_t* data, size_t size, - const char* startName, const char* endName, - unsigned alignment, unsigned accessFlags) = 0; +class SymbolInfo { +public: + unsigned addr; + String name; - virtual void dispose() = 0; + inline SymbolInfo(uint64_t addr, const char* name): + addr(addr), + name(name) {} + + inline SymbolInfo(): + name("") {} +}; + +class Buffer { +public: + size_t capacity; + size_t length; + uint8_t* data; + + Buffer(); + ~Buffer(); + + void ensure(size_t more); + void write(const void* d, size_t size); +}; + +class StringTable : public Buffer { +public: + unsigned add(String str); +}; + +template +class Slice { +public: + T* items; + size_t count; + + inline Slice(T* items, size_t count): + items(items), + count(count) {} + + inline T* begin() { + return items; + } + + inline T* end() { + return items + count; + } }; class PlatformInfo { @@ -98,7 +140,12 @@ public: first = this; } - virtual ObjectWriter* makeObjectWriter(OutputStream* out) = 0; + enum AccessFlags { + Writable = 1 << 0, + Executable = 1 << 1 + }; + + virtual bool writeObject(OutputStream* out, Slice symbols, Slice data, unsigned accessFlags, unsigned alignment) = 0; static Platform* getPlatform(PlatformInfo info); };