improve afl_maybe_log tcg call generation + merge elfload diffs

2025-06-14 02:58:08 +00:00 · 2019-06-05 11:48:36 +02:00
parent bcc0347eef
commit b59d71546b
7 changed files with 218 additions and 231 deletions
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@ -133,7 +133,6 @@ patch -p1 <../patches/cpu-exec.diff || exit 1
 patch -p1 <../patches/syscall.diff || exit 1
 patch -p1 <../patches/translate-all.diff || exit 1
 patch -p1 <../patches/tcg.diff || exit 1
 patch -p1 <../patches/elfload2.diff || exit 1
 echo "[+] Patching done."
--- a/qemu_mode/patches/afl-qemu-cpu-inl.h
+++ b/qemu_mode/patches/afl-qemu-cpu-inl.h
@ -9,6 +9,8 @@
   TCG instrumentation and block chaining support by Andrea Biondo
                                      <andrea.biondo965@gmail.com>
   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
                                      <andreafioraldi@gmail.com>
   Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
@ -19,7 +21,7 @@
     http://www.apache.org/licenses/LICENSE-2.0
   This code is a shim patched into the separately-distributed source
-   code of QEMU 2.10.0. It leverages the built-in QEMU tracing functionality
+   code of QEMU 3.1.0. It leverages the built-in QEMU tracing functionality
   to implement AFL-style instrumentation and to take care of the remaining
   parts of the AFL fork server logic.
--- a/qemu_mode/patches/afl-qemu-tcg-inl.h
+++ b/qemu_mode/patches/afl-qemu-tcg-inl.h
@ -0,0 +1,165 @@
 /*
   american fuzzy lop - high-performance binary-only instrumentation
   -----------------------------------------------------------------
   Written by Andrew Griffiths <agriffiths@google.com> and
              Michal Zalewski <lcamtuf@google.com>
   Idea & design very much by Andrew Griffiths.
   TCG instrumentation and block chaining support by Andrea Biondo
                                      <andrea.biondo965@gmail.com>
   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
                                      <andreafioraldi@gmail.com>
   Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at:
     http://www.apache.org/licenses/LICENSE-2.0
   This code is a shim patched into the separately-distributed source
   code of QEMU 3.1.0. It leverages the built-in QEMU tracing functionality
   to implement AFL-style instrumentation and to take care of the remaining
   parts of the AFL fork server logic.
   The resulting QEMU binary is essentially a standalone instrumentation
   tool; for an example of how to leverage it for other purposes, you can
   have a look at afl-showmap.c.
 */
 void afl_maybe_log(void* cur_loc);
 /* Note: we convert the 64 bit args to 32 bit and do some alignment
   and endian swap. Maybe it would be better to do the alignment
   and endian swap in tcg_reg_alloc_call(). */
 void tcg_gen_afl_maybe_log_call(target_ulong cur_loc)
 {
    int real_args, pi;
    unsigned sizemask, flags;
    TCGOp *op;
    TCGTemp *arg = tcgv_ptr_temp( tcg_const_tl(cur_loc) );
    flags = 0;
    sizemask = dh_sizemask(void, 0) | dh_sizemask(ptr, 1);
 #if defined(__sparc__) && !defined(__arch64__) \
    && !defined(CONFIG_TCG_INTERPRETER)
    /* We have 64-bit values in one register, but need to pass as two
       separate parameters.  Split them.  */
    int orig_sizemask = sizemask;
    TCGv_i64 retl, reth;
    TCGTemp *split_args[MAX_OPC_PARAM];
    retl = NULL;
    reth = NULL;
    if (sizemask != 0) {
        real_args = 0;
        int is_64bit = sizemask & (1 << 2);
        if (is_64bit) {
            TCGv_i64 orig = temp_tcgv_i64(arg);
            TCGv_i32 h = tcg_temp_new_i32();
            TCGv_i32 l = tcg_temp_new_i32();
            tcg_gen_extr_i64_i32(l, h, orig);
            split_args[real_args++] = tcgv_i32_temp(h);
            split_args[real_args++] = tcgv_i32_temp(l);
        } else {
            split_args[real_args++] = arg;
        }
        nargs = real_args;
        args = split_args;
        sizemask = 0;
    }
 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
    int is_64bit = sizemask & (1 << 2);
    int is_signed = sizemask & (2 << 2);
    if (!is_64bit) {
        TCGv_i64 temp = tcg_temp_new_i64();
        TCGv_i64 orig = temp_tcgv_i64(arg);
        if (is_signed) {
            tcg_gen_ext32s_i64(temp, orig);
        } else {
            tcg_gen_ext32u_i64(temp, orig);
        }
        arg = tcgv_i64_temp(temp);
    }
 #endif /* TCG_TARGET_EXTEND_ARGS */
    op = tcg_emit_op(INDEX_op_call);
    pi = 0;
    TCGOP_CALLO(op) = 0;
    real_args = 0;
    int is_64bit = sizemask & (1 << 2);
    if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
        /* some targets want aligned 64 bit args */
        if (real_args & 1) {
            op->args[pi++] = TCG_CALL_DUMMY_ARG;
            real_args++;
        }
 #endif
       /* If stack grows up, then we will be placing successive
          arguments at lower addresses, which means we need to
          reverse the order compared to how we would normally
          treat either big or little-endian.  For those arguments
          that will wind up in registers, this still works for
          HPPA (the only current STACK_GROWSUP target) since the
          argument registers are *also* allocated in decreasing
          order.  If another such target is added, this logic may
          have to get more complicated to differentiate between
          stack arguments and register arguments.  */
 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
        op->args[pi++] = temp_arg(arg + 1);
        op->args[pi++] = temp_arg(arg);
 #else
        op->args[pi++] = temp_arg(arg);
        op->args[pi++] = temp_arg(arg + 1);
 #endif
        real_args += 2;
    }
    op->args[pi++] = temp_arg(arg);
    real_args++;
    op->args[pi++] = (uintptr_t)&afl_maybe_log;
    op->args[pi++] = flags;
    TCGOP_CALLI(op) = real_args;
    /* Make sure the fields didn't overflow.  */
    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
 #if defined(__sparc__) && !defined(__arch64__) \
    && !defined(CONFIG_TCG_INTERPRETER)
    /* Free all of the parts we allocated above.  */
    real_args = 0;
    int is_64bit = orig_sizemask & (1 << 2);
    if (is_64bit) {
        tcg_temp_free_internal(args[real_args++]);
        tcg_temp_free_internal(args[real_args++]);
    } else {
        real_args++;
    }
    if (orig_sizemask & 1) {
        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
           Note that describing these as TCGv_i64 eliminates an unnecessary
           zero-extension that tcg_gen_concat_i32_i64 would create.  */
        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
        tcg_temp_free_i64(retl);
        tcg_temp_free_i64(reth);
    }
 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
    int is_64bit = sizemask & (1 << 2);
    if (!is_64bit) {
        tcg_temp_free_internal(arg);
    }
 #endif /* TCG_TARGET_EXTEND_ARGS */
 }
--- a/qemu_mode/patches/afl-qemu-translate-inl.h
+++ b/qemu_mode/patches/afl-qemu-translate-inl.h
@ -9,6 +9,8 @@
   TCG instrumentation and block chaining support by Andrea Biondo
                                      <andrea.biondo965@gmail.com>
   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
                                      <andreafioraldi@gmail.com>
   Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
@ -19,7 +21,7 @@
     http://www.apache.org/licenses/LICENSE-2.0
   This code is a shim patched into the separately-distributed source
-   code of QEMU 2.10.0. It leverages the built-in QEMU tracing functionality
+   code of QEMU 3.1.0. It leverages the built-in QEMU tracing functionality
   to implement AFL-style instrumentation and to take care of the remaining
   parts of the AFL fork server logic.
@ -37,19 +39,17 @@ extern unsigned char *afl_area_ptr;
 extern unsigned int afl_inst_rms;
 extern abi_ulong afl_start_code, afl_end_code;
-void tcg_gen_afl_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args);
+void tcg_gen_afl_maybe_log_call(target_ulong cur_loc);
-
+void afl_maybe_log(void* cur_loc) { 
 void afl_maybe_log(abi_ulong cur_loc) {
  static __thread abi_ulong prev_loc;
-  afl_area_ptr[cur_loc ^ prev_loc]++;
+  afl_area_ptr[(abi_ulong)cur_loc ^ prev_loc]++;
-  prev_loc = cur_loc >> 1;
+  prev_loc = (abi_ulong)cur_loc >> 1;
 }
 /* Generates TCG code for AFL's tracing instrumentation. */
 static void afl_gen_trace(target_ulong cur_loc) {
@ -71,7 +71,6 @@ static void afl_gen_trace(target_ulong cur_loc) {
  if (cur_loc >= afl_inst_rms) return;
-  TCGTemp *args[1] = { tcgv_i64_temp( tcg_const_tl(cur_loc) ) };
+  tcg_gen_afl_maybe_log_call(cur_loc);
  tcg_gen_afl_callN(afl_maybe_log, NULL, 1, args);
 }
--- a/qemu_mode/patches/elfload.diff
+++ b/qemu_mode/patches/elfload.diff
@ -1,5 +1,5 @@
 diff --git a/linux-user/elfload.c b/linux-user/elfload.c
-index 5bccd2e2..94e928a4 100644
+index 5bccd2e2..fd7460b3 100644
 --- a/linux-user/elfload.c
 +++ b/linux-user/elfload.c
@@ -20,6 +20,8 @@
@ -11,16 +11,29 @@ index 5bccd2e2..94e928a4 100644
 /* from personality.h */
 /*
-@@ -2301,6 +2303,8 @@ static void load_elf_image(const char *image_name, int image_fd,
+@@ -2301,6 +2303,21 @@ static void load_elf_image(const char *image_name, int image_fd,
     info->brk = 0;
     info->elf_flags = ehdr->e_flags;
-+    if (!afl_entry_point) afl_entry_point = info->entry;
+    if (!afl_entry_point) {
 +      char *ptr;
 +      if ((ptr = getenv("AFL_ENTRYPOINT")) != NULL) {
 +        afl_entry_point = strtoul(ptr, NULL, 16);
 +      } else {
 +        afl_entry_point = info->entry;
 +      }
 +#ifdef TARGET_ARM
 +      /* The least significant bit indicates Thumb mode. */
 +      afl_entry_point = afl_entry_point & ~(target_ulong)1;
 +#endif
 +    }
 +    if (getenv("AFL_DEBUG") != NULL)
 +      fprintf(stderr, "AFL forkserver entrypoint: %p\n", (void*)afl_entry_point);
 +
     for (i = 0; i < ehdr->e_phnum; i++) {
         struct elf_phdr *eppnt = phdr + i;
         if (eppnt->p_type == PT_LOAD) {
-@@ -2335,9 +2339,11 @@ static void load_elf_image(const char *image_name, int image_fd,
+@@ -2335,9 +2352,11 @@ static void load_elf_image(const char *image_name, int image_fd,
             if (elf_prot & PROT_EXEC) {
                 if (vaddr < info->start_code) {
                     info->start_code = vaddr;
@ -32,3 +45,26 @@ index 5bccd2e2..94e928a4 100644
                 }
             }
             if (elf_prot & PROT_WRITE) {
@@ -2662,6 +2681,22 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
        change some of these later */
     bprm->p = setup_arg_pages(bprm, info);
 +    // On PowerPC64 the entry point is the _function descriptor_
 +    // of the entry function. For AFL to properly initialize,
 +    // afl_entry_point needs to be set to the actual first instruction
 +    // as opposed executed by the target program. This as opposed to 
 +    // where the function's descriptor sits in memory.
 +    // copied from PPC init_thread
 +#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
 +    if (get_ppc64_abi(infop) < 2) {
 +        uint64_t val;
 +        get_user_u64(val, infop->entry + 8);
 +        _regs->gpr[2] = val + infop->load_bias;
 +        get_user_u64(val, infop->entry);
 +        infop->entry = val + infop->load_bias;
 +    }
 +#endif
 +
     scratch = g_new0(char, TARGET_PAGE_SIZE);
     if (STACK_GROWS_DOWN) {
         bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
--- a/qemu_mode/patches/elfload2.diff
+++ b/qemu_mode/patches/elfload2.diff
@ -1,47 +0,0 @@
 --- a/linux-user/elfload.c	2019-06-03 13:06:40.755755923 +0200
 +++ b/linux-user/elfload.c	2019-06-03 13:33:01.315709801 +0200
@@ -2303,7 +2303,20 @@
     info->brk = 0;
     info->elf_flags = ehdr->e_flags;
 -    if (!afl_entry_point) afl_entry_point = info->entry;
 +    if (!afl_entry_point) {
 +      char *ptr;
 +      if ((ptr = getenv("AFL_ENTRYPOINT")) != NULL) {
 +        afl_entry_point = strtoul(ptr, NULL, 16);
 +      } else {
 +        afl_entry_point = info->entry;
 +      }
 +#ifdef TARGET_ARM
 +      /* The least significant bit indicates Thumb mode. */
 +      afl_entry_point = afl_entry_point & ~(target_ulong)1;
 +#endif
 +    }
 +    if (getenv("AFL_DEBUG") != NULL)
 +      fprintf(stderr, "AFL forkserver entrypoint: %p\n", (void*)afl_entry_point);
     for (i = 0; i < ehdr->e_phnum; i++) {
         struct elf_phdr *eppnt = phdr + i;
@@ -2668,6 +2681,22 @@
        change some of these later */
     bprm->p = setup_arg_pages(bprm, info);
 +    // On PowerPC64 the entry point is the _function descriptor_
 +    // of the entry function. For AFL to properly initialize,
 +    // afl_entry_point needs to be set to the actual first instruction
 +    // as opposed executed by the target program. This as opposed to 
 +    // where the function's descriptor sits in memory.
 +    // copied from PPC init_thread
 +#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
 +    if (get_ppc64_abi(infop) < 2) {
 +        uint64_t val;
 +        get_user_u64(val, infop->entry + 8);
 +        _regs->gpr[2] = val + infop->load_bias;
 +        get_user_u64(val, infop->entry);
 +        infop->entry = val + infop->load_bias;
 +    }
 +#endif
 +
     scratch = g_new0(char, TARGET_PAGE_SIZE);
     if (STACK_GROWS_DOWN) {
         bprm->p = copy_elf_strings(1, &bprm->filename, scratch,
--- a/qemu_mode/patches/tcg.diff
+++ b/qemu_mode/patches/tcg.diff
@ -2,179 +2,12 @@ diff --git a/tcg/tcg.c b/tcg/tcg.c
 index e85133ef..54b9b390 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
-@@ -1612,6 +1612,176 @@ bool tcg_op_supported(TCGOpcode op)
+@@ -1612,6 +1612,9 @@ bool tcg_op_supported(TCGOpcode op)
     }
 }
 +
-+/* Call the instrumentation function from the TCG IR */
+#include "../../patches/afl-qemu-tcg-inl.h"
 +void tcg_gen_afl_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 +{
 +    int i, real_args, nb_rets, pi;
 +    unsigned sizemask, flags;
 +    TCGOp *op;
 +
 +    flags = 0;
 +    sizemask = 0;
 +
 +#if defined(__sparc__) && !defined(__arch64__) \
 +    && !defined(CONFIG_TCG_INTERPRETER)
 +    /* We have 64-bit values in one register, but need to pass as two
 +       separate parameters.  Split them.  */
 +    int orig_sizemask = sizemask;
 +    int orig_nargs = nargs;
 +    TCGv_i64 retl, reth;
 +    TCGTemp *split_args[MAX_OPC_PARAM];
 +
 +    retl = NULL;
 +    reth = NULL;
 +    if (sizemask != 0) {
 +        for (i = real_args = 0; i < nargs; ++i) {
 +            int is_64bit = sizemask & (1 << (i+1)*2);
 +            if (is_64bit) {
 +                TCGv_i64 orig = temp_tcgv_i64(args[i]);
 +                TCGv_i32 h = tcg_temp_new_i32();
 +                TCGv_i32 l = tcg_temp_new_i32();
 +                tcg_gen_extr_i64_i32(l, h, orig);
 +                split_args[real_args++] = tcgv_i32_temp(h);
 +                split_args[real_args++] = tcgv_i32_temp(l);
 +            } else {
 +                split_args[real_args++] = args[i];
 +            }
 +        }
 +        nargs = real_args;
 +        args = split_args;
 +        sizemask = 0;
 +    }
 +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
 +    for (i = 0; i < nargs; ++i) {
 +        int is_64bit = sizemask & (1 << (i+1)*2);
 +        int is_signed = sizemask & (2 << (i+1)*2);
 +        if (!is_64bit) {
 +            TCGv_i64 temp = tcg_temp_new_i64();
 +            TCGv_i64 orig = temp_tcgv_i64(args[i]);
 +            if (is_signed) {
 +                tcg_gen_ext32s_i64(temp, orig);
 +            } else {
 +                tcg_gen_ext32u_i64(temp, orig);
 +            }
 +            args[i] = tcgv_i64_temp(temp);
 +        }
 +    }
 +#endif /* TCG_TARGET_EXTEND_ARGS */
 +
 +    op = tcg_emit_op(INDEX_op_call);
 +
 +    pi = 0;
 +    if (ret != NULL) {
 +#if defined(__sparc__) && !defined(__arch64__) \
 +    && !defined(CONFIG_TCG_INTERPRETER)
 +        if (orig_sizemask & 1) {
 +            /* The 32-bit ABI is going to return the 64-bit value in
 +               the %o0/%o1 register pair.  Prepare for this by using
 +               two return temporaries, and reassemble below.  */
 +            retl = tcg_temp_new_i64();
 +            reth = tcg_temp_new_i64();
 +            op->args[pi++] = tcgv_i64_arg(reth);
 +            op->args[pi++] = tcgv_i64_arg(retl);
 +            nb_rets = 2;
 +        } else {
 +            op->args[pi++] = temp_arg(ret);
 +            nb_rets = 1;
 +        }
 +#else
 +        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
 +#ifdef HOST_WORDS_BIGENDIAN
 +            op->args[pi++] = temp_arg(ret + 1);
 +            op->args[pi++] = temp_arg(ret);
 +#else
 +            op->args[pi++] = temp_arg(ret);
 +            op->args[pi++] = temp_arg(ret + 1);
 +#endif
 +            nb_rets = 2;
 +        } else {
 +            op->args[pi++] = temp_arg(ret);
 +            nb_rets = 1;
 +        }
 +#endif
 +    } else {
 +        nb_rets = 0;
 +    }
 +    TCGOP_CALLO(op) = nb_rets;
 +
 +    real_args = 0;
 +    for (i = 0; i < nargs; i++) {
 +        int is_64bit = sizemask & (1 << (i+1)*2);
 +        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
 +#ifdef TCG_TARGET_CALL_ALIGN_ARGS
 +            /* some targets want aligned 64 bit args */
 +            if (real_args & 1) {
 +                op->args[pi++] = TCG_CALL_DUMMY_ARG;
 +                real_args++;
 +            }
 +#endif
 +           /* If stack grows up, then we will be placing successive
 +              arguments at lower addresses, which means we need to
 +              reverse the order compared to how we would normally
 +              treat either big or little-endian.  For those arguments
 +              that will wind up in registers, this still works for
 +              HPPA (the only current STACK_GROWSUP target) since the
 +              argument registers are *also* allocated in decreasing
 +              order.  If another such target is added, this logic may
 +              have to get more complicated to differentiate between
 +              stack arguments and register arguments.  */
 +#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
 +            op->args[pi++] = temp_arg(args[i] + 1);
 +            op->args[pi++] = temp_arg(args[i]);
 +#else
 +            op->args[pi++] = temp_arg(args[i]);
 +            op->args[pi++] = temp_arg(args[i] + 1);
 +#endif
 +            real_args += 2;
 +            continue;
 +        }
 +
 +        op->args[pi++] = temp_arg(args[i]);
 +        real_args++;
 +    }
 +    op->args[pi++] = (uintptr_t)func;
 +    op->args[pi++] = flags;
 +    TCGOP_CALLI(op) = real_args;
 +
 +    /* Make sure the fields didn't overflow.  */
 +    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
 +    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
 +
 +#if defined(__sparc__) && !defined(__arch64__) \
 +    && !defined(CONFIG_TCG_INTERPRETER)
 +    /* Free all of the parts we allocated above.  */
 +    for (i = real_args = 0; i < orig_nargs; ++i) {
 +        int is_64bit = orig_sizemask & (1 << (i+1)*2);
 +        if (is_64bit) {
 +            tcg_temp_free_internal(args[real_args++]);
 +            tcg_temp_free_internal(args[real_args++]);
 +        } else {
 +            real_args++;
 +        }
 +    }
 +    if (orig_sizemask & 1) {
 +        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
 +           Note that describing these as TCGv_i64 eliminates an unnecessary
 +           zero-extension that tcg_gen_concat_i32_i64 would create.  */
 +        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
 +        tcg_temp_free_i64(retl);
 +        tcg_temp_free_i64(reth);
 +    }
 +#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
 +    for (i = 0; i < nargs; ++i) {
 +        int is_64bit = sizemask & (1 << (i+1)*2);
 +        if (!is_64bit) {
 +            tcg_temp_free_internal(args[i]);
 +        }
 +    }
 +#endif /* TCG_TARGET_EXTEND_ARGS */
 +}
 +
 +
 /* Note: we convert the 64 bit args to 32 bit and do some alignment
    and endian swap. Maybe it would be better to do the alignment