mirror of
https://github.com/AFLplusplus/AFLplusplus.git
synced 2025-06-22 06:18:04 +00:00
Dynamic instrumentation filtering for LLVM native (#1971)
* Add two dynamic instrumentation filter methods to runtime * Always use pc-table with native pcguard * Add make_symbol_list.py and README
This commit is contained in:
committed by
GitHub
parent
2f9eeef60c
commit
58b80b68bc
@ -22,6 +22,10 @@
|
|||||||
#define __USE_GNU
|
#define __USE_GNU
|
||||||
#endif
|
#endif
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
|
|
||||||
|
__attribute__((weak)) void __sanitizer_symbolize_pc(void *, const char *fmt,
|
||||||
|
char *out_buf,
|
||||||
|
size_t out_buf_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
#ifdef __ANDROID__
|
||||||
@ -124,8 +128,8 @@ struct afl_module_info_t {
|
|||||||
uintptr_t base_address;
|
uintptr_t base_address;
|
||||||
|
|
||||||
// PC Guard start/stop
|
// PC Guard start/stop
|
||||||
u32 start;
|
u32 *start;
|
||||||
u32 stop;
|
u32 *stop;
|
||||||
|
|
||||||
// PC Table begin/end
|
// PC Table begin/end
|
||||||
const uintptr_t *pcs_beg;
|
const uintptr_t *pcs_beg;
|
||||||
@ -147,6 +151,18 @@ afl_module_info_t *__afl_module_info = NULL;
|
|||||||
|
|
||||||
u32 __afl_pcmap_size = 0;
|
u32 __afl_pcmap_size = 0;
|
||||||
uintptr_t *__afl_pcmap_ptr = NULL;
|
uintptr_t *__afl_pcmap_ptr = NULL;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
|
||||||
|
uintptr_t start;
|
||||||
|
u32 len;
|
||||||
|
|
||||||
|
} FilterPCEntry;
|
||||||
|
|
||||||
|
u32 __afl_filter_pcs_size = 0;
|
||||||
|
FilterPCEntry *__afl_filter_pcs = NULL;
|
||||||
|
u8 *__afl_filter_pcs_module = NULL;
|
||||||
|
|
||||||
#endif // __AFL_CODE_COVERAGE
|
#endif // __AFL_CODE_COVERAGE
|
||||||
|
|
||||||
/* 1 if we are running in afl, and the forkserver was started, else 0 */
|
/* 1 if we are running in afl, and the forkserver was started, else 0 */
|
||||||
@ -1587,15 +1603,116 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __AFL_CODE_COVERAGE
|
#ifdef __AFL_CODE_COVERAGE
|
||||||
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
void afl_read_pc_filter_file(const char *filter_file) {
|
||||||
const uintptr_t *pcs_end) {
|
|
||||||
|
|
||||||
if (__afl_debug) {
|
FILE *file;
|
||||||
|
char ch;
|
||||||
|
|
||||||
fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init called\n");
|
file = fopen(filter_file, "r");
|
||||||
|
if (file == NULL) {
|
||||||
|
|
||||||
|
perror("Error opening file");
|
||||||
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check how many PCs we expect to read
|
||||||
|
while ((ch = fgetc(file)) != EOF) {
|
||||||
|
|
||||||
|
if (ch == '\n') { __afl_filter_pcs_size++; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewind to actually read the PCs
|
||||||
|
fseek(file, 0, SEEK_SET);
|
||||||
|
|
||||||
|
__afl_filter_pcs = malloc(__afl_filter_pcs_size * sizeof(FilterPCEntry));
|
||||||
|
if (!__afl_filter_pcs) {
|
||||||
|
|
||||||
|
perror("Error allocating PC array");
|
||||||
|
return;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < __afl_filter_pcs_size; i++) {
|
||||||
|
|
||||||
|
fscanf(file, "%lx", &(__afl_filter_pcs[i].start));
|
||||||
|
ch = fgetc(file); // Read tab
|
||||||
|
fscanf(file, "%u", &(__afl_filter_pcs[i].len));
|
||||||
|
ch = fgetc(file); // Read tab
|
||||||
|
|
||||||
|
if (!__afl_filter_pcs_module) {
|
||||||
|
|
||||||
|
// Read the module name and store it.
|
||||||
|
// TODO: We only support one module here right now although
|
||||||
|
// there is technically no reason to support multiple modules
|
||||||
|
// in one go.
|
||||||
|
size_t max_module_len = 255;
|
||||||
|
size_t i = 0;
|
||||||
|
__afl_filter_pcs_module = malloc(max_module_len);
|
||||||
|
while (i < max_module_len - 1 &&
|
||||||
|
(__afl_filter_pcs_module[i] = fgetc(file)) != '\t') {
|
||||||
|
|
||||||
|
++i;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
__afl_filter_pcs_module[i] = '\0';
|
||||||
|
fprintf(stderr, "DEBUGXXX: Read module name %s\n",
|
||||||
|
__afl_filter_pcs_module);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((ch = fgetc(file)) != '\n' && ch != EOF)
|
||||||
|
;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 locate_in_pcs(uintptr_t needle, u32 *index) {
|
||||||
|
|
||||||
|
size_t lower_bound = 0;
|
||||||
|
size_t upper_bound = __afl_filter_pcs_size - 1;
|
||||||
|
|
||||||
|
while (lower_bound < __afl_filter_pcs_size && lower_bound <= upper_bound) {
|
||||||
|
|
||||||
|
size_t current_index = lower_bound + (upper_bound - lower_bound) / 2;
|
||||||
|
|
||||||
|
if (__afl_filter_pcs[current_index].start <= needle) {
|
||||||
|
|
||||||
|
if (__afl_filter_pcs[current_index].start +
|
||||||
|
__afl_filter_pcs[current_index].len >
|
||||||
|
needle) {
|
||||||
|
|
||||||
|
// Hit
|
||||||
|
*index = current_index;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
lower_bound = current_index + 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if (!current_index) { break; }
|
||||||
|
upper_bound = current_index - 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
||||||
|
const uintptr_t *pcs_end) {
|
||||||
|
|
||||||
// If for whatever reason, we cannot get dlinfo here, then pc_guard_init also
|
// If for whatever reason, we cannot get dlinfo here, then pc_guard_init also
|
||||||
// couldn't get it and we'd end up attributing to the wrong module.
|
// couldn't get it and we'd end up attributing to the wrong module.
|
||||||
Dl_info dlinfo;
|
Dl_info dlinfo;
|
||||||
@ -1608,6 +1725,16 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (__afl_debug) {
|
||||||
|
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"DEBUG: (%u) __sanitizer_cov_pcs_init called for module %s with %ld "
|
||||||
|
"PCs\n",
|
||||||
|
getpid(), dlinfo.dli_fname, pcs_end - pcs_beg);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
afl_module_info_t *last_module_info = __afl_module_info;
|
afl_module_info_t *last_module_info = __afl_module_info;
|
||||||
while (last_module_info && last_module_info->next) {
|
while (last_module_info && last_module_info->next) {
|
||||||
|
|
||||||
@ -1623,34 +1750,78 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strcmp(dlinfo.dli_fname, last_module_info->name)) {
|
||||||
|
|
||||||
|
// This can happen with modules being loaded after the forkserver
|
||||||
|
// where we decide to not track the module. In that case we must
|
||||||
|
// not track it here either.
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"WARNING: __sanitizer_cov_pcs_init module info mismatch: %s vs %s\n",
|
||||||
|
dlinfo.dli_fname, last_module_info->name);
|
||||||
|
return;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
last_module_info->pcs_beg = pcs_beg;
|
last_module_info->pcs_beg = pcs_beg;
|
||||||
last_module_info->pcs_end = pcs_end;
|
last_module_info->pcs_end = pcs_end;
|
||||||
|
|
||||||
|
// This is a direct filter based on symbolizing inside the runtime.
|
||||||
|
// It should only be used with smaller binaries to avoid long startup
|
||||||
|
// times. Currently, this only supports a single token to scan for.
|
||||||
|
const char *pc_filter = getenv("AFL_PC_FILTER");
|
||||||
|
|
||||||
|
// This is a much faster PC filter based on pre-symbolized input data
|
||||||
|
// that is sorted for fast lookup through binary search. This method
|
||||||
|
// of filtering is suitable even for very large binaries.
|
||||||
|
const char *pc_filter_file = getenv("AFL_PC_FILTER_FILE");
|
||||||
|
if (pc_filter_file && !__afl_filter_pcs) {
|
||||||
|
|
||||||
|
afl_read_pc_filter_file(pc_filter_file);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Now update the pcmap. If this is the last module coming in, after all
|
// Now update the pcmap. If this is the last module coming in, after all
|
||||||
// pre-loaded code, then this will also map all of our delayed previous
|
// pre-loaded code, then this will also map all of our delayed previous
|
||||||
// modules.
|
// modules.
|
||||||
|
//
|
||||||
if (!__afl_pcmap_ptr) { return; }
|
|
||||||
|
|
||||||
for (afl_module_info_t *mod_info = __afl_module_info; mod_info;
|
for (afl_module_info_t *mod_info = __afl_module_info; mod_info;
|
||||||
mod_info = mod_info->next) {
|
mod_info = mod_info->next) {
|
||||||
|
|
||||||
if (mod_info->mapped) { continue; }
|
if (mod_info->mapped) { continue; }
|
||||||
|
|
||||||
|
if (!mod_info->start) {
|
||||||
|
|
||||||
|
fprintf(stderr,
|
||||||
|
"ERROR: __sanitizer_cov_pcs_init called with mod_info->start == "
|
||||||
|
"NULL (%s)\n",
|
||||||
|
mod_info->name);
|
||||||
|
abort();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
PCTableEntry *start = (PCTableEntry *)(mod_info->pcs_beg);
|
PCTableEntry *start = (PCTableEntry *)(mod_info->pcs_beg);
|
||||||
PCTableEntry *end = (PCTableEntry *)(mod_info->pcs_end);
|
PCTableEntry *end = (PCTableEntry *)(mod_info->pcs_end);
|
||||||
|
|
||||||
|
if (!*mod_info->stop) { continue; }
|
||||||
|
|
||||||
u32 in_module_index = 0;
|
u32 in_module_index = 0;
|
||||||
|
|
||||||
while (start < end) {
|
while (start < end) {
|
||||||
|
|
||||||
if (mod_info->start + in_module_index >= __afl_map_size) {
|
if (*mod_info->start + in_module_index >= __afl_map_size) {
|
||||||
|
|
||||||
fprintf(stderr, "ERROR: __sanitizer_cov_pcs_init out of bounds?!\n");
|
fprintf(stderr,
|
||||||
|
"ERROR: __sanitizer_cov_pcs_init out of bounds?! Start: %u "
|
||||||
|
"Stop: %u Map Size: %u (%s)\n",
|
||||||
|
*mod_info->start, *mod_info->stop, __afl_map_size,
|
||||||
|
mod_info->name);
|
||||||
abort();
|
abort();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 orig_start_index = *mod_info->start;
|
||||||
|
|
||||||
uintptr_t PC = start->PC;
|
uintptr_t PC = start->PC;
|
||||||
|
|
||||||
// This is what `GetPreviousInstructionPc` in sanitizer runtime does
|
// This is what `GetPreviousInstructionPc` in sanitizer runtime does
|
||||||
@ -1660,7 +1831,58 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
|||||||
// Calculate relative offset in module
|
// Calculate relative offset in module
|
||||||
PC = PC - mod_info->base_address;
|
PC = PC - mod_info->base_address;
|
||||||
|
|
||||||
__afl_pcmap_ptr[mod_info->start + in_module_index] = PC;
|
if (__afl_pcmap_ptr) {
|
||||||
|
|
||||||
|
__afl_pcmap_ptr[orig_start_index + in_module_index] = PC;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pc_filter) {
|
||||||
|
|
||||||
|
char PcDescr[1024];
|
||||||
|
// This function is a part of the sanitizer run-time.
|
||||||
|
// To use it, link with AddressSanitizer or other sanitizer.
|
||||||
|
__sanitizer_symbolize_pc((void *)start->PC, "%p %F %L", PcDescr,
|
||||||
|
sizeof(PcDescr));
|
||||||
|
|
||||||
|
if (strstr(PcDescr, pc_filter)) {
|
||||||
|
|
||||||
|
if (__afl_debug)
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"DEBUG: Selective instrumentation match: %s (PC %p Index %u)\n",
|
||||||
|
PcDescr, (void *)start->PC,
|
||||||
|
*(mod_info->start + in_module_index));
|
||||||
|
// No change to guard needed
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// Null out the guard to disable this edge
|
||||||
|
*(mod_info->start + in_module_index) = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__afl_filter_pcs && strstr(mod_info->name, __afl_filter_pcs_module)) {
|
||||||
|
|
||||||
|
u32 result_index;
|
||||||
|
if (locate_in_pcs(PC, &result_index)) {
|
||||||
|
|
||||||
|
if (__afl_debug)
|
||||||
|
fprintf(stderr,
|
||||||
|
"DEBUG: Selective instrumentation match: (PC %lx File "
|
||||||
|
"Index %u PC Index %u)\n",
|
||||||
|
PC, result_index, in_module_index);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// Null out the guard to disable this edge
|
||||||
|
*(mod_info->start + in_module_index) = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
start++;
|
start++;
|
||||||
in_module_index++;
|
in_module_index++;
|
||||||
@ -1671,8 +1893,10 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
|
|||||||
|
|
||||||
if (__afl_debug) {
|
if (__afl_debug) {
|
||||||
|
|
||||||
fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init initialized %u PCs\n",
|
fprintf(stderr,
|
||||||
in_module_index);
|
"DEBUG: __sanitizer_cov_pcs_init successfully mapped %s with %u "
|
||||||
|
"PCs\n",
|
||||||
|
mod_info->name, in_module_index);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1706,9 +1930,9 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
|
|||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
"DEBUG: Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
|
"DEBUG: Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
|
||||||
"after_fs=%u\n",
|
"after_fs=%u *start=%u\n",
|
||||||
start, stop, (unsigned long)(stop - start),
|
start, stop, (unsigned long)(stop - start),
|
||||||
__afl_already_initialized_forkserver);
|
__afl_already_initialized_forkserver, *start);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1740,8 +1964,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
|
|||||||
mod_info->id = last_module_info ? last_module_info->id + 1 : 0;
|
mod_info->id = last_module_info ? last_module_info->id + 1 : 0;
|
||||||
mod_info->name = strdup(dlinfo.dli_fname);
|
mod_info->name = strdup(dlinfo.dli_fname);
|
||||||
mod_info->base_address = (uintptr_t)dlinfo.dli_fbase;
|
mod_info->base_address = (uintptr_t)dlinfo.dli_fbase;
|
||||||
mod_info->start = 0;
|
mod_info->start = NULL;
|
||||||
mod_info->stop = 0;
|
mod_info->stop = NULL;
|
||||||
mod_info->pcs_beg = NULL;
|
mod_info->pcs_beg = NULL;
|
||||||
mod_info->pcs_end = NULL;
|
mod_info->pcs_end = NULL;
|
||||||
mod_info->mapped = 0;
|
mod_info->mapped = 0;
|
||||||
@ -1757,8 +1981,12 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n", dlinfo.dli_fname,
|
if (__afl_debug) {
|
||||||
dlinfo.dli_fbase);
|
|
||||||
|
fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n",
|
||||||
|
dlinfo.dli_fname, dlinfo.dli_fbase);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1861,12 +2089,17 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
|
|||||||
#ifdef __AFL_CODE_COVERAGE
|
#ifdef __AFL_CODE_COVERAGE
|
||||||
if (mod_info) {
|
if (mod_info) {
|
||||||
|
|
||||||
mod_info->start = *orig_start;
|
if (!mod_info->start) {
|
||||||
mod_info->stop = *(stop - 1);
|
|
||||||
|
mod_info->start = orig_start;
|
||||||
|
mod_info->stop = stop - 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (__afl_debug) {
|
if (__afl_debug) {
|
||||||
|
|
||||||
fprintf(stderr, "DEBUG: [pcmap] Start Index: %u Stop Index: %u\n",
|
fprintf(stderr, "DEBUG: [pcmap] Start Index: %u Stop Index: %u\n",
|
||||||
mod_info->start, mod_info->stop);
|
*(mod_info->start), *(mod_info->stop));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
26
src/afl-cc.c
26
src/afl-cc.c
@ -1920,35 +1920,27 @@ void add_native_pcguard(aflcc_state_t *aflcc) {
|
|||||||
/* If llvm-config doesn't figure out LLVM_MAJOR, just
|
/* If llvm-config doesn't figure out LLVM_MAJOR, just
|
||||||
go on anyway and let compiler complain if doesn't work. */
|
go on anyway and let compiler complain if doesn't work. */
|
||||||
|
|
||||||
if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) {
|
|
||||||
|
|
||||||
#if LLVM_MAJOR > 0 && LLVM_MAJOR < 6
|
#if LLVM_MAJOR > 0 && LLVM_MAJOR < 6
|
||||||
FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+");
|
FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+");
|
||||||
#else
|
#else
|
||||||
#if LLVM_MAJOR == 0
|
#if LLVM_MAJOR == 0
|
||||||
WARNF(
|
WARNF(
|
||||||
"pcguard instrumentation with pc-table requires LLVM 6.0.1+"
|
"pcguard instrumentation with pc-table requires LLVM 6.0.1+"
|
||||||
" otherwise the compiler will fail");
|
" otherwise the compiler will fail");
|
||||||
#endif
|
#endif
|
||||||
|
if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) {
|
||||||
|
|
||||||
insert_param(aflcc,
|
insert_param(aflcc,
|
||||||
"-fsanitize-coverage=trace-pc-guard,bb,no-prune,pc-table");
|
"-fsanitize-coverage=trace-pc-guard,bb,no-prune,pc-table");
|
||||||
#endif
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
#if LLVM_MAJOR > 0 && LLVM_MAJOR < 4
|
insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard,pc-table");
|
||||||
FATAL("pcguard instrumentation requires LLVM 4.0.1+");
|
|
||||||
#else
|
|
||||||
#if LLVM_MAJOR == 0
|
|
||||||
WARNF(
|
|
||||||
"pcguard instrumentation requires LLVM 4.0.1+"
|
|
||||||
" otherwise the compiler will fail");
|
|
||||||
#endif
|
|
||||||
insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_optimized_pcguard(aflcc_state_t *aflcc) {
|
void add_optimized_pcguard(aflcc_state_t *aflcc) {
|
||||||
|
55
utils/dynamic_covfilter/README.md
Normal file
55
utils/dynamic_covfilter/README.md
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# Dynamic Instrumentation Filter
|
||||||
|
|
||||||
|
Sometimes it can be beneficial to limit the instrumentation feedback to
|
||||||
|
specific code locations. It is possible to do so at compile-time by simply
|
||||||
|
not instrumenting any undesired locations. However, there are situations
|
||||||
|
where doing this dynamically without requiring a new build can be beneficial.
|
||||||
|
Especially when dealing with larger builds, it is much more convenient to
|
||||||
|
select the target code locations at runtime instead of doing so at build time.
|
||||||
|
|
||||||
|
There are two ways of doing this in AFL++:
|
||||||
|
|
||||||
|
## Simple Selection with `AFL_PC_FILTER`
|
||||||
|
|
||||||
|
This approach requires a build with `AFL_INSTRUMENTATION=llvmnative` or
|
||||||
|
`llvmcodecov` as well as an AddressSanitizer build with debug information.
|
||||||
|
|
||||||
|
By setting the environment variable `AFL_PC_FILTER` to a string, the runtime
|
||||||
|
symbolizer is enabled in the AFL++ runtime. At startup, the runtime will call
|
||||||
|
the `__sanitizer_symbolize_pc` API to resolve every PC in every loaded module.
|
||||||
|
The runtime then matches the result using `strstr` and disables the PC guard
|
||||||
|
if the symbolized PC does not contain the specified string.
|
||||||
|
|
||||||
|
This approach has the benefit of being very easy to use. The downside is that
|
||||||
|
it causes significant startup delays with large binaries and that it requires
|
||||||
|
an AddressSanitizer build.
|
||||||
|
|
||||||
|
This method has no additional runtime overhead after startup.
|
||||||
|
|
||||||
|
## Selection using pre-symbolized data file with `AFL_PC_FILTER_FILE`
|
||||||
|
|
||||||
|
To avoid large startup time delays, a specific module can be pre-symbolized
|
||||||
|
using the `make_symbol_list.py` script. This script outputs a sorted list of
|
||||||
|
functions with their respective relative offsets and lengths in the target
|
||||||
|
binary:
|
||||||
|
|
||||||
|
`python3 make_symbol_list.py libxul.so > libxul.symbols.txt`
|
||||||
|
|
||||||
|
The resulting list can be filtered, e.g. using grep:
|
||||||
|
|
||||||
|
`grep -i "webgl" libxul.symbols.txt > libxul.webgl.symbols.txt`
|
||||||
|
|
||||||
|
Finally, you can run with `AFL_PC_FILTER_FILE=libxul.webgl.symbols.txt` to
|
||||||
|
restrict instrumentation feedback to the given locations. This approach only
|
||||||
|
has a minimal startup time delay due to the implementation only using binary
|
||||||
|
search on the given file per PC rather than reading debug information for every
|
||||||
|
PC. It also works well with Nyx, where symbolizing is usually disabled for the
|
||||||
|
target process to avoid delays with frequent crashes.
|
||||||
|
|
||||||
|
Similar to the previous method, This approach requires a build with
|
||||||
|
`AFL_INSTRUMENTATION=llvmnative` or `llvmcodecov` as well debug information.
|
||||||
|
However, it does not require the ASan runtime as it doesn't do the symbolizing
|
||||||
|
in process. Due to the way it maps PCs to symbols, it is less accurate when it
|
||||||
|
comes to includes and inlines (it assumes all PCs within a function belong to
|
||||||
|
that function and originate from the same file). For most purposes, this should
|
||||||
|
be a reasonable simplification to quickly process even the largest binaries.
|
73
utils/dynamic_covfilter/make_symbol_list.py
Normal file
73
utils/dynamic_covfilter/make_symbol_list.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Written by Christian Holler <decoder at mozilla dot com>
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Usage: %s binfile" % os.path.basename(sys.argv[0]))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
binfile = sys.argv[1]
|
||||||
|
|
||||||
|
addr2len = {}
|
||||||
|
addrs = []
|
||||||
|
|
||||||
|
output = subprocess.check_output(["objdump", "-t", binfile]).decode("utf-8")
|
||||||
|
for line in output.splitlines():
|
||||||
|
line = line.replace("\t", " ")
|
||||||
|
components = [x for x in line.split(" ") if x]
|
||||||
|
if not components:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
start_addr = int(components[0], 16)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Length has variable position in objdump output
|
||||||
|
length = None
|
||||||
|
for comp in components[1:]:
|
||||||
|
if len(comp) == 16:
|
||||||
|
try:
|
||||||
|
length = int(comp, 16)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if length is None:
|
||||||
|
print("ERROR: Couldn't determine function section length: %s" % line)
|
||||||
|
|
||||||
|
func = components[-1]
|
||||||
|
|
||||||
|
addrs.append(start_addr)
|
||||||
|
addr2len[str(hex(start_addr))] = str(length)
|
||||||
|
|
||||||
|
# The search implementation in the AFL runtime expects everything sorted.
|
||||||
|
addrs.sort()
|
||||||
|
addrs = [str(hex(addr)) for addr in addrs]
|
||||||
|
|
||||||
|
# We symbolize in one go to speed things up with large binaries.
|
||||||
|
output = subprocess.check_output([
|
||||||
|
"llvm-addr2line",
|
||||||
|
"--output-style=JSON",
|
||||||
|
"-f", "-C", "-a", "-e",
|
||||||
|
binfile],
|
||||||
|
input="\n".join(addrs).encode("utf-8")).decode("utf-8")
|
||||||
|
|
||||||
|
output = output.strip().splitlines()
|
||||||
|
for line in output:
|
||||||
|
output = json.loads(line)
|
||||||
|
if "Symbol" in output and output["Address"] in addr2len:
|
||||||
|
final_output = [
|
||||||
|
output["Address"],
|
||||||
|
addr2len[output["Address"]],
|
||||||
|
os.path.basename(output["ModuleName"]),
|
||||||
|
output["Symbol"][0]["FileName"],
|
||||||
|
output["Symbol"][0]["FunctionName"]
|
||||||
|
]
|
||||||
|
print("\t".join(final_output))
|
Reference in New Issue
Block a user