You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
558 lines
16 KiB
558 lines
16 KiB
2 months ago
|
/*
|
||
|
Copyright 2013 Google LLC All rights reserved.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at:
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
american fuzzy lop - wrapper for GNU as
|
||
|
---------------------------------------
|
||
|
|
||
|
Written and maintained by Michal Zalewski <lcamtuf@google.com>
|
||
|
|
||
|
The sole purpose of this wrapper is to preprocess assembly files generated
|
||
|
by GCC / clang and inject the instrumentation bits included from afl-as.h. It
|
||
|
is automatically invoked by the toolchain when compiling programs using
|
||
|
afl-gcc / afl-clang.
|
||
|
|
||
|
Note that it's an explicit non-goal to instrument hand-written assembly,
|
||
|
be it in separate .s files or in __asm__ blocks. The only aspiration this
|
||
|
utility has right now is to be able to skip them gracefully and allow the
|
||
|
compilation process to continue.
|
||
|
|
||
|
That said, see experimental/clang_asm_normalize/ for a solution that may
|
||
|
allow clang users to make things work even with hand-crafted assembly. Just
|
||
|
note that there is no equivalent for GCC.
|
||
|
|
||
|
*/
|
||
|
|
||
|
#define AFL_MAIN
|
||
|
|
||
|
#include "config.h"
|
||
|
#include "types.h"
|
||
|
#include "debug.h"
|
||
|
#include "alloc-inl.h"
|
||
|
|
||
|
#include "afl-as.h"
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <unistd.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <time.h>
|
||
|
#include <ctype.h>
|
||
|
#include <fcntl.h>
|
||
|
|
||
|
#include <sys/wait.h>
|
||
|
#include <sys/time.h>
|
||
|
|
||
|
static u8** as_params; /* Parameters passed to the real 'as' */
|
||
|
|
||
|
static u8* input_file; /* Originally specified input file */
|
||
|
static u8* modified_file; /* Instrumented file for the real 'as' */
|
||
|
|
||
|
static u8 be_quiet, /* Quiet mode (no stderr output) */
|
||
|
clang_mode, /* Running in clang mode? */
|
||
|
pass_thru, /* Just pass data through? */
|
||
|
just_version, /* Just show version? */
|
||
|
sanitizer; /* Using ASAN / MSAN */
|
||
|
|
||
|
static u32 inst_ratio = 100, /* Instrumentation probability (%) */
|
||
|
as_par_cnt = 1; /* Number of params to 'as' */
|
||
|
|
||
|
/* If we don't find --32 or --64 in the command line, default to
|
||
|
instrumentation for whichever mode we were compiled with. This is not
|
||
|
perfect, but should do the trick for almost all use cases. */
|
||
|
|
||
|
#ifdef WORD_SIZE_64
|
||
|
|
||
|
static u8 use_64bit = 1;
|
||
|
|
||
|
#else
|
||
|
|
||
|
static u8 use_64bit = 0;
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
# error "Sorry, 32-bit Apple platforms are not supported."
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
#endif /* ^WORD_SIZE_64 */
|
||
|
|
||
|
|
||
|
/* Examine and modify parameters to pass to 'as'. Note that the file name
|
||
|
is always the last parameter passed by GCC, so we exploit this property
|
||
|
to keep the code simple. */
|
||
|
|
||
|
static void edit_params(int argc, char** argv) {
|
||
|
|
||
|
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
|
||
|
u32 i;
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
u8 use_clang_as = 0;
|
||
|
|
||
|
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
|
||
|
with the code generated by newer versions of clang that are hand-built
|
||
|
by the user. See the thread here: http://goo.gl/HBWDtn.
|
||
|
|
||
|
To work around this, when using clang and running without AFL_AS
|
||
|
specified, we will actually call 'clang -c' instead of 'as -q' to
|
||
|
compile the assembly file.
|
||
|
|
||
|
The tools aren't cmdline-compatible, but at least for now, we can
|
||
|
seemingly get away with this by making only very minor tweaks. Thanks
|
||
|
to Nico Weber for the idea. */
|
||
|
|
||
|
if (clang_mode && !afl_as) {
|
||
|
|
||
|
use_clang_as = 1;
|
||
|
|
||
|
afl_as = getenv("AFL_CC");
|
||
|
if (!afl_as) afl_as = getenv("AFL_CXX");
|
||
|
if (!afl_as) afl_as = "clang";
|
||
|
|
||
|
}
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
|
||
|
is not set. We need to check these non-standard variables to properly
|
||
|
handle the pass_thru logic later on. */
|
||
|
|
||
|
if (!tmp_dir) tmp_dir = getenv("TEMP");
|
||
|
if (!tmp_dir) tmp_dir = getenv("TMP");
|
||
|
if (!tmp_dir) tmp_dir = "/tmp";
|
||
|
|
||
|
as_params = ck_alloc((argc + 32) * sizeof(u8*));
|
||
|
|
||
|
as_params[0] = afl_as ? afl_as : (u8*)"as";
|
||
|
|
||
|
as_params[argc] = 0;
|
||
|
|
||
|
for (i = 1; i < argc - 1; i++) {
|
||
|
|
||
|
if (!strcmp(argv[i], "--64")) use_64bit = 1;
|
||
|
else if (!strcmp(argv[i], "--32")) use_64bit = 0;
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
/* The Apple case is a bit different... */
|
||
|
|
||
|
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
|
||
|
|
||
|
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
|
||
|
else if (!strcmp(argv[i + 1], "i386"))
|
||
|
FATAL("Sorry, 32-bit Apple platforms are not supported.");
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Strip options that set the preference for a particular upstream
|
||
|
assembler in Xcode. */
|
||
|
|
||
|
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
|
||
|
continue;
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
as_params[as_par_cnt++] = argv[i];
|
||
|
|
||
|
}
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
/* When calling clang as the upstream assembler, append -c -x assembler
|
||
|
and hope for the best. */
|
||
|
|
||
|
if (use_clang_as) {
|
||
|
|
||
|
as_params[as_par_cnt++] = "-c";
|
||
|
as_params[as_par_cnt++] = "-x";
|
||
|
as_params[as_par_cnt++] = "assembler";
|
||
|
|
||
|
}
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
input_file = argv[argc - 1];
|
||
|
|
||
|
if (input_file[0] == '-') {
|
||
|
|
||
|
if (!strcmp(input_file + 1, "-version")) {
|
||
|
just_version = 1;
|
||
|
modified_file = input_file;
|
||
|
goto wrap_things_up;
|
||
|
}
|
||
|
|
||
|
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
|
||
|
else input_file = NULL;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
/* Check if this looks like a standard invocation as a part of an attempt
|
||
|
to compile a program, rather than using gcc on an ad-hoc .s file in
|
||
|
a format we may not understand. This works around an issue compiling
|
||
|
NSS. */
|
||
|
|
||
|
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
|
||
|
strncmp(input_file, "/var/tmp/", 9) &&
|
||
|
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
|
||
|
(u32)time(NULL));
|
||
|
|
||
|
wrap_things_up:
|
||
|
|
||
|
as_params[as_par_cnt++] = modified_file;
|
||
|
as_params[as_par_cnt] = NULL;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Process input file, generate modified_file. Insert instrumentation in all
|
||
|
the appropriate places. */
|
||
|
|
||
|
static void add_instrumentation(void) {
|
||
|
|
||
|
static u8 line[MAX_LINE];
|
||
|
|
||
|
FILE* inf;
|
||
|
FILE* outf;
|
||
|
s32 outfd;
|
||
|
u32 ins_lines = 0;
|
||
|
|
||
|
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
|
||
|
skip_intel = 0, skip_app = 0, instrument_next = 0;
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
u8* colon_pos;
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
if (input_file) {
|
||
|
|
||
|
inf = fopen(input_file, "r");
|
||
|
if (!inf) PFATAL("Unable to read '%s'", input_file);
|
||
|
|
||
|
} else inf = stdin;
|
||
|
|
||
|
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
|
||
|
|
||
|
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
|
||
|
|
||
|
outf = fdopen(outfd, "w");
|
||
|
|
||
|
if (!outf) PFATAL("fdopen() failed");
|
||
|
|
||
|
while (fgets(line, MAX_LINE, inf)) {
|
||
|
|
||
|
/* In some cases, we want to defer writing the instrumentation trampoline
|
||
|
until after all the labels, macros, comments, etc. If we're in this
|
||
|
mode, and if the line starts with a tab followed by a character, dump
|
||
|
the trampoline now. */
|
||
|
|
||
|
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
|
||
|
instrument_next && line[0] == '\t' && isalpha(line[1])) {
|
||
|
|
||
|
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
|
||
|
R(MAP_SIZE));
|
||
|
|
||
|
instrument_next = 0;
|
||
|
ins_lines++;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Output the actual line, call it a day in pass-thru mode. */
|
||
|
|
||
|
fputs(line, outf);
|
||
|
|
||
|
if (pass_thru) continue;
|
||
|
|
||
|
/* All right, this is where the actual fun begins. For one, we only want to
|
||
|
instrument the .text section. So, let's keep track of that in processed
|
||
|
files - and let's set instr_ok accordingly. */
|
||
|
|
||
|
if (line[0] == '\t' && line[1] == '.') {
|
||
|
|
||
|
/* OpenBSD puts jump tables directly inline with the code, which is
|
||
|
a bit annoying. They use a specific format of p2align directives
|
||
|
around them, so we use that as a signal. */
|
||
|
|
||
|
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
|
||
|
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
|
||
|
|
||
|
if (!strncmp(line + 2, "text\n", 5) ||
|
||
|
!strncmp(line + 2, "section\t.text", 13) ||
|
||
|
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
|
||
|
!strncmp(line + 2, "section __TEXT,__text", 21)) {
|
||
|
instr_ok = 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (!strncmp(line + 2, "section\t", 8) ||
|
||
|
!strncmp(line + 2, "section ", 8) ||
|
||
|
!strncmp(line + 2, "bss\n", 4) ||
|
||
|
!strncmp(line + 2, "data\n", 5)) {
|
||
|
instr_ok = 0;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Detect off-flavor assembly (rare, happens in gdb). When this is
|
||
|
encountered, we set skip_csect until the opposite directive is
|
||
|
seen, and we do not instrument. */
|
||
|
|
||
|
if (strstr(line, ".code")) {
|
||
|
|
||
|
if (strstr(line, ".code32")) skip_csect = use_64bit;
|
||
|
if (strstr(line, ".code64")) skip_csect = !use_64bit;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Detect syntax changes, as could happen with hand-written assembly.
|
||
|
Skip Intel blocks, resume instrumentation when back to AT&T. */
|
||
|
|
||
|
if (strstr(line, ".intel_syntax")) skip_intel = 1;
|
||
|
if (strstr(line, ".att_syntax")) skip_intel = 0;
|
||
|
|
||
|
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
|
||
|
|
||
|
if (line[0] == '#' || line[1] == '#') {
|
||
|
|
||
|
if (strstr(line, "#APP")) skip_app = 1;
|
||
|
if (strstr(line, "#NO_APP")) skip_app = 0;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* If we're in the right mood for instrumenting, check for function
|
||
|
names or conditional labels. This is a bit messy, but in essence,
|
||
|
we want to catch:
|
||
|
|
||
|
^main: - function entry point (always instrumented)
|
||
|
^.L0: - GCC branch label
|
||
|
^.LBB0_0: - clang branch label (but only in clang mode)
|
||
|
^\tjnz foo - conditional branches
|
||
|
|
||
|
...but not:
|
||
|
|
||
|
^# BB#0: - clang comments
|
||
|
^ # BB#0: - ditto
|
||
|
^.Ltmp0: - clang non-branch labels
|
||
|
^.LC0 - GCC non-branch labels
|
||
|
^.LBB0_0: - ditto (when in GCC mode)
|
||
|
^\tjmp foo - non-conditional jumps
|
||
|
|
||
|
Additionally, clang and GCC on MacOS X follow a different convention
|
||
|
with no leading dots on labels, hence the weird maze of #ifdefs
|
||
|
later on.
|
||
|
|
||
|
*/
|
||
|
|
||
|
if (skip_intel || skip_app || skip_csect || !instr_ok ||
|
||
|
line[0] == '#' || line[0] == ' ') continue;
|
||
|
|
||
|
/* Conditional branch instruction (jnz, etc). We append the instrumentation
|
||
|
right after the branch (to instrument the not-taken path) and at the
|
||
|
branch destination label (handled later on). */
|
||
|
|
||
|
if (line[0] == '\t') {
|
||
|
|
||
|
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
|
||
|
|
||
|
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
|
||
|
R(MAP_SIZE));
|
||
|
|
||
|
ins_lines++;
|
||
|
|
||
|
}
|
||
|
|
||
|
continue;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Label of some sort. This may be a branch destination, but we need to
|
||
|
tread carefully and account for several different formatting
|
||
|
conventions. */
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
/* Apple: L<whatever><digit>: */
|
||
|
|
||
|
if ((colon_pos = strstr(line, ":"))) {
|
||
|
|
||
|
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
|
||
|
|
||
|
#else
|
||
|
|
||
|
/* Everybody else: .L<whatever>: */
|
||
|
|
||
|
if (strstr(line, ":")) {
|
||
|
|
||
|
if (line[0] == '.') {
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
/* .L0: or LBB0_0: style jump destination */
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
/* Apple: L<num> / LBB<num> */
|
||
|
|
||
|
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
|
||
|
&& R(100) < inst_ratio) {
|
||
|
|
||
|
#else
|
||
|
|
||
|
/* Apple: .L<num> / .LBB<num> */
|
||
|
|
||
|
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
|
||
|
&& R(100) < inst_ratio) {
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
/* An optimization is possible here by adding the code only if the
|
||
|
label is mentioned in the code in contexts other than call / jmp.
|
||
|
That said, this complicates the code by requiring two-pass
|
||
|
processing (messy with stdin), and results in a speed gain
|
||
|
typically under 10%, because compilers are generally pretty good
|
||
|
about not generating spurious intra-function jumps.
|
||
|
|
||
|
We use deferred output chiefly to avoid disrupting
|
||
|
.Lfunc_begin0-style exception handling calculations (a problem on
|
||
|
MacOS X). */
|
||
|
|
||
|
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
|
||
|
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
|
||
|
/* Function label (always instrumented, deferred mode). */
|
||
|
|
||
|
instrument_next = 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
if (ins_lines)
|
||
|
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
|
||
|
|
||
|
if (input_file) fclose(inf);
|
||
|
fclose(outf);
|
||
|
|
||
|
if (!be_quiet) {
|
||
|
|
||
|
if (!ins_lines) WARNF("No instrumentation targets found%s.",
|
||
|
pass_thru ? " (pass-thru mode)" : "");
|
||
|
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
|
||
|
ins_lines, use_64bit ? "64" : "32",
|
||
|
getenv("AFL_HARDEN") ? "hardened" :
|
||
|
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
|
||
|
inst_ratio);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Main entry point */
|
||
|
|
||
|
int main(int argc, char** argv) {
|
||
|
|
||
|
s32 pid;
|
||
|
u32 rand_seed;
|
||
|
int status;
|
||
|
u8* inst_ratio_str = getenv("AFL_INST_RATIO");
|
||
|
|
||
|
struct timeval tv;
|
||
|
struct timezone tz;
|
||
|
|
||
|
clang_mode = !!getenv(CLANG_ENV_VAR);
|
||
|
|
||
|
if (isatty(2) && !getenv("AFL_QUIET")) {
|
||
|
|
||
|
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
|
||
|
|
||
|
} else be_quiet = 1;
|
||
|
|
||
|
if (argc < 2) {
|
||
|
|
||
|
SAYF("\n"
|
||
|
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
|
||
|
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
|
||
|
"don't want to run this program directly.\n\n"
|
||
|
|
||
|
"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
|
||
|
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
|
||
|
"instrumenting every discovered branch.\n\n");
|
||
|
|
||
|
exit(1);
|
||
|
|
||
|
}
|
||
|
|
||
|
gettimeofday(&tv, &tz);
|
||
|
|
||
|
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
|
||
|
|
||
|
srandom(rand_seed);
|
||
|
|
||
|
edit_params(argc, argv);
|
||
|
|
||
|
if (inst_ratio_str) {
|
||
|
|
||
|
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
|
||
|
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
|
||
|
|
||
|
}
|
||
|
|
||
|
if (getenv(AS_LOOP_ENV_VAR))
|
||
|
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
|
||
|
|
||
|
setenv(AS_LOOP_ENV_VAR, "1", 1);
|
||
|
|
||
|
/* When compiling with ASAN, we don't have a particularly elegant way to skip
|
||
|
ASAN-specific branches. But we can probabilistically compensate for
|
||
|
that... */
|
||
|
|
||
|
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
|
||
|
sanitizer = 1;
|
||
|
inst_ratio /= 3;
|
||
|
}
|
||
|
|
||
|
if (!just_version) add_instrumentation();
|
||
|
|
||
|
if (!(pid = fork())) {
|
||
|
|
||
|
execvp(as_params[0], (char**)as_params);
|
||
|
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
|
||
|
|
||
|
}
|
||
|
|
||
|
if (pid < 0) PFATAL("fork() failed");
|
||
|
|
||
|
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
|
||
|
|
||
|
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
|
||
|
|
||
|
exit(WEXITSTATUS(status));
|
||
|
|
||
|
}
|
||
|
|