@ -0,0 +1,20 @@
|
||||
# Binaries produced by "make".
|
||||
afl-analyze
|
||||
afl-as
|
||||
afl-clang
|
||||
afl-clang++
|
||||
afl-fuzz
|
||||
afl-g++
|
||||
afl-gcc
|
||||
afl-gotcpu
|
||||
afl-showmap
|
||||
afl-tmin
|
||||
as
|
||||
|
||||
# Binaries produced by "make -C llvm_mode"
|
||||
afl-clang-fast
|
||||
afl-clang-fast++
|
||||
afl-llvm-pass.so
|
||||
afl-llvm-rt-32.o
|
||||
afl-llvm-rt-64.o
|
||||
afl-llvm-rt.o
|
@ -0,0 +1,60 @@
|
||||
language: c
|
||||
|
||||
env:
|
||||
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
|
||||
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
|
||||
# TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
|
||||
- AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
|
||||
|
||||
before_install:
|
||||
- sudo apt update
|
||||
- sudo apt install -y libtool libtool-bin automake bison libglib2.0
|
||||
|
||||
# TODO: Look into splitting off some builds using a build matrix.
|
||||
# TODO: Move this all into a bash script so we don't need to write bash in yaml.
|
||||
script:
|
||||
- make
|
||||
- ./afl-gcc ./test-instr.c -o test-instr-gcc
|
||||
- mkdir seeds
|
||||
- echo "" > seeds/nil_seed
|
||||
- if [ -z "$AFL_STOP_MANUALLY" ];
|
||||
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
|
||||
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-gcc;
|
||||
fi
|
||||
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
|
||||
- rm -r out/*
|
||||
- ./afl-clang ./test-instr.c -o test-instr-clang
|
||||
- if [ -z "$AFL_STOP_MANUALLY" ];
|
||||
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
|
||||
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang;
|
||||
fi
|
||||
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 2
|
||||
- make clean
|
||||
- CC=clang CXX=clang++ make
|
||||
- cd llvm_mode
|
||||
# TODO: Build with different versions of clang/LLVM since LLVM passes don't
|
||||
# have a stable API.
|
||||
- CC=clang CXX=clang++ LLVM_CONFIG=llvm-config make
|
||||
- cd ..
|
||||
- rm -r out/*
|
||||
- ./afl-clang-fast ./test-instr.c -o test-instr-clang-fast
|
||||
- if [ -z "$AFL_STOP_MANUALLY" ];
|
||||
then ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
|
||||
else timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr-clang-fast;
|
||||
fi
|
||||
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 1 -p 3
|
||||
# Test fuzzing libFuzzer targets and trace-pc-guard instrumentation.
|
||||
- clang -g -fsanitize-coverage=trace-pc-guard ./test-libfuzzer-target.c -c
|
||||
- clang -c -w llvm_mode/afl-llvm-rt.o.c
|
||||
- wget https://raw.githubusercontent.com/llvm/llvm-project/main/compiler-rt/lib/fuzzer/afl/afl_driver.cpp
|
||||
- clang++ afl_driver.cpp afl-llvm-rt.o.o test-libfuzzer-target.o -o test-libfuzzer-target
|
||||
- timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-libfuzzer-target
|
||||
- cd qemu_mode
|
||||
- ./build_qemu_support.sh
|
||||
- cd ..
|
||||
- gcc ./test-instr.c -o test-no-instr
|
||||
- if [ -z "$AFL_STOP_MANUALLY" ];
|
||||
then ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
|
||||
else timeout --preserve-status 5s ./afl-fuzz -Q -i seeds -o out/ -- ./test-no-instr;
|
||||
fi
|
||||
- .travis/check_fuzzer_stats.sh -o out -k peak_rss_mb -v 12 -p 9
|
@ -1,60 +1,60 @@
|
||||
#!/bin/bash
|
||||
usage() {
|
||||
echo "Usage: $0 -o <out_dir> -k <key> -v <value> [-p <precision>]" 1>&2;
|
||||
echo " " 1>&2;
|
||||
echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2;
|
||||
echo " " 1>&2;
|
||||
echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2;
|
||||
echo "printed by afl is value+/-precision." 1>&2;
|
||||
exit 1; }
|
||||
|
||||
while getopts "o:k:v:p:" opt; do
|
||||
case "${opt}" in
|
||||
o)
|
||||
o=${OPTARG}
|
||||
;;
|
||||
k)
|
||||
k=${OPTARG}
|
||||
;;
|
||||
v)
|
||||
v=${OPTARG}
|
||||
;;
|
||||
p)
|
||||
p=${OPTARG}
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi
|
||||
|
||||
# xargs to trim the surrounding whitespaces
|
||||
stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs )
|
||||
v=$( echo "$v" | xargs )
|
||||
|
||||
if [ -z stat_v ];
|
||||
then echo "ERROR: key $k not found in fuzzer_stats." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
re_percent='^[0-9]+([.][0-9]+)?\%$'
|
||||
# if the argument is a number in percentage, get rid of the %
|
||||
if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi
|
||||
if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi
|
||||
|
||||
re_numeric='^[0-9]+([.][0-9]+)?$'
|
||||
# if the argument is not a number, we check for strict equality
|
||||
if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]);
|
||||
then if [ "$v" != "$stat_v" ];
|
||||
then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2
|
||||
exit 2;
|
||||
fi
|
||||
# checks if the stat reported by afl is in the range
|
||||
elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ];
|
||||
then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2
|
||||
exit 3;
|
||||
fi
|
||||
echo "OK: key $k:$stat_v" 1>&2
|
||||
|
||||
#!/bin/bash
|
||||
usage() {
|
||||
echo "Usage: $0 -o <out_dir> -k <key> -v <value> [-p <precision>]" 1>&2;
|
||||
echo " " 1>&2;
|
||||
echo "Checks if a key:value appears in the fuzzer_stats report" 1>&2;
|
||||
echo " " 1>&2;
|
||||
echo -n "If \"value\" is numeric and \"precision\" is defined, checks if the stat " 1>&2;
|
||||
echo "printed by afl is value+/-precision." 1>&2;
|
||||
exit 1; }
|
||||
|
||||
while getopts "o:k:v:p:" opt; do
|
||||
case "${opt}" in
|
||||
o)
|
||||
o=${OPTARG}
|
||||
;;
|
||||
k)
|
||||
k=${OPTARG}
|
||||
;;
|
||||
v)
|
||||
v=${OPTARG}
|
||||
;;
|
||||
p)
|
||||
p=${OPTARG}
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z $o ] || [ -z $k ] || [ -z $v ]; then usage; fi
|
||||
|
||||
# xargs to trim the surrounding whitespaces
|
||||
stat_v=$( grep $k "$o"/fuzzer_stats | cut -d ":" -f 2 | xargs )
|
||||
v=$( echo "$v" | xargs )
|
||||
|
||||
if [ -z stat_v ];
|
||||
then echo "ERROR: key $k not found in fuzzer_stats." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
re_percent='^[0-9]+([.][0-9]+)?\%$'
|
||||
# if the argument is a number in percentage, get rid of the %
|
||||
if [[ "$v" =~ $re_percent ]]; then v=${v: :-1}; fi
|
||||
if [[ "$stat_v" =~ $re_percent ]]; then stat_v=${stat_v: :-1}; fi
|
||||
|
||||
re_numeric='^[0-9]+([.][0-9]+)?$'
|
||||
# if the argument is not a number, we check for strict equality
|
||||
if (! [[ "$v" =~ $re_numeric ]]) || (! [[ "$stat_v" =~ $re ]]);
|
||||
then if [ "$v" != "$stat_v" ];
|
||||
then echo "ERROR: \"$k:$stat_v\" (should be $v)." 1>&2
|
||||
exit 2;
|
||||
fi
|
||||
# checks if the stat reported by afl is in the range
|
||||
elif [ "$stat_v" -lt $(( v - p )) ] || [ "$stat_v" -gt $(( v + p )) ];
|
||||
then echo "ERROR: key $k:$stat_v is out of correct range." 1>&2
|
||||
exit 3;
|
||||
fi
|
||||
echo "OK: key $k:$stat_v" 1>&2
|
||||
|
@ -1,493 +0,0 @@
|
||||
# american fuzzy lop
|
||||
|
||||
[![Build Status](https://travis-ci.org/google/AFL.svg?branch=master)](https://travis-ci.org/google/AFL)
|
||||
|
||||
Originally developed by Michal Zalewski <lcamtuf@google.com>.
|
||||
|
||||
See [QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to read
|
||||
this file.
|
||||
|
||||
## 1) Challenges of guided fuzzing
|
||||
|
||||
Fuzzing is one of the most powerful and proven strategies for identifying
|
||||
security issues in real-world software; it is responsible for the vast
|
||||
majority of remote code execution and privilege escalation bugs found to date
|
||||
in security-critical software.
|
||||
|
||||
Unfortunately, fuzzing is also relatively shallow; blind, random mutations
|
||||
make it very unlikely to reach certain code paths in the tested code, leaving
|
||||
some vulnerabilities firmly outside the reach of this technique.
|
||||
|
||||
There have been numerous attempts to solve this problem. One of the early
|
||||
approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
|
||||
relies on coverage signals to select a subset of interesting seeds from a
|
||||
massive, high-quality corpus of candidate files, and then fuzz them by
|
||||
traditional means. The approach works exceptionally well, but requires such
|
||||
a corpus to be readily available. In addition, block coverage measurements
|
||||
provide only a very simplistic understanding of program state, and are less
|
||||
useful for guiding the fuzzing effort in the long haul.
|
||||
|
||||
Other, more sophisticated research has focused on techniques such as program
|
||||
flow analysis ("concolic execution"), symbolic execution, or static analysis.
|
||||
All these methods are extremely promising in experimental settings, but tend
|
||||
to suffer from reliability and performance problems in practical uses - and
|
||||
currently do not offer a viable alternative to "dumb" fuzzing techniques.
|
||||
|
||||
## 2) The afl-fuzz approach
|
||||
|
||||
American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
|
||||
but rock-solid instrumentation-guided genetic algorithm. It uses a modified
|
||||
form of edge coverage to effortlessly pick up subtle, local-scale changes to
|
||||
program control flow.
|
||||
|
||||
Simplifying a bit, the overall algorithm can be summed up as:
|
||||
|
||||
1) Load user-supplied initial test cases into the queue,
|
||||
|
||||
2) Take next input file from the queue,
|
||||
|
||||
3) Attempt to trim the test case to the smallest size that doesn't alter
|
||||
the measured behavior of the program,
|
||||
|
||||
4) Repeatedly mutate the file using a balanced and well-researched variety
|
||||
of traditional fuzzing strategies,
|
||||
|
||||
5) If any of the generated mutations resulted in a new state transition
|
||||
recorded by the instrumentation, add mutated output as a new entry in the
|
||||
queue.
|
||||
|
||||
6) Go to 2.
|
||||
|
||||
The discovered test cases are also periodically culled to eliminate ones that
|
||||
have been obsoleted by newer, higher-coverage finds; and undergo several other
|
||||
instrumentation-driven effort minimization steps.
|
||||
|
||||
As a side result of the fuzzing process, the tool creates a small,
|
||||
self-contained corpus of interesting test cases. These are extremely useful
|
||||
for seeding other, labor- or resource-intensive testing regimes - for example,
|
||||
for stress-testing browsers, office applications, graphics suites, or
|
||||
closed-source tools.
|
||||
|
||||
The fuzzer is thoroughly tested to deliver out-of-the-box performance far
|
||||
superior to blind fuzzing or coverage-only tools.
|
||||
|
||||
## 3) Instrumenting programs for use with AFL
|
||||
|
||||
When source code is available, instrumentation can be injected by a companion
|
||||
tool that works as a drop-in replacement for gcc or clang in any standard build
|
||||
process for third-party code.
|
||||
|
||||
The instrumentation has a fairly modest performance impact; in conjunction with
|
||||
other optimizations implemented by afl-fuzz, most programs can be fuzzed as fast
|
||||
or even faster than possible with traditional tools.
|
||||
|
||||
The correct way to recompile the target program may vary depending on the
|
||||
specifics of the build process, but a nearly-universal approach would be:
|
||||
|
||||
```shell
|
||||
$ CC=/path/to/afl/afl-gcc ./configure
|
||||
$ make clean all
|
||||
```
|
||||
|
||||
For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
|
||||
|
||||
The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
|
||||
clang users may also opt to leverage a higher-performance instrumentation mode,
|
||||
as described in llvm_mode/README.llvm.
|
||||
|
||||
When testing libraries, you need to find or write a simple program that reads
|
||||
data from stdin or from a file and passes it to the tested library. In such a
|
||||
case, it is essential to link this executable against a static version of the
|
||||
instrumented library, or to make sure that the correct .so file is loaded at
|
||||
runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
|
||||
build, usually possible via:
|
||||
|
||||
```shell
|
||||
$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
|
||||
```
|
||||
|
||||
Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
|
||||
automatically enable code hardening options that make it easier to detect
|
||||
simple memory bugs. Libdislocator, a helper library included with AFL (see
|
||||
libdislocator/README.dislocator) can help uncover heap corruption issues, too.
|
||||
|
||||
PS. ASAN users are advised to review [notes_for_asan.txt](docs/notes_for_asan.txt) file for important
|
||||
caveats.
|
||||
|
||||
## 4) Instrumenting binary-only apps
|
||||
|
||||
When source code is *NOT* available, the fuzzer offers experimental support for
|
||||
fast, on-the-fly instrumentation of black-box binaries. This is accomplished
|
||||
with a version of QEMU running in the lesser-known "user space emulation" mode.
|
||||
|
||||
QEMU is a project separate from AFL, but you can conveniently build the
|
||||
feature by doing:
|
||||
|
||||
```shell
|
||||
$ cd qemu_mode
|
||||
$ ./build_qemu_support.sh
|
||||
```
|
||||
|
||||
For additional instructions and caveats, see qemu_mode/README.qemu.
|
||||
|
||||
The mode is approximately 2-5x slower than compile-time instrumentation, is
|
||||
less conducive to parallelization, and may have some other quirks.
|
||||
|
||||
## 5) Choosing initial test cases
|
||||
|
||||
To operate correctly, the fuzzer requires one or more starting file that
|
||||
contains a good example of the input data normally expected by the targeted
|
||||
application. There are two basic rules:
|
||||
|
||||
- Keep the files small. Under 1 kB is ideal, although not strictly necessary.
|
||||
For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
|
||||
|
||||
- Use multiple test cases only if they are functionally different from
|
||||
each other. There is no point in using fifty different vacation photos
|
||||
to fuzz an image library.
|
||||
|
||||
You can find many good examples of starting files in the testcases/ subdirectory
|
||||
that comes with this tool.
|
||||
|
||||
PS. If a large corpus of data is available for screening, you may want to use
|
||||
the afl-cmin utility to identify a subset of functionally distinct files that
|
||||
exercise different code paths in the target binary.
|
||||
|
||||
## 6) Fuzzing binaries
|
||||
|
||||
The fuzzing process itself is carried out by the afl-fuzz utility. This program
|
||||
requires a read-only directory with initial test cases, a separate place to
|
||||
store its findings, plus a path to the binary to test.
|
||||
|
||||
For target binaries that accept input directly from stdin, the usual syntax is:
|
||||
|
||||
```shell
|
||||
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
|
||||
```
|
||||
|
||||
For programs that take input from a file, use '@@' to mark the location in
|
||||
the target's command line where the input file name should be placed. The
|
||||
fuzzer will substitute this for you:
|
||||
|
||||
```shell
|
||||
$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
|
||||
```
|
||||
|
||||
You can also use the -f option to have the mutated data written to a specific
|
||||
file. This is useful if the program expects a particular file extension or so.
|
||||
|
||||
Non-instrumented binaries can be fuzzed in the QEMU mode (add -Q in the command
|
||||
line) or in a traditional, blind-fuzzer mode (specify -n).
|
||||
|
||||
You can use -t and -m to override the default timeout and memory limit for the
|
||||
executed process; rare examples of targets that may need these settings touched
|
||||
include compilers and video decoders.
|
||||
|
||||
Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
|
||||
|
||||
Note that afl-fuzz starts by performing an array of deterministic fuzzing
|
||||
steps, which can take several days, but tend to produce neat test cases. If you
|
||||
want quick & dirty results right away - akin to zzuf and other traditional
|
||||
fuzzers - add the -d option to the command line.
|
||||
|
||||
## 7) Interpreting output
|
||||
|
||||
See the [status_screen.txt](docs/status_screen.txt) file for information on
|
||||
how to interpret the displayed stats and monitor the health of the process.
|
||||
Be sure to consult this file especially if any UI elements are highlighted in
|
||||
red.
|
||||
|
||||
The fuzzing process will continue until you press Ctrl-C. At minimum, you want
|
||||
to allow the fuzzer to complete one queue cycle, which may take anywhere from a
|
||||
couple of hours to a week or so.
|
||||
|
||||
There are three subdirectories created within the output directory and updated
|
||||
in real time:
|
||||
|
||||
- queue/ - test cases for every distinctive execution path, plus all the
|
||||
starting files given by the user. This is the synthesized corpus
|
||||
mentioned in section 2.
|
||||
Before using this corpus for any other purposes, you can shrink
|
||||
it to a smaller size using the afl-cmin tool. The tool will find
|
||||
a smaller subset of files offering equivalent edge coverage.
|
||||
|
||||
- crashes/ - unique test cases that cause the tested program to receive a
|
||||
fatal signal (e.g., SIGSEGV, SIGILL, SIGABRT). The entries are
|
||||
grouped by the received signal.
|
||||
|
||||
- hangs/ - unique test cases that cause the tested program to time out. The
|
||||
default time limit before something is classified as a hang is
|
||||
the larger of 1 second and the value of the -t parameter.
|
||||
The value can be fine-tuned by setting AFL_HANG_TMOUT, but this
|
||||
is rarely necessary.
|
||||
|
||||
Crashes and hangs are considered "unique" if the associated execution paths
|
||||
involve any state transitions not seen in previously-recorded faults. If a
|
||||
single bug can be reached in multiple ways, there will be some count inflation
|
||||
early in the process, but this should quickly taper off.
|
||||
|
||||
The file names for crashes and hangs are correlated with parent, non-faulting
|
||||
queue entries. This should help with debugging.
|
||||
|
||||
When you can't reproduce a crash found by afl-fuzz, the most likely cause is
|
||||
that you are not setting the same memory limit as used by the tool. Try:
|
||||
|
||||
```shell
|
||||
$ LIMIT_MB=50
|
||||
$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
|
||||
```
|
||||
|
||||
Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
|
||||
also change -Sv to -Sd.
|
||||
|
||||
Any existing output directory can be also used to resume aborted jobs; try:
|
||||
|
||||
```shell
|
||||
$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
|
||||
```
|
||||
|
||||
If you have gnuplot installed, you can also generate some pretty graphs for any
|
||||
active fuzzing task using afl-plot. For an example of how this looks like,
|
||||
see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
|
||||
|
||||
## 8) Parallelized fuzzing
|
||||
|
||||
Every instance of afl-fuzz takes up roughly one core. This means that on
|
||||
multi-core systems, parallelization is necessary to fully utilize the hardware.
|
||||
For tips on how to fuzz a common target on multiple cores or multiple networked
|
||||
machines, please refer to [parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
|
||||
|
||||
The parallel fuzzing mode also offers a simple way for interfacing AFL to other
|
||||
fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
|
||||
last section of [parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
|
||||
|
||||
## 9) Fuzzer dictionaries
|
||||
|
||||
By default, afl-fuzz mutation engine is optimized for compact data formats -
|
||||
say, images, multimedia, compressed data, regular expression syntax, or shell
|
||||
scripts. It is somewhat less suited for languages with particularly verbose and
|
||||
redundant verbiage - notably including HTML, SQL, or JavaScript.
|
||||
|
||||
To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
|
||||
seed the fuzzing process with an optional dictionary of language keywords,
|
||||
magic headers, or other special tokens associated with the targeted data type
|
||||
-- and use that to reconstruct the underlying grammar on the go:
|
||||
|
||||
[http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
|
||||
|
||||
To use this feature, you first need to create a dictionary in one of the two
|
||||
formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
|
||||
to it via the -x option in the command line.
|
||||
|
||||
(Several common dictionaries are already provided in that subdirectory, too.)
|
||||
|
||||
There is no way to provide more structured descriptions of the underlying
|
||||
syntax, but the fuzzer will likely figure out some of this based on the
|
||||
instrumentation feedback alone. This actually works in practice, say:
|
||||
|
||||
[http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
|
||||
|
||||
PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
|
||||
existing syntax tokens in the input corpus by watching the instrumentation
|
||||
very closely during deterministic byte flips. This works for some types of
|
||||
parsers and grammars, but isn't nearly as good as the -x mode.
|
||||
|
||||
If a dictionary is really hard to come by, another option is to let AFL run
|
||||
for a while, and then use the token capture library that comes as a companion
|
||||
utility with AFL. For that, see libtokencap/README.tokencap.
|
||||
|
||||
## 10) Crash triage
|
||||
|
||||
The coverage-based grouping of crashes usually produces a small data set that
|
||||
can be quickly triaged manually or with a very simple GDB or Valgrind script.
|
||||
Every crash is also traceable to its parent non-crashing test case in the
|
||||
queue, making it easier to diagnose faults.
|
||||
|
||||
Having said that, it's important to acknowledge that some fuzzing crashes can be
|
||||
difficult to quickly evaluate for exploitability without a lot of debugging and
|
||||
code analysis work. To assist with this task, afl-fuzz supports a very unique
|
||||
"crash exploration" mode enabled with the -C flag.
|
||||
|
||||
In this mode, the fuzzer takes one or more crashing test cases as the input,
|
||||
and uses its feedback-driven fuzzing strategies to very quickly enumerate all
|
||||
code paths that can be reached in the program while keeping it in the
|
||||
crashing state.
|
||||
|
||||
Mutations that do not result in a crash are rejected; so are any changes that
|
||||
do not affect the execution path.
|
||||
|
||||
The output is a small corpus of files that can be very rapidly examined to see
|
||||
what degree of control the attacker has over the faulting address, or whether
|
||||
it is possible to get past an initial out-of-bounds read - and see what lies
|
||||
beneath.
|
||||
|
||||
Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
|
||||
can be operated in a very simple way:
|
||||
|
||||
```shell
|
||||
$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
|
||||
```
|
||||
|
||||
The tool works with crashing and non-crashing test cases alike. In the crash
|
||||
mode, it will happily accept instrumented and non-instrumented binaries. In the
|
||||
non-crashing mode, the minimizer relies on standard AFL instrumentation to make
|
||||
the file simpler without altering the execution path.
|
||||
|
||||
The minimizer accepts the -m, -t, -f and @@ syntax in a manner compatible with
|
||||
afl-fuzz.
|
||||
|
||||
Another recent addition to AFL is the afl-analyze tool. It takes an input
|
||||
file, attempts to sequentially flip bytes, and observes the behavior of the
|
||||
tested program. It then color-codes the input based on which sections appear to
|
||||
be critical, and which are not; while not bulletproof, it can often offer quick
|
||||
insights into complex file formats. More info about its operation can be found
|
||||
near the end of [technical_details.txt](docs/technical_details.txt).
|
||||
|
||||
## 11) Going beyond crashes
|
||||
|
||||
Fuzzing is a wonderful and underutilized technique for discovering non-crashing
|
||||
design and implementation errors, too. Quite a few interesting bugs have been
|
||||
found by modifying the target programs to call abort() when, say:
|
||||
|
||||
- Two bignum libraries produce different outputs when given the same
|
||||
fuzzer-generated input,
|
||||
|
||||
- An image library produces different outputs when asked to decode the same
|
||||
input image several times in a row,
|
||||
|
||||
- A serialization / deserialization library fails to produce stable outputs
|
||||
when iteratively serializing and deserializing fuzzer-supplied data,
|
||||
|
||||
- A compression library produces an output inconsistent with the input file
|
||||
when asked to compress and then decompress a particular blob.
|
||||
|
||||
Implementing these or similar sanity checks usually takes very little time;
|
||||
if you are the maintainer of a particular package, you can make this code
|
||||
conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
|
||||
shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
|
||||
|
||||
## 12) Common-sense risks
|
||||
|
||||
Please keep in mind that, similarly to many other computationally-intensive
|
||||
tasks, fuzzing may put strain on your hardware and on the OS. In particular:
|
||||
|
||||
- Your CPU will run hot and will need adequate cooling. In most cases, if
|
||||
cooling is insufficient or stops working properly, CPU speeds will be
|
||||
automatically throttled. That said, especially when fuzzing on less
|
||||
suitable hardware (laptops, smartphones, etc), it's not entirely impossible
|
||||
for something to blow up.
|
||||
|
||||
- Targeted programs may end up erratically grabbing gigabytes of memory or
|
||||
filling up disk space with junk files. AFL tries to enforce basic memory
|
||||
limits, but can't prevent each and every possible mishap. The bottom line
|
||||
is that you shouldn't be fuzzing on systems where the prospect of data loss
|
||||
is not an acceptable risk.
|
||||
|
||||
- Fuzzing involves billions of reads and writes to the filesystem. On modern
|
||||
systems, this will be usually heavily cached, resulting in fairly modest
|
||||
"physical" I/O - but there are many factors that may alter this equation.
|
||||
It is your responsibility to monitor for potential trouble; with very heavy
|
||||
I/O, the lifespan of many HDDs and SSDs may be reduced.
|
||||
|
||||
A good way to monitor disk I/O on Linux is the 'iostat' command:
|
||||
|
||||
```shell
|
||||
$ iostat -d 3 -x -k [...optional disk ID...]
|
||||
```
|
||||
|
||||
## 13) Known limitations & areas for improvement
|
||||
|
||||
Here are some of the most important caveats for AFL:
|
||||
|
||||
- AFL detects faults by checking for the first spawned process dying due to
|
||||
a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
|
||||
these signals may need to have the relevant code commented out. In the same
|
||||
vein, faults in child processed spawned by the fuzzed target may evade
|
||||
detection unless you manually add some code to catch that.
|
||||
|
||||
- As with any other brute-force tool, the fuzzer offers limited coverage if
|
||||
encryption, checksums, cryptographic signatures, or compression are used to
|
||||
wholly wrap the actual data format to be tested.
|
||||
|
||||
To work around this, you can comment out the relevant checks (see
|
||||
experimental/libpng_no_checksum/ for inspiration); if this is not possible,
|
||||
you can also write a postprocessor, as explained in
|
||||
experimental/post_library/.
|
||||
|
||||
- There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
|
||||
isn't due to any specific fault of afl-fuzz; see [notes_for_asan.txt](docs/notes_for_asan.txt)
|
||||
for tips.
|
||||
|
||||
- There is no direct support for fuzzing network services, background
|
||||
daemons, or interactive apps that require UI interaction to work. You may
|
||||
need to make simple code changes to make them behave in a more traditional
|
||||
way. Preeny may offer a relatively simple option, too - see:
|
||||
https://github.com/zardus/preeny
|
||||
|
||||
Some useful tips for modifying network-based services can be also found at:
|
||||
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
|
||||
|
||||
- AFL doesn't output human-readable coverage data. If you want to monitor
|
||||
coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
|
||||
|
||||
- Occasionally, sentient machines rise against their creators. If this
|
||||
happens to you, please consult http://lcamtuf.coredump.cx/prep/.
|
||||
|
||||
Beyond this, see INSTALL for platform-specific tips.
|
||||
|
||||
## 14) Special thanks
|
||||
|
||||
Many of the improvements to afl-fuzz wouldn't be possible without feedback,
|
||||
bug reports, or patches from:
|
||||
|
||||
```
|
||||
Jann Horn Hanno Boeck
|
||||
Felix Groebert Jakub Wilk
|
||||
Richard W. M. Jones Alexander Cherepanov
|
||||
Tom Ritter Hovik Manucharyan
|
||||
Sebastian Roschke Eberhard Mattes
|
||||
Padraig Brady Ben Laurie
|
||||
@dronesec Luca Barbato
|
||||
Tobias Ospelt Thomas Jarosch
|
||||
Martin Carpenter Mudge Zatko
|
||||
Joe Zbiciak Ryan Govostes
|
||||
Michael Rash William Robinet
|
||||
Jonathan Gray Filipe Cabecinhas
|
||||
Nico Weber Jodie Cunningham
|
||||
Andrew Griffiths Parker Thompson
|
||||
Jonathan Neuschfer Tyler Nighswander
|
||||
Ben Nagy Samir Aguiar
|
||||
Aidan Thornton Aleksandar Nikolich
|
||||
Sam Hakim Laszlo Szekeres
|
||||
David A. Wheeler Turo Lamminen
|
||||
Andreas Stieger Richard Godbee
|
||||
Louis Dassy teor2345
|
||||
Alex Moneger Dmitry Vyukov
|
||||
Keegan McAllister Kostya Serebryany
|
||||
Richo Healey Martijn Bogaard
|
||||
rc0r Jonathan Foote
|
||||
Christian Holler Dominique Pelle
|
||||
Jacek Wielemborek Leo Barnes
|
||||
Jeremy Barnes Jeff Trull
|
||||
Guillaume Endignoux ilovezfs
|
||||
Daniel Godas-Lopez Franjo Ivancic
|
||||
Austin Seipp Daniel Komaromy
|
||||
Daniel Binderman Jonathan Metzman
|
||||
Vegard Nossum Jan Kneschke
|
||||
Kurt Roeckx Marcel Bohme
|
||||
Van-Thuan Pham Abhik Roychoudhury
|
||||
Joshua J. Drake Toby Hutton
|
||||
Rene Freingruber Sergey Davidoff
|
||||
Sami Liedes Craig Young
|
||||
Andrzej Jackowski Daniel Hodson
|
||||
```
|
||||
|
||||
Thank you!
|
||||
|
||||
## 15) Contact
|
||||
|
||||
Questions? Concerns? Bug reports? Please use GitHub.
|
||||
|
||||
There is also a mailing list for the project; to join, send a mail to
|
||||
<afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
|
||||
archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users).
|
@ -1,75 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# american fuzzy lop - clang assembly normalizer
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# Written and maintained by Michal Zalewski <lcamtuf@google.com>
|
||||
# The idea for this wrapper comes from Ryan Govostes.
|
||||
#
|
||||
# Copyright 2013, 2014 Google LLC All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at:
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# This 'as' wrapper should allow you to instrument unruly, hand-written
|
||||
# assembly with afl-as.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# export AFL_REAL_PATH=/path/to/directory/with/afl-as/
|
||||
# AFL_PATH=/path/to/this/directory/ make clean all
|
||||
|
||||
if [ "$#" -lt "2" ]; then
|
||||
echo "[-] Error: this utility can't be called directly." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$AFL_REAL_PATH" = "" ]; then
|
||||
echo "[-] Error: AFL_REAL_PATH not set!" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x "$AFL_REAL_PATH/afl-as" ]; then
|
||||
echo "[-] Error: AFL_REAL_PATH does not contain the 'afl-as' binary." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
unset __AFL_AS_CMDLINE __AFL_FNAME
|
||||
|
||||
while [ ! "$#" = "0" ]; do
|
||||
|
||||
if [ "$#" = "1" ]; then
|
||||
__AFL_FNAME="$1"
|
||||
else
|
||||
__AFL_AS_CMDLINE="${__AFL_AS_CMDLINE} $1"
|
||||
fi
|
||||
|
||||
shift
|
||||
|
||||
done
|
||||
|
||||
test "$TMPDIR" = "" && TMPDIR=/tmp
|
||||
|
||||
TMPFILE=`mktemp $TMPDIR/.afl-XXXXXXXXXX.s`
|
||||
|
||||
test "$TMPFILE" = "" && exit 1
|
||||
|
||||
clang -cc1as -filetype asm -output-asm-variant 0 "${__AFL_FNAME}" >"$TMPFILE"
|
||||
|
||||
ERR="$?"
|
||||
|
||||
if [ ! "$ERR" = "0" ]; then
|
||||
rm -f "$TMPFILE"
|
||||
exit $ERR
|
||||
fi
|
||||
|
||||
"$AFL_REAL_PATH/afl-as" ${__AFL_AS_CMDLINE} "$TMPFILE"
|
||||
|
||||
ERR="$?"
|
||||
|
||||
rm -f "$TMPFILE"
|
||||
|
||||
exit "$ERR"
|
@ -0,0 +1,43 @@
|
||||
================
|
||||
AFL dictionaries
|
||||
================
|
||||
|
||||
(See ../docs/README for the general instruction manual.)
|
||||
|
||||
This subdirectory contains a set of dictionaries that can be used in
|
||||
conjunction with the -x option to allow the fuzzer to effortlessly explore the
|
||||
grammar of some of the more verbose data formats or languages. The basic
|
||||
principle behind the operation of fuzzer dictionaries is outlined in section 9
|
||||
of the "main" README for the project.
|
||||
|
||||
Custom dictionaries can be added at will. They should consist of a
|
||||
reasonably-sized set of rudimentary syntax units that the fuzzer will then try
|
||||
to clobber together in various ways. Snippets between 2 and 16 bytes are usually
|
||||
the sweet spot.
|
||||
|
||||
Custom dictionaries can be created in two ways:
|
||||
|
||||
- By creating a new directory and placing each token in a separate file, in
|
||||
which case, there is no need to escape or otherwise format the data.
|
||||
|
||||
- By creating a flat text file where tokens are listed one per line in the
|
||||
format of name="value". The alphanumeric name is ignored and can be omitted,
|
||||
although it is a convenient way to document the meaning of a particular
|
||||
token. The value must appear in quotes, with hex escaping (\xNN) applied to
|
||||
all non-printable, high-bit, or otherwise problematic characters (\\ and \"
|
||||
shorthands are recognized, too).
|
||||
|
||||
The fuzzer auto-selects the appropriate mode depending on whether the -x
|
||||
parameter is a file or a directory.
|
||||
|
||||
In the file mode, every name field can be optionally followed by @<num>, e.g.:
|
||||
|
||||
keyword_foo@1 = "foo"
|
||||
|
||||
Such entries will be loaded only if the requested dictionary level is equal or
|
||||
higher than this number. The default level is zero; a higher value can be set
|
||||
by appending @<num> to the dictionary file name, like so:
|
||||
|
||||
-x path/to/dictionary.dct@2
|
||||
|
||||
Good examples of dictionaries can be found in xml.dict and png.dict.
|
@ -0,0 +1,18 @@
|
||||
#
|
||||
# AFL dictionary for GIF images
|
||||
# -----------------------------
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
header_87a="87a"
|
||||
header_89a="89a"
|
||||
header_gif="GIF"
|
||||
|
||||
marker_2c=","
|
||||
marker_3b=";"
|
||||
|
||||
section_2101="!\x01\x12"
|
||||
section_21f9="!\xf9\x04"
|
||||
section_21fe="!\xfe"
|
||||
section_21ff="!\xff\x11"
|
@ -0,0 +1,160 @@
|
||||
#
|
||||
# AFL dictionary for HTML parsers (tags only)
|
||||
# -------------------------------------------
|
||||
#
|
||||
# A basic collection of HTML tags likely to matter to HTML parsers. Does *not*
|
||||
# include any attributes or attribute values.
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
tag_a="<a>"
|
||||
tag_abbr="<abbr>"
|
||||
tag_acronym="<acronym>"
|
||||
tag_address="<address>"
|
||||
tag_annotation_xml="<annotation-xml>"
|
||||
tag_applet="<applet>"
|
||||
tag_area="<area>"
|
||||
tag_article="<article>"
|
||||
tag_aside="<aside>"
|
||||
tag_audio="<audio>"
|
||||
tag_b="<b>"
|
||||
tag_base="<base>"
|
||||
tag_basefont="<basefont>"
|
||||
tag_bdi="<bdi>"
|
||||
tag_bdo="<bdo>"
|
||||
tag_bgsound="<bgsound>"
|
||||
tag_big="<big>"
|
||||
tag_blink="<blink>"
|
||||
tag_blockquote="<blockquote>"
|
||||
tag_body="<body>"
|
||||
tag_br="<br>"
|
||||
tag_button="<button>"
|
||||
tag_canvas="<canvas>"
|
||||
tag_caption="<caption>"
|
||||
tag_center="<center>"
|
||||
tag_cite="<cite>"
|
||||
tag_code="<code>"
|
||||
tag_col="<col>"
|
||||
tag_colgroup="<colgroup>"
|
||||
tag_data="<data>"
|
||||
tag_datalist="<datalist>"
|
||||
tag_dd="<dd>"
|
||||
tag_del="<del>"
|
||||
tag_desc="<desc>"
|
||||
tag_details="<details>"
|
||||
tag_dfn="<dfn>"
|
||||
tag_dir="<dir>"
|
||||
tag_div="<div>"
|
||||
tag_dl="<dl>"
|
||||
tag_dt="<dt>"
|
||||
tag_em="<em>"
|
||||
tag_embed="<embed>"
|
||||
tag_fieldset="<fieldset>"
|
||||
tag_figcaption="<figcaption>"
|
||||
tag_figure="<figure>"
|
||||
tag_font="<font>"
|
||||
tag_footer="<footer>"
|
||||
tag_foreignobject="<foreignobject>"
|
||||
tag_form="<form>"
|
||||
tag_frame="<frame>"
|
||||
tag_frameset="<frameset>"
|
||||
tag_h1="<h1>"
|
||||
tag_h2="<h2>"
|
||||
tag_h3="<h3>"
|
||||
tag_h4="<h4>"
|
||||
tag_h5="<h5>"
|
||||
tag_h6="<h6>"
|
||||
tag_head="<head>"
|
||||
tag_header="<header>"
|
||||
tag_hgroup="<hgroup>"
|
||||
tag_hr="<hr>"
|
||||
tag_html="<html>"
|
||||
tag_i="<i>"
|
||||
tag_iframe="<iframe>"
|
||||
tag_image="<image>"
|
||||
tag_img="<img>"
|
||||
tag_input="<input>"
|
||||
tag_ins="<ins>"
|
||||
tag_isindex="<isindex>"
|
||||
tag_kbd="<kbd>"
|
||||
tag_keygen="<keygen>"
|
||||
tag_label="<label>"
|
||||
tag_legend="<legend>"
|
||||
tag_li="<li>"
|
||||
tag_link="<link>"
|
||||
tag_listing="<listing>"
|
||||
tag_main="<main>"
|
||||
tag_malignmark="<malignmark>"
|
||||
tag_map="<map>"
|
||||
tag_mark="<mark>"
|
||||
tag_marquee="<marquee>"
|
||||
tag_math="<math>"
|
||||
tag_menu="<menu>"
|
||||
tag_menuitem="<menuitem>"
|
||||
tag_meta="<meta>"
|
||||
tag_meter="<meter>"
|
||||
tag_mglyph="<mglyph>"
|
||||
tag_mi="<mi>"
|
||||
tag_mn="<mn>"
|
||||
tag_mo="<mo>"
|
||||
tag_ms="<ms>"
|
||||
tag_mtext="<mtext>"
|
||||
tag_multicol="<multicol>"
|
||||
tag_nav="<nav>"
|
||||
tag_nextid="<nextid>"
|
||||
tag_nobr="<nobr>"
|
||||
tag_noembed="<noembed>"
|
||||
tag_noframes="<noframes>"
|
||||
tag_noscript="<noscript>"
|
||||
tag_object="<object>"
|
||||
tag_ol="<ol>"
|
||||
tag_optgroup="<optgroup>"
|
||||
tag_option="<option>"
|
||||
tag_output="<output>"
|
||||
tag_p="<p>"
|
||||
tag_param="<param>"
|
||||
tag_plaintext="<plaintext>"
|
||||
tag_pre="<pre>"
|
||||
tag_progress="<progress>"
|
||||
tag_q="<q>"
|
||||
tag_rb="<rb>"
|
||||
tag_rp="<rp>"
|
||||
tag_rt="<rt>"
|
||||
tag_rtc="<rtc>"
|
||||
tag_ruby="<ruby>"
|
||||
tag_s="<s>"
|
||||
tag_samp="<samp>"
|
||||
tag_script="<script>"
|
||||
tag_section="<section>"
|
||||
tag_select="<select>"
|
||||
tag_small="<small>"
|
||||
tag_source="<source>"
|
||||
tag_spacer="<spacer>"
|
||||
tag_span="<span>"
|
||||
tag_strike="<strike>"
|
||||
tag_strong="<strong>"
|
||||
tag_style="<style>"
|
||||
tag_sub="<sub>"
|
||||
tag_summary="<summary>"
|
||||
tag_sup="<sup>"
|
||||
tag_svg="<svg>"
|
||||
tag_table="<table>"
|
||||
tag_tbody="<tbody>"
|
||||
tag_td="<td>"
|
||||
tag_template="<template>"
|
||||
tag_textarea="<textarea>"
|
||||
tag_tfoot="<tfoot>"
|
||||
tag_th="<th>"
|
||||
tag_thead="<thead>"
|
||||
tag_time="<time>"
|
||||
tag_title="<title>"
|
||||
tag_tr="<tr>"
|
||||
tag_track="<track>"
|
||||
tag_tt="<tt>"
|
||||
tag_u="<u>"
|
||||
tag_ul="<ul>"
|
||||
tag_var="<var>"
|
||||
tag_video="<video>"
|
||||
tag_wbr="<wbr>"
|
||||
tag_xmp="<xmp>"
|
@ -0,0 +1,22 @@
|
||||
#
|
||||
# AFL dictionary for JPEG images
|
||||
# ------------------------------
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
header_jfif="JFIF\x00"
|
||||
header_jfxx="JFXX\x00"
|
||||
|
||||
section_ffc0="\xff\xc0"
|
||||
section_ffc2="\xff\xc2"
|
||||
section_ffc4="\xff\xc4"
|
||||
section_ffd0="\xff\xd0"
|
||||
section_ffd8="\xff\xd8"
|
||||
section_ffd9="\xff\xd9"
|
||||
section_ffda="\xff\xda"
|
||||
section_ffdb="\xff\xdb"
|
||||
section_ffdd="\xff\xdd"
|
||||
section_ffe0="\xff\xe0"
|
||||
section_ffe1="\xff\xe1"
|
||||
section_fffe="\xff\xfe"
|
@ -0,0 +1,107 @@
|
||||
#
|
||||
# AFL dictionary for JavaScript
|
||||
# -----------------------------
|
||||
#
|
||||
# Contains basic reserved keywords and syntax building blocks.
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
keyword_arguments="arguments"
|
||||
keyword_break="break"
|
||||
keyword_case="case"
|
||||
keyword_catch="catch"
|
||||
keyword_const="const"
|
||||
keyword_continue="continue"
|
||||
keyword_debugger="debugger"
|
||||
keyword_decodeURI="decodeURI"
|
||||
keyword_default="default"
|
||||
keyword_delete="delete"
|
||||
keyword_do="do"
|
||||
keyword_else="else"
|
||||
keyword_escape="escape"
|
||||
keyword_eval="eval"
|
||||
keyword_export="export"
|
||||
keyword_finally="finally"
|
||||
keyword_for="for (a=0;a<2;a++)"
|
||||
keyword_function="function"
|
||||
keyword_if="if"
|
||||
keyword_in="in"
|
||||
keyword_instanceof="instanceof"
|
||||
keyword_isNaN="isNaN"
|
||||
keyword_let="let"
|
||||
keyword_new="new"
|
||||
keyword_parseInt="parseInt"
|
||||
keyword_return="return"
|
||||
keyword_switch="switch"
|
||||
keyword_this="this"
|
||||
keyword_throw="throw"
|
||||
keyword_try="try"
|
||||
keyword_typeof="typeof"
|
||||
keyword_var="var"
|
||||
keyword_void="void"
|
||||
keyword_while="while"
|
||||
keyword_with="with"
|
||||
|
||||
misc_1=" 1"
|
||||
misc_a="a"
|
||||
misc_array=" [1]"
|
||||
misc_assign=" a=1"
|
||||
misc_code_block=" {1}"
|
||||
misc_colon_num=" 1:"
|
||||
misc_colon_string=" 'a':"
|
||||
misc_comma=" ,"
|
||||
misc_comment_block=" /* */"
|
||||
misc_comment_line=" //"
|
||||
misc_cond=" 1?2:3"
|
||||
misc_dec=" --"
|
||||
misc_div=" /"
|
||||
misc_equals=" ="
|
||||
misc_fn=" a()"
|
||||
misc_identical=" ==="
|
||||
misc_inc=" ++"
|
||||
misc_minus=" -"
|
||||
misc_modulo=" %"
|
||||
misc_parentheses=" ()"
|
||||
misc_parentheses_1=" (1)"
|
||||
misc_parentheses_1x4=" (1,1,1,1)"
|
||||
misc_parentheses_a=" (a)"
|
||||
misc_period="."
|
||||
misc_plus=" +"
|
||||
misc_plus_assign=" +="
|
||||
misc_regex=" /a/g"
|
||||
misc_rol=" <<<"
|
||||
misc_semicolon=" ;"
|
||||
misc_serialized_object=" {'a': 1}"
|
||||
misc_string=" 'a'"
|
||||
misc_unicode=" '\\u0001'"
|
||||
|
||||
object_Array=" Array"
|
||||
object_Boolean=" Boolean"
|
||||
object_Date=" Date"
|
||||
object_Function=" Function"
|
||||
object_Infinity=" Infinity"
|
||||
object_Int8Array=" Int8Array"
|
||||
object_Math=" Math"
|
||||
object_NaN=" NaN"
|
||||
object_Number=" Number"
|
||||
object_Object=" Object"
|
||||
object_RegExp=" RegExp"
|
||||
object_String=" String"
|
||||
object_Symbol=" Symbol"
|
||||
object_false=" false"
|
||||
object_null=" null"
|
||||
object_true=" true"
|
||||
|
||||
prop_charAt=".charAt"
|
||||
prop_concat=".concat"
|
||||
prop_constructor=".constructor"
|
||||
prop_destructor=".destructor"
|
||||
prop_length=".length"
|
||||
prop_match=".match"
|
||||
prop_proto=".__proto__"
|
||||
prop_prototype=".prototype"
|
||||
prop_slice=".slice"
|
||||
prop_toCode=".toCode"
|
||||
prop_toString=".toString"
|
||||
prop_valueOf=".valueOf"
|
@ -0,0 +1,52 @@
|
||||
#
|
||||
# AFL dictionary for JSON
|
||||
# -----------------------
|
||||
#
|
||||
# Just the very basics.
|
||||
#
|
||||
# Inspired by a dictionary by Jakub Wilk <jwilk@jwilk.net>
|
||||
#
|
||||
|
||||
"0"
|
||||
",0"
|
||||
":0"
|
||||
"0:"
|
||||
"-1.2e+3"
|
||||
|
||||
"true"
|
||||
"false"
|
||||
"null"
|
||||
|
||||
"\"\""
|
||||
",\"\""
|
||||
":\"\""
|
||||
"\"\":"
|
||||
|
||||
"{}"
|
||||
",{}"
|
||||
":{}"
|
||||
"{\"\":0}"
|
||||
"{{}}"
|
||||
|
||||
"[]"
|
||||
",[]"
|
||||
":[]"
|
||||
"[0]"
|
||||
"[[]]"
|
||||
|
||||
"''"
|
||||
"\\"
|
||||
"\\b"
|
||||
"\\f"
|
||||
"\\n"
|
||||
"\\r"
|
||||
"\\t"
|
||||
"\\u0000"
|
||||
"\\x00"
|
||||
"\\0"
|
||||
"\\uD800\\uDC00"
|
||||
"\\uDBFF\\uDFFF"
|
||||
|
||||
"\"\":0"
|
||||
"//"
|
||||
"/**/"
|
@ -0,0 +1,16 @@
|
||||
|
||||
#
|
||||
# AFL dictionary for fuzzing Perl
|
||||
# --------------------------------
|
||||
#
|
||||
# Created by @RandomDhiraj
|
||||
#
|
||||
|
||||
"<:crlf"
|
||||
"fwrite()"
|
||||
"fread()"
|
||||
":raw:utf8"
|
||||
":raw:eol(LF)"
|
||||
"Perl_invert()"
|
||||
":raw:eol(CRLF)"
|
||||
"Perl_PerlIO_eof()"
|
@ -0,0 +1,38 @@
|
||||
#
|
||||
# AFL dictionary for PNG images
|
||||
# -----------------------------
|
||||
#
|
||||
# Just the basic, standard-originating sections; does not include vendor
|
||||
# extensions.
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
header_png="\x89PNG\x0d\x0a\x1a\x0a"
|
||||
|
||||
section_IDAT="IDAT"
|
||||
section_IEND="IEND"
|
||||
section_IHDR="IHDR"
|
||||
section_PLTE="PLTE"
|
||||
section_bKGD="bKGD"
|
||||
section_cHRM="cHRM"
|
||||
section_fRAc="fRAc"
|
||||
section_gAMA="gAMA"
|
||||
section_gIFg="gIFg"
|
||||
section_gIFt="gIFt"
|
||||
section_gIFx="gIFx"
|
||||
section_hIST="hIST"
|
||||
section_iCCP="iCCP"
|
||||
section_iTXt="iTXt"
|
||||
section_oFFs="oFFs"
|
||||
section_pCAL="pCAL"
|
||||
section_pHYs="pHYs"
|
||||
section_sBIT="sBIT"
|
||||
section_sCAL="sCAL"
|
||||
section_sPLT="sPLT"
|
||||
section_sRGB="sRGB"
|
||||
section_sTER="sTER"
|
||||
section_tEXt="tEXt"
|
||||
section_tIME="tIME"
|
||||
section_tRNS="tRNS"
|
||||
section_zTXt="zTXt"
|
@ -0,0 +1,254 @@
|
||||
#
|
||||
# AFL dictionary for JS regex
|
||||
# ---------------------------
|
||||
#
|
||||
# Contains various regular expressions.
|
||||
#
|
||||
# Created by Yang Guo <yangguo@chromium.org>
|
||||
#
|
||||
"?"
|
||||
"abc"
|
||||
"()"
|
||||
"[]"
|
||||
"abc|def"
|
||||
"abc|def|ghi"
|
||||
"^xxx$"
|
||||
"ab\\b\\d\\bcd"
|
||||
"\\w|\\d"
|
||||
"a*?"
|
||||
"abc+"
|
||||
"abc+?"
|
||||
"xyz?"
|
||||
"xyz??"
|
||||
"xyz{0,1}"
|
||||
"xyz{0,1}?"
|
||||
"xyz{93}"
|
||||
"xyz{1,32}"
|
||||
"xyz{1,32}?"
|
||||
"xyz{1,}"
|
||||
"xyz{1,}?"
|
||||
"a\\fb\\nc\\rd\\te\\vf"
|
||||
"a\\nb\\bc"
|
||||
"(?:foo)"
|
||||
"(?: foo )"
|
||||
"foo|(bar|baz)|quux"
|
||||
"foo(?=bar)baz"
|
||||
"foo(?!bar)baz"
|
||||
"foo(?<=bar)baz"
|
||||
"foo(?<!bar)baz"
|
||||
"()"
|
||||
"(?=)"
|
||||
"[]"
|
||||
"[x]"
|
||||
"[xyz]"
|
||||
"[a-zA-Z0-9]"
|
||||
"[-123]"
|
||||
"[^123]"
|
||||
"]"
|
||||
"}"
|
||||
"[a-b-c]"
|
||||
"[x\\dz]"
|
||||
"[\\d-z]"
|
||||
"[\\d-\\d]"
|
||||
"[z-\\d]"
|
||||
"\\cj\\cJ\\ci\\cI\\ck\\cK"
|
||||
"\\c!"
|
||||
"\\c_"
|
||||
"\\c~"
|
||||
"[\\c!]"
|
||||
"[\\c_]"
|
||||
"[\\c~]"
|
||||
"[\\ca]"
|
||||
"[\\cz]"
|
||||
"[\\cA]"
|
||||
"[\\cZ]"
|
||||
"[\\c1]"
|
||||
"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
|
||||
"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
|
||||
"\\8"
|
||||
"\\9"
|
||||
"\\11"
|
||||
"\\11a"
|
||||
"\\011"
|
||||
"\\118"
|
||||
"\\111"
|
||||
"\\1111"
|
||||
"(x)(x)(x)\\1"
|
||||
"(x)(x)(x)\\2"
|
||||
"(x)(x)(x)\\3"
|
||||
"(x)(x)(x)\\4"
|
||||
"(x)(x)(x)\\1*"
|
||||
"(x)(x)(x)\\3*"
|
||||
"(x)(x)(x)\\4*"
|
||||
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
|
||||
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
|
||||
"(a)\\1"
|
||||
"(a\\1)"
|
||||
"(\\1a)"
|
||||
"(\\2)(\\1)"
|
||||
"(?=a){0,10}a"
|
||||
"(?=a){1,10}a"
|
||||
"(?=a){9,10}a"
|
||||
"(?!a)?a"
|
||||
"\\1(a)"
|
||||
"(?!(a))\\1"
|
||||
"(?!\\1(a\\1)\\1)\\1"
|
||||
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
|
||||
"[\\0]"
|
||||
"[\\11]"
|
||||
"[\\11a]"
|
||||
"[\\011]"
|
||||
"[\\00011]"
|
||||
"[\\118]"
|
||||
"[\\111]"
|
||||
"[\\1111]"
|
||||
"\\x60"
|
||||
"\\x3z"
|
||||
"\\c"
|
||||
"\\u0034"
|
||||
"\\u003z"
|
||||
"foo[z]*"
|
||||
"\\u{12345}"
|
||||
"\\u{12345}\\u{23456}"
|
||||
"\\u{12345}{3}"
|
||||
"\\u{12345}*"
|
||||
"\\ud808\\udf45*"
|
||||
"[\\ud808\\udf45-\\ud809\\udccc]"
|
||||
"a"
|
||||
"a|b"
|
||||
"a\\n"
|
||||
"a$"
|
||||
"a\\b!"
|
||||
"a\\Bb"
|
||||
"a*?"
|
||||
"a?"
|
||||
"a??"
|
||||
"a{0,1}?"
|
||||
"a{1,2}?"
|
||||
"a+?"
|
||||
"(a)"
|
||||
"(a)\\1"
|
||||
"(\\1a)"
|
||||
"\\1(a)"
|
||||
"a\\s"
|
||||
"a\\S"
|
||||
"a\\D"
|
||||
"a\\w"
|
||||
"a\\W"
|
||||
"a."
|
||||
"a\\q"
|
||||
"a[a]"
|
||||
"a[^a]"
|
||||
"a[a-z]"
|
||||
"a(?:b)"
|
||||
"a(?=b)"
|
||||
"a(?!b)"
|
||||
"\\x60"
|
||||
"\\u0060"
|
||||
"\\cA"
|
||||
"\\q"
|
||||
"\\1112"
|
||||
"(a)\\1"
|
||||
"(?!a)?a\\1"
|
||||
"(?:(?=a))a\\1"
|
||||
"a{}"
|
||||
"a{,}"
|
||||
"a{"
|
||||
"a{z}"
|
||||
"a{12z}"
|
||||
"a{12,"
|
||||
"a{12,3b"
|
||||
"{}"
|
||||
"{,}"
|
||||
"{"
|
||||
"{z}"
|
||||
"{1z}"
|
||||
"{12,"
|
||||
"{12,3b"
|
||||
"a"
|
||||
"abc"
|
||||
"a[bc]d"
|
||||
"a|bc"
|
||||
"ab|c"
|
||||
"a||bc"
|
||||
"(?:ab)"
|
||||
"(?:ab|cde)"
|
||||
"(?:ab)|cde"
|
||||
"(ab)"
|
||||
"(ab|cde)"
|
||||
"(ab)\\1"
|
||||
"(ab|cde)\\1"
|
||||
"(?:ab)?"
|
||||
"(?:ab)+"
|
||||
"a?"
|
||||
"a+"
|
||||
"a??"
|
||||
"a*?"
|
||||
"a+?"
|
||||
"(?:a?)?"
|
||||
"(?:a+)?"
|
||||
"(?:a?)+"
|
||||
"(?:a*)+"
|
||||
"(?:a+)+"
|
||||
"(?:a?)*"
|
||||
"(?:a*)*"
|
||||
"(?:a+)*"
|
||||
"a{0}"
|
||||
"(?:a+){0,0}"
|
||||
"a*b"
|
||||
"a+b"
|
||||
"a*b|c"
|
||||
"a+b|c"
|
||||
"(?:a{5,1000000}){3,1000000}"
|
||||
"(?:ab){4,7}"
|
||||
"a\\bc"
|
||||
"a\\sc"
|
||||
"a\\Sc"
|
||||
"a(?=b)c"
|
||||
"a(?=bbb|bb)c"
|
||||
"a(?!bbb|bb)c"
|
||||
"\xe2\x81\xa3"
|
||||
"[\xe2\x81\xa3]"
|
||||
"\xed\xb0\x80"
|
||||
"\xed\xa0\x80"
|
||||
"(\xed\xb0\x80)\x01"
|
||||
"((\xed\xa0\x80))\x02"
|
||||
"\xf0\x9f\x92\xa9"
|
||||
"\x01"
|
||||
"\x0f"
|
||||
"[-\xf0\x9f\x92\xa9]+"
|
||||
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
|
||||
"(?<=)"
|
||||
"(?<=a)"
|
||||
"(?<!)"
|
||||
"(?<!a)"
|
||||
"(?<a>)"
|
||||
"(?<a>.)"
|
||||
"(?<a>.)\\k<a>"
|
||||
"\\p{Script=Greek}"
|
||||
"\\P{sc=Greek}"
|
||||
"\\p{Script_Extensions=Greek}"
|
||||
"\\P{scx=Greek}"
|
||||
"\\p{General_Category=Decimal_Number}"
|
||||
"\\P{gc=Decimal_Number}"
|
||||
"\\p{gc=Nd}"
|
||||
"\\P{Decimal_Number}"
|
||||
"\\p{Nd}"
|
||||
"\\P{Any}"
|
||||
"\\p{Changes_When_NFKC_Casefolded}"
|
||||
"(?:a?)??"
|
||||
"a?)"xyz{93}"
|
||||
"{93}"
|
||||
"a{12za?)?"
|
||||
"[\x8f]"
|
||||
"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\x92\xa9-\xf4\x8f\xbf\xbf]"
|
||||
"[\x92\xa9-\xf4\x8f\xbf\xbf]"
|
||||
"\\1\\2(b\\1\\2))\\2)\\1"
|
||||
"\\1\\2(a(?:\\1\\2))\\2)\\1"
|
||||
"?:\\1"
|
||||
"\\1(b\\1\\2))\\2)\\1"
|
||||
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
|
||||
"foo(?=bar)bar)baz"
|
||||
"fo(?o(?o(?o(?=bar)baz"
|
||||
"foo(?=bar)baz"
|
||||
"foo(?=bar)bar)az"
|
@ -0,0 +1,282 @@
|
||||
#
|
||||
# AFL dictionary for SQL
|
||||
# ----------------------
|
||||
#
|
||||
# Modeled based on SQLite documentation, contains some number of SQLite
|
||||
# extensions. Other dialects of SQL may benefit from customized dictionaries.
|
||||
#
|
||||
# If you append @1 to the file name when loading this dictionary, afl-fuzz
|
||||
# will also additionally load a selection of pragma keywords that are very
|
||||
# specific to SQLite (and are probably less interesting from the security
|
||||
# standpoint, because they are usually not allowed in non-privileged
|
||||
# contexts).
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
function_abs=" abs(1)"
|
||||
function_avg=" avg(1)"
|
||||
function_changes=" changes()"
|
||||
function_char=" char(1)"
|
||||
function_coalesce=" coalesce(1,1)"
|
||||
function_count=" count(1)"
|
||||
function_date=" date(1,1,1)"
|
||||
function_datetime=" datetime(1,1,1)"
|
||||
function_decimal=" decimal(1,1)"
|
||||
function_glob=" glob(1,1)"
|
||||
function_group_concat=" group_concat(1,1)"
|
||||
function_hex=" hex(1)"
|
||||
function_ifnull=" ifnull(1,1)"
|
||||
function_instr=" instr(1,1)"
|
||||
function_julianday=" julianday(1,1,1)"
|
||||
function_last_insert_rowid=" last_insert_rowid()"
|
||||
function_length=" length(1)"
|
||||
function_like=" like(1,1)"
|
||||
function_likelihood=" likelihood(1,1)"
|
||||
function_likely=" likely(1)"
|
||||
function_load_extension=" load_extension(1,1)"
|
||||
function_lower=" lower(1)"
|
||||
function_ltrim=" ltrim(1,1)"
|
||||
function_max=" max(1,1)"
|
||||
function_min=" min(1,1)"
|
||||
function_nullif=" nullif(1,1)"
|
||||
function_printf=" printf(1,1)"
|
||||
function_quote=" quote(1)"
|
||||
function_random=" random()"
|
||||
function_randomblob=" randomblob(1)"
|
||||
function_replace=" replace(1,1,1)"
|
||||
function_round=" round(1,1)"
|
||||
function_rtrim=" rtrim(1,1)"
|
||||
function_soundex=" soundex(1)"
|
||||
function_sqlite_compileoption_get=" sqlite_compileoption_get(1)"
|
||||
function_sqlite_compileoption_used=" sqlite_compileoption_used(1)"
|
||||
function_sqlite_source_id=" sqlite_source_id()"
|
||||
function_sqlite_version=" sqlite_version()"
|
||||
function_strftime=" strftime(1,1,1,1)"
|
||||
function_substr=" substr(1,1,1)"
|
||||
function_sum=" sum(1)"
|
||||
function_time=" time(1,1,1)"
|
||||
function_total=" total(1)"
|
||||
function_total_changes=" total_changes()"
|
||||
function_trim=" trim(1,1)"
|
||||
function_typeof=" typeof(1)"
|
||||
function_unicode=" unicode(1)"
|
||||
function_unlikely=" unlikely(1)"
|
||||
function_upper=" upper(1)"
|
||||
function_varchar=" varchar(1)"
|
||||
function_zeroblob=" zeroblob(1)"
|
||||
|
||||
keyword_ABORT="ABORT"
|
||||
keyword_ACTION="ACTION"
|
||||
keyword_ADD="ADD"
|
||||
keyword_AFTER="AFTER"
|
||||
keyword_ALL="ALL"
|
||||
keyword_ALTER="ALTER"
|
||||
keyword_ANALYZE="ANALYZE"
|
||||
keyword_AND="AND"
|
||||
keyword_AS="AS"
|
||||
keyword_ASC="ASC"
|
||||
keyword_ATTACH="ATTACH"
|
||||
keyword_AUTOINCREMENT="AUTOINCREMENT"
|
||||
keyword_BEFORE="BEFORE"
|
||||
keyword_BEGIN="BEGIN"
|
||||
keyword_BETWEEN="BETWEEN"
|
||||
keyword_BY="BY"
|
||||
keyword_CASCADE="CASCADE"
|
||||
keyword_CASE="CASE"
|
||||
keyword_CAST="CAST"
|
||||
keyword_CHECK="CHECK"
|
||||
keyword_COLLATE="COLLATE"
|
||||
keyword_COLUMN="COLUMN"
|
||||
keyword_COMMIT="COMMIT"
|
||||
keyword_CONFLICT="CONFLICT"
|
||||
keyword_CONSTRAINT="CONSTRAINT"
|
||||
keyword_CREATE="CREATE"
|
||||
keyword_CROSS="CROSS"
|
||||
keyword_CURRENT_DATE="CURRENT_DATE"
|
||||
keyword_CURRENT_TIME="CURRENT_TIME"
|
||||
keyword_CURRENT_TIMESTAMP="CURRENT_TIMESTAMP"
|
||||
keyword_DATABASE="DATABASE"
|
||||
keyword_DEFAULT="DEFAULT"
|
||||
keyword_DEFERRABLE="DEFERRABLE"
|
||||
keyword_DEFERRED="DEFERRED"
|
||||
keyword_DELETE="DELETE"
|
||||
keyword_DESC="DESC"
|
||||
keyword_DETACH="DETACH"
|
||||
keyword_DISTINCT="DISTINCT"
|
||||
keyword_DROP="DROP"
|
||||
keyword_EACH="EACH"
|
||||
keyword_ELSE="ELSE"
|
||||
keyword_END="END"
|
||||
keyword_ESCAPE="ESCAPE"
|
||||
keyword_EXCEPT="EXCEPT"
|
||||
keyword_EXCLUSIVE="EXCLUSIVE"
|
||||
keyword_EXISTS="EXISTS"
|
||||
keyword_EXPLAIN="EXPLAIN"
|
||||
keyword_FAIL="FAIL"
|
||||
keyword_FOR="FOR"
|
||||
keyword_FOREIGN="FOREIGN"
|
||||
keyword_FROM="FROM"
|
||||
keyword_FULL="FULL"
|
||||
keyword_GLOB="GLOB"
|
||||
keyword_GROUP="GROUP"
|
||||
keyword_HAVING="HAVING"
|
||||
keyword_IF="IF"
|
||||
keyword_IGNORE="IGNORE"
|
||||
keyword_IMMEDIATE="IMMEDIATE"
|
||||
keyword_IN="IN"
|
||||
keyword_INDEX="INDEX"
|
||||
keyword_INDEXED="INDEXED"
|
||||
keyword_INITIALLY="INITIALLY"
|
||||
keyword_INNER="INNER"
|
||||
keyword_INSERT="INSERT"
|
||||
keyword_INSTEAD="INSTEAD"
|
||||
keyword_INTERSECT="INTERSECT"
|
||||
keyword_INTO="INTO"
|
||||
keyword_IS="IS"
|
||||
keyword_ISNULL="ISNULL"
|
||||
keyword_JOIN="JOIN"
|
||||
keyword_KEY="KEY"
|
||||
keyword_LEFT="LEFT"
|
||||
keyword_LIKE="LIKE"
|
||||
keyword_LIMIT="LIMIT"
|
||||
keyword_MATCH="MATCH"
|
||||
keyword_NATURAL="NATURAL"
|
||||
keyword_NO="NO"
|
||||
keyword_NOT="NOT"
|
||||
keyword_NOTNULL="NOTNULL"
|
||||
keyword_NULL="NULL"
|
||||
keyword_OF="OF"
|
||||
keyword_OFFSET="OFFSET"
|
||||
keyword_ON="ON"
|
||||
keyword_OR="OR"
|
||||
keyword_ORDER="ORDER"
|
||||
keyword_OUTER="OUTER"
|
||||
keyword_PLAN="PLAN"
|
||||
keyword_PRAGMA="PRAGMA"
|
||||
keyword_PRIMARY="PRIMARY"
|
||||
keyword_QUERY="QUERY"
|
||||
keyword_RAISE="RAISE"
|
||||
keyword_RECURSIVE="RECURSIVE"
|
||||
keyword_REFERENCES="REFERENCES"
|
||||
keyword_REGEXP="REGEXP"
|
||||
keyword_REINDEX="REINDEX"
|
||||
keyword_RELEASE="RELEASE"
|
||||
keyword_RENAME="RENAME"
|
||||
keyword_REPLACE="REPLACE"
|
||||
keyword_RESTRICT="RESTRICT"
|
||||
keyword_RIGHT="RIGHT"
|
||||
keyword_ROLLBACK="ROLLBACK"
|
||||
keyword_ROW="ROW"
|
||||
keyword_SAVEPOINT="SAVEPOINT"
|
||||
keyword_SELECT="SELECT"
|
||||
keyword_SET="SET"
|
||||
keyword_TABLE="TABLE"
|
||||
keyword_TEMP="TEMP"
|
||||
keyword_TEMPORARY="TEMPORARY"
|
||||
keyword_THEN="THEN"
|
||||
keyword_TO="TO"
|
||||
keyword_TRANSACTION="TRANSACTION"
|
||||
keyword_TRIGGER="TRIGGER"
|
||||
keyword_UNION="UNION"
|
||||
keyword_UNIQUE="UNIQUE"
|
||||
keyword_UPDATE="UPDATE"
|
||||
keyword_USING="USING"
|
||||
keyword_VACUUM="VACUUM"
|
||||
keyword_VALUES="VALUES"
|
||||
keyword_VIEW="VIEW"
|
||||
keyword_VIRTUAL="VIRTUAL"
|
||||
keyword_WHEN="WHEN"
|
||||
keyword_WHERE="WHERE"
|
||||
keyword_WITH="WITH"
|
||||
keyword_WITHOUT="WITHOUT"
|
||||
|
||||
operator_concat=" || "
|
||||
operator_ebove_eq=" >="
|
||||
|
||||
snippet_1eq1=" 1=1"
|
||||
snippet_at=" @1"
|
||||
snippet_backticks=" `a`"
|
||||
snippet_blob=" blob"
|
||||
snippet_brackets=" [a]"
|
||||
snippet_colon=" :1"
|
||||
snippet_comment=" /* */"
|
||||
snippet_date="2001-01-01"
|
||||
snippet_dollar=" $1"
|
||||
snippet_dotref=" a.b"
|
||||
snippet_fmtY="%Y"
|
||||
snippet_int=" int"
|
||||
snippet_neg1=" -1"
|
||||
snippet_pair=" a,b"
|
||||
snippet_parentheses=" (1)"
|
||||
snippet_plus2days="+2 days"
|
||||
snippet_qmark=" ?1"
|
||||
snippet_semicolon=" ;"
|
||||
snippet_star=" *"
|
||||
snippet_string_pair=" \"a\",\"b\""
|
||||
|
||||
string_dbl_q=" \"a\""
|
||||
string_escaped_q=" 'a''b'"
|
||||
string_single_q=" 'a'"
|
||||
|
||||
pragma_application_id@1=" application_id"
|
||||
pragma_auto_vacuum@1=" auto_vacuum"
|
||||
pragma_automatic_index@1=" automatic_index"
|
||||
pragma_busy_timeout@1=" busy_timeout"
|
||||
pragma_cache_size@1=" cache_size"
|
||||
pragma_cache_spill@1=" cache_spill"
|
||||
pragma_case_sensitive_like@1=" case_sensitive_like"
|
||||
pragma_checkpoint_fullfsync@1=" checkpoint_fullfsync"
|
||||
pragma_collation_list@1=" collation_list"
|
||||
pragma_compile_options@1=" compile_options"
|
||||
pragma_count_changes@1=" count_changes"
|
||||
pragma_data_store_directory@1=" data_store_directory"
|
||||
pragma_database_list@1=" database_list"
|
||||
pragma_default_cache_size@1=" default_cache_size"
|
||||
pragma_defer_foreign_keys@1=" defer_foreign_keys"
|
||||
pragma_empty_result_callbacks@1=" empty_result_callbacks"
|
||||
pragma_encoding@1=" encoding"
|
||||
pragma_foreign_key_check@1=" foreign_key_check"
|
||||
pragma_foreign_key_list@1=" foreign_key_list"
|
||||
pragma_foreign_keys@1=" foreign_keys"
|
||||
pragma_freelist_count@1=" freelist_count"
|
||||
pragma_full_column_names@1=" full_column_names"
|
||||
pragma_fullfsync@1=" fullfsync"
|
||||
pragma_ignore_check_constraints@1=" ignore_check_constraints"
|
||||
pragma_incremental_vacuum@1=" incremental_vacuum"
|
||||
pragma_index_info@1=" index_info"
|
||||
pragma_index_list@1=" index_list"
|
||||
pragma_integrity_check@1=" integrity_check"
|
||||
pragma_journal_mode@1=" journal_mode"
|
||||
pragma_journal_size_limit@1=" journal_size_limit"
|
||||
pragma_legacy_file_format@1=" legacy_file_format"
|
||||
pragma_locking_mode@1=" locking_mode"
|
||||
pragma_max_page_count@1=" max_page_count"
|
||||
pragma_mmap_size@1=" mmap_size"
|
||||
pragma_page_count@1=" page_count"
|
||||
pragma_page_size@1=" page_size"
|
||||
pragma_parser_trace@1=" parser_trace"
|
||||
pragma_query_only@1=" query_only"
|
||||
pragma_quick_check@1=" quick_check"
|
||||
pragma_read_uncommitted@1=" read_uncommitted"
|
||||
pragma_recursive_triggers@1=" recursive_triggers"
|
||||
pragma_reverse_unordered_selects@1=" reverse_unordered_selects"
|
||||
pragma_schema_version@1=" schema_version"
|
||||
pragma_secure_delete@1=" secure_delete"
|
||||
pragma_short_column_names@1=" short_column_names"
|
||||
pragma_shrink_memory@1=" shrink_memory"
|
||||
pragma_soft_heap_limit@1=" soft_heap_limit"
|
||||
pragma_stats@1=" stats"
|
||||
pragma_synchronous@1=" synchronous"
|
||||
pragma_table_info@1=" table_info"
|
||||
pragma_temp_store@1=" temp_store"
|
||||
pragma_temp_store_directory@1=" temp_store_directory"
|
||||
pragma_threads@1=" threads"
|
||||
pragma_user_version@1=" user_version"
|
||||
pragma_vdbe_addoptrace@1=" vdbe_addoptrace"
|
||||
pragma_vdbe_debug@1=" vdbe_debug"
|
||||
pragma_vdbe_listing@1=" vdbe_listing"
|
||||
pragma_vdbe_trace@1=" vdbe_trace"
|
||||
pragma_wal_autocheckpoint@1=" wal_autocheckpoint"
|
||||
pragma_wal_checkpoint@1=" wal_checkpoint"
|
||||
pragma_writable_schema@1=" writable_schema"
|
@ -0,0 +1,51 @@
|
||||
#
|
||||
# AFL dictionary for TIFF images
|
||||
# ------------------------------
|
||||
#
|
||||
# Just the basic, standard-originating sections; does not include vendor
|
||||
# extensions.
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
header_ii="II*\x00"
|
||||
header_mm="MM\x00*"
|
||||
|
||||
section_100="\x00\x01"
|
||||
section_101="\x01\x01"
|
||||
section_102="\x02\x01"
|
||||
section_103="\x03\x01"
|
||||
section_106="\x06\x01"
|
||||
section_107="\x07\x01"
|
||||
section_10D="\x0d\x01"
|
||||
section_10E="\x0e\x01"
|
||||
section_10F="\x0f\x01"
|
||||
section_110="\x10\x01"
|
||||
section_111="\x11\x01"
|
||||
section_112="\x12\x01"
|
||||
section_115="\x15\x01"
|
||||
section_116="\x16\x01"
|
||||
section_117="\x17\x01"
|
||||
section_11A="\x1a\x01"
|
||||
section_11B="\x1b\x01"
|
||||
section_11C="\x1c\x01"
|
||||
section_11D="\x1d\x01"
|
||||
section_11E="\x1e\x01"
|
||||
section_11F="\x1f\x01"
|
||||
section_122="\"\x01"
|
||||
section_123="#\x01"
|
||||
section_124="$\x01"
|
||||
section_125="%\x01"
|
||||
section_128="(\x01"
|
||||
section_129=")\x01"
|
||||
section_12D="-\x01"
|
||||
section_131="1\x01"
|
||||
section_132="2\x01"
|
||||
section_13B=";\x01"
|
||||
section_13C="<\x01"
|
||||
section_13D="=\x01"
|
||||
section_13E=">\x01"
|
||||
section_13F="?\x01"
|
||||
section_140="@\x01"
|
||||
section_FE="\xfe\x00"
|
||||
section_FF="\xff\x00"
|
@ -0,0 +1,20 @@
|
||||
#
|
||||
# AFL dictionary for WebP images
|
||||
# ------------------------------
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
header_RIFF="RIFF"
|
||||
header_WEBP="WEBP"
|
||||
|
||||
section_ALPH="ALPH"
|
||||
section_ANIM="ANIM"
|
||||
section_ANMF="ANMF"
|
||||
section_EXIF="EXIF"
|
||||
section_FRGM="FRGM"
|
||||
section_ICCP="ICCP"
|
||||
section_VP8="VP8 "
|
||||
section_VP8L="VP8L"
|
||||
section_VP8X="VP8X"
|
||||
section_XMP="XMP "
|
@ -0,0 +1,72 @@
|
||||
#
|
||||
# AFL dictionary for XML
|
||||
# ----------------------
|
||||
#
|
||||
# Several basic syntax elements and attributes, modeled on libxml2.
|
||||
#
|
||||
# Created by Michal Zalewski <lcamtuf@google.com>
|
||||
#
|
||||
|
||||
attr_encoding=" encoding=\"1\""
|
||||
attr_generic=" a=\"1\""
|
||||
attr_href=" href=\"1\""
|
||||
attr_standalone=" standalone=\"no\""
|
||||
attr_version=" version=\"1\""
|
||||
attr_xml_base=" xml:base=\"1\""
|
||||
attr_xml_id=" xml:id=\"1\""
|
||||
attr_xml_lang=" xml:lang=\"1\""
|
||||
attr_xml_space=" xml:space=\"1\""
|
||||
attr_xmlns=" xmlns=\"1\""
|
||||
|
||||
entity_builtin="<"
|
||||
entity_decimal=""
|
||||
entity_external="&a;"
|
||||
entity_hex=""
|
||||
|
||||
string_any="ANY"
|
||||
string_brackets="[]"
|
||||
string_cdata="CDATA"
|
||||
string_col_fallback=":fallback"
|
||||
string_col_generic=":a"
|
||||
string_col_include=":include"
|
||||
string_dashes="--"
|
||||
string_empty="EMPTY"
|
||||
string_empty_dblquotes="\"\""
|
||||
string_empty_quotes="''"
|
||||
string_entities="ENTITIES"
|
||||
string_entity="ENTITY"
|
||||
string_fixed="#FIXED"
|
||||
string_id="ID"
|
||||
string_idref="IDREF"
|
||||
string_idrefs="IDREFS"
|
||||
string_implied="#IMPLIED"
|
||||
string_nmtoken="NMTOKEN"
|
||||
string_nmtokens="NMTOKENS"
|
||||
string_notation="NOTATION"
|
||||
string_parentheses="()"
|
||||
string_pcdata="#PCDATA"
|
||||
string_percent="%a"
|
||||
string_public="PUBLIC"
|
||||
string_required="#REQUIRED"
|
||||
string_schema=":schema"
|
||||
string_system="SYSTEM"
|
||||
string_ucs4="UCS-4"
|
||||
string_utf16="UTF-16"
|
||||
string_utf8="UTF-8"
|
||||
string_xmlns="xmlns:"
|
||||
|
||||
tag_attlist="<!ATTLIST"
|
||||
tag_cdata="<![CDATA["
|
||||
tag_close="</a>"
|
||||
tag_doctype="<!DOCTYPE"
|
||||
tag_element="<!ELEMENT"
|
||||
tag_entity="<!ENTITY"
|
||||
tag_ignore="<![IGNORE["
|
||||
tag_include="<![INCLUDE["
|
||||
tag_notation="<!NOTATION"
|
||||
tag_open="<a>"
|
||||
tag_open_close="<a />"
|
||||
tag_open_exclamation="<!"
|
||||
tag_open_q="<?"
|
||||
tag_sq2_close="]]>"
|
||||
tag_xml_q="<?xml?>"
|
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -0,0 +1,183 @@
|
||||
=========================
|
||||
Installation instructions
|
||||
=========================
|
||||
|
||||
This document provides basic installation instructions and discusses known
|
||||
issues for a variety of platforms. See README for the general instruction
|
||||
manual.
|
||||
|
||||
1) Linux on x86
|
||||
---------------
|
||||
|
||||
This platform is expected to work well. Compile the program with:
|
||||
|
||||
$ make
|
||||
|
||||
You can start using the fuzzer without installation, but it is also possible to
|
||||
install it with:
|
||||
|
||||
# make install
|
||||
|
||||
There are no special dependencies to speak of; you will need GNU make and a
|
||||
working compiler (gcc or clang). Some of the optional scripts bundled with the
|
||||
program may depend on bash, gdb, and similar basic tools.
|
||||
|
||||
If you are using clang, please review llvm_mode/README.llvm; the LLVM
|
||||
integration mode can offer substantial performance gains compared to the
|
||||
traditional approach.
|
||||
|
||||
You may have to change several settings to get optimal results (most notably,
|
||||
disable crash reporting utilities and switch to a different CPU governor), but
|
||||
afl-fuzz will guide you through that if necessary.
|
||||
|
||||
2) OpenBSD, FreeBSD, NetBSD on x86
|
||||
----------------------------------
|
||||
|
||||
Similarly to Linux, these platforms are expected to work well and are
|
||||
regularly tested. Compile everything with GNU make:
|
||||
|
||||
$ gmake
|
||||
|
||||
Note that BSD make will *not* work; if you do not have gmake on your system,
|
||||
please install it first. As on Linux, you can use the fuzzer itself without
|
||||
installation, or install it with:
|
||||
|
||||
# gmake install
|
||||
|
||||
Keep in mind that if you are using csh as your shell, the syntax of some of the
|
||||
shell commands given in the README and other docs will be different.
|
||||
|
||||
The llvm_mode requires a dynamically linked, fully-operational installation of
|
||||
clang. At least on FreeBSD, the clang binaries are static and do not include
|
||||
some of the essential tools, so if you want to make it work, you may need to
|
||||
follow the instructions in llvm_mode/README.llvm.
|
||||
|
||||
Beyond that, everything should work as advertised.
|
||||
|
||||
The QEMU mode is currently supported only on Linux. I think it's just a QEMU
|
||||
problem, I couldn't get a vanilla copy of user-mode emulation support working
|
||||
correctly on BSD at all.
|
||||
|
||||
3) MacOS X on x86
|
||||
-----------------
|
||||
|
||||
MacOS X should work, but there are some gotchas due to the idiosyncrasies of
|
||||
the platform. On top of this, I have limited release testing capabilities
|
||||
and depend mostly on user feedback.
|
||||
|
||||
To build AFL, install Xcode and follow the general instructions for Linux.
|
||||
|
||||
The Xcode 'gcc' tool is just a wrapper for clang, so be sure to use afl-clang
|
||||
to compile any instrumented binaries; afl-gcc will fail unless you have GCC
|
||||
installed from another source (in which case, please specify AFL_CC and
|
||||
AFL_CXX to point to the "real" GCC binaries).
|
||||
|
||||
Only 64-bit compilation will work on the platform; porting the 32-bit
|
||||
instrumentation would require a fair amount of work due to the way OS X
|
||||
handles relocations, and today, virtually all MacOS X boxes are 64-bit.
|
||||
|
||||
The crash reporting daemon that comes by default with MacOS X will cause
|
||||
problems with fuzzing. You need to turn it off by following the instructions
|
||||
provided here: http://goo.gl/CCcd5u
|
||||
|
||||
The fork() semantics on OS X are a bit unusual compared to other unix systems
|
||||
and definitely don't look POSIX-compliant. This means two things:
|
||||
|
||||
- Fuzzing will be probably slower than on Linux. In fact, some folks report
|
||||
considerable performance gains by running the jobs inside a Linux VM on
|
||||
MacOS X.
|
||||
|
||||
- Some non-portable, platform-specific code may be incompatible with the
|
||||
AFL forkserver. If you run into any problems, set AFL_NO_FORKSRV=1 in the
|
||||
environment before starting afl-fuzz.
|
||||
|
||||
User emulation mode of QEMU does not appear to be supported on MacOS X, so
|
||||
black-box instrumentation mode (-Q) will not work.
|
||||
|
||||
The llvm_mode requires a fully-operational installation of clang. The one that
|
||||
comes with Xcode is missing some of the essential headers and helper tools.
|
||||
See llvm_mode/README.llvm for advice on how to build the compiler from scratch.
|
||||
|
||||
4) Linux or *BSD on non-x86 systems
|
||||
-----------------------------------
|
||||
|
||||
Standard build will fail on non-x86 systems, but you should be able to
|
||||
leverage two other options:
|
||||
|
||||
- The LLVM mode (see llvm_mode/README.llvm), which does not rely on
|
||||
x86-specific assembly shims. It's fast and robust, but requires a
|
||||
complete installation of clang.
|
||||
|
||||
- The QEMU mode (see qemu_mode/README.qemu), which can be also used for
|
||||
fuzzing cross-platform binaries. It's slower and more fragile, but
|
||||
can be used even when you don't have the source for the tested app.
|
||||
|
||||
If you're not sure what you need, you need the LLVM mode. To get it, try:
|
||||
|
||||
$ AFL_NO_X86=1 gmake && gmake -C llvm_mode
|
||||
|
||||
...and compile your target program with afl-clang-fast or afl-clang-fast++
|
||||
instead of the traditional afl-gcc or afl-clang wrappers.
|
||||
|
||||
5) Solaris on x86
|
||||
-----------------
|
||||
|
||||
The fuzzer reportedly works on Solaris, but I have not tested this first-hand,
|
||||
and the user base is fairly small, so I don't have a lot of feedback.
|
||||
|
||||
To get the ball rolling, you will need to use GNU make and GCC or clang. I'm
|
||||
being told that the stock version of GCC that comes with the platform does not
|
||||
work properly due to its reliance on a hardcoded location for 'as' (completely
|
||||
ignoring the -B parameter or $PATH).
|
||||
|
||||
To fix this, you may want to build stock GCC from the source, like so:
|
||||
|
||||
$ ./configure --prefix=$HOME/gcc --with-gnu-as --with-gnu-ld \
|
||||
--with-gmp-include=/usr/include/gmp --with-mpfr-include=/usr/include/mpfr
|
||||
$ make
|
||||
$ sudo make install
|
||||
|
||||
Do *not* specify --with-as=/usr/gnu/bin/as - this will produce a GCC binary that
|
||||
ignores the -B flag and you will be back to square one.
|
||||
|
||||
Note that Solaris reportedly comes with crash reporting enabled, which causes
|
||||
problems with crashes being misinterpreted as hangs, similarly to the gotchas
|
||||
for Linux and MacOS X. AFL does not auto-detect crash reporting on this
|
||||
particular platform, but you may need to run the following command:
|
||||
|
||||
$ coreadm -d global -d global-setid -d process -d proc-setid \
|
||||
-d kzone -d log
|
||||
|
||||
User emulation mode of QEMU is not available on Solaris, so black-box
|
||||
instrumentation mode (-Q) will not work.
|
||||
|
||||
6) Everything else
|
||||
------------------
|
||||
|
||||
You're on your own. On POSIX-compliant systems, you may be able to compile and
|
||||
run the fuzzer; and the LLVM mode may offer a way to instrument non-x86 code.
|
||||
|
||||
The fuzzer will not run on Windows. It will also not work under Cygwin. It
|
||||
could be ported to the latter platform fairly easily, but it's a pretty bad
|
||||
idea, because Cygwin is extremely slow. It makes much more sense to use
|
||||
VirtualBox or so to run a hardware-accelerated Linux VM; it will run around
|
||||
20x faster or so. If you have a *really* compelling use case for Cygwin, let
|
||||
me know.
|
||||
|
||||
Although Android on x86 should theoretically work, the stock kernel may have
|
||||
SHM support compiled out, and if so, you may have to address that issue first.
|
||||
It's possible that all you need is this workaround:
|
||||
|
||||
https://github.com/pelya/android-shmem
|
||||
|
||||
Joshua J. Drake notes that the Android linker adds a shim that automatically
|
||||
intercepts SIGSEGV and related signals. To fix this issue and be able to see
|
||||
crashes, you need to put this at the beginning of the fuzzed program:
|
||||
|
||||
signal(SIGILL, SIG_DFL);
|
||||
signal(SIGABRT, SIG_DFL);
|
||||
signal(SIGBUS, SIG_DFL);
|
||||
signal(SIGFPE, SIG_DFL);
|
||||
signal(SIGSEGV, SIG_DFL);
|
||||
|
||||
You may need to #include <signal.h> first.
|
@ -0,0 +1,49 @@
|
||||
=====================
|
||||
AFL quick start guide
|
||||
=====================
|
||||
|
||||
You should read docs/README. It's pretty short. If you really can't, here's
|
||||
how to hit the ground running:
|
||||
|
||||
1) Compile AFL with 'make'. If build fails, see docs/INSTALL for tips.
|
||||
|
||||
2) Find or write a reasonably fast and simple program that takes data from
|
||||
a file or stdin, processes it in a test-worthy way, then exits cleanly.
|
||||
If testing a network service, modify it to run in the foreground and read
|
||||
from stdin. When fuzzing a format that uses checksums, comment out the
|
||||
checksum verification code, too.
|
||||
|
||||
The program must crash properly when a fault is encountered. Watch out for
|
||||
custom SIGSEGV or SIGABRT handlers and background processes. For tips on
|
||||
detecting non-crashing flaws, see section 11 in docs/README.
|
||||
|
||||
3) Compile the program / library to be fuzzed using afl-gcc. A common way to
|
||||
do this would be:
|
||||
|
||||
CC=/path/to/afl-gcc CXX=/path/to/afl-g++ ./configure --disable-shared
|
||||
make clean all
|
||||
|
||||
If program build fails, ping <afl-users@googlegroups.com>.
|
||||
|
||||
4) Get a small but valid input file that makes sense to the program. When
|
||||
fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in
|
||||
dictionaries/README.dictionaries, too.
|
||||
|
||||
5) If the program reads from stdin, run 'afl-fuzz' like so:
|
||||
|
||||
./afl-fuzz -i testcase_dir -o findings_dir -- \
|
||||
/path/to/tested/program [...program's cmdline...]
|
||||
|
||||
If the program takes input from a file, you can put @@ in the program's
|
||||
command line; AFL will put an auto-generated file name in there for you.
|
||||
|
||||
6) Investigate anything shown in red in the fuzzer UI by promptly consulting
|
||||
docs/status_screen.txt.
|
||||
|
||||
That's it. Sit back, relax, and - time permitting - try to skim through the
|
||||
following files:
|
||||
|
||||
- docs/README - A general introduction to AFL,
|
||||
- docs/perf_tips.txt - Simple tips on how to fuzz more quickly,
|
||||
- docs/status_screen.txt - An explanation of the tidbits shown in the UI,
|
||||
- docs/parallel_fuzzing.txt - Advice on running AFL on multiple cores.
|
@ -0,0 +1,281 @@
|
||||
=======================
|
||||
Environmental variables
|
||||
=======================
|
||||
|
||||
This document discusses the environment variables used by American Fuzzy Lop
|
||||
to expose various exotic functions that may be (rarely) useful for power
|
||||
users or for some types of custom fuzzing setups. See README for the general
|
||||
instruction manual.
|
||||
|
||||
1) Settings for afl-gcc, afl-clang, and afl-as
|
||||
----------------------------------------------
|
||||
|
||||
Because they can't directly accept command-line options, the compile-time
|
||||
tools make fairly broad use of environmental variables:
|
||||
|
||||
- Setting AFL_HARDEN automatically adds code hardening options when invoking
|
||||
the downstream compiler. This currently includes -D_FORTIFY_SOURCE=2 and
|
||||
-fstack-protector-all. The setting is useful for catching non-crashing
|
||||
memory bugs at the expense of a very slight (sub-5%) performance loss.
|
||||
|
||||
- By default, the wrapper appends -O3 to optimize builds. Very rarely, this
|
||||
will cause problems in programs built with -Werror, simply because -O3
|
||||
enables more thorough code analysis and can spew out additional warnings.
|
||||
To disable optimizations, set AFL_DONT_OPTIMIZE.
|
||||
|
||||
- Setting AFL_USE_ASAN automatically enables ASAN, provided that your
|
||||
compiler supports that. Note that fuzzing with ASAN is mildly challenging
|
||||
- see notes_for_asan.txt.
|
||||
|
||||
(You can also enable MSAN via AFL_USE_MSAN; ASAN and MSAN come with the
|
||||
same gotchas; the modes are mutually exclusive. UBSAN and other exotic
|
||||
sanitizers are not officially supported yet, but are easy to get to work
|
||||
by hand.)
|
||||
|
||||
- Setting AFL_CC, AFL_CXX, and AFL_AS lets you use alternate downstream
|
||||
compilation tools, rather than the default 'clang', 'gcc', or 'as' binaries
|
||||
in your $PATH.
|
||||
|
||||
- AFL_PATH can be used to point afl-gcc to an alternate location of afl-as.
|
||||
One possible use of this is experimental/clang_asm_normalize/, which lets
|
||||
you instrument hand-written assembly when compiling clang code by plugging
|
||||
a normalizer into the chain. (There is no equivalent feature for GCC.)
|
||||
|
||||
- Setting AFL_INST_RATIO to a percentage between 0 and 100% controls the
|
||||
probability of instrumenting every branch. This is (very rarely) useful
|
||||
when dealing with exceptionally complex programs that saturate the output
|
||||
bitmap. Examples include v8, ffmpeg, and perl.
|
||||
|
||||
(If this ever happens, afl-fuzz will warn you ahead of the time by
|
||||
displaying the "bitmap density" field in fiery red.)
|
||||
|
||||
Setting AFL_INST_RATIO to 0 is a valid choice. This will instrument only
|
||||
the transitions between function entry points, but not individual branches.
|
||||
|
||||
- AFL_NO_BUILTIN causes the compiler to generate code suitable for use with
|
||||
libtokencap.so (but perhaps running a bit slower than without the flag).
|
||||
|
||||
- TMPDIR is used by afl-as for temporary files; if this variable is not set,
|
||||
the tool defaults to /tmp.
|
||||
|
||||
- Setting AFL_KEEP_ASSEMBLY prevents afl-as from deleting instrumented
|
||||
assembly files. Useful for troubleshooting problems or understanding how
|
||||
the tool works. To get them in a predictable place, try something like:
|
||||
|
||||
mkdir assembly_here
|
||||
TMPDIR=$PWD/assembly_here AFL_KEEP_ASSEMBLY=1 make clean all
|
||||
|
||||
- Setting AFL_QUIET will prevent afl-cc and afl-as banners from being
|
||||
displayed during compilation, in case you find them distracting.
|
||||
|
||||
2) Settings for afl-clang-fast
|
||||
------------------------------
|
||||
|
||||
The native LLVM instrumentation helper accepts a subset of the settings
|
||||
discussed in section #1, with the exception of:
|
||||
|
||||
- AFL_AS, since this toolchain does not directly invoke GNU as.
|
||||
|
||||
- TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
|
||||
created.
|
||||
|
||||
Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
|
||||
because functions are *not* instrumented unconditionally - so low values
|
||||
will have a more striking effect. For this tool, 0 is not a valid choice.
|
||||
|
||||
3) Settings for afl-fuzz
|
||||
------------------------
|
||||
|
||||
The main fuzzer binary accepts several options that disable a couple of sanity
|
||||
checks or alter some of the more exotic semantics of the tool:
|
||||
|
||||
- Setting AFL_SKIP_CPUFREQ skips the check for CPU scaling policy. This is
|
||||
useful if you can't change the defaults (e.g., no root access to the
|
||||
system) and are OK with some performance loss.
|
||||
|
||||
- Setting AFL_NO_FORKSRV disables the forkserver optimization, reverting to
|
||||
fork + execve() call for every tested input. This is useful mostly when
|
||||
working with unruly libraries that create threads or do other crazy
|
||||
things when initializing (before the instrumentation has a chance to run).
|
||||
|
||||
Note that this setting inhibits some of the user-friendly diagnostics
|
||||
normally done when starting up the forkserver and causes a pretty
|
||||
significant performance drop.
|
||||
|
||||
- AFL_EXIT_WHEN_DONE causes afl-fuzz to terminate when all existing paths
|
||||
have been fuzzed and there were no new finds for a while. This would be
|
||||
normally indicated by the cycle counter in the UI turning green. May be
|
||||
convenient for some types of automated jobs.
|
||||
|
||||
- Setting AFL_NO_AFFINITY disables attempts to bind to a specific CPU core
|
||||
on Linux systems. This slows things down, but lets you run more instances
|
||||
of afl-fuzz than would be prudent (if you really want to).
|
||||
|
||||
- AFL_SKIP_CRASHES causes AFL to tolerate crashing files in the input
|
||||
queue. This can help with rare situations where a program crashes only
|
||||
intermittently, but it's not really recommended under normal operating
|
||||
conditions.
|
||||
|
||||
- Setting AFL_HANG_TMOUT allows you to specify a different timeout for
|
||||
deciding if a particular test case is a "hang". The default is 1 second
|
||||
or the value of the -t parameter, whichever is larger. Dialing the value
|
||||
down can be useful if you are very concerned about slow inputs, or if you
|
||||
don't want AFL to spend too much time classifying that stuff and just
|
||||
rapidly put all timeouts in that bin.
|
||||
|
||||
- AFL_NO_ARITH causes AFL to skip most of the deterministic arithmetics.
|
||||
This can be useful to speed up the fuzzing of text-based file formats.
|
||||
|
||||
- AFL_SHUFFLE_QUEUE randomly reorders the input queue on startup. Requested
|
||||
by some users for unorthodox parallelized fuzzing setups, but not
|
||||
advisable otherwise.
|
||||
|
||||
- When developing custom instrumentation on top of afl-fuzz, you can use
|
||||
AFL_SKIP_BIN_CHECK to inhibit the checks for non-instrumented binaries
|
||||
and shell scripts; and AFL_DUMB_FORKSRV in conjunction with the -n
|
||||
setting to instruct afl-fuzz to still follow the fork server protocol
|
||||
without expecting any instrumentation data in return.
|
||||
|
||||
- When running in the -M or -S mode, setting AFL_IMPORT_FIRST causes the
|
||||
fuzzer to import test cases from other instances before doing anything
|
||||
else. This makes the "own finds" counter in the UI more accurate.
|
||||
Beyond counter aesthetics, not much else should change.
|
||||
|
||||
- Setting AFL_POST_LIBRARY allows you to configure a postprocessor for
|
||||
mutated files - say, to fix up checksums. See experimental/post_library/
|
||||
for more.
|
||||
|
||||
- AFL_FAST_CAL keeps the calibration stage about 2.5x faster (albeit less
|
||||
precise), which can help when starting a session against a slow target.
|
||||
|
||||
- The CPU widget shown at the bottom of the screen is fairly simplistic and
|
||||
may complain of high load prematurely, especially on systems with low core
|
||||
counts. To avoid the alarming red color, you can set AFL_NO_CPU_RED.
|
||||
|
||||
- In QEMU mode (-Q), AFL_PATH will be searched for afl-qemu-trace.
|
||||
|
||||
- Setting AFL_PRELOAD causes AFL to set LD_PRELOAD for the target binary
|
||||
without disrupting the afl-fuzz process itself. This is useful, among other
|
||||
things, for bootstrapping libdislocator.so.
|
||||
|
||||
- Setting AFL_NO_UI inhibits the UI altogether, and just periodically prints
|
||||
some basic stats. This behavior is also automatically triggered when the
|
||||
output from afl-fuzz is redirected to a file or to a pipe.
|
||||
|
||||
- If you are Jakub, you may need AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES.
|
||||
Others need not apply.
|
||||
|
||||
- Benchmarking only: AFL_BENCH_JUST_ONE causes the fuzzer to exit after
|
||||
processing the first queue entry; and AFL_BENCH_UNTIL_CRASH causes it to
|
||||
exit soon after the first crash is found.
|
||||
|
||||
4) Settings for afl-qemu-trace
|
||||
------------------------------
|
||||
|
||||
The QEMU wrapper used to instrument binary-only code supports several settings:
|
||||
|
||||
- It is possible to set AFL_INST_RATIO to skip the instrumentation on some
|
||||
of the basic blocks, which can be useful when dealing with very complex
|
||||
binaries.
|
||||
|
||||
- Setting AFL_INST_LIBS causes the translator to also instrument the code
|
||||
inside any dynamically linked libraries (notably including glibc).
|
||||
|
||||
- The underlying QEMU binary will recognize any standard "user space
|
||||
emulation" variables (e.g., QEMU_STACK_SIZE), but there should be no
|
||||
reason to touch them.
|
||||
|
||||
5) Settings for afl-cmin
|
||||
------------------------
|
||||
|
||||
The corpus minimization script offers very little customization:
|
||||
|
||||
- Setting AFL_PATH offers a way to specify the location of afl-showmap
|
||||
and afl-qemu-trace (the latter only in -Q mode).
|
||||
|
||||
- AFL_KEEP_TRACES makes the tool keep traces and other metadata used for
|
||||
minimization and normally deleted at exit. The files can be found in the
|
||||
<out_dir>/.traces/*.
|
||||
|
||||
- AFL_ALLOW_TMP permits this and some other scripts to run in /tmp. This is
|
||||
a modest security risk on multi-user systems with rogue users, but should
|
||||
be safe on dedicated fuzzing boxes.
|
||||
|
||||
6) Settings for afl-tmin
|
||||
------------------------
|
||||
|
||||
Virtually nothing to play with. Well, in QEMU mode (-Q), AFL_PATH will be
|
||||
searched for afl-qemu-trace. In addition to this, TMPDIR may be used if a
|
||||
temporary file can't be created in the current working directory.
|
||||
|
||||
You can specify AFL_TMIN_EXACT if you want afl-tmin to require execution paths
|
||||
to match when minimizing crashes. This will make minimization less useful, but
|
||||
may prevent the tool from "jumping" from one crashing condition to another in
|
||||
very buggy software. You probably want to combine it with the -e flag.
|
||||
|
||||
7) Settings for afl-analyze
|
||||
---------------------------
|
||||
|
||||
You can set AFL_ANALYZE_HEX to get file offsets printed as hexadecimal instead
|
||||
of decimal.
|
||||
|
||||
8) Settings for libdislocator.so
|
||||
--------------------------------
|
||||
|
||||
The library honors three environmental variables:
|
||||
|
||||
- AFL_LD_LIMIT_MB caps the size of the maximum heap usage permitted by the
|
||||
library, in megabytes. The default value is 1 GB. Once this is exceeded,
|
||||
allocations will return NULL.
|
||||
|
||||
- AFL_LD_HARD_FAIL alters the behavior by calling abort() on excessive
|
||||
allocations, thus causing what AFL would perceive as a crash. Useful for
|
||||
programs that are supposed to maintain a specific memory footprint.
|
||||
|
||||
- AFL_LD_VERBOSE causes the library to output some diagnostic messages
|
||||
that may be useful for pinpointing the cause of any observed issues.
|
||||
|
||||
- AFL_LD_NO_CALLOC_OVER inhibits abort() on calloc() overflows. Most
|
||||
of the common allocators check for that internally and return NULL, so
|
||||
it's a security risk only in more exotic setups.
|
||||
|
||||
9) Settings for libtokencap.so
|
||||
------------------------------
|
||||
|
||||
This library accepts AFL_TOKEN_FILE to indicate the location to which the
|
||||
discovered tokens should be written.
|
||||
|
||||
10) Third-party variables set by afl-fuzz & other tools
|
||||
-------------------------------------------------------
|
||||
|
||||
Several variables are not directly interpreted by afl-fuzz, but are set to
|
||||
optimal values if not already present in the environment:
|
||||
|
||||
- By default, LD_BIND_NOW is set to speed up fuzzing by forcing the
|
||||
linker to do all the work before the fork server kicks in. You can
|
||||
override this by setting LD_BIND_LAZY beforehand, but it is almost
|
||||
certainly pointless.
|
||||
|
||||
- By default, ASAN_OPTIONS are set to:
|
||||
|
||||
abort_on_error=1
|
||||
detect_leaks=0
|
||||
symbolize=0
|
||||
allocator_may_return_null=1
|
||||
|
||||
If you want to set your own options, be sure to include abort_on_error=1 -
|
||||
otherwise, the fuzzer will not be able to detect crashes in the tested
|
||||
app. Similarly, include symbolize=0, since without it, AFL may have
|
||||
difficulty telling crashes and hangs apart.
|
||||
|
||||
- In the same vein, by default, MSAN_OPTIONS are set to:
|
||||
|
||||
exit_code=86 (required for legacy reasons)
|
||||
abort_on_error=1
|
||||
symbolize=0
|
||||
msan_track_origins=0
|
||||
allocator_may_return_null=1
|
||||
|
||||
Be sure to include the first one when customizing anything, since some
|
||||
MSAN versions don't call abort() on error, and we need a way to detect
|
||||
faults.
|
@ -0,0 +1,147 @@
|
||||
================
|
||||
Historical notes
|
||||
================
|
||||
|
||||
This doc talks about the rationale of some of the high-level design decisions
|
||||
for American Fuzzy Lop. It's adopted from a discussion with Rob Graham.
|
||||
See README for the general instruction manual, and technical_details.txt for
|
||||
additional implementation-level insights.
|
||||
|
||||
1) Influences
|
||||
-------------
|
||||
|
||||
In short, afl-fuzz is inspired chiefly by the work done by Tavis Ormandy back
|
||||
in 2007. Tavis did some very persuasive experiments using gcov block coverage
|
||||
to select optimal test cases out of a large corpus of data, and then using
|
||||
them as a starting point for traditional fuzzing workflows.
|
||||
|
||||
(By "persuasive", I mean: netting a significant number of interesting
|
||||
vulnerabilities.)
|
||||
|
||||
In parallel to this, both Tavis and I were interested in evolutionary fuzzing.
|
||||
Tavis had his experiments, and I was working on a tool called bunny-the-fuzzer,
|
||||
released somewhere in 2007.
|
||||
|
||||
Bunny used a generational algorithm not much different from afl-fuzz, but
|
||||
also tried to reason about the relationship between various input bits and
|
||||
the internal state of the program, with hopes of deriving some additional value
|
||||
from that. The reasoning / correlation part was probably in part inspired by
|
||||
other projects done around the same time by Will Drewry and Chris Evans.
|
||||
|
||||
The state correlation approach sounded very sexy on paper, but ultimately, made
|
||||
the fuzzer complicated, brittle, and cumbersome to use; every other target
|
||||
program would require a tweak or two. Because Bunny didn't fare a whole lot
|
||||
better than less sophisticated brute-force tools, I eventually decided to write
|
||||
it off. You can still find its original documentation at:
|
||||
|
||||
https://code.google.com/p/bunny-the-fuzzer/wiki/BunnyDoc
|
||||
|
||||
There has been a fair amount of independent work, too. Most notably, a few
|
||||
weeks earlier that year, Jared DeMott had a Defcon presentation about a
|
||||
coverage-driven fuzzer that relied on coverage as a fitness function.
|
||||
|
||||
Jared's approach was by no means identical to what afl-fuzz does, but it was in
|
||||
the same ballpark. His fuzzer tried to explicitly solve for the maximum coverage
|
||||
with a single input file; in comparison, afl simply selects for cases that do
|
||||
something new (which yields better results - see technical_details.txt).
|
||||
|
||||
A few years later, Gabriel Campana released fuzzgrind, a tool that relied purely
|
||||
on Valgrind and a constraint solver to maximize coverage without any brute-force
|
||||
bits; and Microsoft Research folks talked extensively about their still
|
||||
non-public, solver-based SAGE framework.
|
||||
|
||||
In the past six years or so, I've also seen a fair number of academic papers
|
||||
that dealt with smart fuzzing (focusing chiefly on symbolic execution) and a
|
||||
couple papers that discussed proof-of-concept applications of genetic
|
||||
algorithms with the same goals in mind. I'm unconvinced how practical most of
|
||||
these experiments were; I suspect that many of them suffer from the
|
||||
bunny-the-fuzzer's curse of being cool on paper and in carefully designed
|
||||
experiments, but failing the ultimate test of being able to find new,
|
||||
worthwhile security bugs in otherwise well-fuzzed, real-world software.
|
||||
|
||||
In some ways, the baseline that the "cool" solutions have to compete against is
|
||||
a lot more impressive than it may seem, making it difficult for competitors to
|
||||
stand out. For a singular example, check out the work by Gynvael and Mateusz
|
||||
Jurczyk, applying "dumb" fuzzing to ffmpeg, a prominent and security-critical
|
||||
component of modern browsers and media players:
|
||||
|
||||
http://googleonlinesecurity.blogspot.com/2014/01/ffmpeg-and-thousand-fixes.html
|
||||
|
||||
Effortlessly getting comparable results with state-of-the-art symbolic execution
|
||||
in equally complex software still seems fairly unlikely, and hasn't been
|
||||
demonstrated in practice so far.
|
||||
|
||||
But I digress; ultimately, attribution is hard, and glorying the fundamental
|
||||
concepts behind AFL is probably a waste of time. The devil is very much in the
|
||||
often-overlooked details, which brings us to...
|
||||
|
||||
2) Design goals for afl-fuzz
|
||||
----------------------------
|
||||
|
||||
In short, I believe that the current implementation of afl-fuzz takes care of
|
||||
several itches that seemed impossible to scratch with other tools:
|
||||
|
||||
1) Speed. It's genuinely hard to compete with brute force when your "smart"
|
||||
approach is resource-intensive. If your instrumentation makes it 10x more
|
||||
likely to find a bug, but runs 100x slower, your users are getting a bad
|
||||
deal.
|
||||
|
||||
To avoid starting with a handicap, afl-fuzz is meant to let you fuzz most of
|
||||
the intended targets at roughly their native speed - so even if it doesn't
|
||||
add value, you do not lose much.
|
||||
|
||||
On top of this, the tool leverages instrumentation to actually reduce the
|
||||
amount of work in a couple of ways: for example, by carefully trimming the
|
||||
corpus or skipping non-functional but non-trimmable regions in the input
|
||||
files.
|
||||
|
||||
2) Rock-solid reliability. It's hard to compete with brute force if your
|
||||
approach is brittle and fails unexpectedly. Automated testing is attractive
|
||||
because it's simple to use and scalable; anything that goes against these
|
||||
principles is an unwelcome trade-off and means that your tool will be used
|
||||
less often and with less consistent results.
|
||||
|
||||
Most of the approaches based on symbolic execution, taint tracking, or
|
||||
complex syntax-aware instrumentation are currently fairly unreliable with
|
||||
real-world targets. Perhaps more importantly, their failure modes can render
|
||||
them strictly worse than "dumb" tools, and such degradation can be difficult
|
||||
for less experienced users to notice and correct.
|
||||
|
||||
In contrast, afl-fuzz is designed to be rock solid, chiefly by keeping it
|
||||
simple. In fact, at its core, it's designed to be just a very good
|
||||
traditional fuzzer with a wide range of interesting, well-researched
|
||||
strategies to go by. The fancy parts just help it focus the effort in
|
||||
places where it matters the most.
|
||||
|
||||
3) Simplicity. The author of a testing framework is probably the only person
|
||||
who truly understands the impact of all the settings offered by the tool -
|
||||
and who can dial them in just right. Yet, even the most rudimentary fuzzer
|
||||
frameworks often come with countless knobs and fuzzing ratios that need to
|
||||
be guessed by the operator ahead of the time. This can do more harm than
|
||||
good.
|
||||
|
||||
AFL is designed to avoid this as much as possible. The three knobs you
|
||||
can play with are the output file, the memory limit, and the ability to
|
||||
override the default, auto-calibrated timeout. The rest is just supposed to
|
||||
work. When it doesn't, user-friendly error messages outline the probable
|
||||
causes and workarounds, and get you back on track right away.
|
||||
|
||||
4) Chainability. Most general-purpose fuzzers can't be easily employed
|
||||
against resource-hungry or interaction-heavy tools, necessitating the
|
||||
creation of custom in-process fuzzers or the investment of massive CPU
|
||||
power (most of which is wasted on tasks not directly related to the code
|
||||
we actually want to test).
|
||||
|
||||
AFL tries to scratch this itch by allowing users to use more lightweight
|
||||
targets (e.g., standalone image parsing libraries) to create small
|
||||
corpora of interesting test cases that can be fed into a manual testing
|
||||
process or a UI harness later on.
|
||||
|
||||
As mentioned in technical_details.txt, AFL does all this not by systematically
|
||||
applying a single overarching CS concept, but by experimenting with a variety
|
||||
of small, complementary methods that were shown to reliably yields results
|
||||
better than chance. The use of instrumentation is a part of that toolkit, but is
|
||||
far from being the most important one.
|
||||
|
||||
Ultimately, what matters is that afl-fuzz is designed to find cool bugs - and
|
||||
has a pretty robust track record of doing just that.
|
@ -0,0 +1,128 @@
|
||||
# ===================
|
||||
# AFL "Life Pro Tips"
|
||||
# ===================
|
||||
#
|
||||
# Bite-sized advice for those who understand the basics, but can't be bothered
|
||||
# to read or memorize every other piece of documentation for AFL.
|
||||
#
|
||||
|
||||
%
|
||||
|
||||
Get more bang for your buck by using fuzzing dictionaries.
|
||||
See dictionaries/README.dictionaries to learn how.
|
||||
|
||||
%
|
||||
|
||||
You can get the most out of your hardware by parallelizing AFL jobs.
|
||||
See docs/parallel_fuzzing.txt for step-by-step tips.
|
||||
|
||||
%
|
||||
|
||||
Improve the odds of spotting memory corruption bugs with libdislocator.so!
|
||||
It's easy. Consult libdislocator/README.dislocator for usage tips.
|
||||
|
||||
%
|
||||
|
||||
Want to understand how your target parses a particular input file?
|
||||
Try the bundled afl-analyze tool; it's got colors and all!
|
||||
|
||||
%
|
||||
|
||||
You can visually monitor the progress of your fuzzing jobs.
|
||||
Run the bundled afl-plot utility to generate browser-friendly graphs.
|
||||
|
||||
%
|
||||
|
||||
Need to monitor AFL jobs programmatically? Check out the fuzzer_stats file
|
||||
in the AFL output dir or try afl-whatsup.
|
||||
|
||||
%
|
||||
|
||||
Puzzled by something showing up in red or purple in the AFL UI?
|
||||
It could be important - consult docs/status_screen.txt right away!
|
||||
|
||||
%
|
||||
|
||||
Know your target? Convert it to persistent mode for a huge performance gain!
|
||||
Consult section #5 in llvm_mode/README.llvm for tips.
|
||||
|
||||
%
|
||||
|
||||
Using clang? Check out llvm_mode/ for a faster alternative to afl-gcc!
|
||||
|
||||
%
|
||||
|
||||
Did you know that AFL can fuzz closed-source or cross-platform binaries?
|
||||
Check out qemu_mode/README.qemu for more.
|
||||
|
||||
%
|
||||
|
||||
Did you know that afl-fuzz can minimize any test case for you?
|
||||
Try the bundled afl-tmin tool - and get small repro files fast!
|
||||
|
||||
%
|
||||
|
||||
Not sure if a crash is exploitable? AFL can help you figure it out. Specify
|
||||
-C to enable the peruvian were-rabbit mode. See section #10 in README for more.
|
||||
|
||||
%
|
||||
|
||||
Trouble dealing with a machine uprising? Relax, we've all been there.
|
||||
Find essential survival tips at http://lcamtuf.coredump.cx/prep/.
|
||||
|
||||
%
|
||||
|
||||
AFL-generated corpora can be used to power other testing processes.
|
||||
See section #2 in README for inspiration - it tends to pay off!
|
||||
|
||||
%
|
||||
|
||||
Want to automatically spot non-crashing memory handling bugs?
|
||||
Try running an AFL-generated corpus through ASAN, MSAN, or Valgrind.
|
||||
|
||||
%
|
||||
|
||||
Good selection of input files is critical to a successful fuzzing job.
|
||||
See section #5 in README (or docs/perf_tips.txt) for pro tips.
|
||||
|
||||
%
|
||||
|
||||
You can improve the odds of automatically spotting stack corruption issues.
|
||||
Specify AFL_HARDEN=1 in the environment to enable hardening flags.
|
||||
|
||||
%
|
||||
|
||||
Bumping into problems with non-reproducible crashes? It happens, but usually
|
||||
isn't hard to diagnose. See section #7 in README for tips.
|
||||
|
||||
%
|
||||
|
||||
Fuzzing is not just about memory corruption issues in the codebase. Add some
|
||||
sanity-checking assert() / abort() statements to effortlessly catch logic bugs.
|
||||
|
||||
%
|
||||
|
||||
Hey kid... pssst... want to figure out how AFL really works?
|
||||
Check out docs/technical_details.txt for all the gory details in one place!
|
||||
|
||||
%
|
||||
|
||||
There's a ton of third-party helper tools designed to work with AFL!
|
||||
Be sure to check out docs/sister_projects.txt before writing your own.
|
||||
|
||||
%
|
||||
|
||||
Need to fuzz the command-line arguments of a particular program?
|
||||
You can find a simple solution in experimental/argv_fuzzing.
|
||||
|
||||
%
|
||||
|
||||
Attacking a format that uses checksums? Remove the checksum-checking code or
|
||||
use a postprocessor! See experimental/post_library/ for more.
|
||||
|
||||
%
|
||||
|
||||
Dealing with a very slow target or hoping for instant results? Specify -d
|
||||
when calling afl-fuzz!
|
||||
|
||||
%
|
@ -0,0 +1,143 @@
|
||||
==================================
|
||||
Notes for using ASAN with afl-fuzz
|
||||
==================================
|
||||
|
||||
This file discusses some of the caveats for fuzzing under ASAN, and suggests
|
||||
a handful of alternatives. See README for the general instruction manual.
|
||||
|
||||
1) Short version
|
||||
----------------
|
||||
|
||||
ASAN on 64-bit systems requests a lot of memory in a way that can't be easily
|
||||
distinguished from a misbehaving program bent on crashing your system.
|
||||
|
||||
Because of this, fuzzing with ASAN is recommended only in four scenarios:
|
||||
|
||||
- On 32-bit systems, where we can always enforce a reasonable memory limit
|
||||
(-m 800 or so is a good starting point),
|
||||
|
||||
- On 64-bit systems only if you can do one of the following:
|
||||
|
||||
- Compile the binary in 32-bit mode (gcc -m32),
|
||||
|
||||
- Precisely gauge memory needs using http://jwilk.net/software/recidivm .
|
||||
|
||||
- Limit the memory available to process using cgroups on Linux (see
|
||||
experimental/asan_cgroups).
|
||||
|
||||
To compile with ASAN, set AFL_USE_ASAN=1 before calling 'make clean all'. The
|
||||
afl-gcc / afl-clang wrappers will pick that up and add the appropriate flags.
|
||||
Note that ASAN is incompatible with -static, so be mindful of that.
|
||||
|
||||
(You can also use AFL_USE_MSAN=1 to enable MSAN instead.)
|
||||
|
||||
There is also the option of generating a corpus using a non-ASAN binary, and
|
||||
then feeding it to an ASAN-instrumented one to check for bugs. This is faster,
|
||||
and can give you somewhat comparable results. You can also try using
|
||||
libdislocator (see libdislocator/README.dislocator in the parent directory) as a
|
||||
lightweight and hassle-free (but less thorough) alternative.
|
||||
|
||||
2) Long version
|
||||
---------------
|
||||
|
||||
ASAN allocates a huge region of virtual address space for bookkeeping purposes.
|
||||
Most of this is never actually accessed, so the OS never has to allocate any
|
||||
real pages of memory for the process, and the VM grabbed by ASAN is essentially
|
||||
"free" - but the mapping counts against the standard OS-enforced limit
|
||||
(RLIMIT_AS, aka ulimit -v).
|
||||
|
||||
On our end, afl-fuzz tries to protect you from processes that go off-rails
|
||||
and start consuming all the available memory in a vain attempt to parse a
|
||||
malformed input file. This happens surprisingly often, so enforcing such a limit
|
||||
is important for almost any fuzzer: the alternative is for the kernel OOM
|
||||
handler to step in and start killing random processes to free up resources.
|
||||
Needless to say, that's not a very nice prospect to live with.
|
||||
|
||||
Unfortunately, un*x systems offer no portable way to limit the amount of
|
||||
pages actually given to a process in a way that distinguishes between that
|
||||
and the harmless "land grab" done by ASAN. In principle, there are three standard
|
||||
ways to limit the size of the heap:
|
||||
|
||||
- The RLIMIT_AS mechanism (ulimit -v) caps the size of the virtual space -
|
||||
but as noted, this pays no attention to the number of pages actually
|
||||
in use by the process, and doesn't help us here.
|
||||
|
||||
- The RLIMIT_DATA mechanism (ulimit -d) seems like a good fit, but it applies
|
||||
only to the traditional sbrk() / brk() methods of requesting heap space;
|
||||
modern allocators, including the one in glibc, routinely rely on mmap()
|
||||
instead, and circumvent this limit completely.
|
||||
|
||||
- Finally, the RLIMIT_RSS limit (ulimit -m) sounds like what we need, but
|
||||
doesn't work on Linux - mostly because nobody felt like implementing it.
|
||||
|
||||
There are also cgroups, but they are Linux-specific, not universally available
|
||||
even on Linux systems, and they require root permissions to set up; I'm a bit
|
||||
hesitant to make afl-fuzz require root permissions just for that. That said,
|
||||
if you are on Linux and want to use cgroups, check out the contributed script
|
||||
that ships in experimental/asan_cgroups/.
|
||||
|
||||
In settings where cgroups aren't available, we have no nice, portable way to
|
||||
avoid counting the ASAN allocation toward the limit. On 32-bit systems, or for
|
||||
binaries compiled in 32-bit mode (-m32), this is not a big deal: ASAN needs
|
||||
around 600-800 MB or so, depending on the compiler - so all you need to do is
|
||||
to specify -m that is a bit higher than that.
|
||||
|
||||
On 64-bit systems, the situation is more murky, because the ASAN allocation
|
||||
is completely outlandish - around 17.5 TB in older versions, and closer to
|
||||
20 TB with newest ones. The actual amount of memory on your system is
|
||||
(probably!) just a tiny fraction of that - so unless you dial the limit
|
||||
with surgical precision, you will get no protection from OOM bugs.
|
||||
|
||||
On my system, the amount of memory grabbed by ASAN with a slightly older
|
||||
version of gcc is around 17,825,850 MB; for newest clang, it's 20,971,600.
|
||||
But there is no guarantee that these numbers are stable, and if you get them
|
||||
wrong by "just" a couple gigs or so, you will be at risk.
|
||||
|
||||
To get the precise number, you can use the recidivm tool developed by Jakub
|
||||
Wilk (http://jwilk.net/software/recidivm). In absence of this, ASAN is *not*
|
||||
recommended when fuzzing 64-bit binaries, unless you are confident that they
|
||||
are robust and enforce reasonable memory limits (in which case, you can
|
||||
specify '-m none' when calling afl-fuzz).
|
||||
|
||||
Using recidivm or running with no limits aside, there are two other decent
|
||||
alternatives: build a corpus of test cases using a non-ASAN binary, and then
|
||||
examine them with ASAN, Valgrind, or other heavy-duty tools in a more
|
||||
controlled setting; or compile the target program with -m32 (32-bit mode)
|
||||
if your system supports that.
|
||||
|
||||
3) Interactions with the QEMU mode
|
||||
----------------------------------
|
||||
|
||||
ASAN, MSAN, and other sanitizers appear to be incompatible with QEMU user
|
||||
emulation, so please do not try to use them with the -Q option; QEMU doesn't
|
||||
seem to appreciate the shadow VM trick used by these tools, and will likely
|
||||
just allocate all your physical memory, then crash.
|
||||
|
||||
4) ASAN and OOM crashes
|
||||
-----------------------
|
||||
|
||||
By default, ASAN treats memory allocation failures as fatal errors, immediately
|
||||
causing the program to crash. Since this is a departure from normal POSIX
|
||||
semantics (and creates the appearance of security issues in otherwise
|
||||
properly-behaving programs), we try to disable this by specifying
|
||||
allocator_may_return_null=1 in ASAN_OPTIONS.
|
||||
|
||||
Unfortunately, it's been reported that this setting still causes ASAN to
|
||||
trigger phantom crashes in situations where the standard allocator would
|
||||
simply return NULL. If this is interfering with your fuzzing jobs, you may
|
||||
want to cc: yourself on this bug:
|
||||
|
||||
https://bugs.llvm.org/show_bug.cgi?id=22026
|
||||
|
||||
5) What about UBSAN?
|
||||
--------------------
|
||||
|
||||
Some folks expressed interest in fuzzing with UBSAN. This isn't officially
|
||||
supported, because many installations of UBSAN don't offer a consistent way
|
||||
to abort() on fault conditions or to terminate with a distinctive exit code.
|
||||
|
||||
That said, some versions of the library can be binary-patched to address this
|
||||
issue, while newer releases support explicit compile-time flags - see this
|
||||
mailing list thread for tips:
|
||||
|
||||
https://groups.google.com/forum/#!topic/afl-users/GyeSBJt4M38
|
@ -0,0 +1,216 @@
|
||||
=========================
|
||||
Tips for parallel fuzzing
|
||||
=========================
|
||||
|
||||
This document talks about synchronizing afl-fuzz jobs on a single machine
|
||||
or across a fleet of systems. See README for the general instruction manual.
|
||||
|
||||
1) Introduction
|
||||
---------------
|
||||
|
||||
Every copy of afl-fuzz will take up one CPU core. This means that on an
|
||||
n-core system, you can almost always run around n concurrent fuzzing jobs with
|
||||
virtually no performance hit (you can use the afl-gotcpu tool to make sure).
|
||||
|
||||
In fact, if you rely on just a single job on a multi-core system, you will
|
||||
be underutilizing the hardware. So, parallelization is usually the right
|
||||
way to go.
|
||||
|
||||
When targeting multiple unrelated binaries or using the tool in "dumb" (-n)
|
||||
mode, it is perfectly fine to just start up several fully separate instances
|
||||
of afl-fuzz. The picture gets more complicated when you want to have multiple
|
||||
fuzzers hammering a common target: if a hard-to-hit but interesting test case
|
||||
is synthesized by one fuzzer, the remaining instances will not be able to use
|
||||
that input to guide their work.
|
||||
|
||||
To help with this problem, afl-fuzz offers a simple way to synchronize test
|
||||
cases on the fly.
|
||||
|
||||
2) Single-system parallelization
|
||||
--------------------------------
|
||||
|
||||
If you wish to parallelize a single job across multiple cores on a local
|
||||
system, simply create a new, empty output directory ("sync dir") that will be
|
||||
shared by all the instances of afl-fuzz; and then come up with a naming scheme
|
||||
for every instance - say, "fuzzer01", "fuzzer02", etc.
|
||||
|
||||
Run the first one ("master", -M) like this:
|
||||
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
|
||||
|
||||
...and then, start up secondary (-S) instances like this:
|
||||
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
|
||||
|
||||
Each fuzzer will keep its state in a separate subdirectory, like so:
|
||||
|
||||
/path/to/sync_dir/fuzzer01/
|
||||
|
||||
Each instance will also periodically rescan the top-level sync directory
|
||||
for any test cases found by other fuzzers - and will incorporate them into
|
||||
its own fuzzing when they are deemed interesting enough.
|
||||
|
||||
The difference between the -M and -S modes is that the master instance will
|
||||
still perform deterministic checks; while the secondary instances will
|
||||
proceed straight to random tweaks. If you don't want to do deterministic
|
||||
fuzzing at all, it's OK to run all instances with -S. With very slow or complex
|
||||
targets, or when running heavily parallelized jobs, this is usually a good plan.
|
||||
|
||||
Note that running multiple -M instances is wasteful, although there is an
|
||||
experimental support for parallelizing the deterministic checks. To leverage
|
||||
that, you need to create -M instances like so:
|
||||
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
|
||||
$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
|
||||
|
||||
...where the first value after ':' is the sequential ID of a particular master
|
||||
instance (starting at 1), and the second value is the total number of fuzzers to
|
||||
distribute the deterministic fuzzing across. Note that if you boot up fewer
|
||||
fuzzers than indicated by the second number passed to -M, you may end up with
|
||||
poor coverage.
|
||||
|
||||
You can also monitor the progress of your jobs from the command line with the
|
||||
provided afl-whatsup tool. When the instances are no longer finding new paths,
|
||||
it's probably time to stop.
|
||||
|
||||
WARNING: Exercise caution when explicitly specifying the -f option. Each fuzzer
|
||||
must use a separate temporary file; otherwise, things will go south. One safe
|
||||
example may be:
|
||||
|
||||
$ ./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
|
||||
$ ./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
|
||||
$ ./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
|
||||
|
||||
This is not a concern if you use @@ without -f and let afl-fuzz come up with the
|
||||
file name.
|
||||
|
||||
3) Multi-system parallelization
|
||||
-------------------------------
|
||||
|
||||
The basic operating principle for multi-system parallelization is similar to
|
||||
the mechanism explained in section 2. The key difference is that you need to
|
||||
write a simple script that performs two actions:
|
||||
|
||||
- Uses SSH with authorized_keys to connect to every machine and retrieve
|
||||
a tar archive of the /path/to/sync_dir/<fuzzer_id>/queue/ directories for
|
||||
every <fuzzer_id> local to the machine. It's best to use a naming scheme
|
||||
that includes host name in the fuzzer ID, so that you can do something
|
||||
like:
|
||||
|
||||
for s in {1..10}; do
|
||||
ssh user@host${s} "tar -czf - sync/host${s}_fuzzid*/[qf]*" >host${s}.tgz
|
||||
done
|
||||
|
||||
- Distributes and unpacks these files on all the remaining machines, e.g.:
|
||||
|
||||
for s in {1..10}; do
|
||||
for d in {1..10}; do
|
||||
test "$s" = "$d" && continue
|
||||
ssh user@host${d} 'tar -kxzf -' <host${s}.tgz
|
||||
done
|
||||
done
|
||||
|
||||
There is an example of such a script in experimental/distributed_fuzzing/;
|
||||
you can also find a more featured, experimental tool developed by
|
||||
Martijn Bogaard at:
|
||||
|
||||
https://github.com/MartijnB/disfuzz-afl
|
||||
|
||||
Another client-server implementation from Richo Healey is:
|
||||
|
||||
https://github.com/richo/roving
|
||||
|
||||
Note that these third-party tools are unsafe to run on systems exposed to the
|
||||
Internet or to untrusted users.
|
||||
|
||||
When developing custom test case sync code, there are several optimizations
|
||||
to keep in mind:
|
||||
|
||||
- The synchronization does not have to happen very often; running the
|
||||
task every 30 minutes or so may be perfectly fine.
|
||||
|
||||
- There is no need to synchronize crashes/ or hangs/; you only need to
|
||||
copy over queue/* (and ideally, also fuzzer_stats).
|
||||
|
||||
- It is not necessary (and not advisable!) to overwrite existing files;
|
||||
the -k option in tar is a good way to avoid that.
|
||||
|
||||
- There is no need to fetch directories for fuzzers that are not running
|
||||
locally on a particular machine, and were simply copied over onto that
|
||||
system during earlier runs.
|
||||
|
||||
- For large fleets, you will want to consolidate tarballs for each host,
|
||||
as this will let you use n SSH connections for sync, rather than n*(n-1).
|
||||
|
||||
You may also want to implement staged synchronization. For example, you
|
||||
could have 10 groups of systems, with group 1 pushing test cases only
|
||||
to group 2; group 2 pushing them only to group 3; and so on, with group
|
||||
eventually 10 feeding back to group 1.
|
||||
|
||||
This arrangement would allow test interesting cases to propagate across
|
||||
the fleet without having to copy every fuzzer queue to every single host.
|
||||
|
||||
- You do not want a "master" instance of afl-fuzz on every system; you should
|
||||
run them all with -S, and just designate a single process somewhere within
|
||||
the fleet to run with -M.
|
||||
|
||||
It is *not* advisable to skip the synchronization script and run the fuzzers
|
||||
directly on a network filesystem; unexpected latency and unkillable processes
|
||||
in I/O wait state can mess things up.
|
||||
|
||||
4) Remote monitoring and data collection
|
||||
----------------------------------------
|
||||
|
||||
You can use screen, nohup, tmux, or something equivalent to run remote
|
||||
instances of afl-fuzz. If you redirect the program's output to a file, it will
|
||||
automatically switch from a fancy UI to more limited status reports. There is
|
||||
also basic machine-readable information always written to the fuzzer_stats file
|
||||
in the output directory. Locally, that information can be interpreted with
|
||||
afl-whatsup.
|
||||
|
||||
In principle, you can use the status screen of the master (-M) instance to
|
||||
monitor the overall fuzzing progress and decide when to stop. In this
|
||||
mode, the most important signal is just that no new paths are being found
|
||||
for a longer while. If you do not have a master instance, just pick any
|
||||
single secondary instance to watch and go by that.
|
||||
|
||||
You can also rely on that instance's output directory to collect the
|
||||
synthesized corpus that covers all the noteworthy paths discovered anywhere
|
||||
within the fleet. Secondary (-S) instances do not require any special
|
||||
monitoring, other than just making sure that they are up.
|
||||
|
||||
Keep in mind that crashing inputs are *not* automatically propagated to the
|
||||
master instance, so you may still want to monitor for crashes fleet-wide
|
||||
from within your synchronization or health checking scripts (see afl-whatsup).
|
||||
|
||||
5) Asymmetric setups
|
||||
--------------------
|
||||
|
||||
It is perhaps worth noting that all of the following is permitted:
|
||||
|
||||
- Running afl-fuzz with conjunction with other guided tools that can extend
|
||||
coverage (e.g., via concolic execution). Third-party tools simply need to
|
||||
follow the protocol described above for pulling new test cases from
|
||||
out_dir/<fuzzer_id>/queue/* and writing their own finds to sequentially
|
||||
numbered id:nnnnnn files in out_dir/<ext_tool_id>/queue/*.
|
||||
|
||||
- Running some of the synchronized fuzzers with different (but related)
|
||||
target binaries. For example, simultaneously stress-testing several
|
||||
different JPEG parsers (say, IJG jpeg and libjpeg-turbo) while sharing
|
||||
the discovered test cases can have synergistic effects and improve the
|
||||
overall coverage.
|
||||
|
||||
(In this case, running one -M instance per each binary is a good plan.)
|
||||
|
||||
- Having some of the fuzzers invoke the binary in different ways.
|
||||
For example, 'djpeg' supports several DCT modes, configurable with
|
||||
a command-line flag, while 'dwebp' supports incremental and one-shot
|
||||
decoding. In some scenarios, going after multiple distinct modes and then
|
||||
pooling test cases will improve coverage.
|
||||
|
||||
- Much less convincingly, running the synchronized fuzzers with different
|
||||
starting test cases (e.g., progressive and standard JPEG) or dictionaries.
|
||||
The synchronization mechanism ensures that the test sets will get fairly
|
||||
homogeneous over time, but it introduces some initial variability.
|
@ -0,0 +1,201 @@
|
||||
=================================
|
||||
Tips for performance optimization
|
||||
=================================
|
||||
|
||||
This file provides tips for troubleshooting slow or wasteful fuzzing jobs.
|
||||
See README for the general instruction manual.
|
||||
|
||||
1) Keep your test cases small
|
||||
-----------------------------
|
||||
|
||||
This is probably the single most important step to take! Large test cases do
|
||||
not merely take more time and memory to be parsed by the tested binary, but
|
||||
also make the fuzzing process dramatically less efficient in several other
|
||||
ways.
|
||||
|
||||
To illustrate, let's say that you're randomly flipping bits in a file, one bit
|
||||
at a time. Let's assume that if you flip bit #47, you will hit a security bug;
|
||||
flipping any other bit just results in an invalid document.
|
||||
|
||||
Now, if your starting test case is 100 bytes long, you will have a 71% chance of
|
||||
triggering the bug within the first 1,000 execs - not bad! But if the test case
|
||||
is 1 kB long, the probability that we will randomly hit the right pattern in
|
||||
the same timeframe goes down to 11%. And if it has 10 kB of non-essential
|
||||
cruft, the odds plunge to 1%.
|
||||
|
||||
On top of that, with larger inputs, the binary may be now running 5-10x times
|
||||
slower than before - so the overall drop in fuzzing efficiency may be easily
|
||||
as high as 500x or so.
|
||||
|
||||
In practice, this means that you shouldn't fuzz image parsers with your
|
||||
vacation photos. Generate a tiny 16x16 picture instead, and run it through
|
||||
jpegtran or pngcrunch for good measure. The same goes for most other types
|
||||
of documents.
|
||||
|
||||
There's plenty of small starting test cases in ../testcases/* - try them out
|
||||
or submit new ones!
|
||||
|
||||
If you want to start with a larger, third-party corpus, run afl-cmin with an
|
||||
aggressive timeout on that data set first.
|
||||
|
||||
2) Use a simpler target
|
||||
-----------------------
|
||||
|
||||
Consider using a simpler target binary in your fuzzing work. For example, for
|
||||
image formats, bundled utilities such as djpeg, readpng, or gifhisto are
|
||||
considerably (10-20x) faster than the convert tool from ImageMagick - all while
|
||||
exercising roughly the same library-level image parsing code.
|
||||
|
||||
Even if you don't have a lightweight harness for a particular target, remember
|
||||
that you can always use another, related library to generate a corpus that will
|
||||
be then manually fed to a more resource-hungry program later on.
|
||||
|
||||
3) Use LLVM instrumentation
|
||||
---------------------------
|
||||
|
||||
When fuzzing slow targets, you can gain 2x performance improvement by using
|
||||
the LLVM-based instrumentation mode described in llvm_mode/README.llvm. Note
|
||||
that this mode requires the use of clang and will not work with GCC.
|
||||
|
||||
The LLVM mode also offers a "persistent", in-process fuzzing mode that can
|
||||
work well for certain types of self-contained libraries, and for fast targets,
|
||||
can offer performance gains up to 5-10x; and a "deferred fork server" mode
|
||||
that can offer huge benefits for programs with high startup overhead. Both
|
||||
modes require you to edit the source code of the fuzzed program, but the
|
||||
changes often amount to just strategically placing a single line or two.
|
||||
|
||||
4) Profile and optimize the binary
|
||||
----------------------------------
|
||||
|
||||
Check for any parameters or settings that obviously improve performance. For
|
||||
example, the djpeg utility that comes with IJG jpeg and libjpeg-turbo can be
|
||||
called with:
|
||||
|
||||
-dct fast -nosmooth -onepass -dither none -scale 1/4
|
||||
|
||||
...and that will speed things up. There is a corresponding drop in the quality
|
||||
of decoded images, but it's probably not something you care about.
|
||||
|
||||
In some programs, it is possible to disable output altogether, or at least use
|
||||
an output format that is computationally inexpensive. For example, with image
|
||||
transcoding tools, converting to a BMP file will be a lot faster than to PNG.
|
||||
|
||||
With some laid-back parsers, enabling "strict" mode (i.e., bailing out after
|
||||
first error) may result in smaller files and improved run time without
|
||||
sacrificing coverage; for example, for sqlite, you may want to specify -bail.
|
||||
|
||||
If the program is still too slow, you can use strace -tt or an equivalent
|
||||
profiling tool to see if the targeted binary is doing anything silly.
|
||||
Sometimes, you can speed things up simply by specifying /dev/null as the
|
||||
config file, or disabling some compile-time features that aren't really needed
|
||||
for the job (try ./configure --help). One of the notoriously resource-consuming
|
||||
things would be calling other utilities via exec*(), popen(), system(), or
|
||||
equivalent calls; for example, tar can invoke external decompression tools
|
||||
when it decides that the input file is a compressed archive.
|
||||
|
||||
Some programs may also intentionally call sleep(), usleep(), or nanosleep();
|
||||
vim is a good example of that. Other programs may attempt fsync() and so on.
|
||||
There are third-party libraries that make it easy to get rid of such code,
|
||||
e.g.:
|
||||
|
||||
https://launchpad.net/libeatmydata
|
||||
|
||||
In programs that are slow due to unavoidable initialization overhead, you may
|
||||
want to try the LLVM deferred forkserver mode (see llvm_mode/README.llvm),
|
||||
which can give you speed gains up to 10x, as mentioned above.
|
||||
|
||||
Last but not least, if you are using ASAN and the performance is unacceptable,
|
||||
consider turning it off for now, and manually examining the generated corpus
|
||||
with an ASAN-enabled binary later on.
|
||||
|
||||
5) Instrument just what you need
|
||||
--------------------------------
|
||||
|
||||
Instrument just the libraries you actually want to stress-test right now, one
|
||||
at a time. Let the program use system-wide, non-instrumented libraries for
|
||||
any functionality you don't actually want to fuzz. For example, in most
|
||||
cases, it doesn't make to instrument libgmp just because you're testing a
|
||||
crypto app that relies on it for bignum math.
|
||||
|
||||
Beware of programs that come with oddball third-party libraries bundled with
|
||||
their source code (Spidermonkey is a good example of this). Check ./configure
|
||||
options to use non-instrumented system-wide copies instead.
|
||||
|
||||
6) Parallelize your fuzzers
|
||||
---------------------------
|
||||
|
||||
The fuzzer is designed to need ~1 core per job. This means that on a, say,
|
||||
4-core system, you can easily run four parallel fuzzing jobs with relatively
|
||||
little performance hit. For tips on how to do that, see parallel_fuzzing.txt.
|
||||
|
||||
The afl-gotcpu utility can help you understand if you still have idle CPU
|
||||
capacity on your system. (It won't tell you about memory bandwidth, cache
|
||||
misses, or similar factors, but they are less likely to be a concern.)
|
||||
|
||||
7) Keep memory use and timeouts in check
|
||||
----------------------------------------
|
||||
|
||||
If you have increased the -m or -t limits more than truly necessary, consider
|
||||
dialing them back down.
|
||||
|
||||
For programs that are nominally very fast, but get sluggish for some inputs,
|
||||
you can also try setting -t values that are more punishing than what afl-fuzz
|
||||
dares to use on its own. On fast and idle machines, going down to -t 5 may be
|
||||
a viable plan.
|
||||
|
||||
The -m parameter is worth looking at, too. Some programs can end up spending
|
||||
a fair amount of time allocating and initializing megabytes of memory when
|
||||
presented with pathological inputs. Low -m values can make them give up sooner
|
||||
and not waste CPU time.
|
||||
|
||||
8) Check OS configuration
|
||||
-------------------------
|
||||
|
||||
There are several OS-level factors that may affect fuzzing speed:
|
||||
|
||||
- High system load. Use idle machines where possible. Kill any non-essential
|
||||
CPU hogs (idle browser windows, media players, complex screensavers, etc).
|
||||
|
||||
- Network filesystems, either used for fuzzer input / output, or accessed by
|
||||
the fuzzed binary to read configuration files (pay special attention to the
|
||||
home directory - many programs search it for dot-files).
|
||||
|
||||
- On-demand CPU scaling. The Linux 'ondemand' governor performs its analysis
|
||||
on a particular schedule and is known to underestimate the needs of
|
||||
short-lived processes spawned by afl-fuzz (or any other fuzzer). On Linux,
|
||||
this can be fixed with:
|
||||
|
||||
cd /sys/devices/system/cpu
|
||||
echo performance | tee cpu*/cpufreq/scaling_governor
|
||||
|
||||
On other systems, the impact of CPU scaling will be different; when fuzzing,
|
||||
use OS-specific tools to find out if all cores are running at full speed.
|
||||
|
||||
- Transparent huge pages. Some allocators, such as jemalloc, can incur a
|
||||
heavy fuzzing penalty when transparent huge pages (THP) are enabled in the
|
||||
kernel. You can disable this via:
|
||||
|
||||
echo never > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
|
||||
- Suboptimal scheduling strategies. The significance of this will vary from
|
||||
one target to another, but on Linux, you may want to make sure that the
|
||||
following options are set:
|
||||
|
||||
echo 1 >/proc/sys/kernel/sched_child_runs_first
|
||||
echo 1 >/proc/sys/kernel/sched_autogroup_enabled
|
||||
|
||||
Setting a different scheduling policy for the fuzzer process - say
|
||||
SCHED_RR - can usually speed things up, too, but needs to be done with
|
||||
care.
|
||||
|
||||
9) If all other options fail, use -d
|
||||
------------------------------------
|
||||
|
||||
For programs that are genuinely slow, in cases where you really can't escape
|
||||
using huge input files, or when you simply want to get quick and dirty results
|
||||
early on, you can always resort to the -d mode.
|
||||
|
||||
The mode causes afl-fuzz to skip all the deterministic fuzzing steps, which
|
||||
makes output a lot less neat and can ultimately make the testing a bit less
|
||||
in-depth, but it will give you an experience more familiar from other fuzzing
|
||||
tools.
|
@ -0,0 +1,354 @@
|
||||
===============
|
||||
Sister projects
|
||||
===============
|
||||
|
||||
This doc lists some of the projects that are inspired by, derived from,
|
||||
designed for, or meant to integrate with AFL. See README for the general
|
||||
instruction manual.
|
||||
|
||||
-------------------------------------------
|
||||
Support for other languages / environments:
|
||||
-------------------------------------------
|
||||
|
||||
Python AFL (Jakub Wilk)
|
||||
-----------------------
|
||||
|
||||
Allows fuzz-testing of Python programs. Uses custom instrumentation and its
|
||||
own forkserver.
|
||||
|
||||
http://jwilk.net/software/python-afl
|
||||
|
||||
Go-fuzz (Dmitry Vyukov)
|
||||
-----------------------
|
||||
|
||||
AFL-inspired guided fuzzing approach for Go targets:
|
||||
|
||||
https://github.com/dvyukov/go-fuzz
|
||||
|
||||
afl.rs (Keegan McAllister)
|
||||
--------------------------
|
||||
|
||||
Allows Rust features to be easily fuzzed with AFL (using the LLVM mode).
|
||||
|
||||
https://github.com/kmcallister/afl.rs
|
||||
|
||||
OCaml support (KC Sivaramakrishnan)
|
||||
-----------------------------------
|
||||
|
||||
Adds AFL-compatible instrumentation to OCaml programs.
|
||||
|
||||
https://github.com/ocamllabs/opam-repo-dev/pull/23
|
||||
http://canopy.mirage.io/Posts/Fuzzing
|
||||
|
||||
AFL for GCJ Java and other GCC frontends (-)
|
||||
--------------------------------------------
|
||||
|
||||
GCC Java programs are actually supported out of the box - simply rename
|
||||
afl-gcc to afl-gcj. Unfortunately, by default, unhandled exceptions in GCJ do
|
||||
not result in abort() being called, so you will need to manually add a
|
||||
top-level exception handler that exits with SIGABRT or something equivalent.
|
||||
|
||||
Other GCC-supported languages should be fairly easy to get working, but may
|
||||
face similar problems. See https://gcc.gnu.org/frontends.html for a list of
|
||||
options.
|
||||
|
||||
AFL-style in-process fuzzer for LLVM (Kostya Serebryany)
|
||||
--------------------------------------------------------
|
||||
|
||||
Provides an evolutionary instrumentation-guided fuzzing harness that allows
|
||||
some programs to be fuzzed without the fork / execve overhead. (Similar
|
||||
functionality is now available as the "persistent" feature described in
|
||||
../llvm_mode/README.llvm.)
|
||||
|
||||
http://llvm.org/docs/LibFuzzer.html
|
||||
|
||||
AFL fixup shim (Ben Nagy)
|
||||
-------------------------
|
||||
|
||||
Allows AFL_POST_LIBRARY postprocessors to be written in arbitrary languages
|
||||
that don't have C / .so bindings. Includes examples in Go.
|
||||
|
||||
https://github.com/bnagy/aflfix
|
||||
|
||||
TriforceAFL (Tim Newsham and Jesse Hertz)
|
||||
-----------------------------------------
|
||||
|
||||
Leverages QEMU full system emulation mode to allow AFL to target operating
|
||||
systems and other alien worlds:
|
||||
|
||||
https://www.nccgroup.trust/us/about-us/newsroom-and-events/blog/2016/june/project-triforce-run-afl-on-everything/
|
||||
|
||||
WinAFL (Ivan Fratric)
|
||||
---------------------
|
||||
|
||||
As the name implies, allows you to fuzz Windows binaries (using DynamoRio).
|
||||
|
||||
https://github.com/ivanfratric/winafl
|
||||
|
||||
Another Windows alternative may be:
|
||||
|
||||
https://github.com/carlosgprado/BrundleFuzz/
|
||||
|
||||
----------------
|
||||
Network fuzzing:
|
||||
----------------
|
||||
|
||||
Preeny (Yan Shoshitaishvili)
|
||||
----------------------------
|
||||
|
||||
Provides a fairly simple way to convince dynamically linked network-centric
|
||||
programs to read from a file or not fork. Not AFL-specific, but described as
|
||||
useful by many users. Some assembly required.
|
||||
|
||||
https://github.com/zardus/preeny
|
||||
|
||||
-------------------------------------------
|
||||
Distributed fuzzing and related automation:
|
||||
-------------------------------------------
|
||||
|
||||
roving (Richo Healey)
|
||||
---------------------
|
||||
|
||||
A client-server architecture for effortlessly orchestrating AFL runs across
|
||||
a fleet of machines. You don't want to use this on systems that face the
|
||||
Internet or live in other untrusted environments.
|
||||
|
||||
https://github.com/richo/roving
|
||||
|
||||
Distfuzz-AFL (Martijn Bogaard)
|
||||
------------------------------
|
||||
|
||||
Simplifies the management of afl-fuzz instances on remote machines. The
|
||||
author notes that the current implementation isn't secure and should not
|
||||
be exposed on the Internet.
|
||||
|
||||
https://github.com/MartijnB/disfuzz-afl
|
||||
|
||||
AFLDFF (quantumvm)
|
||||
------------------
|
||||
|
||||
A nice GUI for managing AFL jobs.
|
||||
|
||||
https://github.com/quantumvm/AFLDFF
|
||||
|
||||
afl-launch (Ben Nagy)
|
||||
---------------------
|
||||
|
||||
Batch AFL launcher utility with a simple CLI.
|
||||
|
||||
https://github.com/bnagy/afl-launch
|
||||
|
||||
AFL Utils (rc0r)
|
||||
----------------
|
||||
|
||||
Simplifies the triage of discovered crashes, start parallel instances, etc.
|
||||
|
||||
https://github.com/rc0r/afl-utils
|
||||
|
||||
Another crash triage tool:
|
||||
|
||||
https://github.com/floyd-fuh/afl-crash-analyzer
|
||||
|
||||
afl-fuzzing-scripts (Tobias Ospelt)
|
||||
-----------------------------------
|
||||
|
||||
Simplifies starting up multiple parallel AFL jobs.
|
||||
|
||||
https://github.com/floyd-fuh/afl-fuzzing-scripts/
|
||||
|
||||
afl-sid (Jacek Wielemborek)
|
||||
---------------------------
|
||||
|
||||
Allows users to more conveniently build and deploy AFL via Docker.
|
||||
|
||||
https://github.com/d33tah/afl-sid
|
||||
|
||||
Another Docker-related project:
|
||||
|
||||
https://github.com/ozzyjohnson/docker-afl
|
||||
|
||||
afl-monitor (Paul S. Ziegler)
|
||||
-----------------------------
|
||||
|
||||
Provides more detailed and versatile statistics about your running AFL jobs.
|
||||
|
||||
https://github.com/reflare/afl-monitor
|
||||
|
||||
-----------------------------------------------------------
|
||||
Crash triage, coverage analysis, and other companion tools:
|
||||
-----------------------------------------------------------
|
||||
|
||||
afl-crash-analyzer (Tobias Ospelt)
|
||||
----------------------------------
|
||||
|
||||
Makes it easier to navigate and annotate crashing test cases.
|
||||
|
||||
https://github.com/floyd-fuh/afl-crash-analyzer/
|
||||
|
||||
Crashwalk (Ben Nagy)
|
||||
--------------------
|
||||
|
||||
AFL-aware tool to annotate and sort through crashing test cases.
|
||||
|
||||
https://github.com/bnagy/crashwalk
|
||||
|
||||
afl-cov (Michael Rash)
|
||||
----------------------
|
||||
|
||||
Produces human-readable coverage data based on the output queue of afl-fuzz.
|
||||
|
||||
https://github.com/mrash/afl-cov
|
||||
|
||||
afl-sancov (Bhargava Shastry)
|
||||
-----------------------------
|
||||
|
||||
Similar to afl-cov, but uses clang sanitizer instrumentation.
|
||||
|
||||
https://github.com/bshastry/afl-sancov
|
||||
|
||||
RecidiVM (Jakub Wilk)
|
||||
---------------------
|
||||
|
||||
Makes it easy to estimate memory usage limits when fuzzing with ASAN or MSAN.
|
||||
|
||||
http://jwilk.net/software/recidivm
|
||||
|
||||
aflize (Jacek Wielemborek)
|
||||
--------------------------
|
||||
|
||||
Automatically build AFL-enabled versions of Debian packages.
|
||||
|
||||
https://github.com/d33tah/aflize
|
||||
|
||||
afl-ddmin-mod (Markus Teufelberger)
|
||||
-----------------------------------
|
||||
|
||||
A variant of afl-tmin that uses a more sophisticated (but slower)
|
||||
minimization algorithm.
|
||||
|
||||
https://github.com/MarkusTeufelberger/afl-ddmin-mod
|
||||
|
||||
afl-kit (Kuang-che Wu)
|
||||
----------------------
|
||||
|
||||
Replacements for afl-cmin and afl-tmin with additional features, such
|
||||
as the ability to filter crashes based on stderr patterns.
|
||||
|
||||
https://github.com/kcwu/afl-kit
|
||||
|
||||
-------------------------------
|
||||
Narrow-purpose or experimental:
|
||||
-------------------------------
|
||||
|
||||
Cygwin support (Ali Rizvi-Santiago)
|
||||
-----------------------------------
|
||||
|
||||
Pretty self-explanatory. As per the author, this "mostly" ports AFL to
|
||||
Windows. Field reports welcome!
|
||||
|
||||
https://github.com/arizvisa/afl-cygwin
|
||||
|
||||
Pause and resume scripts (Ben Nagy)
|
||||
-----------------------------------
|
||||
|
||||
Simple automation to suspend and resume groups of fuzzing jobs.
|
||||
|
||||
https://github.com/bnagy/afl-trivia
|
||||
|
||||
Static binary-only instrumentation (Aleksandar Nikolich)
|
||||
--------------------------------------------------------
|
||||
|
||||
Allows black-box binaries to be instrumented statically (i.e., by modifying
|
||||
the binary ahead of the time, rather than translating it on the run). Author
|
||||
reports better performance compared to QEMU, but occasional translation
|
||||
errors with stripped binaries.
|
||||
|
||||
https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
|
||||
|
||||
AFL PIN (Parker Thompson)
|
||||
-------------------------
|
||||
|
||||
Early-stage Intel PIN instrumentation support (from before we settled on
|
||||
faster-running QEMU).
|
||||
|
||||
https://github.com/mothran/aflpin
|
||||
|
||||
AFL-style instrumentation in llvm (Kostya Serebryany)
|
||||
-----------------------------------------------------
|
||||
|
||||
Allows AFL-equivalent instrumentation to be injected at compiler level.
|
||||
This is currently not supported by AFL as-is, but may be useful in other
|
||||
projects.
|
||||
|
||||
https://code.google.com/p/address-sanitizer/wiki/AsanCoverage#Coverage_counters
|
||||
|
||||
AFL JS (Han Choongwoo)
|
||||
----------------------
|
||||
|
||||
One-off optimizations to speed up the fuzzing of JavaScriptCore (now likely
|
||||
superseded by LLVM deferred forkserver init - see llvm_mode/README.llvm).
|
||||
|
||||
https://github.com/tunz/afl-fuzz-js
|
||||
|
||||
AFL harness for fwknop (Michael Rash)
|
||||
-------------------------------------
|
||||
|
||||
An example of a fairly involved integration with AFL.
|
||||
|
||||
https://github.com/mrash/fwknop/tree/master/test/afl
|
||||
|
||||
Building harnesses for DNS servers (Jonathan Foote, Ron Bowes)
|
||||
--------------------------------------------------------------
|
||||
|
||||
Two articles outlining the general principles and showing some example code.
|
||||
|
||||
https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
|
||||
https://goo.gl/j9EgFf
|
||||
|
||||
Fuzzer shell for SQLite (Richard Hipp)
|
||||
--------------------------------------
|
||||
|
||||
A simple SQL shell designed specifically for fuzzing the underlying library.
|
||||
|
||||
http://www.sqlite.org/src/artifact/9e7e273da2030371
|
||||
|
||||
Support for Python mutation modules (Christian Holler)
|
||||
------------------------------------------------------
|
||||
|
||||
https://github.com/choller/afl/blob/master/docs/mozilla/python_modules.txt
|
||||
|
||||
Support for selective instrumentation (Christian Holler)
|
||||
--------------------------------------------------------
|
||||
|
||||
https://github.com/choller/afl/blob/master/docs/mozilla/partial_instrumentation.txt
|
||||
|
||||
Kernel fuzzing (Dmitry Vyukov)
|
||||
------------------------------
|
||||
|
||||
A similar guided approach as applied to fuzzing syscalls:
|
||||
|
||||
https://github.com/google/syzkaller/wiki/Found-Bugs
|
||||
https://github.com/dvyukov/linux/commit/33787098ffaaa83b8a7ccf519913ac5fd6125931
|
||||
http://events.linuxfoundation.org/sites/events/files/slides/AFL%20filesystem%20fuzzing%2C%20Vault%202016_0.pdf
|
||||
|
||||
Android support (ele7enxxh)
|
||||
---------------------------
|
||||
|
||||
Based on a somewhat dated version of AFL:
|
||||
|
||||
https://github.com/ele7enxxh/android-afl
|
||||
|
||||
CGI wrapper (floyd)
|
||||
-------------------
|
||||
|
||||
Facilitates the testing of CGI scripts.
|
||||
|
||||
https://github.com/floyd-fuh/afl-cgi-wrapper
|
||||
|
||||
Fuzzing difficulty estimation (Marcel Boehme)
|
||||
---------------------------------------------
|
||||
|
||||
A fork of AFL that tries to quantify the likelihood of finding additional
|
||||
paths or crashes at any point in a fuzzing job.
|
||||
|
||||
https://github.com/mboehme/pythia
|
@ -0,0 +1,408 @@
|
||||
===============================
|
||||
Understanding the status screen
|
||||
===============================
|
||||
|
||||
This document provides an overview of the status screen - plus tips for
|
||||
troubleshooting any warnings and red text shown in the UI. See README for
|
||||
the general instruction manual.
|
||||
|
||||
0) A note about colors
|
||||
----------------------
|
||||
|
||||
The status screen and error messages use colors to keep things readable and
|
||||
attract your attention to the most important details. For example, red almost
|
||||
always means "consult this doc" :-)
|
||||
|
||||
Unfortunately, the UI will render correctly only if your terminal is using
|
||||
traditional un*x palette (white text on black background) or something close
|
||||
to that.
|
||||
|
||||
If you are using inverse video, you may want to change your settings, say:
|
||||
|
||||
- For GNOME Terminal, go to Edit > Profile preferences, select the "colors"
|
||||
tab, and from the list of built-in schemes, choose "white on black".
|
||||
|
||||
- For the MacOS X Terminal app, open a new window using the "Pro" scheme via
|
||||
the Shell > New Window menu (or make "Pro" your default).
|
||||
|
||||
Alternatively, if you really like your current colors, you can edit config.h
|
||||
to comment out USE_COLORS, then do 'make clean all'.
|
||||
|
||||
I'm not aware of any other simple way to make this work without causing
|
||||
other side effects - sorry about that.
|
||||
|
||||
With that out of the way, let's talk about what's actually on the screen...
|
||||
|
||||
1) Process timing
|
||||
-----------------
|
||||
|
||||
+----------------------------------------------------+
|
||||
| run time : 0 days, 8 hrs, 32 min, 43 sec |
|
||||
| last new path : 0 days, 0 hrs, 6 min, 40 sec |
|
||||
| last uniq crash : none seen yet |
|
||||
| last uniq hang : 0 days, 1 hrs, 24 min, 32 sec |
|
||||
+----------------------------------------------------+
|
||||
|
||||
This section is fairly self-explanatory: it tells you how long the fuzzer has
|
||||
been running and how much time has elapsed since its most recent finds. This is
|
||||
broken down into "paths" (a shorthand for test cases that trigger new execution
|
||||
patterns), crashes, and hangs.
|
||||
|
||||
When it comes to timing: there is no hard rule, but most fuzzing jobs should be
|
||||
expected to run for days or weeks; in fact, for a moderately complex project, the
|
||||
first pass will probably take a day or so. Every now and then, some jobs
|
||||
will be allowed to run for months.
|
||||
|
||||
There's one important thing to watch out for: if the tool is not finding new
|
||||
paths within several minutes of starting, you're probably not invoking the
|
||||
target binary correctly and it never gets to parse the input files we're
|
||||
throwing at it; another possible explanations are that the default memory limit
|
||||
(-m) is too restrictive, and the program exits after failing to allocate a
|
||||
buffer very early on; or that the input files are patently invalid and always
|
||||
fail a basic header check.
|
||||
|
||||
If there are no new paths showing up for a while, you will eventually see a big
|
||||
red warning in this section, too :-)
|
||||
|
||||
2) Overall results
|
||||
------------------
|
||||
|
||||
+-----------------------+
|
||||
| cycles done : 0 |
|
||||
| total paths : 2095 |
|
||||
| uniq crashes : 0 |
|
||||
| uniq hangs : 19 |
|
||||
+-----------------------+
|
||||
|
||||
The first field in this section gives you the count of queue passes done so far
|
||||
- that is, the number of times the fuzzer went over all the interesting test
|
||||
cases discovered so far, fuzzed them, and looped back to the very beginning.
|
||||
Every fuzzing session should be allowed to complete at least one cycle; and
|
||||
ideally, should run much longer than that.
|
||||
|
||||
As noted earlier, the first pass can take a day or longer, so sit back and
|
||||
relax. If you want to get broader but more shallow coverage right away, try
|
||||
the -d option - it gives you a more familiar experience by skipping the
|
||||
deterministic fuzzing steps. It is, however, inferior to the standard mode in
|
||||
a couple of subtle ways.
|
||||
|
||||
To help make the call on when to hit Ctrl-C, the cycle counter is color-coded.
|
||||
It is shown in magenta during the first pass, progresses to yellow if new finds
|
||||
are still being made in subsequent rounds, then blue when that ends - and
|
||||
finally, turns green after the fuzzer hasn't been seeing any action for a
|
||||
longer while.
|
||||
|
||||
The remaining fields in this part of the screen should be pretty obvious:
|
||||
there's the number of test cases ("paths") discovered so far, and the number of
|
||||
unique faults. The test cases, crashes, and hangs can be explored in real-time
|
||||
by browsing the output directory, as discussed in the README.
|
||||
|
||||
3) Cycle progress
|
||||
-----------------
|
||||
|
||||
+-------------------------------------+
|
||||
| now processing : 1296 (61.86%) |
|
||||
| paths timed out : 0 (0.00%) |
|
||||
+-------------------------------------+
|
||||
|
||||
This box tells you how far along the fuzzer is with the current queue cycle: it
|
||||
shows the ID of the test case it is currently working on, plus the number of
|
||||
inputs it decided to ditch because they were persistently timing out.
|
||||
|
||||
The "*" suffix sometimes shown in the first line means that the currently
|
||||
processed path is not "favored" (a property discussed later on, in section 6).
|
||||
|
||||
If you feel that the fuzzer is progressing too slowly, see the note about the
|
||||
-d option in section 2 of this doc.
|
||||
|
||||
4) Map coverage
|
||||
---------------
|
||||
|
||||
+--------------------------------------+
|
||||
| map density : 10.15% / 29.07% |
|
||||
| count coverage : 4.03 bits/tuple |
|
||||
+--------------------------------------+
|
||||
|
||||
The section provides some trivia about the coverage observed by the
|
||||
instrumentation embedded in the target binary.
|
||||
|
||||
The first line in the box tells you how many branch tuples we have already
|
||||
hit, in proportion to how much the bitmap can hold. The number on the left
|
||||
describes the current input; the one on the right is the value for the entire
|
||||
input corpus.
|
||||
|
||||
Be wary of extremes:
|
||||
|
||||
- Absolute numbers below 200 or so suggest one of three things: that the
|
||||
program is extremely simple; that it is not instrumented properly (e.g.,
|
||||
due to being linked against a non-instrumented copy of the target
|
||||
library); or that it is bailing out prematurely on your input test cases.
|
||||
The fuzzer will try to mark this in pink, just to make you aware.
|
||||
|
||||
- Percentages over 70% may very rarely happen with very complex programs
|
||||
that make heavy use of template-generated code.
|
||||
|
||||
Because high bitmap density makes it harder for the fuzzer to reliably
|
||||
discern new program states, I recommend recompiling the binary with
|
||||
AFL_INST_RATIO=10 or so and trying again (see env_variables.txt).
|
||||
|
||||
The fuzzer will flag high percentages in red. Chances are, you will never
|
||||
see that unless you're fuzzing extremely hairy software (say, v8, perl,
|
||||
ffmpeg).
|
||||
|
||||
The other line deals with the variability in tuple hit counts seen in the
|
||||
binary. In essence, if every taken branch is always taken a fixed number of
|
||||
times for all the inputs we have tried, this will read "1.00". As we manage
|
||||
to trigger other hit counts for every branch, the needle will start to move
|
||||
toward "8.00" (every bit in the 8-bit map hit), but will probably never
|
||||
reach that extreme.
|
||||
|
||||
Together, the values can be useful for comparing the coverage of several
|
||||
different fuzzing jobs that rely on the same instrumented binary.
|
||||
|
||||
5) Stage progress
|
||||
-----------------
|
||||
|
||||
+-------------------------------------+
|
||||
| now trying : interest 32/8 |
|
||||
| stage execs : 3996/34.4k (11.62%) |
|
||||
| total execs : 27.4M |
|
||||
| exec speed : 891.7/sec |
|
||||
+-------------------------------------+
|
||||
|
||||
This part gives you an in-depth peek at what the fuzzer is actually doing right
|
||||
now. It tells you about the current stage, which can be any of:
|
||||
|
||||
- calibration - a pre-fuzzing stage where the execution path is examined
|
||||
to detect anomalies, establish baseline execution speed, and so on. Executed
|
||||
very briefly whenever a new find is being made.
|
||||
|
||||
- trim L/S - another pre-fuzzing stage where the test case is trimmed to the
|
||||
shortest form that still produces the same execution path. The length (L)
|
||||
and stepover (S) are chosen in general relationship to file size.
|
||||
|
||||
- bitflip L/S - deterministic bit flips. There are L bits toggled at any given
|
||||
time, walking the input file with S-bit increments. The current L/S variants
|
||||
are: 1/1, 2/1, 4/1, 8/8, 16/8, 32/8.
|
||||
|
||||
- arith L/8 - deterministic arithmetics. The fuzzer tries to subtract or add
|
||||
small integers to 8-, 16-, and 32-bit values. The stepover is always 8 bits.
|
||||
|
||||
- interest L/8 - deterministic value overwrite. The fuzzer has a list of known
|
||||
"interesting" 8-, 16-, and 32-bit values to try. The stepover is 8 bits.
|
||||
|
||||
- extras - deterministic injection of dictionary terms. This can be shown as
|
||||
"user" or "auto", depending on whether the fuzzer is using a user-supplied
|
||||
dictionary (-x) or an auto-created one. You will also see "over" or "insert",
|
||||
depending on whether the dictionary words overwrite existing data or are
|
||||
inserted by offsetting the remaining data to accommodate their length.
|
||||
|
||||
- havoc - a sort-of-fixed-length cycle with stacked random tweaks. The
|
||||
operations attempted during this stage include bit flips, overwrites with
|
||||
random and "interesting" integers, block deletion, block duplication, plus
|
||||
assorted dictionary-related operations (if a dictionary is supplied in the
|
||||
first place).
|
||||
|
||||
- splice - a last-resort strategy that kicks in after the first full queue
|
||||
cycle with no new paths. It is equivalent to 'havoc', except that it first
|
||||
splices together two random inputs from the queue at some arbitrarily
|
||||
selected midpoint.
|
||||
|
||||
- sync - a stage used only when -M or -S is set (see parallel_fuzzing.txt).
|
||||
No real fuzzing is involved, but the tool scans the output from other
|
||||
fuzzers and imports test cases as necessary. The first time this is done,
|
||||
it may take several minutes or so.
|
||||
|
||||
The remaining fields should be fairly self-evident: there's the exec count
|
||||
progress indicator for the current stage, a global exec counter, and a
|
||||
benchmark for the current program execution speed. This may fluctuate from
|
||||
one test case to another, but the benchmark should be ideally over 500 execs/sec
|
||||
most of the time - and if it stays below 100, the job will probably take very
|
||||
long.
|
||||
|
||||
The fuzzer will explicitly warn you about slow targets, too. If this happens,
|
||||
see the perf_tips.txt file included with the fuzzer for ideas on how to speed
|
||||
things up.
|
||||
|
||||
6) Findings in depth
|
||||
--------------------
|
||||
|
||||
+--------------------------------------+
|
||||
| favored paths : 879 (41.96%) |
|
||||
| new edges on : 423 (20.19%) |
|
||||
| total crashes : 0 (0 unique) |
|
||||
| total tmouts : 24 (19 unique) |
|
||||
+--------------------------------------+
|
||||
|
||||
This gives you several metrics that are of interest mostly to complete nerds.
|
||||
The section includes the number of paths that the fuzzer likes the most based
|
||||
on a minimization algorithm baked into the code (these will get considerably
|
||||
more air time), and the number of test cases that actually resulted in better
|
||||
edge coverage (versus just pushing the branch hit counters up). There are also
|
||||
additional, more detailed counters for crashes and timeouts.
|
||||
|
||||
Note that the timeout counter is somewhat different from the hang counter; this
|
||||
one includes all test cases that exceeded the timeout, even if they did not
|
||||
exceed it by a margin sufficient to be classified as hangs.
|
||||
|
||||
7) Fuzzing strategy yields
|
||||
--------------------------
|
||||
|
||||
+-----------------------------------------------------+
|
||||
| bit flips : 57/289k, 18/289k, 18/288k |
|
||||
| byte flips : 0/36.2k, 4/35.7k, 7/34.6k |
|
||||
| arithmetics : 53/2.54M, 0/537k, 0/55.2k |
|
||||
| known ints : 8/322k, 12/1.32M, 10/1.70M |
|
||||
| dictionary : 9/52k, 1/53k, 1/24k |
|
||||
| havoc : 1903/20.0M, 0/0 |
|
||||
| trim : 20.31%/9201, 17.05% |
|
||||
+-----------------------------------------------------+
|
||||
|
||||
This is just another nerd-targeted section keeping track of how many paths we
|
||||
have netted, in proportion to the number of execs attempted, for each of the
|
||||
fuzzing strategies discussed earlier on. This serves to convincingly validate
|
||||
assumptions about the usefulness of the various approaches taken by afl-fuzz.
|
||||
|
||||
The trim strategy stats in this section are a bit different than the rest.
|
||||
The first number in this line shows the ratio of bytes removed from the input
|
||||
files; the second one corresponds to the number of execs needed to achieve this
|
||||
goal. Finally, the third number shows the proportion of bytes that, although
|
||||
not possible to remove, were deemed to have no effect and were excluded from
|
||||
some of the more expensive deterministic fuzzing steps.
|
||||
|
||||
8) Path geometry
|
||||
----------------
|
||||
|
||||
+---------------------+
|
||||
| levels : 5 |
|
||||
| pending : 1570 |
|
||||
| pend fav : 583 |
|
||||
| own finds : 0 |
|
||||
| imported : 0 |
|
||||
| stability : 100.00% |
|
||||
+---------------------+
|
||||
|
||||
The first field in this section tracks the path depth reached through the
|
||||
guided fuzzing process. In essence: the initial test cases supplied by the
|
||||
user are considered "level 1". The test cases that can be derived from that
|
||||
through traditional fuzzing are considered "level 2"; the ones derived by
|
||||
using these as inputs to subsequent fuzzing rounds are "level 3"; and so forth.
|
||||
The maximum depth is therefore a rough proxy for how much value you're getting
|
||||
out of the instrumentation-guided approach taken by afl-fuzz.
|
||||
|
||||
The next field shows you the number of inputs that have not gone through any
|
||||
fuzzing yet. The same stat is also given for "favored" entries that the fuzzer
|
||||
really wants to get to in this queue cycle (the non-favored entries may have to
|
||||
wait a couple of cycles to get their chance).
|
||||
|
||||
Next, we have the number of new paths found during this fuzzing section and
|
||||
imported from other fuzzer instances when doing parallelized fuzzing; and the
|
||||
extent to which identical inputs appear to sometimes produce variable behavior
|
||||
in the tested binary.
|
||||
|
||||
That last bit is actually fairly interesting: it measures the consistency of
|
||||
observed traces. If a program always behaves the same for the same input data,
|
||||
it will earn a score of 100%. When the value is lower but still shown in purple,
|
||||
the fuzzing process is unlikely to be negatively affected. If it goes into red,
|
||||
you may be in trouble, since AFL will have difficulty discerning between
|
||||
meaningful and "phantom" effects of tweaking the input file.
|
||||
|
||||
Now, most targets will just get a 100% score, but when you see lower figures,
|
||||
there are several things to look at:
|
||||
|
||||
- The use of uninitialized memory in conjunction with some intrinsic sources
|
||||
of entropy in the tested binary. Harmless to AFL, but could be indicative
|
||||
of a security bug.
|
||||
|
||||
- Attempts to manipulate persistent resources, such as left over temporary
|
||||
files or shared memory objects. This is usually harmless, but you may want
|
||||
to double-check to make sure the program isn't bailing out prematurely.
|
||||
Running out of disk space, SHM handles, or other global resources can
|
||||
trigger this, too.
|
||||
|
||||
- Hitting some functionality that is actually designed to behave randomly.
|
||||
Generally harmless. For example, when fuzzing sqlite, an input like
|
||||
'select random();' will trigger a variable execution path.
|
||||
|
||||
- Multiple threads executing at once in semi-random order. This is harmless
|
||||
when the 'stability' metric stays over 90% or so, but can become an issue
|
||||
if not. Here's what to try:
|
||||
|
||||
- Use afl-clang-fast from llvm_mode/ - it uses a thread-local tracking
|
||||
model that is less prone to concurrency issues,
|
||||
|
||||
- See if the target can be compiled or run without threads. Common
|
||||
./configure options include --without-threads, --disable-pthreads, or
|
||||
--disable-openmp.
|
||||
|
||||
- Replace pthreads with GNU Pth (https://www.gnu.org/software/pth/), which
|
||||
allows you to use a deterministic scheduler.
|
||||
|
||||
- In persistent mode, minor drops in the "stability" metric can be normal,
|
||||
because not all the code behaves identically when re-entered; but major
|
||||
dips may signify that the code within __AFL_LOOP() is not behaving
|
||||
correctly on subsequent iterations (e.g., due to incomplete clean-up or
|
||||
reinitialization of the state) and that most of the fuzzing effort goes
|
||||
to waste.
|
||||
|
||||
The paths where variable behavior is detected are marked with a matching entry
|
||||
in the <out_dir>/queue/.state/variable_behavior/ directory, so you can look
|
||||
them up easily.
|
||||
|
||||
9) CPU load
|
||||
-----------
|
||||
|
||||
[cpu: 25%]
|
||||
|
||||
This tiny widget shows the apparent CPU utilization on the local system. It is
|
||||
calculated by taking the number of processes in the "runnable" state, and then
|
||||
comparing it to the number of logical cores on the system.
|
||||
|
||||
If the value is shown in green, you are using fewer CPU cores than available on
|
||||
your system and can probably parallelize to improve performance; for tips on
|
||||
how to do that, see parallel_fuzzing.txt.
|
||||
|
||||
If the value is shown in red, your CPU is *possibly* oversubscribed, and
|
||||
running additional fuzzers may not give you any benefits.
|
||||
|
||||
Of course, this benchmark is very simplistic; it tells you how many processes
|
||||
are ready to run, but not how resource-hungry they may be. It also doesn't
|
||||
distinguish between physical cores, logical cores, and virtualized CPUs; the
|
||||
performance characteristics of each of these will differ quite a bit.
|
||||
|
||||
If you want a more accurate measurement, you can run the afl-gotcpu utility
|
||||
from the command line.
|
||||
|
||||
10) Addendum: status and plot files
|
||||
-----------------------------------
|
||||
|
||||
For unattended operation, some of the key status screen information can be also
|
||||
found in a machine-readable format in the fuzzer_stats file in the output
|
||||
directory. This includes:
|
||||
|
||||
- start_time - unix time indicating the start time of afl-fuzz
|
||||
- last_update - unix time corresponding to the last update of this file
|
||||
- fuzzer_pid - PID of the fuzzer process
|
||||
- cycles_done - queue cycles completed so far
|
||||
- execs_done - number of execve() calls attempted
|
||||
- execs_per_sec - current number of execs per second
|
||||
- paths_total - total number of entries in the queue
|
||||
- paths_found - number of entries discovered through local fuzzing
|
||||
- paths_imported - number of entries imported from other instances
|
||||
- max_depth - number of levels in the generated data set
|
||||
- cur_path - currently processed entry number
|
||||
- pending_favs - number of favored entries still waiting to be fuzzed
|
||||
- pending_total - number of all entries waiting to be fuzzed
|
||||
- stability - percentage of bitmap bytes that behave consistently
|
||||
- variable_paths - number of test cases showing variable behavior
|
||||
- unique_crashes - number of unique crashes recorded
|
||||
- unique_hangs - number of unique hangs encountered
|
||||
- command_line - full command line used for the fuzzing session
|
||||
- slowest_exec_ms- real time of the slowest execution in ms
|
||||
- peak_rss_mb - max rss usage reached during fuzzing in mb
|
||||
|
||||
Most of these map directly to the UI elements discussed earlier on.
|
||||
|
||||
On top of that, you can also find an entry called 'plot_data', containing a
|
||||
plottable history for most of these fields. If you have gnuplot installed, you
|
||||
can turn this into a nice progress report with the included 'afl-plot' tool.
|
@ -0,0 +1,563 @@
|
||||
===================================
|
||||
Technical "whitepaper" for afl-fuzz
|
||||
===================================
|
||||
|
||||
This document provides a quick overview of the guts of American Fuzzy Lop.
|
||||
See README for the general instruction manual; and for a discussion of
|
||||
motivations and design goals behind AFL, see historical_notes.txt.
|
||||
|
||||
0) Design statement
|
||||
-------------------
|
||||
|
||||
American Fuzzy Lop does its best not to focus on any singular principle of
|
||||
operation and not be a proof-of-concept for any specific theory. The tool can
|
||||
be thought of as a collection of hacks that have been tested in practice,
|
||||
found to be surprisingly effective, and have been implemented in the simplest,
|
||||
most robust way I could think of at the time.
|
||||
|
||||
Many of the resulting features are made possible thanks to the availability of
|
||||
lightweight instrumentation that served as a foundation for the tool, but this
|
||||
mechanism should be thought of merely as a means to an end. The only true
|
||||
governing principles are speed, reliability, and ease of use.
|
||||
|
||||
1) Coverage measurements
|
||||
------------------------
|
||||
|
||||
The instrumentation injected into compiled programs captures branch (edge)
|
||||
coverage, along with coarse branch-taken hit counts. The code injected at
|
||||
branch points is essentially equivalent to:
|
||||
|
||||
cur_location = <COMPILE_TIME_RANDOM>;
|
||||
shared_mem[cur_location ^ prev_location]++;
|
||||
prev_location = cur_location >> 1;
|
||||
|
||||
The cur_location value is generated randomly to simplify the process of
|
||||
linking complex projects and keep the XOR output distributed uniformly.
|
||||
|
||||
The shared_mem[] array is a 64 kB SHM region passed to the instrumented binary
|
||||
by the caller. Every byte set in the output map can be thought of as a hit for
|
||||
a particular (branch_src, branch_dst) tuple in the instrumented code.
|
||||
|
||||
The size of the map is chosen so that collisions are sporadic with almost all
|
||||
of the intended targets, which usually sport between 2k and 10k discoverable
|
||||
branch points:
|
||||
|
||||
Branch cnt | Colliding tuples | Example targets
|
||||
------------+------------------+-----------------
|
||||
1,000 | 0.75% | giflib, lzo
|
||||
2,000 | 1.5% | zlib, tar, xz
|
||||
5,000 | 3.5% | libpng, libwebp
|
||||
10,000 | 7% | libxml
|
||||
20,000 | 14% | sqlite
|
||||
50,000 | 30% | -
|
||||
|
||||
At the same time, its size is small enough to allow the map to be analyzed
|
||||
in a matter of microseconds on the receiving end, and to effortlessly fit
|
||||
within L2 cache.
|
||||
|
||||
This form of coverage provides considerably more insight into the execution
|
||||
path of the program than simple block coverage. In particular, it trivially
|
||||
distinguishes between the following execution traces:
|
||||
|
||||
A -> B -> C -> D -> E (tuples: AB, BC, CD, DE)
|
||||
A -> B -> D -> C -> E (tuples: AB, BD, DC, CE)
|
||||
|
||||
This aids the discovery of subtle fault conditions in the underlying code,
|
||||
because security vulnerabilities are more often associated with unexpected
|
||||
or incorrect state transitions than with merely reaching a new basic block.
|
||||
|
||||
The reason for the shift operation in the last line of the pseudocode shown
|
||||
earlier in this section is to preserve the directionality of tuples (without
|
||||
this, A ^ B would be indistinguishable from B ^ A) and to retain the identity
|
||||
of tight loops (otherwise, A ^ A would be obviously equal to B ^ B).
|
||||
|
||||
The absence of simple saturating arithmetic opcodes on Intel CPUs means that
|
||||
the hit counters can sometimes wrap around to zero. Since this is a fairly
|
||||
unlikely and localized event, it's seen as an acceptable performance trade-off.
|
||||
|
||||
2) Detecting new behaviors
|
||||
--------------------------
|
||||
|
||||
The fuzzer maintains a global map of tuples seen in previous executions; this
|
||||
data can be rapidly compared with individual traces and updated in just a couple
|
||||
of dword- or qword-wide instructions and a simple loop.
|
||||
|
||||
When a mutated input produces an execution trace containing new tuples, the
|
||||
corresponding input file is preserved and routed for additional processing
|
||||
later on (see section #3). Inputs that do not trigger new local-scale state
|
||||
transitions in the execution trace (i.e., produce no new tuples) are discarded,
|
||||
even if their overall control flow sequence is unique.
|
||||
|
||||
This approach allows for a very fine-grained and long-term exploration of
|
||||
program state while not having to perform any computationally intensive and
|
||||
fragile global comparisons of complex execution traces, and while avoiding the
|
||||
scourge of path explosion.
|
||||
|
||||
To illustrate the properties of the algorithm, consider that the second trace
|
||||
shown below would be considered substantially new because of the presence of
|
||||
new tuples (CA, AE):
|
||||
|
||||
#1: A -> B -> C -> D -> E
|
||||
#2: A -> B -> C -> A -> E
|
||||
|
||||
At the same time, with #2 processed, the following pattern will not be seen
|
||||
as unique, despite having a markedly different overall execution path:
|
||||
|
||||
#3: A -> B -> C -> A -> B -> C -> A -> B -> C -> D -> E
|
||||
|
||||
In addition to detecting new tuples, the fuzzer also considers coarse tuple
|
||||
hit counts. These are divided into several buckets:
|
||||
|
||||
1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+
|
||||
|
||||
To some extent, the number of buckets is an implementation artifact: it allows
|
||||
an in-place mapping of an 8-bit counter generated by the instrumentation to
|
||||
an 8-position bitmap relied on by the fuzzer executable to keep track of the
|
||||
already-seen execution counts for each tuple.
|
||||
|
||||
Changes within the range of a single bucket are ignored; transition from one
|
||||
bucket to another is flagged as an interesting change in program control flow,
|
||||
and is routed to the evolutionary process outlined in the section below.
|
||||
|
||||
The hit count behavior provides a way to distinguish between potentially
|
||||
interesting control flow changes, such as a block of code being executed
|
||||
twice when it was normally hit only once. At the same time, it is fairly
|
||||
insensitive to empirically less notable changes, such as a loop going from
|
||||
47 cycles to 48. The counters also provide some degree of "accidental"
|
||||
immunity against tuple collisions in dense trace maps.
|
||||
|
||||
The execution is policed fairly heavily through memory and execution time
|
||||
limits; by default, the timeout is set at 5x the initially-calibrated
|
||||
execution speed, rounded up to 20 ms. The aggressive timeouts are meant to
|
||||
prevent dramatic fuzzer performance degradation by descending into tarpits
|
||||
that, say, improve coverage by 1% while being 100x slower; we pragmatically
|
||||
reject them and hope that the fuzzer will find a less expensive way to reach
|
||||
the same code. Empirical testing strongly suggests that more generous time
|
||||
limits are not worth the cost.
|
||||
|
||||
3) Evolving the input queue
|
||||
---------------------------
|
||||
|
||||
Mutated test cases that produced new state transitions within the program are
|
||||
added to the input queue and used as a starting point for future rounds of
|
||||
fuzzing. They supplement, but do not automatically replace, existing finds.
|
||||
|
||||
In contrast to more greedy genetic algorithms, this approach allows the tool
|
||||
to progressively explore various disjoint and possibly mutually incompatible
|
||||
features of the underlying data format, as shown in this image:
|
||||
|
||||
http://lcamtuf.coredump.cx/afl/afl_gzip.png
|
||||
|
||||
Several practical examples of the results of this algorithm are discussed
|
||||
here:
|
||||
|
||||
http://lcamtuf.blogspot.com/2014/11/pulling-jpegs-out-of-thin-air.html
|
||||
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-nobody-expects-cdata-sections.html
|
||||
|
||||
The synthetic corpus produced by this process is essentially a compact
|
||||
collection of "hmm, this does something new!" input files, and can be used to
|
||||
seed any other testing processes down the line (for example, to manually
|
||||
stress-test resource-intensive desktop apps).
|
||||
|
||||
With this approach, the queue for most targets grows to somewhere between 1k
|
||||
and 10k entries; approximately 10-30% of this is attributable to the discovery
|
||||
of new tuples, and the remainder is associated with changes in hit counts.
|
||||
|
||||
The following table compares the relative ability to discover file syntax and
|
||||
explore program states when using several different approaches to guided
|
||||
fuzzing. The instrumented target was GNU patch 2.7.3 compiled with -O3 and
|
||||
seeded with a dummy text file; the session consisted of a single pass over the
|
||||
input queue with afl-fuzz:
|
||||
|
||||
Fuzzer guidance | Blocks | Edges | Edge hit | Highest-coverage
|
||||
strategy used | reached | reached | cnt var | test case generated
|
||||
------------------+---------+---------+----------+---------------------------
|
||||
(Initial file) | 156 | 163 | 1.00 | (none)
|
||||
| | | |
|
||||
Blind fuzzing S | 182 | 205 | 2.23 | First 2 B of RCS diff
|
||||
Blind fuzzing L | 228 | 265 | 2.23 | First 4 B of -c mode diff
|
||||
Block coverage | 855 | 1,130 | 1.57 | Almost-valid RCS diff
|
||||
Edge coverage | 1,452 | 2,070 | 2.18 | One-chunk -c mode diff
|
||||
AFL model | 1,765 | 2,597 | 4.99 | Four-chunk -c mode diff
|
||||
|
||||
The first entry for blind fuzzing ("S") corresponds to executing just a single
|
||||
round of testing; the second set of figures ("L") shows the fuzzer running in a
|
||||
loop for a number of execution cycles comparable with that of the instrumented
|
||||
runs, which required more time to fully process the growing queue.
|
||||
|
||||
Roughly similar results have been obtained in a separate experiment where the
|
||||
fuzzer was modified to compile out all the random fuzzing stages and leave just
|
||||
a series of rudimentary, sequential operations such as walking bit flips.
|
||||
Because this mode would be incapable of altering the size of the input file,
|
||||
the sessions were seeded with a valid unified diff:
|
||||
|
||||
Queue extension | Blocks | Edges | Edge hit | Number of unique
|
||||
strategy used | reached | reached | cnt var | crashes found
|
||||
------------------+---------+---------+----------+------------------
|
||||
(Initial file) | 624 | 717 | 1.00 | -
|
||||
| | | |
|
||||
Blind fuzzing | 1,101 | 1,409 | 1.60 | 0
|
||||
Block coverage | 1,255 | 1,649 | 1.48 | 0
|
||||
Edge coverage | 1,259 | 1,734 | 1.72 | 0
|
||||
AFL model | 1,452 | 2,040 | 3.16 | 1
|
||||
|
||||
At noted earlier on, some of the prior work on genetic fuzzing relied on
|
||||
maintaining a single test case and evolving it to maximize coverage. At least
|
||||
in the tests described above, this "greedy" approach appears to confer no
|
||||
substantial benefits over blind fuzzing strategies.
|
||||
|
||||
4) Culling the corpus
|
||||
---------------------
|
||||
|
||||
The progressive state exploration approach outlined above means that some of
|
||||
the test cases synthesized later on in the game may have edge coverage that
|
||||
is a strict superset of the coverage provided by their ancestors.
|
||||
|
||||
To optimize the fuzzing effort, AFL periodically re-evaluates the queue using a
|
||||
fast algorithm that selects a smaller subset of test cases that still cover
|
||||
every tuple seen so far, and whose characteristics make them particularly
|
||||
favorable to the tool.
|
||||
|
||||
The algorithm works by assigning every queue entry a score proportional to its
|
||||
execution latency and file size; and then selecting lowest-scoring candidates
|
||||
for each tuple.
|
||||
|
||||
The tuples are then processed sequentially using a simple workflow:
|
||||
|
||||
1) Find next tuple not yet in the temporary working set,
|
||||
|
||||
2) Locate the winning queue entry for this tuple,
|
||||
|
||||
3) Register *all* tuples present in that entry's trace in the working set,
|
||||
|
||||
4) Go to #1 if there are any missing tuples in the set.
|
||||
|
||||
The generated corpus of "favored" entries is usually 5-10x smaller than the
|
||||
starting data set. Non-favored entries are not discarded, but they are skipped
|
||||
with varying probabilities when encountered in the queue:
|
||||
|
||||
- If there are new, yet-to-be-fuzzed favorites present in the queue, 99%
|
||||
of non-favored entries will be skipped to get to the favored ones.
|
||||
|
||||
- If there are no new favorites:
|
||||
|
||||
- If the current non-favored entry was fuzzed before, it will be skipped
|
||||
95% of the time.
|
||||
|
||||
- If it hasn't gone through any fuzzing rounds yet, the odds of skipping
|
||||
drop down to 75%.
|
||||
|
||||
Based on empirical testing, this provides a reasonable balance between queue
|
||||
cycling speed and test case diversity.
|
||||
|
||||
Slightly more sophisticated but much slower culling can be performed on input
|
||||
or output corpora with afl-cmin. This tool permanently discards the redundant
|
||||
entries and produces a smaller corpus suitable for use with afl-fuzz or
|
||||
external tools.
|
||||
|
||||
5) Trimming input files
|
||||
-----------------------
|
||||
|
||||
File size has a dramatic impact on fuzzing performance, both because large
|
||||
files make the target binary slower, and because they reduce the likelihood
|
||||
that a mutation would touch important format control structures, rather than
|
||||
redundant data blocks. This is discussed in more detail in perf_tips.txt.
|
||||
|
||||
The possibility that the user will provide a low-quality starting corpus aside,
|
||||
some types of mutations can have the effect of iteratively increasing the size
|
||||
of the generated files, so it is important to counter this trend.
|
||||
|
||||
Luckily, the instrumentation feedback provides a simple way to automatically
|
||||
trim down input files while ensuring that the changes made to the files have no
|
||||
impact on the execution path.
|
||||
|
||||
The built-in trimmer in afl-fuzz attempts to sequentially remove blocks of data
|
||||
with variable length and stepover; any deletion that doesn't affect the checksum
|
||||
of the trace map is committed to disk. The trimmer is not designed to be
|
||||
particularly thorough; instead, it tries to strike a balance between precision
|
||||
and the number of execve() calls spent on the process, selecting the block size
|
||||
and stepover to match. The average per-file gains are around 5-20%.
|
||||
|
||||
The standalone afl-tmin tool uses a more exhaustive, iterative algorithm, and
|
||||
also attempts to perform alphabet normalization on the trimmed files. The
|
||||
operation of afl-tmin is as follows.
|
||||
|
||||
First, the tool automatically selects the operating mode. If the initial input
|
||||
crashes the target binary, afl-tmin will run in non-instrumented mode, simply
|
||||
keeping any tweaks that produce a simpler file but still crash the target. If
|
||||
the target is non-crashing, the tool uses an instrumented mode and keeps only
|
||||
the tweaks that produce exactly the same execution path.
|
||||
|
||||
The actual minimization algorithm is:
|
||||
|
||||
1) Attempt to zero large blocks of data with large stepovers. Empirically,
|
||||
this is shown to reduce the number of execs by preempting finer-grained
|
||||
efforts later on.
|
||||
|
||||
2) Perform a block deletion pass with decreasing block sizes and stepovers,
|
||||
binary-search-style.
|
||||
|
||||
3) Perform alphabet normalization by counting unique characters and trying
|
||||
to bulk-replace each with a zero value.
|
||||
|
||||
4) As a last result, perform byte-by-byte normalization on non-zero bytes.
|
||||
|
||||
Instead of zeroing with a 0x00 byte, afl-tmin uses the ASCII digit '0'. This
|
||||
is done because such a modification is much less likely to interfere with
|
||||
text parsing, so it is more likely to result in successful minimization of
|
||||
text files.
|
||||
|
||||
The algorithm used here is less involved than some other test case
|
||||
minimization approaches proposed in academic work, but requires far fewer
|
||||
executions and tends to produce comparable results in most real-world
|
||||
applications.
|
||||
|
||||
6) Fuzzing strategies
|
||||
---------------------
|
||||
|
||||
The feedback provided by the instrumentation makes it easy to understand the
|
||||
value of various fuzzing strategies and optimize their parameters so that they
|
||||
work equally well across a wide range of file types. The strategies used by
|
||||
afl-fuzz are generally format-agnostic and are discussed in more detail here:
|
||||
|
||||
http://lcamtuf.blogspot.com/2014/08/binary-fuzzing-strategies-what-works.html
|
||||
|
||||
It is somewhat notable that especially early on, most of the work done by
|
||||
afl-fuzz is actually highly deterministic, and progresses to random stacked
|
||||
modifications and test case splicing only at a later stage. The deterministic
|
||||
strategies include:
|
||||
|
||||
- Sequential bit flips with varying lengths and stepovers,
|
||||
|
||||
- Sequential addition and subtraction of small integers,
|
||||
|
||||
- Sequential insertion of known interesting integers (0, 1, INT_MAX, etc),
|
||||
|
||||
The purpose of opening with deterministic steps is related to their tendency to
|
||||
produce compact test cases and small diffs between the non-crashing and crashing
|
||||
inputs.
|
||||
|
||||
With deterministic fuzzing out of the way, the non-deterministic steps include
|
||||
stacked bit flips, insertions, deletions, arithmetics, and splicing of different
|
||||
test cases.
|
||||
|
||||
The relative yields and execve() costs of all these strategies have been
|
||||
investigated and are discussed in the aforementioned blog post.
|
||||
|
||||
For the reasons discussed in historical_notes.txt (chiefly, performance,
|
||||
simplicity, and reliability), AFL generally does not try to reason about the
|
||||
relationship between specific mutations and program states; the fuzzing steps
|
||||
are nominally blind, and are guided only by the evolutionary design of the
|
||||
input queue.
|
||||
|
||||
That said, there is one (trivial) exception to this rule: when a new queue
|
||||
entry goes through the initial set of deterministic fuzzing steps, and tweaks to
|
||||
some regions in the file are observed to have no effect on the checksum of the
|
||||
execution path, they may be excluded from the remaining phases of
|
||||
deterministic fuzzing - and the fuzzer may proceed straight to random tweaks.
|
||||
Especially for verbose, human-readable data formats, this can reduce the number
|
||||
of execs by 10-40% or so without an appreciable drop in coverage. In extreme
|
||||
cases, such as normally block-aligned tar archives, the gains can be as high as
|
||||
90%.
|
||||
|
||||
Because the underlying "effector maps" are local every queue entry and remain
|
||||
in force only during deterministic stages that do not alter the size or the
|
||||
general layout of the underlying file, this mechanism appears to work very
|
||||
reliably and proved to be simple to implement.
|
||||
|
||||
7) Dictionaries
|
||||
---------------
|
||||
|
||||
The feedback provided by the instrumentation makes it easy to automatically
|
||||
identify syntax tokens in some types of input files, and to detect that certain
|
||||
combinations of predefined or auto-detected dictionary terms constitute a
|
||||
valid grammar for the tested parser.
|
||||
|
||||
A discussion of how these features are implemented within afl-fuzz can be found
|
||||
here:
|
||||
|
||||
http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
|
||||
|
||||
In essence, when basic, typically easily-obtained syntax tokens are combined
|
||||
together in a purely random manner, the instrumentation and the evolutionary
|
||||
design of the queue together provide a feedback mechanism to differentiate
|
||||
between meaningless mutations and ones that trigger new behaviors in the
|
||||
instrumented code - and to incrementally build more complex syntax on top of
|
||||
this discovery.
|
||||
|
||||
The dictionaries have been shown to enable the fuzzer to rapidly reconstruct
|
||||
the grammar of highly verbose and complex languages such as JavaScript, SQL,
|
||||
or XML; several examples of generated SQL statements are given in the blog
|
||||
post mentioned above.
|
||||
|
||||
Interestingly, the AFL instrumentation also allows the fuzzer to automatically
|
||||
isolate syntax tokens already present in an input file. It can do so by looking
|
||||
for run of bytes that, when flipped, produce a consistent change to the
|
||||
program's execution path; this is suggestive of an underlying atomic comparison
|
||||
to a predefined value baked into the code. The fuzzer relies on this signal
|
||||
to build compact "auto dictionaries" that are then used in conjunction with
|
||||
other fuzzing strategies.
|
||||
|
||||
8) De-duping crashes
|
||||
--------------------
|
||||
|
||||
De-duplication of crashes is one of the more important problems for any
|
||||
competent fuzzing tool. Many of the naive approaches run into problems; in
|
||||
particular, looking just at the faulting address may lead to completely
|
||||
unrelated issues being clustered together if the fault happens in a common
|
||||
library function (say, strcmp, strcpy); while checksumming call stack
|
||||
backtraces can lead to extreme crash count inflation if the fault can be
|
||||
reached through a number of different, possibly recursive code paths.
|
||||
|
||||
The solution implemented in afl-fuzz considers a crash unique if any of two
|
||||
conditions are met:
|
||||
|
||||
- The crash trace includes a tuple not seen in any of the previous crashes,
|
||||
|
||||
- The crash trace is missing a tuple that was always present in earlier
|
||||
faults.
|
||||
|
||||
The approach is vulnerable to some path count inflation early on, but exhibits
|
||||
a very strong self-limiting effect, similar to the execution path analysis
|
||||
logic that is the cornerstone of afl-fuzz.
|
||||
|
||||
9) Investigating crashes
|
||||
------------------------
|
||||
|
||||
The exploitability of many types of crashes can be ambiguous; afl-fuzz tries
|
||||
to address this by providing a crash exploration mode where a known-faulting
|
||||
test case is fuzzed in a manner very similar to the normal operation of the
|
||||
fuzzer, but with a constraint that causes any non-crashing mutations to be
|
||||
thrown away.
|
||||
|
||||
A detailed discussion of the value of this approach can be found here:
|
||||
|
||||
http://lcamtuf.blogspot.com/2014/11/afl-fuzz-crash-exploration-mode.html
|
||||
|
||||
The method uses instrumentation feedback to explore the state of the crashing
|
||||
program to get past the ambiguous faulting condition and then isolate the
|
||||
newly-found inputs for human review.
|
||||
|
||||
On the subject of crashes, it is worth noting that in contrast to normal
|
||||
queue entries, crashing inputs are *not* trimmed; they are kept exactly as
|
||||
discovered to make it easier to compare them to the parent, non-crashing entry
|
||||
in the queue. That said, afl-tmin can be used to shrink them at will.
|
||||
|
||||
10) The fork server
|
||||
-------------------
|
||||
|
||||
To improve performance, afl-fuzz uses a "fork server", where the fuzzed process
|
||||
goes through execve(), linking, and libc initialization only once, and is then
|
||||
cloned from a stopped process image by leveraging copy-on-write. The
|
||||
implementation is described in more detail here:
|
||||
|
||||
http://lcamtuf.blogspot.com/2014/10/fuzzing-binaries-without-execve.html
|
||||
|
||||
The fork server is an integral aspect of the injected instrumentation and
|
||||
simply stops at the first instrumented function to await commands from
|
||||
afl-fuzz.
|
||||
|
||||
With fast targets, the fork server can offer considerable performance gains,
|
||||
usually between 1.5x and 2x. It is also possible to:
|
||||
|
||||
- Use the fork server in manual ("deferred") mode, skipping over larger,
|
||||
user-selected chunks of initialization code. It requires very modest
|
||||
code changes to the targeted program, and With some targets, can
|
||||
produce 10x+ performance gains.
|
||||
|
||||
- Enable "persistent" mode, where a single process is used to try out
|
||||
multiple inputs, greatly limiting the overhead of repetitive fork()
|
||||
calls. This generally requires some code changes to the targeted program,
|
||||
but can improve the performance of fast targets by a factor of 5 or more
|
||||
- approximating the benefits of in-process fuzzing jobs while still
|
||||
maintaining very robust isolation between the fuzzer process and the
|
||||
targeted binary.
|
||||
|
||||
11) Parallelization
|
||||
-------------------
|
||||
|
||||
The parallelization mechanism relies on periodically examining the queues
|
||||
produced by independently-running instances on other CPU cores or on remote
|
||||
machines, and then selectively pulling in the test cases that, when tried
|
||||
out locally, produce behaviors not yet seen by the fuzzer at hand.
|
||||
|
||||
This allows for extreme flexibility in fuzzer setup, including running synced
|
||||
instances against different parsers of a common data format, often with
|
||||
synergistic effects.
|
||||
|
||||
For more information about this design, see parallel_fuzzing.txt.
|
||||
|
||||
12) Binary-only instrumentation
|
||||
-------------------------------
|
||||
|
||||
Instrumentation of black-box, binary-only targets is accomplished with the
|
||||
help of a separately-built version of QEMU in "user emulation" mode. This also
|
||||
allows the execution of cross-architecture code - say, ARM binaries on x86.
|
||||
|
||||
QEMU uses basic blocks as translation units; the instrumentation is implemented
|
||||
on top of this and uses a model roughly analogous to the compile-time hooks:
|
||||
|
||||
if (block_address > elf_text_start && block_address < elf_text_end) {
|
||||
|
||||
cur_location = (block_address >> 4) ^ (block_address << 8);
|
||||
shared_mem[cur_location ^ prev_location]++;
|
||||
prev_location = cur_location >> 1;
|
||||
|
||||
}
|
||||
|
||||
The shift-and-XOR-based scrambling in the second line is used to mask the
|
||||
effects of instruction alignment.
|
||||
|
||||
The start-up of binary translators such as QEMU, DynamoRIO, and PIN is fairly
|
||||
slow; to counter this, the QEMU mode leverages a fork server similar to that
|
||||
used for compiler-instrumented code, effectively spawning copies of an
|
||||
already-initialized process paused at _start.
|
||||
|
||||
First-time translation of a new basic block also incurs substantial latency. To
|
||||
eliminate this problem, the AFL fork server is extended by providing a channel
|
||||
between the running emulator and the parent process. The channel is used
|
||||
to notify the parent about the addresses of any newly-encountered blocks and to
|
||||
add them to the translation cache that will be replicated for future child
|
||||
processes.
|
||||
|
||||
As a result of these two optimizations, the overhead of the QEMU mode is
|
||||
roughly 2-5x, compared to 100x+ for PIN.
|
||||
|
||||
13) The afl-analyze tool
|
||||
------------------------
|
||||
|
||||
The file format analyzer is a simple extension of the minimization algorithm
|
||||
discussed earlier on; instead of attempting to remove no-op blocks, the tool
|
||||
performs a series of walking byte flips and then annotates runs of bytes
|
||||
in the input file.
|
||||
|
||||
It uses the following classification scheme:
|
||||
|
||||
- "No-op blocks" - segments where bit flips cause no apparent changes to
|
||||
control flow. Common examples may be comment sections, pixel data within
|
||||
a bitmap file, etc.
|
||||
|
||||
- "Superficial content" - segments where some, but not all, bitflips
|
||||
produce some control flow changes. Examples may include strings in rich
|
||||
documents (e.g., XML, RTF).
|
||||
|
||||
- "Critical stream" - a sequence of bytes where all bit flips alter control
|
||||
flow in different but correlated ways. This may be compressed data,
|
||||
non-atomically compared keywords or magic values, etc.
|
||||
|
||||
- "Suspected length field" - small, atomic integer that, when touched in
|
||||
any way, causes a consistent change to program control flow, suggestive
|
||||
of a failed length check.
|
||||
|
||||
- "Suspected cksum or magic int" - an integer that behaves similarly to a
|
||||
length field, but has a numerical value that makes the length explanation
|
||||
unlikely. This is suggestive of a checksum or other "magic" integer.
|
||||
|
||||
- "Suspected checksummed block" - a long block of data where any change
|
||||
always triggers the same new execution path. Likely caused by failing
|
||||
a checksum or a similar integrity check before any subsequent parsing
|
||||
takes place.
|
||||
|
||||
- "Magic value section" - a generic token where changes cause the type
|
||||
of binary behavior outlined earlier, but that doesn't meet any of the
|
||||
other criteria. May be an atomically compared keyword or so.
|
After Width: | Height: | Size: 581 KiB |
@ -0,0 +1 @@
|
||||
() { _; } >_[$($())] { id; }
|
@ -0,0 +1 @@
|
||||
() { x() { _; }; x() { _; } <<a; }
|
After Width: | Height: | Size: 892 B |
After Width: | Height: | Size: 1.7 KiB |
After Width: | Height: | Size: 38 B |
After Width: | Height: | Size: 179 B |
After Width: | Height: | Size: 642 B |
After Width: | Height: | Size: 595 B |
@ -0,0 +1,3 @@
|
||||
<!DOCTYPEd[<!ENTITY
|
||||
S ""><!ENTITY %
|
||||
N "<!ELEMENT<![INCLUDE0"<!ENTITYL%N;
|
After Width: | Height: | Size: 876 B |
After Width: | Height: | Size: 293 B |
After Width: | Height: | Size: 434 B |
After Width: | Height: | Size: 996 B |
@ -0,0 +1,2 @@
|
||||
create table t0(o CHar(0)CHECK(0&O>O));insert into t0
|
||||
select randomblob(0)-trim(0);
|
@ -0,0 +1 @@
|
||||
SELECT 0 UNION SELECT 0 ORDER BY 1 COLLATE"""""""";
|
@ -0,0 +1 @@
|
||||
PRAGMA foreign_keys=1;CREATE TABLE t1("""0"PRIMARY KEy REFERENCES t1 ON DELETE SET NULL);REPLACE INTO t1 SELECT(0);
|
@ -0,0 +1,2 @@
|
||||
DROP TABLE IF EXISTS t;CREATE VIRTUAL TABLE t0 USING fts4();insert into t0 select zeroblob(0);SAVEPOINT O;insert into t0
|
||||
select(0);SAVEPOINT E;insert into t0 SELECT 0 UNION SELECT 0'x'ORDER BY x;
|
@ -0,0 +1 @@
|
||||
SELECT*from(select"",zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(150000000),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0)),(select"",zeroblob(1E9),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(1E9),(0),zeroblob(150000000),(0),zeroblob(0),(0)EXCEPT select zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0),zeroblob(0));
|
@ -0,0 +1,2 @@
|
||||
create table t0(t);insert into t0
|
||||
select strftime();
|
@ -0,0 +1 @@
|
||||
SELECT fts3_tokenizer(@0());
|
@ -0,0 +1 @@
|
||||
select''like''like''like#0;
|
@ -0,0 +1 @@
|
||||
PRAGMA e;select lower(0);select lower(0)"a",""GROUP BY a ORDER BY a;
|
@ -0,0 +1 @@
|
||||
WITH x AS(SELECT*FROM t)SELECT""EXCEPT SELECT 0 ORDER BY 0 COLLATE"";
|
@ -0,0 +1 @@
|
||||
CREATE VIRTUAL TABLE x USING fts4();VALUES(0,0),(0,0),(0,0),(0,0);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql=''WHERE name='';UPDATE sqlite_master SET sql='CREATE table t(d CHECK(T(#0)';SAVEPOINT K;SAVEPOINT T;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;
|
@ -0,0 +1 @@
|
||||
CREATE VIRTUAL TABLE t4 USING fts4(0,b,c,notindexed=0);INSERT INTO t4 VALUES('','','0');BEGIN;INSERT INTO t4 VALUES('','','0');INSERT INTO t4(t4)VALUES('integrity-check');
|
@ -0,0 +1 @@
|
||||
DETACH(select group_concat(q));
|
@ -0,0 +1 @@
|
||||
select(select strftime());
|
@ -0,0 +1 @@
|
||||
select n()AND+#00;
|
@ -0,0 +1 @@
|
||||
select e.*,0 from(s,(L))e;
|
@ -0,0 +1 @@
|
||||
PRAGMA encoding='UTF16';CREATE VIRTUAL TABLE È USING s;
|
@ -0,0 +1 @@
|
||||
CREATE VIRTUAL TABLE t USING fts4(tokenize=);
|
@ -0,0 +1 @@
|
||||
CREATE TABLE p(a UNIQUE,PRIMARY KEY('a'))WITHOUT rowid;
|
@ -0,0 +1 @@
|
||||
CREATE TABLE t0(z);WITH d(x)AS(SELECT*UNION SELECT 0)INSERT INTO t0 SELECT 0 FROM d;
|
@ -0,0 +1 @@
|
||||
create table t0(‰ DEFAULT(0=0)NOT/**/NULL);REPLACE into t0 select'';
|
@ -0,0 +1,6 @@
|
||||
CREATE VIRTUAL TABLE t0 USING fts4(x,order=DESC);
|
||||
INSERT INTO t0(docid,x)VALUES(-1E0,'0(o');
|
||||
INSERT INTO t0 VALUES('');
|
||||
INSERT INTO t0 VALUES('');
|
||||
INSeRT INTO t0 VALUES('o');
|
||||
SELECT docid FROM t0 WHERE t0 MATCH'"0*o"';
|
@ -0,0 +1 @@
|
||||
SELECT printf('%*.*f',90000||006000000&6600000000,00000000000000000909000000000000.0000000000000000)""WHERE"">"";
|
@ -0,0 +1 @@
|
||||
CREATE VIRTUAL TABLE t0 USING fts4(content=t0);
|
@ -0,0 +1 @@
|
||||
REATE VIRTUAL TABLE t0 USING fts4(prefix=0);INSERT INTO t0 VALUES(0);
|
@ -0,0 +1 @@
|
||||
create table t(s);PRAGMA writable_schema=ON;UPDATE sqlite_master SET sql='ANALYZE;CREATE VIRTUAL TABLE t USING fts3;DROP TABLE t;DROP TABLE EXISTS t';PRAGMA r;SAVEPOINT T;ANALYZE;ROLLBACK;SAVEPOINT E;DROP TABLE IF EXISTS t;
|
@ -0,0 +1,3 @@
|
||||
$$@$$$@$o
|
||||
S…Ôo
|
||||
S…Ô
|